diff options
Diffstat (limited to '')
179 files changed, 46675 insertions, 0 deletions
diff --git a/src/tools/rbd/ArgumentTypes.cc b/src/tools/rbd/ArgumentTypes.cc new file mode 100644 index 00000000..ae5f9fd7 --- /dev/null +++ b/src/tools/rbd/ArgumentTypes.cc @@ -0,0 +1,515 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/rbd/features.h" +#include "common/config_proxy.h" +#include "common/strtol.h" +#include "common/Formatter.h" +#include "global/global_context.h" +#include <iostream> +#include <boost/tokenizer.hpp> + +namespace rbd { +namespace argument_types { + +namespace po = boost::program_options; + +const std::map<uint64_t, std::string> ImageFeatures::FEATURE_MAPPING = { + {RBD_FEATURE_LAYERING, RBD_FEATURE_NAME_LAYERING}, + {RBD_FEATURE_STRIPINGV2, RBD_FEATURE_NAME_STRIPINGV2}, + {RBD_FEATURE_EXCLUSIVE_LOCK, RBD_FEATURE_NAME_EXCLUSIVE_LOCK}, + {RBD_FEATURE_OBJECT_MAP, RBD_FEATURE_NAME_OBJECT_MAP}, + {RBD_FEATURE_FAST_DIFF, RBD_FEATURE_NAME_FAST_DIFF}, + {RBD_FEATURE_DEEP_FLATTEN, RBD_FEATURE_NAME_DEEP_FLATTEN}, + {RBD_FEATURE_JOURNALING, RBD_FEATURE_NAME_JOURNALING}, + {RBD_FEATURE_DATA_POOL, RBD_FEATURE_NAME_DATA_POOL}, + {RBD_FEATURE_OPERATIONS, RBD_FEATURE_NAME_OPERATIONS}, + {RBD_FEATURE_MIGRATING, RBD_FEATURE_NAME_MIGRATING}, +}; + +Format::Formatter Format::create_formatter(bool pretty) const { + if (value == "json") { + return Formatter(new JSONFormatter(pretty)); + } else if (value == "xml") { + return Formatter(new XMLFormatter(pretty)); + } + return Formatter(); +} + +std::string get_name_prefix(ArgumentModifier modifier) { + switch (modifier) { + case ARGUMENT_MODIFIER_SOURCE: + return SOURCE_PREFIX; + case ARGUMENT_MODIFIER_DEST: + return DEST_PREFIX; + default: + return ""; + } +} + +std::string get_description_prefix(ArgumentModifier modifier) { + switch (modifier) { + case ARGUMENT_MODIFIER_SOURCE: + return "source "; + case ARGUMENT_MODIFIER_DEST: + return "destination "; + default: + return ""; + } +} + +void add_pool_option(po::options_description *opt, + ArgumentModifier modifier, + const std::string &desc_suffix) { + std::string name = POOL_NAME + ",p"; + std::string description = "pool name"; + switch (modifier) { + case ARGUMENT_MODIFIER_NONE: + break; + case ARGUMENT_MODIFIER_SOURCE: + description = "source " + description; + break; + case ARGUMENT_MODIFIER_DEST: + name = DEST_POOL_NAME; + description = "destination " + description; + break; + } + description += desc_suffix; + + // TODO add validator + opt->add_options() + (name.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_namespace_option(boost::program_options::options_description *opt, + ArgumentModifier modifier) { + std::string name = NAMESPACE_NAME; + std::string description = "namespace name"; + switch (modifier) { + case ARGUMENT_MODIFIER_NONE: + break; + case ARGUMENT_MODIFIER_SOURCE: + description = "source " + description; + break; + case ARGUMENT_MODIFIER_DEST: + name = DEST_NAMESPACE_NAME; + description = "destination " + description; + break; + } + + // TODO add validator + opt->add_options() + (name.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_image_option(po::options_description *opt, + ArgumentModifier modifier, + const std::string &desc_suffix) { + std::string name = IMAGE_NAME; + std::string description = "image name"; + switch (modifier) { + case ARGUMENT_MODIFIER_NONE: + break; + case ARGUMENT_MODIFIER_SOURCE: + description = "source " + description; + break; + case ARGUMENT_MODIFIER_DEST: + name = DEST_IMAGE_NAME; + description = "destination " + description; + break; + } + description += desc_suffix; + + // TODO add validator + opt->add_options() + (name.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_image_id_option(po::options_description *opt, + const std::string &desc_suffix) { + std::string name = IMAGE_ID; + std::string description = "image id"; + description += desc_suffix; + + // TODO add validator + opt->add_options() + (name.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_snap_option(po::options_description *opt, + ArgumentModifier modifier) { + + std::string name = SNAPSHOT_NAME; + std::string description = "snapshot name"; + switch (modifier) { + case ARGUMENT_MODIFIER_NONE: + break; + case ARGUMENT_MODIFIER_DEST: + name = DEST_SNAPSHOT_NAME; + description = "destination " + description; + break; + case ARGUMENT_MODIFIER_SOURCE: + description = "source " + description; + break; + } + + // TODO add validator + opt->add_options() + (name.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_snap_id_option(po::options_description *opt) { + opt->add_options() + (SNAPSHOT_ID.c_str(), po::value<uint64_t>(), "snapshot id"); +} + +void add_pool_options(boost::program_options::options_description *pos, + boost::program_options::options_description *opt, + bool namespaces_supported) { + opt->add_options() + ((POOL_NAME + ",p").c_str(), po::value<std::string>(), "pool name"); + if (namespaces_supported) { + add_namespace_option(opt, ARGUMENT_MODIFIER_NONE); + pos->add_options() + ("pool-spec", "pool specification\n" + "(example: <pool-name>[/<namespace>]"); + } else { + pos->add_options() + ("pool-name", "pool name"); + } +} + +void add_image_spec_options(po::options_description *pos, + po::options_description *opt, + ArgumentModifier modifier) { + pos->add_options() + ((get_name_prefix(modifier) + IMAGE_SPEC).c_str(), + (get_description_prefix(modifier) + "image specification\n" + + "(example: [<pool-name>/[<namespace>/]]<image-name>)").c_str()); + add_pool_option(opt, modifier); + add_namespace_option(opt, modifier); + add_image_option(opt, modifier); +} + +void add_snap_spec_options(po::options_description *pos, + po::options_description *opt, + ArgumentModifier modifier) { + pos->add_options() + ((get_name_prefix(modifier) + SNAPSHOT_SPEC).c_str(), + (get_description_prefix(modifier) + "snapshot specification\n" + + "(example: [<pool-name>/[<namespace>/]]<image-name>@<snapshot-name>)").c_str()); + add_pool_option(opt, modifier); + add_namespace_option(opt, modifier); + add_image_option(opt, modifier); + add_snap_option(opt, modifier); +} + +void add_image_or_snap_spec_options(po::options_description *pos, + po::options_description *opt, + ArgumentModifier modifier) { + pos->add_options() + ((get_name_prefix(modifier) + IMAGE_OR_SNAPSHOT_SPEC).c_str(), + (get_description_prefix(modifier) + "image or snapshot specification\n" + + "(example: [<pool-name>/[<namespace>/]]<image-name>[@<snap-name>])").c_str()); + add_pool_option(opt, modifier); + add_namespace_option(opt, modifier); + add_image_option(opt, modifier); + add_snap_option(opt, modifier); +} + +void add_create_image_options(po::options_description *opt, + bool include_format) { + // TODO get default image format from conf + if (include_format) { + opt->add_options() + (IMAGE_FORMAT.c_str(), po::value<ImageFormat>(), + "image format [1 (deprecated) or 2]") + (IMAGE_NEW_FORMAT.c_str(), + po::value<ImageNewFormat>()->zero_tokens(), + "use image format 2\n(deprecated)"); + } + + opt->add_options() + (IMAGE_ORDER.c_str(), po::value<ImageOrder>(), + "object order [12 <= order <= 25]") + (IMAGE_OBJECT_SIZE.c_str(), po::value<ImageObjectSize>(), + "object size in B/K/M [4K <= object size <= 32M]") + (IMAGE_FEATURES.c_str(), po::value<ImageFeatures>()->composing(), + ("image features\n" + get_short_features_help(true)).c_str()) + (IMAGE_SHARED.c_str(), po::bool_switch(), "shared image") + (IMAGE_STRIPE_UNIT.c_str(), po::value<ImageObjectSize>(), "stripe unit in B/K/M") + (IMAGE_STRIPE_COUNT.c_str(), po::value<uint64_t>(), "stripe count") + (IMAGE_DATA_POOL.c_str(), po::value<std::string>(), "data pool"); + + add_create_journal_options(opt); +} + +void add_create_journal_options(po::options_description *opt) { + opt->add_options() + (JOURNAL_SPLAY_WIDTH.c_str(), po::value<uint64_t>(), + "number of active journal objects") + (JOURNAL_OBJECT_SIZE.c_str(), po::value<JournalObjectSize>(), + "size of journal objects [4K <= size <= 64M]") + (JOURNAL_POOL.c_str(), po::value<std::string>(), + "pool for journal objects"); +} + +void add_size_option(boost::program_options::options_description *opt) { + opt->add_options() + ((IMAGE_SIZE + ",s").c_str(), po::value<ImageSize>()->required(), + "image size (in M/G/T) [default: M]"); +} + +void add_sparse_size_option(boost::program_options::options_description *opt) { + opt->add_options() + (IMAGE_SPARSE_SIZE.c_str(), po::value<ImageObjectSize>(), + "sparse size in B/K/M [default: 4K]"); +} + +void add_path_options(boost::program_options::options_description *pos, + boost::program_options::options_description *opt, + const std::string &description) { + pos->add_options() + (PATH_NAME.c_str(), po::value<std::string>(), description.c_str()); + opt->add_options() + (PATH.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_limit_option(po::options_description *opt) { + std::string description = "maximum allowed snapshot count"; + + opt->add_options() + (LIMIT.c_str(), po::value<uint64_t>(), description.c_str()); +} + +void add_no_progress_option(boost::program_options::options_description *opt) { + opt->add_options() + (NO_PROGRESS.c_str(), po::bool_switch(), "disable progress output"); +} + +void add_format_options(boost::program_options::options_description *opt) { + opt->add_options() + (FORMAT.c_str(), po::value<Format>(), "output format (plain, json, or xml) [default: plain]") + (PRETTY_FORMAT.c_str(), po::bool_switch(), + "pretty formatting (json and xml)"); +} + +void add_verbose_option(boost::program_options::options_description *opt) { + opt->add_options() + (VERBOSE.c_str(), po::bool_switch(), "be verbose"); +} + +void add_no_error_option(boost::program_options::options_description *opt) { + opt->add_options() + (NO_ERROR.c_str(), po::bool_switch(), "continue after error"); +} + +void add_export_format_option(boost::program_options::options_description *opt) { + opt->add_options() + ("export-format", po::value<ExportFormat>(), "format of image file"); +} + +void add_flatten_option(boost::program_options::options_description *opt) { + opt->add_options() + (IMAGE_FLATTEN.c_str(), po::bool_switch(), + "fill clone with parent data (make it independent)"); +} + +std::string get_short_features_help(bool append_suffix) { + std::ostringstream oss; + bool first_feature = true; + oss << "["; + for (auto &pair : ImageFeatures::FEATURE_MAPPING) { + if ((pair.first & RBD_FEATURES_IMPLICIT_ENABLE) != 0ULL) { + // hide implicitly enabled features from list + continue; + } else if (!append_suffix && (pair.first & RBD_FEATURES_MUTABLE) == 0ULL) { + // hide non-mutable features for the 'rbd feature XYZ' command + continue; + } + + if (!first_feature) { + oss << ", "; + } + first_feature = false; + + std::string suffix; + if (append_suffix) { + if ((pair.first & rbd::utils::get_rbd_default_features(g_ceph_context)) != 0) { + suffix += "+"; + } + if ((pair.first & RBD_FEATURES_MUTABLE) != 0) { + suffix += "*"; + } else if ((pair.first & RBD_FEATURES_DISABLE_ONLY) != 0) { + suffix += "-"; + } + if (!suffix.empty()) { + suffix = "(" + suffix + ")"; + } + } + oss << pair.second << suffix; + } + oss << "]"; + return oss.str(); +} + +std::string get_long_features_help() { + std::ostringstream oss; + oss << "Image Features:" << std::endl + << " (*) supports enabling/disabling on existing images" << std::endl + << " (-) supports disabling-only on existing images" << std::endl + << " (+) enabled by default for new images if features not specified" + << std::endl; + return oss.str(); +} + +void validate(boost::any& v, const std::vector<std::string>& values, + ImageSize *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + + std::string parse_error; + uint64_t size = strict_iecstrtoll(s.c_str(), &parse_error); + if (!parse_error.empty()) { + throw po::validation_error(po::validation_error::invalid_option_value); + } + + //NOTE: We can remove below given three lines of code once all applications, + //which use this CLI will adopt B/K/M/G/T/P/E with size value + if (isdigit(*s.rbegin())) { + size = size << 20; // Default MB to Bytes + } + v = boost::any(size); +} + +void validate(boost::any& v, const std::vector<std::string>& values, + ImageOrder *target_type, int dummy) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + try { + uint64_t order = boost::lexical_cast<uint64_t>(s); + if (order >= 12 && order <= 25) { + v = boost::any(order); + return; + } + } catch (const boost::bad_lexical_cast &) { + } + throw po::validation_error(po::validation_error::invalid_option_value); +} + +void validate(boost::any& v, const std::vector<std::string>& values, + ImageObjectSize *target_type, int dummy) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + + std::string parse_error; + uint64_t objectsize = strict_iecstrtoll(s.c_str(), &parse_error); + if (!parse_error.empty()) { + throw po::validation_error(po::validation_error::invalid_option_value); + } + v = boost::any(objectsize); +} + +void validate(boost::any& v, const std::vector<std::string>& values, + ImageFormat *target_type, int dummy) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + try { + uint32_t format = boost::lexical_cast<uint32_t>(s); + if (format == 1 || format == 2) { + v = boost::any(format); + return; + } + } catch (const boost::bad_lexical_cast &) { + } + throw po::validation_error(po::validation_error::invalid_option_value); +} + +void validate(boost::any& v, const std::vector<std::string>& values, + ImageNewFormat *target_type, int dummy) { + std::cout << "rbd: --new-format is deprecated, use --image-format" + << std::endl; + v = boost::any(true); +} + +void validate(boost::any& v, const std::vector<std::string>& values, + ImageFeatures *target_type, int) { + if (v.empty()) { + v = boost::any(static_cast<uint64_t>(0)); + } + + uint64_t &features = boost::any_cast<uint64_t &>(v); + for (auto &value : values) { + boost::char_separator<char> sep(","); + boost::tokenizer<boost::char_separator<char> > tok(value, sep); + for (auto &token : tok) { + bool matched = false; + for (auto &it : ImageFeatures::FEATURE_MAPPING) { + if (token == it.second) { + features |= it.first; + matched = true; + break; + } + } + + if (!matched) { + throw po::validation_error(po::validation_error::invalid_option_value); + } + } + } +} + +void validate(boost::any& v, const std::vector<std::string>& values, + Format *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + if (s == "plain" || s == "json" || s == "xml") { + v = boost::any(Format(s)); + } else { + throw po::validation_error(po::validation_error::invalid_option_value); + } +} + +void validate(boost::any& v, const std::vector<std::string>& values, + JournalObjectSize *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + + std::string parse_error; + uint64_t size = strict_iecstrtoll(s.c_str(), &parse_error); + if (parse_error.empty() && (size >= (1 << 12)) && (size <= (1 << 26))) { + v = boost::any(size); + return; + } + throw po::validation_error(po::validation_error::invalid_option_value); +} + +void validate(boost::any& v, const std::vector<std::string>& values, + ExportFormat *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + + std::string parse_error; + uint64_t format = strict_iecstrtoll(s.c_str(), &parse_error); + if (!parse_error.empty() || (format != 1 && format != 2)) { + throw po::validation_error(po::validation_error::invalid_option_value); + } + + v = boost::any(format); +} + +void validate(boost::any& v, const std::vector<std::string>& values, + Secret *target_type, int) { + std::cerr << "rbd: --secret is deprecated, use --keyfile" << std::endl; + + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + g_conf().set_val_or_die("keyfile", s.c_str()); + v = boost::any(s); +} + +} // namespace argument_types +} // namespace rbd diff --git a/src/tools/rbd/ArgumentTypes.h b/src/tools/rbd/ArgumentTypes.h new file mode 100644 index 00000000..23bb02b9 --- /dev/null +++ b/src/tools/rbd/ArgumentTypes.h @@ -0,0 +1,218 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_ARGUMENT_TYPES_H +#define CEPH_RBD_ARGUMENT_TYPES_H + +#include "include/int_types.h" +#include <set> +#include <string> +#include <vector> +#include <boost/any.hpp> +#include <boost/program_options.hpp> +#include <boost/shared_ptr.hpp> + +namespace ceph { class Formatter; } + +namespace rbd { +namespace argument_types { + +enum ArgumentModifier { + ARGUMENT_MODIFIER_NONE, + ARGUMENT_MODIFIER_SOURCE, + ARGUMENT_MODIFIER_DEST +}; + +enum SpecFormat { + SPEC_FORMAT_IMAGE, + SPEC_FORMAT_SNAPSHOT, + SPEC_FORMAT_IMAGE_OR_SNAPSHOT +}; + +static const std::string SOURCE_PREFIX("source-"); +static const std::string DEST_PREFIX("dest-"); + +// positional arguments +static const std::string POSITIONAL_COMMAND_SPEC("positional-command-spec"); +static const std::string POSITIONAL_ARGUMENTS("positional-arguments"); +static const std::string IMAGE_SPEC("image-spec"); +static const std::string SNAPSHOT_SPEC("snap-spec"); +static const std::string IMAGE_OR_SNAPSHOT_SPEC("image-or-snap-spec"); +static const std::string PATH_NAME("path-name"); +static const std::string IMAGE_ID("image-id"); + +// optional arguments +static const std::string CONFIG_PATH("conf"); +static const std::string POOL_NAME("pool"); +static const std::string DEST_POOL_NAME("dest-pool"); +static const std::string NAMESPACE_NAME("namespace"); +static const std::string DEST_NAMESPACE_NAME("dest-namespace"); +static const std::string IMAGE_NAME("image"); +static const std::string DEST_IMAGE_NAME("dest"); +static const std::string SNAPSHOT_NAME("snap"); +static const std::string SNAPSHOT_ID("snap-id"); +static const std::string DEST_SNAPSHOT_NAME("dest-snap"); +static const std::string PATH("path"); +static const std::string FROM_SNAPSHOT_NAME("from-snap"); +static const std::string WHOLE_OBJECT("whole-object"); + +static const std::string IMAGE_FORMAT("image-format"); +static const std::string IMAGE_NEW_FORMAT("new-format"); +static const std::string IMAGE_ORDER("order"); +static const std::string IMAGE_OBJECT_SIZE("object-size"); +static const std::string IMAGE_FEATURES("image-feature"); +static const std::string IMAGE_SHARED("image-shared"); +static const std::string IMAGE_SIZE("size"); +static const std::string IMAGE_STRIPE_UNIT("stripe-unit"); +static const std::string IMAGE_STRIPE_COUNT("stripe-count"); +static const std::string IMAGE_DATA_POOL("data-pool"); +static const std::string IMAGE_SPARSE_SIZE("sparse-size"); +static const std::string IMAGE_THICK_PROVISION("thick-provision"); +static const std::string IMAGE_FLATTEN("flatten"); + +static const std::string JOURNAL_OBJECT_SIZE("journal-object-size"); +static const std::string JOURNAL_SPLAY_WIDTH("journal-splay-width"); +static const std::string JOURNAL_POOL("journal-pool"); + +static const std::string NO_PROGRESS("no-progress"); +static const std::string FORMAT("format"); +static const std::string PRETTY_FORMAT("pretty-format"); +static const std::string VERBOSE("verbose"); +static const std::string NO_ERROR("no-error"); + +static const std::string LIMIT("limit"); + +static const std::set<std::string> SWITCH_ARGUMENTS = { + WHOLE_OBJECT, NO_PROGRESS, PRETTY_FORMAT, VERBOSE, NO_ERROR}; + +struct ImageSize {}; +struct ImageOrder {}; +struct ImageObjectSize {}; +struct ImageFormat {}; +struct ImageNewFormat {}; + +struct ImageFeatures { + static const std::map<uint64_t, std::string> FEATURE_MAPPING; + + uint64_t features; +}; + +template <typename T> +struct TypedValue { + T value; + TypedValue(const T& t) : value(t) {} +}; + +struct Format : public TypedValue<std::string> { + typedef boost::shared_ptr<ceph::Formatter> Formatter; + + Format(const std::string &format) : TypedValue<std::string>(format) {} + + Formatter create_formatter(bool pretty) const; +}; + +struct JournalObjectSize {}; + +struct ExportFormat {}; + +struct Secret {}; + +void add_export_format_option(boost::program_options::options_description *opt); + +std::string get_name_prefix(ArgumentModifier modifier); +std::string get_description_prefix(ArgumentModifier modifier); + +void add_all_option(boost::program_options::options_description *opt, + std::string description); + +void add_pool_option(boost::program_options::options_description *opt, + ArgumentModifier modifier, + const std::string &desc_suffix = ""); +void add_namespace_option(boost::program_options::options_description *opt, + ArgumentModifier modifier); + +void add_image_option(boost::program_options::options_description *opt, + ArgumentModifier modifier, + const std::string &desc_suffix = ""); + +void add_image_id_option(boost::program_options::options_description *opt, + const std::string &desc_suffix = ""); + +void add_snap_option(boost::program_options::options_description *opt, + ArgumentModifier modifier); +void add_snap_id_option(boost::program_options::options_description *opt); + +void add_pool_options(boost::program_options::options_description *pos, + boost::program_options::options_description *opt, + bool namespaces_supported); + +void add_image_spec_options(boost::program_options::options_description *pos, + boost::program_options::options_description *opt, + ArgumentModifier modifier); + +void add_snap_spec_options(boost::program_options::options_description *pos, + boost::program_options::options_description *opt, + ArgumentModifier modifier); + +void add_image_or_snap_spec_options( + boost::program_options::options_description *pos, + boost::program_options::options_description *opt, + ArgumentModifier modifier); + +void add_create_image_options(boost::program_options::options_description *opt, + bool include_format); + +void add_create_journal_options( + boost::program_options::options_description *opt); + +void add_size_option(boost::program_options::options_description *opt); + +void add_sparse_size_option(boost::program_options::options_description *opt); + +void add_path_options(boost::program_options::options_description *pos, + boost::program_options::options_description *opt, + const std::string &description); + +void add_limit_option(boost::program_options::options_description *opt); + +void add_no_progress_option(boost::program_options::options_description *opt); + +void add_format_options(boost::program_options::options_description *opt); + +void add_verbose_option(boost::program_options::options_description *opt); + +void add_no_error_option(boost::program_options::options_description *opt); + +void add_flatten_option(boost::program_options::options_description *opt); + +std::string get_short_features_help(bool append_suffix); +std::string get_long_features_help(); + +void validate(boost::any& v, const std::vector<std::string>& values, + ExportFormat *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + ImageSize *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + ImageOrder *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + ImageObjectSize *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + ImageFormat *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + ImageNewFormat *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + ImageFeatures *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + Format *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + JournalObjectSize *target_type, int); +void validate(boost::any& v, const std::vector<std::string>& values, + Secret *target_type, int); + + +std::ostream &operator<<(std::ostream &os, const ImageFeatures &features); + +} // namespace argument_types +} // namespace rbd + +#endif // CEPH_RBD_ARGUMENT_TYPES_H diff --git a/src/tools/rbd/CMakeLists.txt b/src/tools/rbd/CMakeLists.txt new file mode 100644 index 00000000..0e38a033 --- /dev/null +++ b/src/tools/rbd/CMakeLists.txt @@ -0,0 +1,65 @@ +set(CURSES_NEED_NCURSES TRUE) +find_package(Curses REQUIRED) + +set(rbd_srcs + rbd.cc + ArgumentTypes.cc + IndentStream.cc + MirrorDaemonServiceInfo.cc + OptionPrinter.cc + Shell.cc + Utils.cc + action/Bench.cc + action/Children.cc + action/Clone.cc + action/Config.cc + action/Copy.cc + action/Create.cc + action/Device.cc + action/Diff.cc + action/DiskUsage.cc + action/Export.cc + action/Feature.cc + action/Flatten.cc + action/Ggate.cc + action/Group.cc + action/ImageMeta.cc + action/Import.cc + action/Info.cc + action/Journal.cc + action/Kernel.cc + action/List.cc + action/Lock.cc + action/MergeDiff.cc + action/Migration.cc + action/MirrorPool.cc + action/MirrorImage.cc + action/Namespace.cc + action/Nbd.cc + action/ObjectMap.cc + action/Perf.cc + action/Pool.cc + action/Remove.cc + action/Rename.cc + action/Resize.cc + action/Snap.cc + action/Sparsify.cc + action/Status.cc + action/Trash.cc + action/Watch.cc) + +add_executable(rbd ${rbd_srcs} + $<TARGET_OBJECTS:common_texttable_obj>) +set_target_properties(rbd PROPERTIES OUTPUT_NAME rbd) +target_link_libraries(rbd librbd librados + cls_journal_client cls_rbd_client + rbd_types + journal + ceph-common global ${CURSES_LIBRARIES} + ${BLKID_LIBRARIES} ${CMAKE_DL_LIBS}) +if(WITH_KRBD) + target_link_libraries(rbd + krbd) +endif() + +install(TARGETS rbd DESTINATION bin) diff --git a/src/tools/rbd/IndentStream.cc b/src/tools/rbd/IndentStream.cc new file mode 100644 index 00000000..83591a8c --- /dev/null +++ b/src/tools/rbd/IndentStream.cc @@ -0,0 +1,59 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/IndentStream.h" + +namespace rbd { + +int IndentBuffer::overflow (int c) { + if (traits_type::eq_int_type(traits_type::eof(), c)) { + return traits_type::not_eof(c); + } + + int r; + switch (c) { + case '\n': + m_buffer += c; + flush_line(); + r = m_streambuf->sputn(m_buffer.c_str(), m_buffer.size()); + m_buffer.clear(); + return r; + case '\t': + // convert tab to single space and fall-through + c = ' '; + default: + if (m_indent + m_buffer.size() >= m_line_length) { + size_t word_offset = m_buffer.find_last_of(m_delim); + bool space_delim = (m_delim == " "); + if (word_offset == std::string::npos && !space_delim) { + word_offset = m_buffer.find_last_of(" "); + } + + if (word_offset != std::string::npos) { + flush_line(); + m_streambuf->sputn(m_buffer.c_str(), word_offset); + m_buffer = std::string(m_buffer, + word_offset + (space_delim ? 1 : 0)); + } else { + flush_line(); + m_streambuf->sputn(m_buffer.c_str(), m_buffer.size()); + m_buffer.clear(); + } + m_streambuf->sputc('\n'); + } + m_buffer += c; + return c; + } +} + +void IndentBuffer::flush_line() { + if (m_initial_offset >= m_indent) { + m_initial_offset = 0; + m_streambuf->sputc('\n'); + } + + m_streambuf->sputn(m_indent_prefix.c_str(), m_indent - m_initial_offset); + m_initial_offset = 0; +} + +} // namespace rbd diff --git a/src/tools/rbd/IndentStream.h b/src/tools/rbd/IndentStream.h new file mode 100644 index 00000000..85ccc85b --- /dev/null +++ b/src/tools/rbd/IndentStream.h @@ -0,0 +1,60 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_INDENT_STREAM_H +#define CEPH_RBD_INDENT_STREAM_H + +#include "include/int_types.h" +#include <iostream> +#include <streambuf> +#include <iomanip> + +namespace rbd { + +class IndentBuffer : public std::streambuf { +public: + IndentBuffer(size_t indent, size_t initial_offset, size_t line_length, + std::streambuf *streambuf) + : m_indent(indent), m_initial_offset(initial_offset), + m_line_length(line_length), m_streambuf(streambuf), + m_delim(" "), m_indent_prefix(m_indent, ' ') { + } + + void set_delimiter(const std::string &delim) { + m_delim = delim; + } + +protected: + int overflow (int c) override; + +private: + size_t m_indent; + size_t m_initial_offset; + size_t m_line_length; + std::streambuf *m_streambuf; + + std::string m_delim; + std::string m_indent_prefix; + std::string m_buffer; + + void flush_line(); +}; + +class IndentStream : public std::ostream { +public: + IndentStream(size_t indent, size_t initial_offset, size_t line_length, + std::ostream &os) + : std::ostream(&m_indent_buffer), + m_indent_buffer(indent, initial_offset, line_length, os.rdbuf()) { + } + + void set_delimiter(const std::string &delim) { + m_indent_buffer.set_delimiter(delim); + } +private: + IndentBuffer m_indent_buffer; +}; + +} // namespace rbd + +#endif // CEPH_RBD_INDENT_STREAM_ITERATOR_H diff --git a/src/tools/rbd/MirrorDaemonServiceInfo.cc b/src/tools/rbd/MirrorDaemonServiceInfo.cc new file mode 100644 index 00000000..4870c1b2 --- /dev/null +++ b/src/tools/rbd/MirrorDaemonServiceInfo.cc @@ -0,0 +1,174 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/ceph_json.h" +#include "common/errno.h" +#include "include/rados/librados.hpp" +#include "include/stringify.h" +#include "tools/rbd/MirrorDaemonServiceInfo.h" + +#include <boost/scope_exit.hpp> +#include <iostream> + +namespace rbd { + +int MirrorDaemonServiceInfo::init() { + + std::string cmd = "{\"prefix\": \"service dump\"}"; + + bufferlist in_bl; + bufferlist out_bl; + int r = librados::Rados(m_io_ctx).mgr_command(cmd, in_bl, &out_bl, nullptr); + if (r < 0) { + std::cerr << "rbd: failed to get service dump: " << cpp_strerror(r) + << std::endl; + return r; + } + + bool json_valid = false; + json_spirit::mValue json_root; + if (json_spirit::read(out_bl.to_str(), json_root)) { + try { + auto& json_obj = json_root.get_obj(); + if (json_obj.count("services")) { + auto &services = json_obj["services"].get_obj(); + if (services.count("rbd-mirror")) { + auto &mirror_service = services["rbd-mirror"].get_obj(); + if (mirror_service.count("daemons")) { + for (auto &it : mirror_service["daemons"].get_obj()) { + if (it.second.type() != json_spirit::obj_type || + !it.second.get_obj().count("metadata")) { + continue; + } + auto &service_id = it.first; + auto &daemon_metadata = it.second.get_obj()["metadata"].get_obj(); + for (auto &iter : daemon_metadata) { + if (iter.second.type() != json_spirit::str_type) { + continue; + } + m_daemons_metadata[service_id][iter.first] = iter.second.get_str(); + } + } + } + } + } + json_valid = true; + } catch (std::runtime_error&) { + } + } + + if (!json_valid) { + std::cerr << "rbd: failed to parse service status" << std::endl; + return -EBADMSG; + } + + cmd = "{\"prefix\": \"service status\"}"; + + out_bl.clear(); + r = librados::Rados(m_io_ctx).mgr_command(cmd, in_bl, &out_bl, nullptr); + if (r < 0) { + std::cerr << "rbd: failed to get service status: " << cpp_strerror(r) + << std::endl; + return r; + } + + json_valid = false; + if (json_spirit::read(out_bl.to_str(), json_root)) { + try { + auto& json_obj = json_root.get_obj(); + if (json_obj.count("rbd-mirror")) { + auto &mirror_service = json_obj["rbd-mirror"].get_obj(); + for (auto &it : mirror_service) { + auto &service_id = it.first; + auto &daemon = it.second.get_obj(); + if (daemon.count("status") && + daemon["status"].get_obj().count("json")) { + auto& status_json_str = + daemon["status"].get_obj()["json"].get_str(); + json_spirit::mValue status_json_root; + if (json_spirit::read(status_json_str, status_json_root)) { + auto& status = status_json_root.get_obj(); + auto iter = status.find(stringify(m_io_ctx.get_id())); + if (iter != status.end() && + iter->second.get_obj().count("instance_id")) { + auto &instance_id = + iter->second.get_obj()["instance_id"].get_str(); + m_instance_id_to_service_id[instance_id] = service_id; + } + } + } + } + } + json_valid = true; + } catch (std::runtime_error&) { + } + } + + if (!json_valid) { + std::cerr << "rbd: failed to parse service status" << std::endl; + return -EBADMSG; + } + + return 0; +} + +std::string MirrorDaemonServiceInfo::get_description( + const std::string &instance_id) const { + if (!m_instance_id_to_service_id.count(instance_id)) { + return {}; + } + + auto service_id = m_instance_id_to_service_id.find(instance_id)->second; + + auto it = m_daemons_metadata.find(service_id); + if (it == m_daemons_metadata.end()) { + return service_id; + } + + auto &metadata = it->second; + auto iter = metadata.find("id"); + std::string description = (iter != metadata.end()) ? + iter->second : service_id; + iter = metadata.find("hostname"); + if (iter != metadata.end()) { + description += " on " + iter->second; + } + + return description; +} + +void MirrorDaemonServiceInfo::dump( + const std::string &instance_id, + argument_types::Format::Formatter formatter) const { + formatter->open_object_section("daemon_service"); + BOOST_SCOPE_EXIT(formatter) { + formatter->close_section(); + } BOOST_SCOPE_EXIT_END; + + if (instance_id.empty() || + !m_instance_id_to_service_id.count(instance_id)) { + return; + } + + auto service_id = m_instance_id_to_service_id.find(instance_id)->second; + formatter->dump_string("service_id", service_id); + formatter->dump_string("instance_id", instance_id); + + auto it = m_daemons_metadata.find(service_id); + if (it == m_daemons_metadata.end()) { + return; + } + + auto &metadata = it->second; + auto iter = metadata.find("id"); + if (iter != metadata.end()) { + formatter->dump_string("daemon_id", iter->second); + } + iter = metadata.find("hostname"); + if (iter != metadata.end()) { + formatter->dump_string("hostname", iter->second); + } +} + +} // namespace rbd + diff --git a/src/tools/rbd/MirrorDaemonServiceInfo.h b/src/tools/rbd/MirrorDaemonServiceInfo.h new file mode 100644 index 00000000..7c3c3856 --- /dev/null +++ b/src/tools/rbd/MirrorDaemonServiceInfo.h @@ -0,0 +1,34 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_DAEMON_SERVICE_INFO_H +#define CEPH_RBD_MIRROR_DAEMON_SERVICE_INFO_H + +#include "include/rados/librados_fwd.hpp" +#include "tools/rbd/ArgumentTypes.h" + +#include <string> +#include <map> + +namespace rbd { + +class MirrorDaemonServiceInfo { +public: + MirrorDaemonServiceInfo(librados::IoCtx &io_ctx) : m_io_ctx(io_ctx) { + } + + int init(); + + std::string get_description(const std::string &instance_id) const; + void dump(const std::string &instance_id, + argument_types::Format::Formatter formatter) const; + +private: + librados::IoCtx &m_io_ctx; + std::map<std::string, std::string> m_instance_id_to_service_id; + std::map<std::string, std::map<std::string, std::string>> m_daemons_metadata; +}; + +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_DAEMON_SERVICE_INFO_H diff --git a/src/tools/rbd/OptionPrinter.cc b/src/tools/rbd/OptionPrinter.cc new file mode 100644 index 00000000..14affb0b --- /dev/null +++ b/src/tools/rbd/OptionPrinter.cc @@ -0,0 +1,110 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/OptionPrinter.h" +#include "tools/rbd/IndentStream.h" + +namespace rbd { + +namespace po = boost::program_options; + +const std::string OptionPrinter::POSITIONAL_ARGUMENTS("Positional arguments"); +const std::string OptionPrinter::OPTIONAL_ARGUMENTS("Optional arguments"); + +const size_t OptionPrinter::MAX_DESCRIPTION_OFFSET; + +OptionPrinter::OptionPrinter(const OptionsDescription &positional, + const OptionsDescription &optional) + : m_positional(positional), m_optional(optional) { +} + +void OptionPrinter::print_short(std::ostream &os, size_t initial_offset) { + size_t name_width = std::min(initial_offset, MAX_DESCRIPTION_OFFSET) + 1; + + IndentStream indent_stream(name_width, initial_offset, LINE_WIDTH, os); + indent_stream.set_delimiter("["); + for (size_t i = 0; i < m_optional.options().size(); ++i) { + bool required = m_optional.options()[i]->semantic()->is_required(); + if (!required) { + indent_stream << "["; + } + indent_stream << "--" << m_optional.options()[i]->long_name(); + if (m_optional.options()[i]->semantic()->max_tokens() != 0) { + indent_stream << " <" << m_optional.options()[i]->long_name() << ">"; + } + if (!required) { + indent_stream << "]"; + } + indent_stream << " "; + } + + if (m_optional.options().size() > 0 || m_positional.options().size() == 0) { + indent_stream << std::endl; + } + + if (m_positional.options().size() > 0) { + indent_stream.set_delimiter(" "); + for (size_t i = 0; i < m_positional.options().size(); ++i) { + indent_stream << "<" << m_positional.options()[i]->long_name() << "> "; + if (m_positional.options()[i]->semantic()->max_tokens() > 1) { + indent_stream << "[<" << m_positional.options()[i]->long_name() + << "> ...]"; + break; + } + } + indent_stream << std::endl; + } +} + +void OptionPrinter::print_detailed(std::ostream &os) { + std::string indent_prefix(2, ' '); + size_t name_width = compute_name_width(indent_prefix.size()); + + if (m_positional.options().size() > 0) { + std::cout << POSITIONAL_ARGUMENTS << std::endl; + for (size_t i = 0; i < m_positional.options().size(); ++i) { + std::stringstream ss; + ss << indent_prefix << "<" << m_positional.options()[i]->long_name() + << ">"; + + std::cout << ss.str(); + IndentStream indent_stream(name_width, ss.str().size(), LINE_WIDTH, os); + indent_stream << m_positional.options()[i]->description() << std::endl; + } + std::cout << std::endl; + } + + if (m_optional.options().size() > 0) { + std::cout << OPTIONAL_ARGUMENTS << std::endl; + for (size_t i = 0; i < m_optional.options().size(); ++i) { + std::stringstream ss; + ss << indent_prefix + << m_optional.options()[i]->format_name() << " " + << m_optional.options()[i]->format_parameter(); + + std::cout << ss.str(); + IndentStream indent_stream(name_width, ss.str().size(), LINE_WIDTH, os); + indent_stream << m_optional.options()[i]->description() << std::endl; + } + std::cout << std::endl; + } +} + +size_t OptionPrinter::compute_name_width(size_t indent) { + size_t width = MIN_NAME_WIDTH; + std::vector<OptionsDescription> descs = {m_positional, m_optional}; + for (size_t desc_idx = 0; desc_idx < descs.size(); ++desc_idx) { + const OptionsDescription &desc = descs[desc_idx]; + for (size_t opt_idx = 0; opt_idx < desc.options().size(); ++opt_idx) { + size_t name_width = desc.options()[opt_idx]->format_name().size() + + desc.options()[opt_idx]->format_parameter().size() + + 1; + width = std::max(width, name_width); + } + } + width += indent; + width = std::min(width, MAX_DESCRIPTION_OFFSET) + 1; + return width; +} + +} // namespace rbd diff --git a/src/tools/rbd/OptionPrinter.h b/src/tools/rbd/OptionPrinter.h new file mode 100644 index 00000000..e18a5f88 --- /dev/null +++ b/src/tools/rbd/OptionPrinter.h @@ -0,0 +1,40 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_OPTION_PRINTER_H +#define CEPH_RBD_OPTION_PRINTER_H + +#include "include/int_types.h" +#include <string> +#include <vector> +#include <boost/program_options.hpp> + +namespace rbd { + +class OptionPrinter { +public: + typedef boost::program_options::options_description OptionsDescription; + + static const std::string POSITIONAL_ARGUMENTS; + static const std::string OPTIONAL_ARGUMENTS; + + static const size_t LINE_WIDTH = 80; + static const size_t MIN_NAME_WIDTH = 20; + static const size_t MAX_DESCRIPTION_OFFSET = LINE_WIDTH / 2; + + OptionPrinter(const OptionsDescription &positional, + const OptionsDescription &optional); + + void print_short(std::ostream &os, size_t initial_offset); + void print_detailed(std::ostream &os); + +private: + const OptionsDescription &m_positional; + const OptionsDescription &m_optional; + + size_t compute_name_width(size_t indent); +}; + +} // namespace rbd + +#endif // CEPH_RBD_OPTION_PRINTER_H diff --git a/src/tools/rbd/Shell.cc b/src/tools/rbd/Shell.cc new file mode 100644 index 00000000..9993c691 --- /dev/null +++ b/src/tools/rbd/Shell.cc @@ -0,0 +1,432 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/Shell.h" +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/IndentStream.h" +#include "tools/rbd/OptionPrinter.h" +#include "common/ceph_argparse.h" +#include "common/config.h" +#include "global/global_context.h" +#include "global/global_init.h" +#include "include/stringify.h" +#include <algorithm> +#include <iostream> +#include <set> + +namespace rbd { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +static const std::string APP_NAME("rbd"); +static const std::string HELP_SPEC("help"); +static const std::string BASH_COMPLETION_SPEC("bash-completion"); + +boost::intrusive_ptr<CephContext> global_init( + int argc, const char **argv, std::vector<std::string> *command_args, + std::vector<std::string> *global_init_args) { + std::vector<const char*> cmd_args; + argv_to_vec(argc, argv, cmd_args); + std::vector<const char*> args(cmd_args); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_MON_CONFIG); + + *command_args = {args.begin(), args.end()}; + + // Scan command line arguments for ceph global init args (those are + // filtered out from args vector by global_init). + + auto cursor = args.begin(); + for (auto &arg : cmd_args) { + auto iter = cursor; + for (; iter != args.end(); iter++) { + if (*iter == arg) { + break; + } + } + if (iter == args.end()) { + // filtered out by global_init + global_init_args->push_back(arg); + } else { + cursor = ++iter; + } + } + + return cct; +} + +std::string format_command_spec(const Shell::CommandSpec &spec) { + return joinify<std::string>(spec.begin(), spec.end(), " "); +} + +std::string format_alias_spec(const Shell::CommandSpec &spec, + const Shell::CommandSpec &alias_spec) { + auto spec_it = spec.begin(); + auto alias_it = alias_spec.begin(); + int level = 0; + while (spec_it != spec.end() && alias_it != alias_spec.end() && + *spec_it == *alias_it) { + spec_it++; + alias_it++; + level++; + } + ceph_assert(spec_it != spec.end() && alias_it != alias_spec.end()); + + if (level < 2) { + return joinify<std::string>(alias_spec.begin(), alias_spec.end(), " "); + } else { + return "... " + joinify<std::string>(alias_it, alias_spec.end(), " "); + } +} + +std::string format_command_name(const Shell::CommandSpec &spec, + const Shell::CommandSpec &alias_spec) { + std::string name = format_command_spec(spec); + if (!alias_spec.empty()) { + name += " (" + format_alias_spec(spec, alias_spec) + ")"; + } + return name; +} + +std::string format_option_suffix( + const boost::shared_ptr<po::option_description> &option) { + std::string suffix; + if (option->semantic()->max_tokens() != 0) { + if (option->description().find("path") != std::string::npos || + option->description().find("file") != std::string::npos) { + suffix += " path"; + } else if (option->description().find("host") != std::string::npos) { + suffix += " host"; + } else { + suffix += " arg"; + } + } + return suffix; +} + +} // anonymous namespace + +std::vector<Shell::Action *>& Shell::get_actions() { + static std::vector<Action *> actions; + + return actions; +} + +std::set<std::string>& Shell::get_switch_arguments() { + static std::set<std::string> switch_arguments; + + return switch_arguments; +} + +int Shell::execute(int argc, const char **argv) { + std::vector<std::string> arguments; + std::vector<std::string> ceph_global_init_args; + auto cct = global_init(argc, argv, &arguments, &ceph_global_init_args); + + std::vector<std::string> command_spec; + get_command_spec(arguments, &command_spec); + bool is_alias = true; + + if (command_spec.empty() || command_spec == CommandSpec({"help"})) { + // list all available actions + print_help(); + return 0; + } else if (command_spec[0] == HELP_SPEC) { + // list help for specific action + command_spec.erase(command_spec.begin()); + Action *action = find_action(command_spec, NULL, &is_alias); + if (action == NULL) { + print_unknown_action(command_spec); + return EXIT_FAILURE; + } else { + print_action_help(action, is_alias); + return 0; + } + } else if (command_spec[0] == BASH_COMPLETION_SPEC) { + command_spec.erase(command_spec.begin()); + print_bash_completion(command_spec); + return 0; + } + + CommandSpec *matching_spec; + Action *action = find_action(command_spec, &matching_spec, &is_alias); + if (action == NULL) { + print_unknown_action(command_spec); + return EXIT_FAILURE; + } + + po::variables_map vm; + try { + po::options_description positional_opts; + po::options_description command_opts; + (*action->get_arguments)(&positional_opts, &command_opts); + + // dynamically allocate options for our command (e.g. snap list) and + // its associated positional arguments + po::options_description argument_opts; + argument_opts.add_options() + (at::POSITIONAL_COMMAND_SPEC.c_str(), + po::value<std::vector<std::string> >()->required(), "") + (at::POSITIONAL_ARGUMENTS.c_str(), + po::value<std::vector<std::string> >(), ""); + + po::positional_options_description positional_options; + positional_options.add(at::POSITIONAL_COMMAND_SPEC.c_str(), + matching_spec->size()); + if (!positional_opts.options().empty()) { + int max_count = positional_opts.options().size(); + if (positional_opts.options().back()->semantic()->max_tokens() > 1) + max_count = -1; + positional_options.add(at::POSITIONAL_ARGUMENTS.c_str(), max_count); + } + + po::options_description group_opts; + group_opts.add(command_opts) + .add(argument_opts); + + po::store(po::command_line_parser(arguments) + .style(po::command_line_style::default_style & + ~po::command_line_style::allow_guessing) + .options(group_opts) + .positional(positional_options) + .run(), vm); + + if (vm[at::POSITIONAL_COMMAND_SPEC].as<std::vector<std::string> >() != + *matching_spec) { + std::cerr << "rbd: failed to parse command" << std::endl; + return EXIT_FAILURE; + } + + int r = (*action->execute)(vm, ceph_global_init_args); + if (r != 0) { + return std::abs(r); + } + } catch (po::required_option& e) { + std::cerr << "rbd: " << e.what() << std::endl; + return EXIT_FAILURE; + } catch (po::too_many_positional_options_error& e) { + std::cerr << "rbd: too many arguments" << std::endl; + return EXIT_FAILURE; + } catch (po::error& e) { + std::cerr << "rbd: " << e.what() << std::endl; + return EXIT_FAILURE; + } + + return 0; +} + +void Shell::get_command_spec(const std::vector<std::string> &arguments, + std::vector<std::string> *command_spec) { + for (size_t i = 0; i < arguments.size(); ++i) { + std::string arg(arguments[i]); + if (arg == "-h" || arg == "--help") { + *command_spec = {HELP_SPEC}; + return; + } else if (arg == "--") { + // all arguments after a double-dash are positional + if (i + 1 < arguments.size()) { + command_spec->insert(command_spec->end(), + arguments.data() + i + 1, + arguments.data() + arguments.size()); + } + return; + } else if (arg[0] == '-') { + // if the option is not a switch, skip its value + if (arg.size() >= 2 && + (arg[1] == '-' || + get_switch_arguments().count(arg.substr(1, 1)) == 0) && + (arg[1] != '-' || + get_switch_arguments().count(arg.substr(2, std::string::npos)) == 0) && + at::SWITCH_ARGUMENTS.count(arg.substr(2, std::string::npos)) == 0 && + arg.find('=') == std::string::npos) { + ++i; + } + } else { + command_spec->push_back(arg); + } + } +} + +Shell::Action *Shell::find_action(const CommandSpec &command_spec, + CommandSpec **matching_spec, bool *is_alias) { + for (size_t i = 0; i < get_actions().size(); ++i) { + Action *action = get_actions()[i]; + if (action->command_spec.size() <= command_spec.size()) { + if (std::includes(action->command_spec.begin(), + action->command_spec.end(), + command_spec.begin(), + command_spec.begin() + action->command_spec.size())) { + if (matching_spec != NULL) { + *matching_spec = &action->command_spec; + } + *is_alias = false; + return action; + } + } + if (!action->alias_command_spec.empty() && + action->alias_command_spec.size() <= command_spec.size()) { + if (std::includes(action->alias_command_spec.begin(), + action->alias_command_spec.end(), + command_spec.begin(), + command_spec.begin() + + action->alias_command_spec.size())) { + if (matching_spec != NULL) { + *matching_spec = &action->alias_command_spec; + } + *is_alias = true; + return action; + } + } + } + return NULL; +} + +void Shell::get_global_options(po::options_description *opts) { + opts->add_options() + ((at::CONFIG_PATH + ",c").c_str(), po::value<std::string>(), "path to cluster configuration") + ("cluster", po::value<std::string>(), "cluster name") + ("id", po::value<std::string>(), "client id (without 'client.' prefix)") + ("user", po::value<std::string>(), "client id (without 'client.' prefix)") + ("name,n", po::value<std::string>(), "client name") + ("mon_host,m", po::value<std::string>(), "monitor host") + ("secret", po::value<at::Secret>(), "path to secret key (deprecated)") + ("keyfile,K", po::value<std::string>(), "path to secret key") + ("keyring,k", po::value<std::string>(), "path to keyring"); +} + +void Shell::print_help() { + std::cout << "usage: " << APP_NAME << " <command> ..." + << std::endl << std::endl + << "Command-line interface for managing Ceph RBD images." + << std::endl << std::endl; + + std::vector<Action *> actions(get_actions()); + std::sort(actions.begin(), actions.end(), + [](Action *lhs, Action *rhs) { return lhs->command_spec < + rhs->command_spec; }); + + std::cout << OptionPrinter::POSITIONAL_ARGUMENTS << ":" << std::endl + << " <command>" << std::endl; + + // since the commands have spaces, we have to build our own formatter + std::string indent(4, ' '); + size_t name_width = OptionPrinter::MIN_NAME_WIDTH; + for (size_t i = 0; i < actions.size(); ++i) { + Action *action = actions[i]; + std::string name = format_command_name(action->command_spec, + action->alias_command_spec); + name_width = std::max(name_width, name.size()); + } + name_width += indent.size(); + name_width = std::min(name_width, OptionPrinter::MAX_DESCRIPTION_OFFSET) + 1; + + for (size_t i = 0; i < actions.size(); ++i) { + Action *action = actions[i]; + if (!action->visible) + continue; + std::stringstream ss; + ss << indent + << format_command_name(action->command_spec, action->alias_command_spec); + + std::cout << ss.str(); + if (!action->description.empty()) { + IndentStream indent_stream(name_width, ss.str().size(), + OptionPrinter::LINE_WIDTH, + std::cout); + indent_stream << action->description << std::endl; + } else { + std::cout << std::endl; + } + } + + po::options_description global_opts(OptionPrinter::OPTIONAL_ARGUMENTS); + get_global_options(&global_opts); + std::cout << std::endl << global_opts << std::endl + << "See '" << APP_NAME << " help <command>' for help on a specific " + << "command." << std::endl; + } + +void Shell::print_action_help(Action *action, bool is_alias) { + std::stringstream ss; + ss << "usage: " << APP_NAME << " " + << format_command_spec(is_alias ? action->alias_command_spec : action->command_spec); + std::cout << ss.str(); + + po::options_description positional; + po::options_description options; + (*action->get_arguments)(&positional, &options); + + OptionPrinter option_printer(positional, options); + option_printer.print_short(std::cout, ss.str().size()); + + if (!action->description.empty()) { + std::cout << std::endl << action->description << std::endl; + } + + std::cout << std::endl; + option_printer.print_detailed(std::cout); + + if (!action->help.empty()) { + std::cout << action->help << std::endl; + } +} + +void Shell::print_unknown_action(const std::vector<std::string> &command_spec) { + std::cerr << "error: unknown option '" + << joinify<std::string>(command_spec.begin(), + command_spec.end(), " ") << "'" + << std::endl << std::endl; + print_help(); +} + +void Shell::print_bash_completion(const CommandSpec &command_spec) { + + bool is_alias = true; + + Action *action = find_action(command_spec, NULL, &is_alias); + po::options_description global_opts; + get_global_options(&global_opts); + print_bash_completion_options(global_opts); + + if (action != nullptr) { + po::options_description positional_opts; + po::options_description command_opts; + (*action->get_arguments)(&positional_opts, &command_opts); + print_bash_completion_options(command_opts); + } else { + std::cout << "|help"; + for (size_t i = 0; i < get_actions().size(); ++i) { + Action *action = get_actions()[i]; + std::cout << "|" + << joinify<std::string>(action->command_spec.begin(), + action->command_spec.end(), " "); + if (!action->alias_command_spec.empty()) { + std::cout << "|" + << joinify<std::string>(action->alias_command_spec.begin(), + action->alias_command_spec.end(), + " "); + } + } + } + std::cout << "|" << std::endl; +} + +void Shell::print_bash_completion_options(const po::options_description &ops) { + for (size_t i = 0; i < ops.options().size(); ++i) { + auto option = ops.options()[i]; + std::string long_name(option->canonical_display_name(0)); + std::string short_name(option->canonical_display_name( + po::command_line_style::allow_dash_for_short)); + + std::cout << "|--" << long_name << format_option_suffix(option); + if (long_name != short_name) { + std::cout << "|" << short_name << format_option_suffix(option); + } + } +} + +} // namespace rbd diff --git a/src/tools/rbd/Shell.h b/src/tools/rbd/Shell.h new file mode 100644 index 00000000..fe3dee46 --- /dev/null +++ b/src/tools/rbd/Shell.h @@ -0,0 +1,76 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_SHELL_H +#define CEPH_RBD_SHELL_H + +#include "include/int_types.h" +#include <set> +#include <string> +#include <vector> +#include <boost/program_options.hpp> + +namespace rbd { + +class Shell { +public: + typedef std::vector<std::string> CommandSpec; + + struct Action { + typedef void (*GetArguments)(boost::program_options::options_description *, + boost::program_options::options_description *); + typedef int (*Execute)(const boost::program_options::variables_map &, + const std::vector<std::string> &); + + CommandSpec command_spec; + CommandSpec alias_command_spec; + const std::string description; + const std::string help; + GetArguments get_arguments; + Execute execute; + bool visible; + + template <typename Args, typename Execute> + Action(const std::initializer_list<std::string> &command_spec, + const std::initializer_list<std::string> &alias_command_spec, + const std::string &description, const std::string &help, + Args args, Execute execute, bool visible = true) + : command_spec(command_spec), alias_command_spec(alias_command_spec), + description(description), help(help), get_arguments(args), + execute(execute), visible(visible) { + Shell::get_actions().push_back(this); + } + + }; + + struct SwitchArguments { + SwitchArguments(const std::initializer_list<std::string> &arguments) { + Shell::get_switch_arguments().insert(arguments.begin(), arguments.end()); + } + }; + + int execute(int argc, const char **argv); + +private: + static std::vector<Action *>& get_actions(); + static std::set<std::string>& get_switch_arguments(); + + void get_command_spec(const std::vector<std::string> &arguments, + std::vector<std::string> *command_spec); + Action *find_action(const CommandSpec &command_spec, + CommandSpec **matching_spec, bool *is_alias); + + void get_global_options(boost::program_options::options_description *opts); + + void print_help(); + void print_action_help(Action *action, bool is_alias); + void print_unknown_action(const CommandSpec &command_spec); + + void print_bash_completion(const CommandSpec &command_spec); + void print_bash_completion_options( + const boost::program_options::options_description &ops); +}; + +} // namespace rbd + +#endif // CEPH_RBD_SHELL_H diff --git a/src/tools/rbd/Utils.cc b/src/tools/rbd/Utils.cc new file mode 100644 index 00000000..d4f50022 --- /dev/null +++ b/src/tools/rbd/Utils.cc @@ -0,0 +1,907 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/Utils.h" +#include "include/ceph_assert.h" +#include "include/Context.h" +#include "include/encoding.h" +#include "common/common_init.h" +#include "include/stringify.h" +#include "include/rbd/features.h" +#include "common/config.h" +#include "common/errno.h" +#include "common/safe_io.h" +#include "global/global_context.h" +#include <iostream> +#include <regex> +#include <boost/algorithm/string.hpp> +#include <boost/lexical_cast.hpp> + +namespace rbd { +namespace utils { + +namespace at = argument_types; +namespace po = boost::program_options; + +int ProgressContext::update_progress(uint64_t offset, uint64_t total) { + if (progress) { + int pc = total ? (offset * 100ull / total) : 0; + if (pc != last_pc) { + cerr << "\r" << operation << ": " + << pc << "% complete..."; + cerr.flush(); + last_pc = pc; + } + } + return 0; +} + +void ProgressContext::finish() { + if (progress) { + cerr << "\r" << operation << ": 100% complete...done." << std::endl; + } +} + +void ProgressContext::fail() { + if (progress) { + cerr << "\r" << operation << ": " << last_pc << "% complete...failed." + << std::endl; + } +} + +void aio_context_callback(librbd::completion_t completion, void *arg) +{ + librbd::RBD::AioCompletion *aio_completion = + reinterpret_cast<librbd::RBD::AioCompletion*>(completion); + Context *context = reinterpret_cast<Context *>(arg); + context->complete(aio_completion->get_return_value()); + aio_completion->release(); +} + +int read_string(int fd, unsigned max, std::string *out) { + char buf[4]; + + int r = safe_read_exact(fd, buf, 4); + if (r < 0) + return r; + + bufferlist bl; + bl.append(buf, 4); + auto p = bl.cbegin(); + uint32_t len; + decode(len, p); + if (len > max) + return -EINVAL; + + char sbuf[len]; + r = safe_read_exact(fd, sbuf, len); + if (r < 0) + return r; + out->assign(sbuf, len); + return len; +} + +int extract_spec(const std::string &spec, std::string *pool_name, + std::string *namespace_name, std::string *name, + std::string *snap_name, SpecValidation spec_validation) { + if (!g_ceph_context->_conf.get_val<bool>("rbd_validate_names")) { + spec_validation = SPEC_VALIDATION_NONE; + } + + std::regex pattern; + switch (spec_validation) { + case SPEC_VALIDATION_FULL: + // disallow "/" and "@" in all names + pattern = "^(?:([^/@]+)/(?:([^/@]+)/)?)?([^/@]+)(?:@([^/@]+))?$"; + break; + case SPEC_VALIDATION_SNAP: + // disallow "/" and "@" in snap name + pattern = "^(?:([^/]+)/(?:([^/@]+)/)?)?([^@]+)(?:@([^/@]+))?$"; + break; + case SPEC_VALIDATION_NONE: + // relaxed pattern assumes pool is before first "/", + // namespace is before second "/", and snap name is after first "@" + pattern = "^(?:([^/]+)/(?:([^/@]+)/)?)?([^@]+)(?:@(.+))?$"; + break; + default: + ceph_abort(); + break; + } + + std::smatch match; + if (!std::regex_match(spec, match, pattern)) { + std::cerr << "rbd: invalid spec '" << spec << "'" << std::endl; + return -EINVAL; + } + + if (match[1].matched) { + if (pool_name != nullptr) { + *pool_name = match[1]; + } else { + std::cerr << "rbd: pool name specified for a command that doesn't use it" + << std::endl; + return -EINVAL; + } + } + + if (match[2].matched) { + if (namespace_name != nullptr) { + *namespace_name = match[2]; + } else { + std::cerr << "rbd: namespace name specified for a command that doesn't " + << "use it" << std::endl; + return -EINVAL; + } + } + + if (name != nullptr) { + *name = match[3]; + } + + if (match[4].matched) { + if (snap_name != nullptr) { + *snap_name = match[4]; + } else { + std::cerr << "rbd: snapshot name specified for a command that doesn't " + << "use it" << std::endl; + return -EINVAL; + } + } + return 0; +} + +std::string get_positional_argument(const po::variables_map &vm, size_t index) { + if (vm.count(at::POSITIONAL_ARGUMENTS) == 0) { + return ""; + } + + const std::vector<std::string> &args = + boost::any_cast<std::vector<std::string> >( + vm[at::POSITIONAL_ARGUMENTS].value()); + if (index < args.size()) { + return args[index]; + } + return ""; +} + +std::string get_default_pool_name() { + return g_ceph_context->_conf.get_val<std::string>("rbd_default_pool"); +} + +int get_pool_and_namespace_names( + const boost::program_options::variables_map &vm, + bool default_empty_pool_name, bool validate_pool_name, + std::string* pool_name, std::string* namespace_name, size_t *arg_index) { + if (namespace_name != nullptr && vm.count(at::NAMESPACE_NAME)) { + *namespace_name = vm[at::NAMESPACE_NAME].as<std::string>(); + } + + if (vm.count(at::POOL_NAME)) { + *pool_name = vm[at::POOL_NAME].as<std::string>(); + } else { + *pool_name = get_positional_argument(vm, *arg_index); + if (!pool_name->empty()) { + if (namespace_name != nullptr) { + auto slash_pos = pool_name->find_last_of('/'); + if (slash_pos != std::string::npos) { + *namespace_name = pool_name->substr(slash_pos + 1); + } + *pool_name = pool_name->substr(0, slash_pos); + } + ++(*arg_index); + } + } + + if (default_empty_pool_name && pool_name->empty()) { + *pool_name = get_default_pool_name(); + } + + if (!g_ceph_context->_conf.get_val<bool>("rbd_validate_names")) { + validate_pool_name = false; + } + + if (validate_pool_name && + pool_name->find_first_of("/@") != std::string::npos) { + std::cerr << "rbd: invalid pool '" << *pool_name << "'" << std::endl; + return -EINVAL; + } else if (namespace_name != nullptr && + namespace_name->find_first_of("/@") != std::string::npos) { + std::cerr << "rbd: invalid namespace '" << *namespace_name << "'" + << std::endl; + return -EINVAL; + } + + return 0; +} + +int get_pool_image_id(const po::variables_map &vm, + size_t *spec_arg_index, + std::string *pool_name, + std::string *namespace_name, + std::string *image_id) { + + if (vm.count(at::POOL_NAME) && pool_name != nullptr) { + *pool_name = vm[at::POOL_NAME].as<std::string>(); + } + if (vm.count(at::NAMESPACE_NAME) && namespace_name != nullptr) { + *namespace_name = vm[at::NAMESPACE_NAME].as<std::string>(); + } + if (vm.count(at::IMAGE_ID) && image_id != nullptr) { + *image_id = vm[at::IMAGE_ID].as<std::string>(); + } + + int r; + if (image_id != nullptr && spec_arg_index != nullptr && image_id->empty()) { + std::string spec = get_positional_argument(vm, (*spec_arg_index)++); + if (!spec.empty()) { + r = extract_spec(spec, pool_name, namespace_name, image_id, nullptr, + SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + } + } + + if (pool_name != nullptr && pool_name->empty()) { + *pool_name = get_default_pool_name(); + } + + if (image_id != nullptr && image_id->empty()) { + std::cerr << "rbd: image id was not specified" << std::endl; + return -EINVAL; + } + + return 0; +} + +int get_pool_image_snapshot_names(const po::variables_map &vm, + at::ArgumentModifier mod, + size_t *spec_arg_index, + std::string *pool_name, + std::string *namespace_name, + std::string *image_name, + std::string *snap_name, + bool image_name_required, + SnapshotPresence snapshot_presence, + SpecValidation spec_validation) { + std::string pool_key = (mod == at::ARGUMENT_MODIFIER_DEST ? + at::DEST_POOL_NAME : at::POOL_NAME); + std::string image_key = (mod == at::ARGUMENT_MODIFIER_DEST ? + at::DEST_IMAGE_NAME : at::IMAGE_NAME); + return get_pool_generic_snapshot_names(vm, mod, spec_arg_index, pool_key, + pool_name, namespace_name, image_key, + "image", image_name, snap_name, + image_name_required, snapshot_presence, + spec_validation); +} + +int get_pool_generic_snapshot_names(const po::variables_map &vm, + at::ArgumentModifier mod, + size_t *spec_arg_index, + const std::string& pool_key, + std::string *pool_name, + std::string *namespace_name, + const std::string& generic_key, + const std::string& generic_key_desc, + std::string *generic_name, + std::string *snap_name, + bool generic_name_required, + SnapshotPresence snapshot_presence, + SpecValidation spec_validation) { + std::string namespace_key = (mod == at::ARGUMENT_MODIFIER_DEST ? + at::DEST_NAMESPACE_NAME : at::NAMESPACE_NAME); + std::string snap_key = (mod == at::ARGUMENT_MODIFIER_DEST ? + at::DEST_SNAPSHOT_NAME : at::SNAPSHOT_NAME); + + if (vm.count(pool_key) && pool_name != nullptr) { + *pool_name = vm[pool_key].as<std::string>(); + } + if (vm.count(namespace_key) && namespace_name != nullptr) { + *namespace_name = vm[namespace_key].as<std::string>(); + } + if (vm.count(generic_key) && generic_name != nullptr) { + *generic_name = vm[generic_key].as<std::string>(); + } + if (vm.count(snap_key) && snap_name != nullptr) { + *snap_name = vm[snap_key].as<std::string>(); + } + + int r; + if ((generic_key == at::IMAGE_NAME || generic_key == at::DEST_IMAGE_NAME) && + generic_name != nullptr && !generic_name->empty()) { + // despite the separate pool and snapshot name options, + // we can also specify them via the image option + std::string image_name_copy(*generic_name); + r = extract_spec(image_name_copy, pool_name, namespace_name, generic_name, + snap_name, spec_validation); + if (r < 0) { + return r; + } + } + + if (generic_name != nullptr && spec_arg_index != nullptr && + generic_name->empty()) { + std::string spec = get_positional_argument(vm, (*spec_arg_index)++); + if (!spec.empty()) { + r = extract_spec(spec, pool_name, namespace_name, generic_name, snap_name, + spec_validation); + if (r < 0) { + return r; + } + } + } + + if (pool_name != nullptr && pool_name->empty()) { + *pool_name = get_default_pool_name(); + } + + if (generic_name != nullptr && generic_name_required && + generic_name->empty()) { + std::string prefix = at::get_description_prefix(mod); + std::cerr << "rbd: " + << (mod == at::ARGUMENT_MODIFIER_DEST ? prefix : std::string()) + << generic_key_desc << " name was not specified" << std::endl; + return -EINVAL; + } + + std::regex pattern("^[^@/]+?$"); + if (spec_validation == SPEC_VALIDATION_FULL) { + // validate pool name while creating/renaming/copying/cloning/importing/etc + if ((pool_name != nullptr) && !std::regex_match (*pool_name, pattern)) { + std::cerr << "rbd: invalid pool name '" << *pool_name << "'" << std::endl; + return -EINVAL; + } + } + + if (namespace_name != nullptr && !namespace_name->empty() && + !std::regex_match (*namespace_name, pattern)) { + std::cerr << "rbd: invalid namespace name '" << *namespace_name << "'" + << std::endl; + return -EINVAL; + } + + if (snap_name != nullptr) { + r = validate_snapshot_name(mod, *snap_name, snapshot_presence, + spec_validation); + if (r < 0) { + return r; + } + } + return 0; +} + +int validate_snapshot_name(at::ArgumentModifier mod, + const std::string &snap_name, + SnapshotPresence snapshot_presence, + SpecValidation spec_validation) { + std::string prefix = at::get_description_prefix(mod); + switch (snapshot_presence) { + case SNAPSHOT_PRESENCE_PERMITTED: + break; + case SNAPSHOT_PRESENCE_NONE: + if (!snap_name.empty()) { + std::cerr << "rbd: " + << (mod == at::ARGUMENT_MODIFIER_DEST ? prefix : std::string()) + << "snapshot name specified for a command that doesn't use it" + << std::endl; + return -EINVAL; + } + break; + case SNAPSHOT_PRESENCE_REQUIRED: + if (snap_name.empty()) { + std::cerr << "rbd: " + << (mod == at::ARGUMENT_MODIFIER_DEST ? prefix : std::string()) + << "snapshot name was not specified" << std::endl; + return -EINVAL; + } + break; + } + + if (spec_validation == SPEC_VALIDATION_SNAP) { + // disallow "/" and "@" in snap name + std::regex pattern("^[^@/]*?$"); + if (!std::regex_match (snap_name, pattern)) { + std::cerr << "rbd: invalid snap name '" << snap_name << "'" << std::endl; + return -EINVAL; + } + } + return 0; +} + +int get_image_options(const boost::program_options::variables_map &vm, + bool get_format, librbd::ImageOptions *opts) { + uint64_t order = 0, stripe_unit = 0, stripe_count = 0, object_size = 0; + uint64_t features = 0, features_clear = 0; + std::string data_pool; + bool order_specified = true; + bool features_specified = false; + bool features_clear_specified = false; + bool stripe_specified = false; + + if (vm.count(at::IMAGE_ORDER)) { + order = vm[at::IMAGE_ORDER].as<uint64_t>(); + std::cerr << "rbd: --order is deprecated, use --object-size" + << std::endl; + } else if (vm.count(at::IMAGE_OBJECT_SIZE)) { + object_size = vm[at::IMAGE_OBJECT_SIZE].as<uint64_t>(); + order = std::round(std::log2(object_size)); + } else { + order_specified = false; + } + + if (vm.count(at::IMAGE_FEATURES)) { + features = vm[at::IMAGE_FEATURES].as<uint64_t>(); + features_specified = true; + } else { + features = get_rbd_default_features(g_ceph_context); + } + + if (vm.count(at::IMAGE_STRIPE_UNIT)) { + stripe_unit = vm[at::IMAGE_STRIPE_UNIT].as<uint64_t>(); + stripe_specified = true; + } + + if (vm.count(at::IMAGE_STRIPE_COUNT)) { + stripe_count = vm[at::IMAGE_STRIPE_COUNT].as<uint64_t>(); + stripe_specified = true; + } + + if (vm.count(at::IMAGE_SHARED) && vm[at::IMAGE_SHARED].as<bool>()) { + if (features_specified) { + features &= ~RBD_FEATURES_SINGLE_CLIENT; + } else { + features_clear |= RBD_FEATURES_SINGLE_CLIENT; + features_clear_specified = true; + } + } + + if (vm.count(at::IMAGE_DATA_POOL)) { + data_pool = vm[at::IMAGE_DATA_POOL].as<std::string>(); + } + + if (get_format) { + uint64_t format = 0; + bool format_specified = false; + if (vm.count(at::IMAGE_NEW_FORMAT)) { + format = 2; + format_specified = true; + } else if (vm.count(at::IMAGE_FORMAT)) { + format = vm[at::IMAGE_FORMAT].as<uint32_t>(); + format_specified = true; + } + if (format == 1) { + std::cerr << "rbd: image format 1 is deprecated" << std::endl; + } + + if (features_specified && features != 0) { + if (format_specified && format == 1) { + std::cerr << "rbd: features not allowed with format 1; " + << "use --image-format 2" << std::endl; + return -EINVAL; + } else { + format = 2; + format_specified = true; + } + } + + if ((stripe_unit || stripe_count) && + (stripe_unit != (1ull << order) && stripe_count != 1)) { + if (format_specified && format == 1) { + std::cerr << "rbd: non-default striping not allowed with format 1; " + << "use --image-format 2" << std::endl; + return -EINVAL; + } else { + format = 2; + format_specified = true; + } + } + + if (!data_pool.empty()) { + if (format_specified && format == 1) { + std::cerr << "rbd: data pool not allowed with format 1; " + << "use --image-format 2" << std::endl; + return -EINVAL; + } else { + format = 2; + format_specified = true; + } + } + + if (format_specified) { + int r = g_conf().set_val("rbd_default_format", stringify(format)); + ceph_assert(r == 0); + opts->set(RBD_IMAGE_OPTION_FORMAT, format); + } + } + + if (order_specified) + opts->set(RBD_IMAGE_OPTION_ORDER, order); + if (features_specified) + opts->set(RBD_IMAGE_OPTION_FEATURES, features); + if (features_clear_specified) { + opts->set(RBD_IMAGE_OPTION_FEATURES_CLEAR, features_clear); + } + if (stripe_specified) { + opts->set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit); + opts->set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count); + } + if (!data_pool.empty()) { + opts->set(RBD_IMAGE_OPTION_DATA_POOL, data_pool); + } + int r = get_journal_options(vm, opts); + if (r < 0) { + return r; + } + + r = get_flatten_option(vm, opts); + if (r < 0) { + return r; + } + + return 0; +} + +int get_journal_options(const boost::program_options::variables_map &vm, + librbd::ImageOptions *opts) { + + if (vm.count(at::JOURNAL_OBJECT_SIZE)) { + uint64_t size = vm[at::JOURNAL_OBJECT_SIZE].as<uint64_t>(); + uint64_t order = 12; + while ((1ULL << order) < size) { + order++; + } + opts->set(RBD_IMAGE_OPTION_JOURNAL_ORDER, order); + + int r = g_conf().set_val("rbd_journal_order", stringify(order)); + ceph_assert(r == 0); + } + if (vm.count(at::JOURNAL_SPLAY_WIDTH)) { + opts->set(RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH, + vm[at::JOURNAL_SPLAY_WIDTH].as<uint64_t>()); + + int r = g_conf().set_val("rbd_journal_splay_width", + stringify( + vm[at::JOURNAL_SPLAY_WIDTH].as<uint64_t>())); + ceph_assert(r == 0); + } + if (vm.count(at::JOURNAL_POOL)) { + opts->set(RBD_IMAGE_OPTION_JOURNAL_POOL, + vm[at::JOURNAL_POOL].as<std::string>()); + + int r = g_conf().set_val("rbd_journal_pool", + vm[at::JOURNAL_POOL].as<std::string>()); + ceph_assert(r == 0); + } + + return 0; +} + +int get_flatten_option(const boost::program_options::variables_map &vm, + librbd::ImageOptions *opts) { + if (vm.count(at::IMAGE_FLATTEN) && vm[at::IMAGE_FLATTEN].as<bool>()) { + uint64_t flatten = 1; + opts->set(RBD_IMAGE_OPTION_FLATTEN, flatten); + } + return 0; +} + +int get_image_size(const boost::program_options::variables_map &vm, + uint64_t *size) { + if (vm.count(at::IMAGE_SIZE) == 0) { + std::cerr << "rbd: must specify --size <M/G/T>" << std::endl; + return -EINVAL; + } + + *size = vm[at::IMAGE_SIZE].as<uint64_t>(); + return 0; +} + +int get_path(const boost::program_options::variables_map &vm, + size_t *arg_index, std::string *path) { + if (vm.count(at::PATH)) { + *path = vm[at::PATH].as<std::string>(); + } else { + *path = get_positional_argument(vm, *arg_index); + if (!path->empty()) { + ++(*arg_index); + } + } + + if (path->empty()) { + std::cerr << "rbd: path was not specified" << std::endl; + return -EINVAL; + } + return 0; +} + +int get_formatter(const po::variables_map &vm, + at::Format::Formatter *formatter) { + if (vm.count(at::FORMAT)) { + bool pretty = vm[at::PRETTY_FORMAT].as<bool>(); + *formatter = vm[at::FORMAT].as<at::Format>().create_formatter(pretty); + if (*formatter == nullptr && pretty) { + std::cerr << "rbd: --pretty-format only works when --format " + << "is json or xml" << std::endl; + return -EINVAL; + } else if (*formatter != nullptr && !pretty) { + formatter->get()->enable_line_break(); + } + } else if (vm[at::PRETTY_FORMAT].as<bool>()) { + std::cerr << "rbd: --pretty-format only works when --format " + << "is json or xml" << std::endl; + return -EINVAL; + } + return 0; +} + +void init_context() { + g_conf().set_val_or_die("rbd_cache_writethrough_until_flush", "false"); + g_conf().apply_changes(nullptr); + common_init_finish(g_ceph_context); +} + +int init_rados(librados::Rados *rados) { + init_context(); + + int r = rados->init_with_context(g_ceph_context); + if (r < 0) { + std::cerr << "rbd: couldn't initialize rados!" << std::endl; + return r; + } + + r = rados->connect(); + if (r < 0) { + std::cerr << "rbd: couldn't connect to the cluster!" << std::endl; + return r; + } + + return 0; +} + +int init(const std::string &pool_name, const std::string& namespace_name, + librados::Rados *rados, librados::IoCtx *io_ctx) { + init_context(); + + int r = init_rados(rados); + if (r < 0) { + return r; + } + + r = init_io_ctx(*rados, pool_name, namespace_name, io_ctx); + if (r < 0) { + return r; + } + return 0; +} + +int init_io_ctx(librados::Rados &rados, const std::string &pool_name, + const std::string& namespace_name, librados::IoCtx *io_ctx) { + int r = rados.ioctx_create(pool_name.c_str(), *io_ctx); + if (r < 0) { + if (r == -ENOENT && pool_name == get_default_pool_name()) { + std::cerr << "rbd: error opening default pool " + << "'" << pool_name << "'" << std::endl + << "Ensure that the default pool has been created or specify " + << "an alternate pool name." << std::endl; + } else { + std::cerr << "rbd: error opening pool '" << pool_name << "': " + << cpp_strerror(r) << std::endl; + } + return r; + } + + return set_namespace(namespace_name, io_ctx); +} + +int set_namespace(const std::string& namespace_name, librados::IoCtx *io_ctx) { + if (!namespace_name.empty()) { + librbd::RBD rbd; + bool exists = false; + int r = rbd.namespace_exists(*io_ctx, namespace_name.c_str(), &exists); + if (r < 0) { + std::cerr << "rbd: error asserting namespace: " + << cpp_strerror(r) << std::endl; + return r; + } + if (!exists) { + std::cerr << "rbd: namespace '" << namespace_name << "' does not exist." + << std::endl; + return -ENOENT; + } + } + io_ctx->set_namespace(namespace_name); + return 0; +} + +void disable_cache() { + g_conf().set_val_or_die("rbd_cache", "false"); +} + +int open_image(librados::IoCtx &io_ctx, const std::string &image_name, + bool read_only, librbd::Image *image) { + int r; + librbd::RBD rbd; + if (read_only) { + r = rbd.open_read_only(io_ctx, *image, image_name.c_str(), NULL); + } else { + r = rbd.open(io_ctx, *image, image_name.c_str()); + } + + if (r < 0) { + std::cerr << "rbd: error opening image " << image_name << ": " + << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +int open_image_by_id(librados::IoCtx &io_ctx, const std::string &image_id, + bool read_only, librbd::Image *image) { + int r; + librbd::RBD rbd; + if (read_only) { + r = rbd.open_by_id_read_only(io_ctx, *image, image_id.c_str(), NULL); + } else { + r = rbd.open_by_id(io_ctx, *image, image_id.c_str()); + } + + if (r < 0) { + std::cerr << "rbd: error opening image with id " << image_id << ": " + << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +int init_and_open_image(const std::string &pool_name, + const std::string &namespace_name, + const std::string &image_name, + const std::string &image_id, + const std::string &snap_name, bool read_only, + librados::Rados *rados, librados::IoCtx *io_ctx, + librbd::Image *image) { + int r = init(pool_name, namespace_name, rados, io_ctx); + if (r < 0) { + return r; + } + + if (image_id.empty()) { + r = open_image(*io_ctx, image_name, read_only, image); + } else { + r = open_image_by_id(*io_ctx, image_id, read_only, image); + } + if (r < 0) { + return r; + } + + if (!snap_name.empty()) { + r = snap_set(*image, snap_name); + if (r < 0) { + return r; + } + } + return 0; +} + +int snap_set(librbd::Image &image, const std::string &snap_name) { + int r = image.snap_set(snap_name.c_str()); + if (r < 0) { + std::cerr << "error setting snapshot context: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void calc_sparse_extent(const bufferptr &bp, + size_t sparse_size, + size_t buffer_offset, + uint64_t buffer_length, + size_t *write_length, + bool *zeroed) { + if (sparse_size == 0) { + // sparse writes are disabled -- write the full extent + ceph_assert(buffer_offset == 0); + *write_length = buffer_length; + *zeroed = false; + return; + } + + *write_length = 0; + size_t original_offset = buffer_offset; + while (buffer_offset < buffer_length) { + size_t extent_size = std::min<size_t>( + sparse_size, buffer_length - buffer_offset); + + bufferptr extent(bp, buffer_offset, extent_size); + + bool extent_is_zero = extent.is_zero(); + if (original_offset == buffer_offset) { + *zeroed = extent_is_zero; + } else if (*zeroed != extent_is_zero) { + ceph_assert(*write_length > 0); + return; + } + + buffer_offset += extent_size; + *write_length += extent_size; + } +} + +std::string image_id(librbd::Image& image) { + std::string id; + int r = image.get_id(&id); + if (r < 0) { + return std::string(); + } + return id; +} + +std::string mirror_image_state(librbd::mirror_image_state_t state) { + switch (state) { + case RBD_MIRROR_IMAGE_DISABLING: + return "disabling"; + case RBD_MIRROR_IMAGE_ENABLED: + return "enabled"; + case RBD_MIRROR_IMAGE_DISABLED: + return "disabled"; + default: + return "unknown"; + } +} + +std::string mirror_image_status_state(librbd::mirror_image_status_state_t state) { + switch (state) { + case MIRROR_IMAGE_STATUS_STATE_UNKNOWN: + return "unknown"; + case MIRROR_IMAGE_STATUS_STATE_ERROR: + return "error"; + case MIRROR_IMAGE_STATUS_STATE_SYNCING: + return "syncing"; + case MIRROR_IMAGE_STATUS_STATE_STARTING_REPLAY: + return "starting_replay"; + case MIRROR_IMAGE_STATUS_STATE_REPLAYING: + return "replaying"; + case MIRROR_IMAGE_STATUS_STATE_STOPPING_REPLAY: + return "stopping_replay"; + case MIRROR_IMAGE_STATUS_STATE_STOPPED: + return "stopped"; + default: + return "unknown (" + stringify(static_cast<uint32_t>(state)) + ")"; + } +} + +std::string mirror_image_status_state(librbd::mirror_image_status_t status) { + return (status.up ? "up+" : "down+") + + mirror_image_status_state(status.state); +} + +std::string timestr(time_t t) { + struct tm tm; + + localtime_r(&t, &tm); + + char buf[32]; + strftime(buf, sizeof(buf), "%F %T", &tm); + + return buf; +} + +uint64_t get_rbd_default_features(CephContext* cct) { + auto features = cct->_conf.get_val<std::string>("rbd_default_features"); + return boost::lexical_cast<uint64_t>(features); +} + +bool is_not_user_snap_namespace(librbd::Image* image, + const librbd::snap_info_t &snap_info) +{ + librbd::snap_namespace_type_t namespace_type; + int r = image->snap_get_namespace_type(snap_info.id, &namespace_type); + if (r < 0) { + return false; + } + return namespace_type != RBD_SNAP_NAMESPACE_TYPE_USER; +} + +} // namespace utils +} // namespace rbd diff --git a/src/tools/rbd/Utils.h b/src/tools/rbd/Utils.h new file mode 100644 index 00000000..81ea2c71 --- /dev/null +++ b/src/tools/rbd/Utils.h @@ -0,0 +1,204 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_UTILS_H +#define CEPH_RBD_UTILS_H + +#include "include/int_types.h" +#include "include/rados/librados.hpp" +#include "include/rbd/librbd.hpp" +#include "tools/rbd/ArgumentTypes.h" +#include <string> +#include <boost/program_options.hpp> + +namespace rbd { +namespace utils { + +namespace detail { + +template <typename T, void(T::*MF)(int)> +void aio_completion_callback(librbd::completion_t completion, + void *arg) { + librbd::RBD::AioCompletion *aio_completion = + reinterpret_cast<librbd::RBD::AioCompletion*>(completion); + + // complete the AIO callback in separate thread context + T *t = reinterpret_cast<T *>(arg); + int r = aio_completion->get_return_value(); + aio_completion->release(); + + (t->*MF)(r); +} + +} // namespace detail + +static const std::string RBD_DIFF_BANNER ("rbd diff v1\n"); +static const size_t RBD_DEFAULT_SPARSE_SIZE = 4096; + +static const std::string RBD_IMAGE_BANNER_V2 ("rbd image v2\n"); +static const std::string RBD_IMAGE_DIFFS_BANNER_V2 ("rbd image diffs v2\n"); +static const std::string RBD_DIFF_BANNER_V2 ("rbd diff v2\n"); + +#define RBD_DIFF_FROM_SNAP 'f' +#define RBD_DIFF_TO_SNAP 't' +#define RBD_DIFF_IMAGE_SIZE 's' +#define RBD_DIFF_WRITE 'w' +#define RBD_DIFF_ZERO 'z' +#define RBD_DIFF_END 'e' + +#define RBD_SNAP_PROTECTION_STATUS 'p' + +#define RBD_EXPORT_IMAGE_ORDER 'O' +#define RBD_EXPORT_IMAGE_FEATURES 'T' +#define RBD_EXPORT_IMAGE_STRIPE_UNIT 'U' +#define RBD_EXPORT_IMAGE_STRIPE_COUNT 'C' +#define RBD_EXPORT_IMAGE_META 'M' +#define RBD_EXPORT_IMAGE_END 'E' + +enum SnapshotPresence { + SNAPSHOT_PRESENCE_NONE, + SNAPSHOT_PRESENCE_PERMITTED, + SNAPSHOT_PRESENCE_REQUIRED +}; + +enum SpecValidation { + SPEC_VALIDATION_FULL, + SPEC_VALIDATION_SNAP, + SPEC_VALIDATION_NONE +}; + +struct ProgressContext : public librbd::ProgressContext { + const char *operation; + bool progress; + int last_pc; + + ProgressContext(const char *o, bool no_progress) + : operation(o), progress(!no_progress), last_pc(0) { + } + + int update_progress(uint64_t offset, uint64_t total) override; + void finish(); + void fail(); +}; + +template <typename T, void(T::*MF)(int)> +librbd::RBD::AioCompletion *create_aio_completion(T *t) { + return new librbd::RBD::AioCompletion( + t, &detail::aio_completion_callback<T, MF>); +} + +void aio_context_callback(librbd::completion_t completion, void *arg); + +int read_string(int fd, unsigned max, std::string *out); + +int extract_spec(const std::string &spec, std::string *pool_name, + std::string *namespace_name, std::string *name, + std::string *snap_name, SpecValidation spec_validation); + +std::string get_positional_argument( + const boost::program_options::variables_map &vm, size_t index); + +std::string get_default_pool_name(); +int get_pool_and_namespace_names( + const boost::program_options::variables_map &vm, + bool default_empty_pool_name, bool validate_pool_name, + std::string* pool_name, std::string* namespace_name, size_t *arg_index); + +int get_pool_image_snapshot_names( + const boost::program_options::variables_map &vm, + argument_types::ArgumentModifier mod, size_t *spec_arg_index, + std::string *pool_name, std::string *namespace_name, + std::string *image_name, std::string *snap_name, bool image_name_required, + SnapshotPresence snapshot_presence, SpecValidation spec_validation); + +int get_pool_generic_snapshot_names( + const boost::program_options::variables_map &vm, + argument_types::ArgumentModifier mod, size_t *spec_arg_index, + const std::string& pool_key, std::string *pool_name, + std::string *namespace_name, const std::string& generic_key, + const std::string& generic_key_desc, std::string *generic_name, + std::string *snap_name, bool generic_name_required, + SnapshotPresence snapshot_presence, SpecValidation spec_validation); + +int get_pool_image_id(const boost::program_options::variables_map &vm, + size_t *spec_arg_index, + std::string *pool_name, + std::string *namespace_name, + std::string *image_id); + +int validate_snapshot_name(argument_types::ArgumentModifier mod, + const std::string &snap_name, + SnapshotPresence snapshot_presence, + SpecValidation spec_validation); + +int get_image_options(const boost::program_options::variables_map &vm, + bool get_format, librbd::ImageOptions* opts); + +int get_journal_options(const boost::program_options::variables_map &vm, + librbd::ImageOptions *opts); + +int get_flatten_option(const boost::program_options::variables_map &vm, + librbd::ImageOptions *opts); + +int get_image_size(const boost::program_options::variables_map &vm, + uint64_t *size); + +int get_path(const boost::program_options::variables_map &vm, + size_t *arg_index, std::string *path); + +int get_formatter(const boost::program_options::variables_map &vm, + argument_types::Format::Formatter *formatter); + +void init_context(); + +int init_rados(librados::Rados *rados); + +int init(const std::string &pool_name, const std::string& namespace_name, + librados::Rados *rados, librados::IoCtx *io_ctx); +int init_io_ctx(librados::Rados &rados, const std::string &pool_name, + const std::string& namespace_name, librados::IoCtx *io_ctx); +int set_namespace(const std::string& namespace_name, librados::IoCtx *io_ctx); + +void disable_cache(); + +int open_image(librados::IoCtx &io_ctx, const std::string &image_name, + bool read_only, librbd::Image *image); + +int open_image_by_id(librados::IoCtx &io_ctx, const std::string &image_id, + bool read_only, librbd::Image *image); + +int init_and_open_image(const std::string &pool_name, + const std::string &namespace_name, + const std::string &image_name, + const std::string &image_id, + const std::string &snap_name, bool read_only, + librados::Rados *rados, librados::IoCtx *io_ctx, + librbd::Image *image); + +int snap_set(librbd::Image &image, const std::string &snap_name); + +void calc_sparse_extent(const bufferptr &bp, + size_t sparse_size, + size_t buffer_offset, + uint64_t length, + size_t *write_length, + bool *zeroed); + +bool is_not_user_snap_namespace(librbd::Image* image, + const librbd::snap_info_t &snap_info); + +std::string image_id(librbd::Image& image); + +std::string mirror_image_state(librbd::mirror_image_state_t mirror_image_state); +std::string mirror_image_status_state(librbd::mirror_image_status_state_t state); +std::string mirror_image_status_state(librbd::mirror_image_status_t status); + +std::string timestr(time_t t); + +// duplicate here to not include librbd_internal lib +uint64_t get_rbd_default_features(CephContext* cct); + +} // namespace utils +} // namespace rbd + +#endif // CEPH_RBD_UTILS_H diff --git a/src/tools/rbd/action/Bench.cc b/src/tools/rbd/action/Bench.cc new file mode 100644 index 00000000..27843c7b --- /dev/null +++ b/src/tools/rbd/action/Bench.cc @@ -0,0 +1,539 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include "common/strtol.h" +#include "common/Cond.h" +#include "common/Mutex.h" +#include "global/signal_handler.h" +#include <atomic> +#include <chrono> +#include <iostream> +#include <boost/accumulators/accumulators.hpp> +#include <boost/accumulators/statistics/stats.hpp> +#include <boost/accumulators/statistics/rolling_sum.hpp> +#include <boost/program_options.hpp> + +using namespace std::chrono; + +static std::atomic<bool> terminating; +static void handle_signal(int signum) +{ + ceph_assert(signum == SIGINT || signum == SIGTERM); + terminating = true; +} + +namespace rbd { +namespace action { +namespace bench { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +enum io_type_t { + IO_TYPE_READ = 0, + IO_TYPE_WRITE, + IO_TYPE_RW, + + IO_TYPE_NUM, +}; + +struct IOType {}; +struct Size {}; +struct IOPattern {}; + +void validate(boost::any& v, const std::vector<std::string>& values, + Size *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + + std::string parse_error; + uint64_t size = strict_iecstrtoll(s.c_str(), &parse_error); + if (!parse_error.empty()) { + throw po::validation_error(po::validation_error::invalid_option_value); + } + v = boost::any(size); +} + +void validate(boost::any& v, const std::vector<std::string>& values, + IOPattern *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + if (s == "rand") { + v = boost::any(true); + } else if (s == "seq") { + v = boost::any(false); + } else { + throw po::validation_error(po::validation_error::invalid_option_value); + } +} + +io_type_t get_io_type(string io_type_string) { + if (io_type_string == "read") + return IO_TYPE_READ; + else if (io_type_string == "write") + return IO_TYPE_WRITE; + else if (io_type_string == "readwrite" || io_type_string == "rw") + return IO_TYPE_RW; + else + return IO_TYPE_NUM; +} + +void validate(boost::any& v, const std::vector<std::string>& values, + IOType *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + io_type_t io_type = get_io_type(s); + if (io_type >= IO_TYPE_NUM) + throw po::validation_error(po::validation_error::invalid_option_value); + else + v = boost::any(io_type); +} + +} // anonymous namespace + +static void rbd_bencher_completion(void *c, void *pc); +struct rbd_bencher; + +struct bencher_completer { + rbd_bencher *bencher; + bufferlist *bl; + +public: + bencher_completer(rbd_bencher *bencher, bufferlist *bl) + : bencher(bencher), bl(bl) + { } + + ~bencher_completer() + { + if (bl) + delete bl; + } +}; + +struct rbd_bencher { + librbd::Image *image; + Mutex lock; + Cond cond; + int in_flight; + io_type_t io_type; + uint64_t io_size; + bufferlist write_bl; + + explicit rbd_bencher(librbd::Image *i, io_type_t io_type, uint64_t io_size) + : image(i), + lock("rbd_bencher::lock"), + in_flight(0), + io_type(io_type), + io_size(io_size) + { + if (io_type == IO_TYPE_WRITE || io_type == IO_TYPE_RW) { + bufferptr bp(io_size); + memset(bp.c_str(), rand() & 0xff, io_size); + write_bl.push_back(bp); + } + } + + void start_io(int max, uint64_t off, uint64_t len, int op_flags, bool read_flag) + { + { + Mutex::Locker l(lock); + in_flight++; + } + + librbd::RBD::AioCompletion *c; + if (read_flag) { + bufferlist *read_bl = new bufferlist(); + c = new librbd::RBD::AioCompletion((void *)(new bencher_completer(this, read_bl)), + rbd_bencher_completion); + image->aio_read2(off, len, *read_bl, c, op_flags); + } else { + c = new librbd::RBD::AioCompletion((void *)(new bencher_completer(this, NULL)), + rbd_bencher_completion); + image->aio_write2(off, len, write_bl, c, op_flags); + } + } + + int wait_for(int max, bool interrupt_on_terminating) { + Mutex::Locker l(lock); + while (in_flight > max && !(terminating && interrupt_on_terminating)) { + utime_t dur; + dur.set_from_double(.2); + cond.WaitInterval(lock, dur); + } + + return terminating ? -EINTR : 0; + } + +}; + +void rbd_bencher_completion(void *vc, void *pc) +{ + librbd::RBD::AioCompletion *c = (librbd::RBD::AioCompletion *)vc; + bencher_completer *bc = static_cast<bencher_completer *>(pc); + rbd_bencher *b = bc->bencher; + //cout << "complete " << c << std::endl; + int ret = c->get_return_value(); + if (b->io_type == IO_TYPE_WRITE && ret != 0) { + cout << "write error: " << cpp_strerror(ret) << std::endl; + exit(ret < 0 ? -ret : ret); + } else if (b->io_type == IO_TYPE_READ && (unsigned int)ret != b->io_size) { + cout << "read error: " << cpp_strerror(ret) << std::endl; + exit(ret < 0 ? -ret : ret); + } + b->lock.Lock(); + b->in_flight--; + b->cond.Signal(); + b->lock.Unlock(); + c->release(); + delete bc; +} + +bool should_read(uint64_t read_proportion) +{ + uint64_t rand_num = rand() % 100; + + if (rand_num < read_proportion) + return true; + else + return false; +} + +int do_bench(librbd::Image& image, io_type_t io_type, + uint64_t io_size, uint64_t io_threads, + uint64_t io_bytes, bool random, uint64_t read_proportion) +{ + uint64_t size = 0; + image.size(&size); + if (io_size > size) { + std::cerr << "rbd: io-size " << byte_u_t(io_size) << " " + << "larger than image size " << byte_u_t(size) << std::endl; + return -EINVAL; + } + + if (io_size > std::numeric_limits<uint32_t>::max()) { + std::cerr << "rbd: io-size should be less than 4G" << std::endl; + return -EINVAL; + } + + int r = image.flush(); + if (r < 0 && (r != -EROFS || io_type != IO_TYPE_READ)) { + std::cerr << "rbd: failed to flush: " << cpp_strerror(r) << std::endl; + return r; + } + + rbd_bencher b(&image, io_type, io_size); + + std::cout << "bench " + << " type " << (io_type == IO_TYPE_READ ? "read" : + io_type == IO_TYPE_WRITE ? "write" : "readwrite") + << (io_type == IO_TYPE_RW ? " read:write=" + + to_string(read_proportion) + ":" + to_string(100 - read_proportion) : "") + << " io_size " << io_size + << " io_threads " << io_threads + << " bytes " << io_bytes + << " pattern " << (random ? "random" : "sequential") + << std::endl; + + srand(time(NULL) % (unsigned long) -1); + + coarse_mono_time start = coarse_mono_clock::now(); + chrono::duration<double> last = chrono::duration<double>::zero(); + unsigned ios = 0; + + vector<uint64_t> thread_offset; + uint64_t i; + uint64_t start_pos; + + uint64_t unit_len = size/io_size/io_threads; + // disturb all thread's offset + for (i = 0; i < io_threads; i++) { + if (random) { + start_pos = (rand() % (size / io_size)) * io_size; + } else { + start_pos = unit_len * i * io_size; + } + thread_offset.push_back(start_pos); + } + + const int WINDOW_SIZE = 5; + typedef boost::accumulators::accumulator_set< + double, boost::accumulators::stats< + boost::accumulators::tag::rolling_sum> > RollingSum; + + RollingSum time_acc( + boost::accumulators::tag::rolling_window::window_size = WINDOW_SIZE); + RollingSum ios_acc( + boost::accumulators::tag::rolling_window::window_size = WINDOW_SIZE); + RollingSum off_acc( + boost::accumulators::tag::rolling_window::window_size = WINDOW_SIZE); + uint64_t cur_ios = 0; + uint64_t cur_off = 0; + + int op_flags; + if (random) { + op_flags = LIBRADOS_OP_FLAG_FADVISE_RANDOM; + } else { + op_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL; + } + + printf(" SEC OPS OPS/SEC BYTES/SEC\n"); + uint64_t off; + int read_ops = 0; + int write_ops = 0; + + for (off = 0; off < io_bytes; ) { + // Issue I/O + i = 0; + int r = 0; + while (i < io_threads && off < io_bytes) { + bool read_flag = should_read(read_proportion); + + r = b.wait_for(io_threads - 1, true); + if (r < 0) { + break; + } + b.start_io(io_threads, thread_offset[i], io_size, op_flags, read_flag); + + ++i; + ++ios; + off += io_size; + + ++cur_ios; + cur_off += io_size; + + if (read_flag) + read_ops++; + else + write_ops++; + } + + if (r < 0) { + break; + } + + // Set the thread_offsets of next I/O + for (i = 0; i < io_threads; ++i) { + if (random) { + thread_offset[i] = (rand() % (size / io_size)) * io_size; + continue; + } + if (off < (io_size * unit_len * io_threads) ) { + thread_offset[i] += io_size; + } else { + // thread_offset is adjusted to the chunks unassigned to threads. + thread_offset[i] = off + (i * io_size); + } + if (thread_offset[i] + io_size > size) + thread_offset[i] = unit_len * i * io_size; + } + + coarse_mono_time now = coarse_mono_clock::now(); + chrono::duration<double> elapsed = now - start; + if (last == chrono::duration<double>::zero()) { + last = elapsed; + } else if ((int)elapsed.count() != (int)last.count()) { + time_acc((elapsed - last).count()); + ios_acc(static_cast<double>(cur_ios)); + off_acc(static_cast<double>(cur_off)); + cur_ios = 0; + cur_off = 0; + + double time_sum = boost::accumulators::rolling_sum(time_acc); + printf("%5d %8d %8.2lf %8.2lf\n", + (int)elapsed.count(), + (int)(ios - io_threads), + boost::accumulators::rolling_sum(ios_acc) / time_sum, + boost::accumulators::rolling_sum(off_acc) / time_sum); + last = elapsed; + } + } + b.wait_for(0, false); + + if (io_type != IO_TYPE_READ) { + r = image.flush(); + if (r < 0) { + std::cerr << "rbd: failed to flush at the end: " << cpp_strerror(r) + << std::endl; + } + } + + coarse_mono_time now = coarse_mono_clock::now(); + chrono::duration<double> elapsed = now - start; + + printf("elapsed: %5d ops: %8d ops/sec: %8.2lf bytes/sec: %8.2lf\n", + (int)elapsed.count(), ios, (double)ios / elapsed.count(), + (double)off / elapsed.count()); + + if (io_type == IO_TYPE_RW) { + printf("read_ops: %5d read_ops/sec: %8.2lf read_bytes/sec: %8.2lf\n", + read_ops, (double)read_ops / elapsed.count(), + (double)read_ops * io_size / elapsed.count()); + + printf("write_ops: %5d write_ops/sec: %8.2lf write_bytes/sec: %8.2lf\n", + write_ops, (double)write_ops / elapsed.count(), + (double)write_ops * io_size / elapsed.count()); + } + + return 0; +} + +void add_bench_common_options(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + + options->add_options() + ("io-size", po::value<Size>(), "IO size (in B/K/M/G/T) [default: 4K]") + ("io-threads", po::value<uint32_t>(), "ios in flight [default: 16]") + ("io-total", po::value<Size>(), "total size for IO (in B/K/M/G/T) [default: 1G]") + ("io-pattern", po::value<IOPattern>(), "IO pattern (rand or seq) [default: seq]") + ("rw-mix-read", po::value<uint64_t>(), "read proportion in readwrite (<= 100) [default: 50]"); +} + +void get_arguments_for_write(po::options_description *positional, + po::options_description *options) { + add_bench_common_options(positional, options); +} + +void get_arguments_for_bench(po::options_description *positional, + po::options_description *options) { + add_bench_common_options(positional, options); + + options->add_options() + ("io-type", po::value<IOType>()->required(), "IO type (read , write, or readwrite(rw))"); +} + +int bench_execute(const po::variables_map &vm, io_type_t bench_io_type) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + utils::SnapshotPresence snap_presence = utils::SNAPSHOT_PRESENCE_NONE; + if (bench_io_type == IO_TYPE_READ) + snap_presence = utils::SNAPSHOT_PRESENCE_PERMITTED; + + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, snap_presence, utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + uint64_t bench_io_size; + if (vm.count("io-size")) { + bench_io_size = vm["io-size"].as<uint64_t>(); + } else { + bench_io_size = 4096; + } + if (bench_io_size == 0) { + std::cerr << "rbd: --io-size should be greater than zero." << std::endl; + return -EINVAL; + } + + uint32_t bench_io_threads; + if (vm.count("io-threads")) { + bench_io_threads = vm["io-threads"].as<uint32_t>(); + } else { + bench_io_threads = 16; + } + if (bench_io_threads == 0) { + std::cerr << "rbd: --io-threads should be greater than zero." << std::endl; + return -EINVAL; + } + + uint64_t bench_bytes; + if (vm.count("io-total")) { + bench_bytes = vm["io-total"].as<uint64_t>(); + } else { + bench_bytes = 1 << 30; + } + + bool bench_random; + if (vm.count("io-pattern")) { + bench_random = vm["io-pattern"].as<bool>(); + } else { + bench_random = false; + } + + uint64_t bench_read_proportion; + if (bench_io_type == IO_TYPE_READ) { + bench_read_proportion = 100; + } else if (bench_io_type == IO_TYPE_WRITE) { + bench_read_proportion = 0; + } else { + if (vm.count("rw-mix-read")) { + bench_read_proportion = vm["rw-mix-read"].as<uint64_t>(); + } else { + bench_read_proportion = 50; + } + + if (bench_read_proportion > 100) { + std::cerr << "rbd: --rw-mix-read should not be larger than 100." << std::endl; + return -EINVAL; + } + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", + snap_name, false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + init_async_signal_handler(); + register_async_signal_handler(SIGHUP, sighup_handler); + register_async_signal_handler_oneshot(SIGINT, handle_signal); + register_async_signal_handler_oneshot(SIGTERM, handle_signal); + + r = do_bench(image, bench_io_type, bench_io_size, bench_io_threads, + bench_bytes, bench_random, bench_read_proportion); + + unregister_async_signal_handler(SIGHUP, sighup_handler); + unregister_async_signal_handler(SIGINT, handle_signal); + unregister_async_signal_handler(SIGTERM, handle_signal); + shutdown_async_signal_handler(); + + if (r < 0) { + std::cerr << "bench failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +int execute_for_write(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::cerr << "rbd: bench-write is deprecated, use rbd bench --io-type write ..." << std::endl; + return bench_execute(vm, IO_TYPE_WRITE); +} + +int execute_for_bench(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + io_type_t bench_io_type; + if (vm.count("io-type")) { + bench_io_type = vm["io-type"].as<io_type_t>(); + } else { + std::cerr << "rbd: --io-type must be specified." << std::endl; + return -EINVAL; + } + + return bench_execute(vm, bench_io_type); +} + +Shell::Action action_write( + {"bench-write"}, {}, "Simple write benchmark. (Deprecated, please use `rbd bench --io-type write` instead.)", + "", &get_arguments_for_write, &execute_for_write, false); + +Shell::Action action_bench( + {"bench"}, {}, "Simple benchmark.", "", &get_arguments_for_bench, &execute_for_bench); + +} // namespace bench +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Children.cc b/src/tools/rbd/action/Children.cc new file mode 100644 index 00000000..f459e92b --- /dev/null +++ b/src/tools/rbd/action/Children.cc @@ -0,0 +1,166 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace children { + +namespace at = argument_types; +namespace po = boost::program_options; + +int do_list_children(librados::IoCtx &io_ctx, librbd::Image &image, + bool all_flag, bool descendants_flag, Formatter *f) +{ + std::vector<librbd::linked_image_spec_t> children; + librbd::RBD rbd; + int r; + if (descendants_flag) { + r = image.list_descendants(&children); + } else { + r = image.list_children3(&children); + } + if (r < 0) + return r; + + if (f) + f->open_array_section("children"); + + for (auto& child : children) { + bool trash = child.trash; + if (f) { + if (all_flag) { + f->open_object_section("child"); + f->dump_string("pool", child.pool_name); + f->dump_string("pool_namespace", child.pool_namespace); + f->dump_string("image", child.image_name); + f->dump_string("id", child.image_id); + f->dump_bool("trash", child.trash); + f->close_section(); + } else if (!trash) { + f->open_object_section("child"); + f->dump_string("pool", child.pool_name); + f->dump_string("pool_namespace", child.pool_namespace); + f->dump_string("image", child.image_name); + f->close_section(); + } + } else if (all_flag || !trash) { + if (child.pool_name.empty()) { + std::cout << "(child missing " << child.pool_id << "/"; + } else { + std::cout << child.pool_name << "/"; + } + if (!child.pool_namespace.empty()) { + std::cout << child.pool_namespace << "/"; + } + if (child.image_name.empty()) { + std::cout << child.image_id << ")"; + } else { + std::cout << child.image_name; + if (trash) { + std::cout << " (trash " << child.image_id << ")"; + } + } + std::cout << std::endl; + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } + + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_NONE); + at::add_snap_id_option(options); + options->add_options() + ("all,a", po::bool_switch(), "list all children (include trash)"); + options->add_options() + ("descendants", po::bool_switch(), "include all descendants"); + at::add_format_options(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + uint64_t snap_id = LIBRADOS_SNAP_HEAD; + if (vm.count(at::SNAPSHOT_ID)) { + snap_id = vm[at::SNAPSHOT_ID].as<uint64_t>(); + } + + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + if (snap_id != LIBRADOS_SNAP_HEAD && !snap_name.empty()) { + std::cerr << "rbd: trying to access snapshot using both name and id." + << std::endl; + return -EINVAL; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + if (!snap_name.empty()) { + r = image.snap_set(snap_name.c_str()); + } else if (snap_id != LIBRADOS_SNAP_HEAD) { + r = image.snap_set_by_id(snap_id); + } + if (r == -ENOENT) { + std::cerr << "rbd: snapshot does not exist." << std::endl; + return r; + } else if (r < 0) { + std::cerr << "rbd: error setting snapshot: " << cpp_strerror(r) + << std::endl; + return r; + } + + r = do_list_children(io_ctx, image, vm["all"].as<bool>(), + vm["descendants"].as<bool>(), formatter.get()); + if (r < 0) { + std::cerr << "rbd: listing children failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"children"}, {}, "Display children of an image or its snapshot.", "", + &get_arguments, &execute); + +} // namespace children +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Clone.cc b/src/tools/rbd/action/Clone.cc new file mode 100644 index 00000000..6406c957 --- /dev/null +++ b/src/tools/rbd/action/Clone.cc @@ -0,0 +1,99 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace clone { + +namespace at = argument_types; +namespace po = boost::program_options; + +int do_clone(librbd::RBD &rbd, librados::IoCtx &p_ioctx, + const char *p_name, const char *p_snapname, + librados::IoCtx &c_ioctx, const char *c_name, + librbd::ImageOptions& opts) { + return rbd.clone3(p_ioctx, p_name, p_snapname, c_ioctx, c_name, opts); +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_SOURCE); + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST); + at::add_create_image_options(options, false); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_REQUIRED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string dst_pool_name; + std::string dst_namespace_name; + std::string dst_image_name; + std::string dst_snap_name; + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dst_pool_name, + &dst_namespace_name, &dst_image_name, &dst_snap_name, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + librbd::ImageOptions opts; + r = utils::get_image_options(vm, false, &opts); + if (r < 0) { + return r; + } + opts.set(RBD_IMAGE_OPTION_FORMAT, static_cast<uint64_t>(2)); + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librados::IoCtx dst_io_ctx; + r = utils::init_io_ctx(rados, dst_pool_name, dst_namespace_name, &dst_io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = do_clone(rbd, io_ctx, image_name.c_str(), snap_name.c_str(), dst_io_ctx, + dst_image_name.c_str(), opts); + if (r == -EXDEV) { + std::cerr << "rbd: clone v2 required for cross-namespace clones." + << std::endl; + return r; + } else if (r < 0) { + std::cerr << "rbd: clone error: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"clone"}, {}, "Clone a snapshot into a CoW child image.", + at::get_long_features_help(), &get_arguments, &execute); + +} // namespace clone +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Config.cc b/src/tools/rbd/action/Config.cc new file mode 100644 index 00000000..2868c7ad --- /dev/null +++ b/src/tools/rbd/action/Config.cc @@ -0,0 +1,890 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/Formatter.h" +#include "common/TextTable.h" +#include "common/ceph_context.h" +#include "common/ceph_json.h" +#include "common/escape.h" +#include "common/errno.h" +#include "common/options.h" +#include "global/global_context.h" +#include "include/stringify.h" + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" + +#include <iostream> + +#include <boost/algorithm/string/predicate.hpp> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace config { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +const std::string METADATA_CONF_PREFIX = "conf_"; +const uint32_t MAX_KEYS = 64; + +void add_config_entity_option( + boost::program_options::options_description *positional) { + positional->add_options() + ("config-entity", "config entity (global, client, client.<id>)"); +} + +void add_pool_option(boost::program_options::options_description *positional) { + positional->add_options() + ("pool-name", "pool name"); +} + +void add_key_option(po::options_description *positional) { + positional->add_options() + ("key", "config key"); +} + +int get_config_entity(const po::variables_map &vm, std::string *config_entity) { + *config_entity = utils::get_positional_argument(vm, 0); + + if (*config_entity != "global" && *config_entity != "client" && + !boost::starts_with(*config_entity, ("client."))) { + std::cerr << "rbd: invalid config entity: " << *config_entity + << " (must be global, client or client.<id>)" << std::endl; + return -EINVAL; + } + + return 0; +} + +int get_pool(const po::variables_map &vm, std::string *pool_name) { + *pool_name = utils::get_positional_argument(vm, 0); + if (pool_name->empty()) { + std::cerr << "rbd: pool name was not specified" << std::endl; + return -EINVAL; + } + + return 0; +} + +int get_key(const po::variables_map &vm, size_t *arg_index, + std::string *key) { + *key = utils::get_positional_argument(vm, *arg_index); + if (key->empty()) { + std::cerr << "rbd: config key was not specified" << std::endl; + return -EINVAL; + } else { + ++(*arg_index); + } + + if (!boost::starts_with(*key, "rbd_")) { + std::cerr << "rbd: not rbd option: " << *key << std::endl; + return -EINVAL; + } + + std::string value; + int r = g_ceph_context->_conf.get_val(key->c_str(), &value); + if (r < 0) { + std::cerr << "rbd: invalid config key: " << *key << std::endl; + return -EINVAL; + } + + return 0; +} + +std::ostream& operator<<(std::ostream& os, + const librbd::config_source_t& source) { + switch (source) { + case RBD_CONFIG_SOURCE_CONFIG: + os << "config"; + break; + case RBD_CONFIG_SOURCE_POOL: + os << "pool"; + break; + case RBD_CONFIG_SOURCE_IMAGE: + os << "image"; + break; + default: + os << "unknown (" << static_cast<uint32_t>(source) << ")"; + break; + } + return os; +} + +int config_global_list( + librados::Rados &rados, const std::string &config_entity, + std::map<std::string, std::pair<std::string, std::string>> *options) { + bool client_id_config_entity = + boost::starts_with(config_entity, ("client.")); + std::string cmd = + "{" + "\"prefix\": \"config dump\", " + "\"format\": \"json\" " + "}"; + bufferlist in_bl; + bufferlist out_bl; + std::string ss; + int r = rados.mon_command(cmd, in_bl, &out_bl, &ss); + if (r < 0) { + std::cerr << "rbd: error reading config: " << ss << std::endl; + return r; + } + + json_spirit::mValue json_root; + if (!json_spirit::read(out_bl.to_str(), json_root)) { + std::cerr << "rbd: error parsing config dump" << std::endl; + return -EINVAL; + } + + try { + auto &json_array = json_root.get_array(); + for (auto& e : json_array) { + auto &json_obj = e.get_obj(); + std::string section; + std::string name; + std::string value; + + for (auto &pairs : json_obj) { + if (pairs.first == "section") { + section = pairs.second.get_str(); + } else if (pairs.first == "name") { + name = pairs.second.get_str(); + } else if (pairs.first == "value") { + value = pairs.second.get_str(); + } + } + + if (!boost::starts_with(name, "rbd_")) { + continue; + } + if (section != "global" && section != "client" && + (!client_id_config_entity || section != config_entity)) { + continue; + } + if (config_entity == "global" && section != "global") { + continue; + } + auto it = options->find(name); + if (it == options->end()) { + (*options)[name] = {value, section}; + continue; + } + if (section == "client") { + if (it->second.second == "global") { + it->second = {value, section}; + } + } else if (client_id_config_entity) { + it->second = {value, section}; + } + } + } catch (std::runtime_error &e) { + std::cerr << "rbd: error parsing config dump: " << e.what() << std::endl; + return -EINVAL; + } + + return 0; +} + +} // anonymous namespace + +void get_global_get_arguments(po::options_description *positional, + po::options_description *options) { + add_config_entity_option(positional); + add_key_option(positional); +} + +int execute_global_get(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string config_entity; + int r = get_config_entity(vm, &config_entity); + if (r < 0) { + return r; + } + + std::string key; + size_t arg_index = 1; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + std::map<std::string, std::pair<std::string, std::string>> options; + r = config_global_list(rados, config_entity, &options); + if (r < 0) { + return r; + } + + auto it = options.find(key); + + if (it == options.end() || it->second.second != config_entity) { + std::cerr << "rbd: " << key << " is not set" << std::endl; + return -ENOENT; + } + + std::cout << it->second.first << std::endl; + return 0; +} + +void get_global_set_arguments(po::options_description *positional, + po::options_description *options) { + add_config_entity_option(positional); + add_key_option(positional); + positional->add_options() + ("value", "config value"); +} + +int execute_global_set(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string config_entity; + int r = get_config_entity(vm, &config_entity); + if (r < 0) { + return r; + } + + std::string key; + size_t arg_index = 1; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + std::string value = utils::get_positional_argument(vm, 2); + std::string cmd = + "{" + "\"prefix\": \"config set\", " + "\"who\": \"" + stringify(json_stream_escaper(config_entity)) + "\", " + "\"name\": \"" + key + "\", " + "\"value\": \"" + stringify(json_stream_escaper(value)) + "\"" + "}"; + bufferlist in_bl; + std::string ss; + r = rados.mon_command(cmd, in_bl, nullptr, &ss); + if (r < 0) { + std::cerr << "rbd: error setting " << key << ": " << ss << std::endl; + return r; + } + + return 0; +} + +void get_global_remove_arguments(po::options_description *positional, + po::options_description *options) { + add_config_entity_option(positional); + add_key_option(positional); +} + +int execute_global_remove( + const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string config_entity; + int r = get_config_entity(vm, &config_entity); + if (r < 0) { + return r; + } + + std::string key; + size_t arg_index = 1; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + std::string cmd = + "{" + "\"prefix\": \"config rm\", " + "\"who\": \"" + stringify(json_stream_escaper(config_entity)) + "\", " + "\"name\": \"" + key + "\"" + "}"; + bufferlist in_bl; + std::string ss; + r = rados.mon_command(cmd, in_bl, nullptr, &ss); + if (r < 0) { + std::cerr << "rbd: error removing " << key << ": " << ss << std::endl; + return r; + } + + return 0; +} + +void get_global_list_arguments(po::options_description *positional, + po::options_description *options) { + add_config_entity_option(positional); + at::add_format_options(options); +} + +int execute_global_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string config_entity; + int r = get_config_entity(vm, &config_entity); + if (r < 0) { + return r; + } + + at::Format::Formatter f; + r = utils::get_formatter(vm, &f); + if (r < 0) { + return r; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + std::map<std::string, std::pair<std::string, std::string>> options; + r = config_global_list(rados, config_entity, &options); + if (r < 0) { + return r; + } + + if (options.empty() && !f) { + return 0; + } + + TextTable tbl; + + if (f) { + f->open_array_section("config"); + } else { + tbl.define_column("Name", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("Value", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("Section", TextTable::LEFT, TextTable::LEFT); + } + + for (const auto &it : options) { + if (f) { + f->open_object_section("option"); + f->dump_string("name", it.first); + f->dump_string("value", it.second.first); + f->dump_string("section", it.second.second); + f->close_section(); + } else { + tbl << it.first << it.second.first << it.second.second + << TextTable::endrow; + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } else { + std::cout << tbl; + } + + return 0; +} + +void get_pool_get_arguments(po::options_description *positional, + po::options_description *options) { + add_pool_option(positional); + add_key_option(positional); +} + +int execute_pool_get(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + int r = get_pool(vm, &pool_name); + if (r < 0) { + return r; + } + + std::string key; + size_t arg_index = 1; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + std::string value; + + r = rbd.pool_metadata_get(io_ctx, METADATA_CONF_PREFIX + key, &value); + if (r < 0) { + if (r == -ENOENT) { + std::cerr << "rbd: " << key << " is not set" << std::endl; + } else { + std::cerr << "rbd: failed to get " << key << ": " << cpp_strerror(r) + << std::endl; + } + return r; + } + + std::cout << value << std::endl; + return 0; +} + +void get_pool_set_arguments(po::options_description *positional, + po::options_description *options) { + add_pool_option(positional); + add_key_option(positional); + positional->add_options() + ("value", "config value"); +} + +int execute_pool_set(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + int r = get_pool(vm, &pool_name); + if (r < 0) { + return r; + } + + std::string key; + size_t arg_index = 1; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + std::string value = utils::get_positional_argument(vm, 2); + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.pool_metadata_set(io_ctx, METADATA_CONF_PREFIX + key, value); + if (r < 0) { + std::cerr << "rbd: failed to set " << key << ": " << cpp_strerror(r) + << std::endl; + return r; + } + + return 0; +} + +void get_pool_remove_arguments(po::options_description *positional, + po::options_description *options) { + add_pool_option(positional); + add_key_option(positional); +} + +int execute_pool_remove(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + int r = get_pool(vm, &pool_name); + if (r < 0) { + return r; + } + + std::string key; + size_t arg_index = 1; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.pool_metadata_remove(io_ctx, METADATA_CONF_PREFIX + key); + if (r < 0) { + std::cerr << "rbd: failed to remove " << key << ": " << cpp_strerror(r) + << std::endl; + return r; + } + + return 0; +} + +void get_pool_list_arguments(po::options_description *positional, + po::options_description *options) { + add_pool_option(positional); + at::add_format_options(options); +} + +int execute_pool_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + int r = get_pool(vm, &pool_name); + if (r < 0) { + return r; + } + + at::Format::Formatter f; + r = utils::get_formatter(vm, &f); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + TextTable tbl; + librbd::RBD rbd; + std::vector<librbd::config_option_t> options; + + r = rbd.config_list(io_ctx, &options); + if (r < 0) { + std::cerr << "rbd: failed to list config: " << cpp_strerror(r) << std::endl; + return r; + } + + if (f) { + f->open_array_section("config"); + } else { + tbl.define_column("Name", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("Value", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("Source", TextTable::LEFT, TextTable::LEFT); + } + + for (auto &option : options) { + if (f) { + f->open_object_section("option"); + f->dump_string("name", option.name); + f->dump_string("value", option.value); + f->dump_stream("source") << option.source; + f->close_section(); + } else { + std::ostringstream source; + source << option.source; + tbl << option.name << option.value << source.str() << TextTable::endrow; + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } else { + std::cout << tbl; + } + + return 0; +} + +void get_image_get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + add_key_option(positional); +} + +int execute_image_get(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string key; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + std::string value; + + r = image.metadata_get(METADATA_CONF_PREFIX + key, &value); + if (r < 0) { + if (r == -ENOENT) { + std::cerr << "rbd: " << key << " is not set" << std::endl; + } else { + std::cerr << "rbd: failed to get " << key << ": " << cpp_strerror(r) + << std::endl; + } + return r; + } + + std::cout << value << std::endl; + return 0; +} + +void get_image_set_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + add_key_option(positional); + positional->add_options() + ("value", "config value"); +} + +int execute_image_set(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string key; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + std::string value = utils::get_positional_argument(vm, arg_index); + if (value.empty()) { + std::cerr << "rbd: image config value was not specified" << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = image.metadata_set(METADATA_CONF_PREFIX + key, value); + if (r < 0) { + std::cerr << "rbd: failed to set " << key << ": " << cpp_strerror(r) + << std::endl; + return r; + } + + return 0; +} + +void get_image_remove_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + add_key_option(positional); +} + +int execute_image_remove( + const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string key; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = image.metadata_remove(METADATA_CONF_PREFIX + key); + if (r < 0) { + std::cerr << "rbd: failed to remove " << key << ": " << cpp_strerror(r) + << std::endl; + return r; + } + + return 0; +} + +void get_image_list_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_format_options(options); +} + +int execute_image_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + at::Format::Formatter f; + r = utils::get_formatter(vm, &f); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + TextTable tbl; + std::vector<librbd::config_option_t> options; + + r = image.config_list(&options); + if (r < 0) { + std::cerr << "rbd: failed to list config: " << cpp_strerror(r) << std::endl; + return r; + } + + if (options.empty()) { + if (f == nullptr) { + std::cout << "There are no values" << std::endl; + } + return 0; + } + + if (f) { + f->open_array_section("config"); + } else { + tbl.define_column("Name", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("Value", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("Source", TextTable::LEFT, TextTable::LEFT); + } + + for (auto &option : options) { + if (f) { + f->open_object_section("option"); + f->dump_string("name", option.name); + f->dump_string("value", option.value); + f->dump_stream("source") << option.source; + f->close_section(); + } else { + std::ostringstream source; + source << option.source; + tbl << option.name << option.value << source.str() << TextTable::endrow; + } + } + + if (f == nullptr) { + bool single = (options.size() == 1); + std::cout << "There " << (single ? "is" : "are") << " " << options.size() + << " " << (single ? "value" : "values") << ":" << std::endl; + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } else { + std::cout << tbl; + } + + return 0; +} + +Shell::Action action_global_get( + {"config", "global", "get"}, {}, + "Get a global-level configuration override.", "", + &get_global_get_arguments, &execute_global_get); +Shell::Action action_global_set( + {"config", "global", "set"}, {}, + "Set a global-level configuration override.", "", + &get_global_set_arguments, &execute_global_set); +Shell::Action action_global_remove( + {"config", "global", "remove"}, {"config", "global", "rm"}, + "Remove a global-level configuration override.", "", + &get_global_remove_arguments, &execute_global_remove); +Shell::Action action_global_list( + {"config", "global", "list"}, {"config", "global", "ls"}, + "List global-level configuration overrides.", "", + &get_global_list_arguments, &execute_global_list); + +Shell::Action action_pool_get( + {"config", "pool", "get"}, {}, "Get a pool-level configuration override.", "", + &get_pool_get_arguments, &execute_pool_get); +Shell::Action action_pool_set( + {"config", "pool", "set"}, {}, "Set a pool-level configuration override.", "", + &get_pool_set_arguments, &execute_pool_set); +Shell::Action action_pool_remove( + {"config", "pool", "remove"}, {"config", "pool", "rm"}, + "Remove a pool-level configuration override.", "", + &get_pool_remove_arguments, &execute_pool_remove); +Shell::Action action_pool_list( + {"config", "pool", "list"}, {"config", "pool", "ls"}, + "List pool-level configuration overrides.", "", + &get_pool_list_arguments, &execute_pool_list); + +Shell::Action action_image_get( + {"config", "image", "get"}, {}, "Get an image-level configuration override.", + "", &get_image_get_arguments, &execute_image_get); +Shell::Action action_image_set( + {"config", "image", "set"}, {}, "Set an image-level configuration override.", + "", &get_image_set_arguments, &execute_image_set); +Shell::Action action_image_remove( + {"config", "image", "remove"}, {"config", "image", "rm"}, + "Remove an image-level configuration override.", "", + &get_image_remove_arguments, &execute_image_remove); +Shell::Action action_image_list( + {"config", "image", "list"}, {"config", "image", "ls"}, + "List image-level configuration overrides.", "", + &get_image_list_arguments, &execute_image_list); + +} // namespace config +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Copy.cc b/src/tools/rbd/action/Copy.cc new file mode 100644 index 00000000..9a248437 --- /dev/null +++ b/src/tools/rbd/action/Copy.cc @@ -0,0 +1,195 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace copy { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_copy(librbd::Image &src, librados::IoCtx& dest_pp, + const char *destname, librbd::ImageOptions& opts, + bool no_progress, + size_t sparse_size) +{ + utils::ProgressContext pc("Image copy", no_progress); + int r = src.copy_with_progress4(dest_pp, destname, opts, pc, sparse_size); + if (r < 0){ + pc.fail(); + return r; + } + pc.finish(); + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_SOURCE); + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST); + at::add_create_image_options(options, false); + at::add_sparse_size_option(options); + at::add_no_progress_option(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string dst_pool_name; + std::string dst_namespace_name; + std::string dst_image_name; + std::string dst_snap_name; + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dst_pool_name, + &dst_namespace_name, &dst_image_name, &dst_snap_name, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + librbd::ImageOptions opts; + r = utils::get_image_options(vm, false, &opts); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", + snap_name, true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + librados::IoCtx dst_io_ctx; + r = utils::init_io_ctx(rados, dst_pool_name, dst_namespace_name, &dst_io_ctx); + if (r < 0) { + return r; + } + + size_t sparse_size = utils::RBD_DEFAULT_SPARSE_SIZE; + if (vm.count(at::IMAGE_SPARSE_SIZE)) { + sparse_size = vm[at::IMAGE_SPARSE_SIZE].as<size_t>(); + } + r = do_copy(image, dst_io_ctx, dst_image_name.c_str(), opts, + vm[at::NO_PROGRESS].as<bool>(), sparse_size); + if (r < 0) { + std::cerr << "rbd: copy failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"copy"}, {"cp"}, "Copy src image to dest.", at::get_long_features_help(), + &get_arguments, &execute); + +static int do_deep_copy(librbd::Image &src, librados::IoCtx& dest_pp, + const char *destname, librbd::ImageOptions& opts, + bool no_progress) +{ + utils::ProgressContext pc("Image deep copy", no_progress); + int r = src.deep_copy_with_progress(dest_pp, destname, opts, pc); + if (r < 0){ + pc.fail(); + return r; + } + pc.finish(); + return 0; +} + +void get_arguments_deep(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_SOURCE); + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST); + at::add_create_image_options(options, false); + at::add_flatten_option(options); + at::add_no_progress_option(options); +} + +int execute_deep(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string dst_pool_name; + std::string dst_namespace_name; + std::string dst_image_name; + std::string dst_snap_name; + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dst_pool_name, + &dst_namespace_name, &dst_image_name, &dst_snap_name, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + librbd::ImageOptions opts; + r = utils::get_image_options(vm, false, &opts); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", + snap_name, true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + librados::IoCtx dst_io_ctx; + r = utils::init_io_ctx(rados, dst_pool_name, dst_namespace_name, &dst_io_ctx); + if (r < 0) { + return r; + } + + r = do_deep_copy(image, dst_io_ctx, dst_image_name.c_str(), opts, + vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + std::cerr << "rbd: deep copy failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action_deep( + {"deep", "copy"}, {"deep", "cp"}, "Deep copy src image to dest.", + at::get_long_features_help(), &get_arguments_deep, &execute_deep); + +} // namespace copy +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Create.cc b/src/tools/rbd/action/Create.cc new file mode 100644 index 00000000..99efa0b5 --- /dev/null +++ b/src/tools/rbd/action/Create.cc @@ -0,0 +1,264 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> +#include "common/Cond.h" +#include "common/Mutex.h" + +namespace rbd { +namespace action { +namespace create { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_create(librbd::RBD &rbd, librados::IoCtx& io_ctx, + const char *imgname, uint64_t size, + librbd::ImageOptions& opts) { + return rbd.create4(io_ctx, imgname, size, opts); +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_create_image_options(options, true); + options->add_options() + (at::IMAGE_THICK_PROVISION.c_str(), po::bool_switch(), "fully allocate storage and zero image"); + at::add_size_option(options); + at::add_no_progress_option(options); +} + +void thick_provision_writer_completion(rbd_completion_t, void *); + +struct thick_provision_writer { + librbd::Image *image; + Mutex lock; + Cond cond; + bufferlist bl; + uint64_t chunk_size; + const int block_size; + uint64_t concurr; + struct { + uint64_t in_flight; + int io_error; + } io_status; + + // Constructor + explicit thick_provision_writer(librbd::Image *i, librbd::ImageOptions &o) + : image(i), + lock("thick_provision_writer::lock"), + block_size(512) // 512 Bytes + { + // If error cases occur, the code is aborted, because + // constructor cannot return error value. + ceph_assert(g_ceph_context != nullptr); + bl.append_zero(block_size); + + librbd::image_info_t info; + int r = image->stat(info, sizeof(info)); + ceph_assert(r >= 0); + uint64_t order; + if (info.order == 0) { + order = g_conf().get_val<uint64_t>("rbd_default_order"); + } else { + order = info.order; + } + chunk_size = (1ull << order); + if (image->get_stripe_unit() < chunk_size) { + chunk_size = image->get_stripe_unit(); + } + + concurr = g_conf().get_val<uint64_t>("rbd_concurrent_management_ops"); + io_status.in_flight = 0; + io_status.io_error = 0; + } + + int start_io(uint64_t write_offset) + { + { + Mutex::Locker l(lock); + io_status.in_flight++; + if (io_status.in_flight > concurr) { + io_status.in_flight--; + return -EINVAL; + } + } + + librbd::RBD::AioCompletion *c; + c = new librbd::RBD::AioCompletion(this, thick_provision_writer_completion); + int r; + r = image->aio_writesame(write_offset, chunk_size, bl, c, LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL); + if (r < 0) { + Mutex::Locker l(lock); + io_status.io_error = r; + } + return r; + } + + int wait_for(uint64_t max) { + Mutex::Locker l(lock); + int r = io_status.io_error; + + while (io_status.in_flight > max) { + utime_t dur; + dur.set_from_double(.2); + cond.WaitInterval(lock, dur); + } + return r; + } +}; + +void thick_provision_writer_completion(rbd_completion_t rc, void *pc) { + librbd::RBD::AioCompletion *ac = (librbd::RBD::AioCompletion *)rc; + thick_provision_writer *tc = static_cast<thick_provision_writer *>(pc); + + int r = ac->get_return_value(); + tc->lock.Lock(); + if (r < 0 && tc->io_status.io_error >= 0) { + tc->io_status.io_error = r; + } + tc->io_status.in_flight--; + tc->cond.Signal(); + tc->lock.Unlock(); + ac->release(); +} + +int write_data(librbd::Image &image, librbd::ImageOptions &opts, + bool no_progress) { + uint64_t image_size; + int r = 0; + utils::ProgressContext pc("Thick provisioning", no_progress); + + if (image.size(&image_size) != 0) { + return -EINVAL; + } + + thick_provision_writer tpw(&image, opts); + uint64_t off; + uint64_t i; + for (off = 0; off < image_size;) { + i = 0; + while (i < tpw.concurr && off < image_size) { + tpw.wait_for(tpw.concurr - 1); + r = tpw.start_io(off); + if (r != 0) { + goto err_writesame; + } + ++i; + off += tpw.chunk_size; + if(off > image_size) { + off = image_size; + } + pc.update_progress(off, image_size); + } + } + + tpw.wait_for(0); + r = image.flush(); + if (r < 0) { + std::cerr << "rbd: failed to flush at the end: " << cpp_strerror(r) + << std::endl; + goto err_writesame; + } + pc.finish(); + + return r; + +err_writesame: + tpw.wait_for(0); + pc.fail(); + + return r; +} + +int thick_write(const std::string &image_name,librados::IoCtx &io_ctx, + librbd::ImageOptions &opts, bool no_progress) { + int r = 0; + librbd::Image image; + + // To prevent writesame from discarding data, thick_write sets + // the rbd_discard_on_zeroed_write_same option to false. + ceph_assert(g_ceph_context != nullptr); + r = g_conf().set_val("rbd_discard_on_zeroed_write_same", "false"); + ceph_assert(r == 0); + r = utils::open_image(io_ctx, image_name, false, &image); + if (r < 0) { + return r; + } + + r = write_data(image, opts, no_progress); + + image.close(); + + return r; +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + librbd::ImageOptions opts; + r = utils::get_image_options(vm, true, &opts); + if (r < 0) { + return r; + } + + uint64_t size; + r = utils::get_image_size(vm, &size); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = do_create(rbd, io_ctx, image_name.c_str(), size, opts); + if (!namespace_name.empty() && r == -ENOENT) { + std::cerr << "rbd: namespace not found - it must be created with " + << "'rbd namespace create' before creating an image." + << std::endl; + return r; + } else if (r < 0) { + std::cerr << "rbd: create error: " << cpp_strerror(r) << std::endl; + return r; + } + + if (vm.count(at::IMAGE_THICK_PROVISION) && vm[at::IMAGE_THICK_PROVISION].as<bool>()) { + r = thick_write(image_name, io_ctx, opts, vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + std::cerr << "rbd: image created but error encountered during thick provisioning: " + << cpp_strerror(r) << std::endl; + return r; + } + } + return 0; +} + +Shell::Action action( + {"create"}, {}, "Create an empty image.", at::get_long_features_help(), + &get_arguments, &execute); + +} // namespace create +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Device.cc b/src/tools/rbd/action/Device.cc new file mode 100644 index 00000000..3fdf2ef5 --- /dev/null +++ b/src/tools/rbd/action/Device.cc @@ -0,0 +1,185 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "acconfig.h" +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" + +#include <boost/program_options.hpp> + +#include "include/ceph_assert.h" + +namespace rbd { +namespace action { + +namespace at = argument_types; +namespace po = boost::program_options; + +#define DECLARE_DEVICE_OPERATIONS(ns) \ + namespace ns { \ + int execute_list(const po::variables_map &vm, \ + const std::vector<std::string> &ceph_global_args); \ + int execute_map(const po::variables_map &vm, \ + const std::vector<std::string> &ceph_global_args); \ + int execute_unmap(const po::variables_map &vm, \ + const std::vector<std::string> &ceph_global_args); \ + } + +DECLARE_DEVICE_OPERATIONS(ggate); +DECLARE_DEVICE_OPERATIONS(kernel); +DECLARE_DEVICE_OPERATIONS(nbd); + +namespace device { + +namespace { + +struct DeviceOperations { + int (*execute_list)(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_args); + int (*execute_map)(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_args); + int (*execute_unmap)(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_args); +}; + +const DeviceOperations ggate_operations = { + ggate::execute_list, + ggate::execute_map, + ggate::execute_unmap, +}; + +const DeviceOperations krbd_operations = { + kernel::execute_list, + kernel::execute_map, + kernel::execute_unmap, +}; + +const DeviceOperations nbd_operations = { + nbd::execute_list, + nbd::execute_map, + nbd::execute_unmap, +}; + +enum device_type_t { + DEVICE_TYPE_GGATE, + DEVICE_TYPE_KRBD, + DEVICE_TYPE_NBD, +}; + +struct DeviceType {}; + +void validate(boost::any& v, const std::vector<std::string>& values, + DeviceType *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + if (s == "ggate") { + v = boost::any(DEVICE_TYPE_GGATE); + } else if (s == "krbd") { + v = boost::any(DEVICE_TYPE_KRBD); + } else if (s == "nbd") { + v = boost::any(DEVICE_TYPE_NBD); + } else { + throw po::validation_error(po::validation_error::invalid_option_value); + } +} + +void add_device_type_option(po::options_description *options) { + options->add_options() + ("device-type,t", po::value<DeviceType>(), + "device type [ggate, krbd (default), nbd]"); +} + +void add_device_specific_options(po::options_description *options) { + options->add_options() + ("options,o", po::value<std::vector<std::string>>(), + "device specific options"); +} + +device_type_t get_device_type(const po::variables_map &vm) { + if (vm.count("device-type")) { + return vm["device-type"].as<device_type_t>(); + } + return DEVICE_TYPE_KRBD; +} + +const DeviceOperations *get_device_operations(const po::variables_map &vm) { + switch (get_device_type(vm)) { + case DEVICE_TYPE_GGATE: + return &ggate_operations; + case DEVICE_TYPE_KRBD: + return &krbd_operations; + case DEVICE_TYPE_NBD: + return &nbd_operations; + default: + ceph_abort(); + return nullptr; + } +} + +} // anonymous namespace + +void get_list_arguments(po::options_description *positional, + po::options_description *options) { + add_device_type_option(options); + at::add_format_options(options); +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + return (*get_device_operations(vm)->execute_list)(vm, ceph_global_init_args); +} + +void get_map_arguments(po::options_description *positional, + po::options_description *options) { + add_device_type_option(options); + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_NONE); + options->add_options() + ("read-only", po::bool_switch(), "map read-only") + ("exclusive", po::bool_switch(), "disable automatic exclusive lock transitions"); + add_device_specific_options(options); +} + +int execute_map(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + return (*get_device_operations(vm)->execute_map)(vm, ceph_global_init_args); +} + +void get_unmap_arguments(po::options_description *positional, + po::options_description *options) { + add_device_type_option(options); + positional->add_options() + ("image-or-snap-or-device-spec", + "image, snapshot, or device specification\n" + "[<pool-name>/]<image-name>[@<snapshot-name>] or <device-path>"); + at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_snap_option(options, at::ARGUMENT_MODIFIER_NONE); + add_device_specific_options(options); +} + +int execute_unmap(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + return (*get_device_operations(vm)->execute_unmap)(vm, ceph_global_init_args); +} + +Shell::SwitchArguments switched_arguments({"read-only", "exclusive"}); +Shell::Action action_list( + {"device", "list"}, {"showmapped"}, "List mapped rbd images.", "", + &get_list_arguments, &execute_list); +// yet another alias for list command +Shell::Action action_ls( + {"device", "ls"}, {}, "List mapped rbd images.", "", + &get_list_arguments, &execute_list, false); + +Shell::Action action_map( + {"device", "map"}, {"map"}, "Map an image to a block device.", "", + &get_map_arguments, &execute_map); + +Shell::Action action_unmap( + {"device", "unmap"}, {"unmap"}, "Unmap a rbd device.", "", + &get_unmap_arguments, &execute_unmap); + +} // namespace device +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Diff.cc b/src/tools/rbd/action/Diff.cc new file mode 100644 index 00000000..3729469c --- /dev/null +++ b/src/tools/rbd/action/Diff.cc @@ -0,0 +1,143 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace diff { + +namespace at = argument_types; +namespace po = boost::program_options; + +struct output_method { + output_method() : f(NULL), t(NULL), empty(true) {} + Formatter *f; + TextTable *t; + bool empty; +}; + +static int diff_cb(uint64_t ofs, size_t len, int exists, void *arg) +{ + output_method *om = static_cast<output_method *>(arg); + om->empty = false; + if (om->f) { + om->f->open_object_section("extent"); + om->f->dump_unsigned("offset", ofs); + om->f->dump_unsigned("length", len); + om->f->dump_string("exists", exists ? "true" : "false"); + om->f->close_section(); + } else { + ceph_assert(om->t); + *(om->t) << ofs << len << (exists ? "data" : "zero") << TextTable::endrow; + } + return 0; +} + +static int do_diff(librbd::Image& image, const char *fromsnapname, + bool whole_object, Formatter *f) +{ + int r; + librbd::image_info_t info; + + r = image.stat(info, sizeof(info)); + if (r < 0) + return r; + + output_method om; + if (f) { + om.f = f; + f->open_array_section("extents"); + } else { + om.t = new TextTable(); + om.t->define_column("Offset", TextTable::LEFT, TextTable::LEFT); + om.t->define_column("Length", TextTable::LEFT, TextTable::LEFT); + om.t->define_column("Type", TextTable::LEFT, TextTable::LEFT); + } + + r = image.diff_iterate2(fromsnapname, 0, info.size, true, whole_object, + diff_cb, &om); + if (f) { + f->close_section(); + f->flush(std::cout); + } else { + if (!om.empty) + std::cout << *om.t; + delete om.t; + } + return r; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_NONE); + options->add_options() + (at::FROM_SNAPSHOT_NAME.c_str(), po::value<std::string>(), + "snapshot starting point") + (at::WHOLE_OBJECT.c_str(), po::bool_switch(), "compare whole object"); + at::add_format_options(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string from_snap_name; + if (vm.count(at::FROM_SNAPSHOT_NAME)) { + from_snap_name = vm[at::FROM_SNAPSHOT_NAME].as<std::string>(); + } + + bool diff_whole_object = vm[at::WHOLE_OBJECT].as<bool>(); + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", + snap_name, true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_diff(image, from_snap_name.empty() ? nullptr : from_snap_name.c_str(), + diff_whole_object, formatter.get()); + if (r < 0) { + std::cerr << "rbd: diff error: " << cpp_strerror(r) << std::endl; + return -r; + } + return 0; +} + +Shell::SwitchArguments switched_arguments({at::WHOLE_OBJECT}); +Shell::Action action( + {"diff"}, {}, + "Print extents that differ since a previous snap, or image creation.", "", + &get_arguments, &execute); + +} // namespace diff +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/DiskUsage.cc b/src/tools/rbd/action/DiskUsage.cc new file mode 100644 index 00000000..649f39a7 --- /dev/null +++ b/src/tools/rbd/action/DiskUsage.cc @@ -0,0 +1,341 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/types.h" +#include "include/stringify.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include <algorithm> +#include <iostream> +#include <boost/bind.hpp> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace disk_usage { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int disk_usage_callback(uint64_t offset, size_t len, int exists, + void *arg) { + uint64_t *used_size = reinterpret_cast<uint64_t *>(arg); + if (exists) { + (*used_size) += len; + } + return 0; +} + +static int compute_image_disk_usage(const std::string& name, + const std::string& snap_name, + const std::string& from_snap_name, + librbd::Image &image, uint64_t size, + bool exact, TextTable& tbl, Formatter *f, + uint64_t *used_size) { + const char* from = NULL; + if (!from_snap_name.empty()) { + from = from_snap_name.c_str(); + } + + uint64_t flags; + int r = image.get_flags(&flags); + if (r < 0) { + std::cerr << "rbd: failed to retrieve image flags: " << cpp_strerror(r) + << std::endl; + return r; + } + if ((flags & RBD_FLAG_FAST_DIFF_INVALID) != 0) { + std::cerr << "warning: fast-diff map is invalid for " << name + << (snap_name.empty() ? "" : "@" + snap_name) << ". " + << "operation may be slow." << std::endl; + } + + *used_size = 0; + r = image.diff_iterate2(from, 0, size, false, !exact, + &disk_usage_callback, used_size); + if (r < 0) { + std::cerr << "rbd: failed to iterate diffs: " << cpp_strerror(r) + << std::endl; + return r; + } + + if (f) { + f->open_object_section("image"); + f->dump_string("name", name); + if (!snap_name.empty()) { + f->dump_string("snapshot", snap_name); + } + f->dump_unsigned("provisioned_size", size); + f->dump_unsigned("used_size" , *used_size); + f->close_section(); + } else { + std::string full_name = name; + if (!snap_name.empty()) { + full_name += "@" + snap_name; + } + tbl << full_name + << stringify(byte_u_t(size)) + << stringify(byte_u_t(*used_size)) + << TextTable::endrow; + } + return 0; +} + +static int do_disk_usage(librbd::RBD &rbd, librados::IoCtx &io_ctx, + const char *imgname, const char *snapname, + const char *from_snapname, bool exact, Formatter *f) { + std::vector<librbd::image_spec_t> images; + int r = rbd.list2(io_ctx, &images); + if (r == -ENOENT) { + r = 0; + } else if (r < 0) { + return r; + } + + TextTable tbl; + if (f) { + f->open_object_section("stats"); + f->open_array_section("images"); + } else { + tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("PROVISIONED", TextTable::LEFT, TextTable::RIGHT); + tbl.define_column("USED", TextTable::LEFT, TextTable::RIGHT); + } + + uint32_t count = 0; + uint64_t used_size = 0; + uint64_t total_prov = 0; + uint64_t total_used = 0; + uint64_t snap_id = CEPH_NOSNAP; + uint64_t from_id = CEPH_NOSNAP; + bool found = false; + for (auto& image_spec : images) { + if (imgname != NULL && image_spec.name != imgname) { + continue; + } + found = true; + + librbd::Image image; + r = rbd.open_read_only(io_ctx, image, image_spec.name.c_str(), NULL); + if (r < 0) { + if (r != -ENOENT) { + std::cerr << "rbd: error opening " << image_spec.name << ": " + << cpp_strerror(r) << std::endl; + } + continue; + } + + uint64_t features; + r = image.features(&features); + if (r < 0) { + std::cerr << "rbd: failed to retrieve image features: " << cpp_strerror(r) + << std::endl; + goto out; + } + if ((features & RBD_FEATURE_FAST_DIFF) == 0) { + std::cerr << "warning: fast-diff map is not enabled for " + << image_spec.name << ". " << "operation may be slow." + << std::endl; + } + + librbd::image_info_t info; + if (image.stat(info, sizeof(info)) < 0) { + r = -EINVAL; + goto out; + } + + std::vector<librbd::snap_info_t> snap_list; + r = image.snap_list(snap_list); + if (r < 0) { + std::cerr << "rbd: error opening " << image_spec.name << " snapshots: " + << cpp_strerror(r) << std::endl; + continue; + } + + snap_list.erase(remove_if(snap_list.begin(), + snap_list.end(), + boost::bind(utils::is_not_user_snap_namespace, &image, _1)), + snap_list.end()); + + bool found_from_snap = (from_snapname == nullptr); + bool found_snap = (snapname == nullptr); + bool found_from = (from_snapname == nullptr); + std::string last_snap_name; + std::sort(snap_list.begin(), snap_list.end(), + boost::bind(&librbd::snap_info_t::id, _1) < + boost::bind(&librbd::snap_info_t::id, _2)); + if (!found_snap || !found_from) { + for (auto &snap_info : snap_list) { + if (!found_snap && snap_info.name == snapname) { + snap_id = snap_info.id; + found_snap = true; + } + if (!found_from && snap_info.name == from_snapname) { + from_id = snap_info.id; + found_from = true; + } + if (found_snap && found_from) { + break; + } + } + } + if ((snapname != nullptr && snap_id == CEPH_NOSNAP) || + (from_snapname != nullptr && from_id == CEPH_NOSNAP)) { + std::cerr << "specified snapshot is not found." << std::endl; + return -ENOENT; + } + if (snap_id != CEPH_NOSNAP && from_id != CEPH_NOSNAP) { + if (from_id == snap_id) { + // no diskusage. + return 0; + } + if (from_id >= snap_id) { + return -EINVAL; + } + } + + for (std::vector<librbd::snap_info_t>::const_iterator snap = + snap_list.begin(); snap != snap_list.end(); ++snap) { + librbd::Image snap_image; + r = rbd.open_read_only(io_ctx, snap_image, image_spec.name.c_str(), + snap->name.c_str()); + if (r < 0) { + std::cerr << "rbd: error opening snapshot " << image_spec.name << "@" + << snap->name << ": " << cpp_strerror(r) << std::endl; + goto out; + } + + if (imgname == nullptr || found_from_snap || + (found_from_snap && snapname != nullptr && snap->name == snapname)) { + r = compute_image_disk_usage(image_spec.name, snap->name, + last_snap_name, snap_image, snap->size, + exact, tbl, f, &used_size); + if (r < 0) { + goto out; + } + + if (snapname != NULL) { + total_prov += snap->size; + } + total_used += used_size; + ++count; + } + + if (!found_from_snap && from_snapname != nullptr && + snap->name == from_snapname) { + found_from_snap = true; + } + if (snapname != nullptr && snap->name == snapname) { + break; + } + last_snap_name = snap->name; + } + + if (snapname == NULL) { + r = compute_image_disk_usage(image_spec.name, "", last_snap_name, image, + info.size, exact, tbl, f, &used_size); + if (r < 0) { + goto out; + } + total_prov += info.size; + total_used += used_size; + ++count; + } + } + if (imgname != nullptr && !found) { + std::cerr << "specified image " << imgname << " is not found." << std::endl; + return -ENOENT; + } + +out: + if (f) { + f->close_section(); + if (imgname == NULL) { + f->dump_unsigned("total_provisioned_size", total_prov); + f->dump_unsigned("total_used_size", total_used); + } + f->close_section(); + f->flush(std::cout); + } else if (!images.empty()) { + if (count > 1) { + tbl << "<TOTAL>" + << stringify(byte_u_t(total_prov)) + << stringify(byte_u_t(total_used)) + << TextTable::endrow; + } + std::cout << tbl; + } + + return r < 0 ? r : 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_NONE); + at::add_format_options(options); + options->add_options() + (at::FROM_SNAPSHOT_NAME.c_str(), po::value<std::string>(), + "snapshot starting point") + ("exact", po::bool_switch(), "compute exact disk usage (slow)"); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, vm.count(at::FROM_SNAPSHOT_NAME), + utils::SNAPSHOT_PRESENCE_PERMITTED, utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string from_snap_name; + if (vm.count(at::FROM_SNAPSHOT_NAME)) { + from_snap_name = vm[at::FROM_SNAPSHOT_NAME].as<std::string>(); + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + utils::disable_cache(); + + librbd::RBD rbd; + r = do_disk_usage(rbd, io_ctx, + image_name.empty() ? nullptr: image_name.c_str(), + snap_name.empty() ? nullptr : snap_name.c_str(), + from_snap_name.empty() ? nullptr : from_snap_name.c_str(), + vm["exact"].as<bool>(), formatter.get()); + if (r < 0) { + std::cerr << "rbd: du failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"disk-usage"}, {"du"}, "Show disk usage stats for pool, image or snapshot.", + "", &get_arguments, &execute); + +} // namespace disk_usage +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Export.cc b/src/tools/rbd/action/Export.cc new file mode 100644 index 00000000..b5b82f4c --- /dev/null +++ b/src/tools/rbd/action/Export.cc @@ -0,0 +1,651 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/compat.h" +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/Context.h" +#include "common/errno.h" +#include "common/Throttle.h" +#include "include/encoding.h" +#include <iostream> +#include <fcntl.h> +#include <stdlib.h> +#include <boost/program_options.hpp> +#include <boost/scope_exit.hpp> + +namespace rbd { +namespace action { +namespace export_full { + +struct ExportDiffContext { + librbd::Image *image; + int fd; + int export_format; + uint64_t totalsize; + utils::ProgressContext pc; + OrderedThrottle throttle; + + ExportDiffContext(librbd::Image *i, int f, uint64_t t, int max_ops, + bool no_progress, int eformat) : + image(i), fd(f), export_format(eformat), totalsize(t), pc("Exporting image", no_progress), + throttle(max_ops, true) { + } +}; + +class C_ExportDiff : public Context { +public: + C_ExportDiff(ExportDiffContext *edc, uint64_t offset, uint64_t length, + bool exists, int export_format) + : m_export_diff_context(edc), m_offset(offset), m_length(length), + m_exists(exists), m_export_format(export_format) { + } + + int send() { + if (m_export_diff_context->throttle.pending_error()) { + return m_export_diff_context->throttle.wait_for_ret(); + } + + C_OrderedThrottle *ctx = m_export_diff_context->throttle.start_op(this); + if (m_exists) { + librbd::RBD::AioCompletion *aio_completion = + new librbd::RBD::AioCompletion(ctx, &utils::aio_context_callback); + + int op_flags = LIBRADOS_OP_FLAG_FADVISE_NOCACHE; + int r = m_export_diff_context->image->aio_read2( + m_offset, m_length, m_read_data, aio_completion, op_flags); + if (r < 0) { + aio_completion->release(); + ctx->complete(r); + } + } else { + ctx->complete(0); + } + return 0; + } + + static int export_diff_cb(uint64_t offset, size_t length, int exists, + void *arg) { + ExportDiffContext *edc = reinterpret_cast<ExportDiffContext *>(arg); + + C_ExportDiff *context = new C_ExportDiff(edc, offset, length, exists, edc->export_format); + return context->send(); + } + +protected: + void finish(int r) override { + if (r >= 0) { + if (m_exists) { + m_exists = !m_read_data.is_zero(); + } + r = write_extent(m_export_diff_context, m_offset, m_length, m_exists, m_export_format); + if (r == 0 && m_exists) { + r = m_read_data.write_fd(m_export_diff_context->fd); + } + } + m_export_diff_context->throttle.end_op(r); + } + +private: + ExportDiffContext *m_export_diff_context; + uint64_t m_offset; + uint64_t m_length; + bool m_exists; + int m_export_format; + bufferlist m_read_data; + + static int write_extent(ExportDiffContext *edc, uint64_t offset, + uint64_t length, bool exists, int export_format) { + // extent + bufferlist bl; + __u8 tag = exists ? RBD_DIFF_WRITE : RBD_DIFF_ZERO; + uint64_t len = 0; + encode(tag, bl); + if (export_format == 2) { + if (tag == RBD_DIFF_WRITE) + len = 8 + 8 + length; + else + len = 8 + 8; + encode(len, bl); + } + encode(offset, bl); + encode(length, bl); + int r = bl.write_fd(edc->fd); + + edc->pc.update_progress(offset, edc->totalsize); + return r; + } +}; + + +int do_export_diff_fd(librbd::Image& image, const char *fromsnapname, + const char *endsnapname, bool whole_object, + int fd, bool no_progress, int export_format) +{ + int r; + librbd::image_info_t info; + + r = image.stat(info, sizeof(info)); + if (r < 0) + return r; + + { + // header + bufferlist bl; + if (export_format == 1) + bl.append(utils::RBD_DIFF_BANNER); + else + bl.append(utils::RBD_DIFF_BANNER_V2); + + __u8 tag; + uint64_t len = 0; + if (fromsnapname) { + tag = RBD_DIFF_FROM_SNAP; + encode(tag, bl); + std::string from(fromsnapname); + if (export_format == 2) { + len = from.length() + 4; + encode(len, bl); + } + encode(from, bl); + } + + if (endsnapname) { + tag = RBD_DIFF_TO_SNAP; + encode(tag, bl); + std::string to(endsnapname); + if (export_format == 2) { + len = to.length() + 4; + encode(len, bl); + } + encode(to, bl); + } + + if (endsnapname && export_format == 2) { + tag = RBD_SNAP_PROTECTION_STATUS; + encode(tag, bl); + bool is_protected = false; + r = image.snap_is_protected(endsnapname, &is_protected); + if (r < 0) { + return r; + } + len = 8; + encode(len, bl); + encode(is_protected, bl); + } + + tag = RBD_DIFF_IMAGE_SIZE; + encode(tag, bl); + uint64_t endsize = info.size; + if (export_format == 2) { + len = 8; + encode(len, bl); + } + encode(endsize, bl); + + r = bl.write_fd(fd); + if (r < 0) { + return r; + } + } + ExportDiffContext edc(&image, fd, info.size, + g_conf().get_val<uint64_t>("rbd_concurrent_management_ops"), + no_progress, export_format); + r = image.diff_iterate2(fromsnapname, 0, info.size, true, whole_object, + &C_ExportDiff::export_diff_cb, (void *)&edc); + if (r < 0) { + goto out; + } + + r = edc.throttle.wait_for_ret(); + if (r < 0) { + goto out; + } + + { + __u8 tag = RBD_DIFF_END; + bufferlist bl; + encode(tag, bl); + r = bl.write_fd(fd); + } + +out: + if (r < 0) + edc.pc.fail(); + else + edc.pc.finish(); + + return r; +} + +int do_export_diff(librbd::Image& image, const char *fromsnapname, + const char *endsnapname, bool whole_object, + const char *path, bool no_progress) +{ + int r; + int fd; + + if (strcmp(path, "-") == 0) + fd = STDOUT_FILENO; + else + fd = open(path, O_WRONLY | O_CREAT | O_EXCL, 0644); + if (fd < 0) + return -errno; + + r = do_export_diff_fd(image, fromsnapname, endsnapname, whole_object, fd, no_progress, 1); + + if (fd != 1) + close(fd); + if (r < 0 && fd != 1) { + remove(path); + } + + return r; +} + + +namespace at = argument_types; +namespace po = boost::program_options; + +void get_arguments_diff(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_SOURCE); + at::add_path_options(positional, options, + "export file (or '-' for stdout)"); + options->add_options() + (at::FROM_SNAPSHOT_NAME.c_str(), po::value<std::string>(), + "snapshot starting point") + (at::WHOLE_OBJECT.c_str(), po::bool_switch(), "compare whole object"); + at::add_no_progress_option(options); +} + +int execute_diff(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string path; + r = utils::get_path(vm, &arg_index, &path); + if (r < 0) { + return r; + } + + std::string from_snap_name; + if (vm.count(at::FROM_SNAPSHOT_NAME)) { + from_snap_name = vm[at::FROM_SNAPSHOT_NAME].as<std::string>(); + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", + snap_name, true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_export_diff(image, + from_snap_name.empty() ? nullptr : from_snap_name.c_str(), + snap_name.empty() ? nullptr : snap_name.c_str(), + vm[at::WHOLE_OBJECT].as<bool>(), path.c_str(), + vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + std::cerr << "rbd: export-diff error: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::SwitchArguments switched_arguments({at::WHOLE_OBJECT}); +Shell::Action action_diff( + {"export-diff"}, {}, "Export incremental diff to file.", "", + &get_arguments_diff, &execute_diff); + +class C_Export : public Context +{ +public: + C_Export(OrderedThrottle &ordered_throttle, librbd::Image &image, + uint64_t fd_offset, uint64_t offset, uint64_t length, int fd) + : m_throttle(ordered_throttle), m_image(image), m_dest_offset(fd_offset), + m_offset(offset), m_length(length), m_fd(fd) + { + } + + void send() + { + auto ctx = m_throttle.start_op(this); + auto aio_completion = new librbd::RBD::AioCompletion( + ctx, &utils::aio_context_callback); + int op_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | + LIBRADOS_OP_FLAG_FADVISE_NOCACHE; + int r = m_image.aio_read2(m_offset, m_length, m_bufferlist, + aio_completion, op_flags); + if (r < 0) { + cerr << "rbd: error requesting read from source image" << std::endl; + aio_completion->release(); + m_throttle.end_op(r); + } + } + + void finish(int r) override + { + BOOST_SCOPE_EXIT((&m_throttle) (&r)) + { + m_throttle.end_op(r); + } BOOST_SCOPE_EXIT_END + + if (r < 0) { + cerr << "rbd: error reading from source image at offset " + << m_offset << ": " << cpp_strerror(r) << std::endl; + return; + } + + ceph_assert(m_bufferlist.length() == static_cast<size_t>(r)); + if (m_fd != STDOUT_FILENO) { + if (m_bufferlist.is_zero()) { + return; + } + + uint64_t chkret = lseek64(m_fd, m_dest_offset, SEEK_SET); + if (chkret != m_dest_offset) { + cerr << "rbd: error seeking destination image to offset " + << m_dest_offset << std::endl; + r = -errno; + return; + } + } + + r = m_bufferlist.write_fd(m_fd); + if (r < 0) { + cerr << "rbd: error writing to destination image at offset " + << m_dest_offset << std::endl; + } + } + +private: + OrderedThrottle &m_throttle; + librbd::Image &m_image; + bufferlist m_bufferlist; + uint64_t m_dest_offset; + uint64_t m_offset; + uint64_t m_length; + int m_fd; +}; + +const uint32_t MAX_KEYS = 64; + +static int do_export_v2(librbd::Image& image, librbd::image_info_t &info, int fd, + uint64_t period, int max_concurrent_ops, utils::ProgressContext &pc) +{ + int r = 0; + // header + bufferlist bl; + bl.append(utils::RBD_IMAGE_BANNER_V2); + + __u8 tag; + uint64_t length; + // encode order + tag = RBD_EXPORT_IMAGE_ORDER; + length = 8; + encode(tag, bl); + encode(length, bl); + encode(uint64_t(info.order), bl); + + // encode features + tag = RBD_EXPORT_IMAGE_FEATURES; + uint64_t features; + image.features(&features); + length = 8; + encode(tag, bl); + encode(length, bl); + encode(features, bl); + + // encode stripe_unit and stripe_count + tag = RBD_EXPORT_IMAGE_STRIPE_UNIT; + uint64_t stripe_unit; + stripe_unit = image.get_stripe_unit(); + length = 8; + encode(tag, bl); + encode(length, bl); + encode(stripe_unit, bl); + + tag = RBD_EXPORT_IMAGE_STRIPE_COUNT; + uint64_t stripe_count; + stripe_count = image.get_stripe_count(); + length = 8; + encode(tag, bl); + encode(length, bl); + encode(stripe_count, bl); + + //retrieve metadata of image + std::map<std::string, string> imagemetas; + std::string last_key; + bool more_results = true; + while (more_results) { + std::map<std::string, bufferlist> pairs; + r = image.metadata_list(last_key, MAX_KEYS, &pairs); + if (r < 0) { + std::cerr << "failed to retrieve metadata of image : " << cpp_strerror(r) + << std::endl; + return r; + } + + if (!pairs.empty()) { + last_key = pairs.rbegin()->first; + + for (auto kv : pairs) { + std::string key = kv.first; + std::string val(kv.second.c_str(), kv.second.length()); + imagemetas[key] = val; + } + } + more_results = (pairs.size() == MAX_KEYS); + } + + //encode imageMeta key and value + for (std::map<std::string, string>::iterator it = imagemetas.begin(); + it != imagemetas.end(); ++it) { + string key = it->first; + string value = it->second; + + tag = RBD_EXPORT_IMAGE_META; + length = key.length() + value.length() + 4 * 2; + encode(tag, bl); + encode(length, bl); + encode(key, bl); + encode(value, bl); + } + + // encode end tag + tag = RBD_EXPORT_IMAGE_END; + encode(tag, bl); + + // write bl to fd. + r = bl.write_fd(fd); + if (r < 0) { + return r; + } + + // header for snapshots + bl.clear(); + bl.append(utils::RBD_IMAGE_DIFFS_BANNER_V2); + + std::vector<librbd::snap_info_t> snaps; + r = image.snap_list(snaps); + if (r < 0) { + return r; + } + + uint64_t diff_num = snaps.size() + 1; + encode(diff_num, bl); + + r = bl.write_fd(fd); + if (r < 0) { + return r; + } + + const char *last_snap = NULL; + for (size_t i = 0; i < snaps.size(); ++i) { + utils::snap_set(image, snaps[i].name.c_str()); + r = do_export_diff_fd(image, last_snap, snaps[i].name.c_str(), false, fd, true, 2); + if (r < 0) { + return r; + } + pc.update_progress(i, snaps.size() + 1); + last_snap = snaps[i].name.c_str(); + } + utils::snap_set(image, std::string("")); + r = do_export_diff_fd(image, last_snap, nullptr, false, fd, true, 2); + if (r < 0) { + return r; + } + pc.update_progress(snaps.size() + 1, snaps.size() + 1); + return r; +} + +static int do_export_v1(librbd::Image& image, librbd::image_info_t &info, + int fd, uint64_t period, int max_concurrent_ops, + utils::ProgressContext &pc) +{ + int r = 0; + size_t file_size = 0; + OrderedThrottle throttle(max_concurrent_ops, false); + for (uint64_t offset = 0; offset < info.size; offset += period) { + if (throttle.pending_error()) { + break; + } + + uint64_t length = min(period, info.size - offset); + C_Export *ctx = new C_Export(throttle, image, file_size + offset, offset, + length, fd); + ctx->send(); + + pc.update_progress(offset, info.size); + } + + file_size += info.size; + r = throttle.wait_for_ret(); + if (fd != 1) { + if (r >= 0) { + r = ftruncate(fd, file_size); + if (r < 0) + return r; + + uint64_t chkret = lseek64(fd, file_size, SEEK_SET); + if (chkret != file_size) + r = errno; + } + } + return r; +} + +static int do_export(librbd::Image& image, const char *path, bool no_progress, + int export_format) +{ + librbd::image_info_t info; + int64_t r = image.stat(info, sizeof(info)); + if (r < 0) + return r; + + int fd; + int max_concurrent_ops = g_conf().get_val<uint64_t>("rbd_concurrent_management_ops"); + bool to_stdout = (strcmp(path, "-") == 0); + if (to_stdout) { + fd = STDOUT_FILENO; + } else { + fd = open(path, O_WRONLY | O_CREAT | O_EXCL, 0644); + if (fd < 0) { + return -errno; + } +#ifdef HAVE_POSIX_FADVISE + posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + } + + utils::ProgressContext pc("Exporting image", no_progress); + uint64_t period = image.get_stripe_count() * (1ull << info.order); + + if (export_format == 1) + r = do_export_v1(image, info, fd, period, max_concurrent_ops, pc); + else + r = do_export_v2(image, info, fd, period, max_concurrent_ops, pc); + + if (r < 0) + pc.fail(); + else + pc.finish(); + if (!to_stdout) + close(fd); + return r; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_SOURCE); + at::add_path_options(positional, options, + "export file (or '-' for stdout)"); + at::add_no_progress_option(options); + at::add_export_format_option(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string path; + r = utils::get_path(vm, &arg_index, &path); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", + snap_name, true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + int format = 1; + if (vm.count("export-format")) + format = vm["export-format"].as<uint64_t>(); + + r = do_export(image, path.c_str(), vm[at::NO_PROGRESS].as<bool>(), format); + if (r < 0) { + std::cerr << "rbd: export error: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"export"}, {}, "Export image to file.", "", &get_arguments, &execute); + +} // namespace export_full +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Feature.cc b/src/tools/rbd/action/Feature.cc new file mode 100644 index 00000000..13a7b6ea --- /dev/null +++ b/src/tools/rbd/action/Feature.cc @@ -0,0 +1,116 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/stringify.h" +#include "common/errno.h" +#include <iostream> +#include <map> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace feature { + +namespace at = argument_types; +namespace po = boost::program_options; + +void get_arguments(po::options_description *positional, + po::options_description *options, bool enabled) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + positional->add_options() + ("features", po::value<at::ImageFeatures>()->multitoken(), + ("image features\n" + at::get_short_features_help(false)).c_str()); + if (enabled) { + at::add_create_journal_options(options); + } +} + +void get_arguments_disable(po::options_description *positional, + po::options_description *options) { + get_arguments(positional, options, false); +} + +void get_arguments_enable(po::options_description *positional, + po::options_description *options) { + get_arguments(positional, options, true); +} + +int execute(const po::variables_map &vm, bool enabled) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librbd::ImageOptions opts; + r = utils::get_journal_options(vm, &opts); + if (r < 0) { + return r; + } + + std::vector<std::string> feature_names; + if (vm.count(at::POSITIONAL_ARGUMENTS)) { + const std::vector<std::string> &args = + vm[at::POSITIONAL_ARGUMENTS].as<std::vector<std::string> >(); + feature_names.insert(feature_names.end(), args.begin() + arg_index, + args.end()); + } + + if (feature_names.empty()) { + std::cerr << "rbd: at least one feature name must be specified" + << std::endl; + return -EINVAL; + } + + boost::any features_any(static_cast<uint64_t>(0)); + at::ImageFeatures image_features; + at::validate(features_any, feature_names, &image_features, 0); + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = image.update_features(boost::any_cast<uint64_t>(features_any), enabled); + if (r < 0) { + std::cerr << "rbd: failed to update image features: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +int execute_disable(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + return execute(vm, false); +} + +int execute_enable(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + return execute(vm, true); +} + +Shell::Action action_disable( + {"feature", "disable"}, {}, "Disable the specified image feature.", "", + &get_arguments_disable, &execute_disable); +Shell::Action action_enable( + {"feature", "enable"}, {}, "Enable the specified image feature.", "", + &get_arguments_enable, &execute_enable); + +} // namespace feature +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Flatten.cc b/src/tools/rbd/action/Flatten.cc new file mode 100644 index 00000000..ec4e837a --- /dev/null +++ b/src/tools/rbd/action/Flatten.cc @@ -0,0 +1,74 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace flatten { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_flatten(librbd::Image& image, bool no_progress) +{ + utils::ProgressContext pc("Image flatten", no_progress); + int r = image.flatten_with_progress(pc); + if (r < 0) { + pc.fail(); + return r; + } + pc.finish(); + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_no_progress_option(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_flatten(image, vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + std::cerr << "rbd: flatten error: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"flatten"}, {}, "Fill clone with parent data (make it independent).", "", + &get_arguments, &execute); + +} // namespace flatten +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Ggate.cc b/src/tools/rbd/action/Ggate.cc new file mode 100644 index 00000000..61f77be2 --- /dev/null +++ b/src/tools/rbd/action/Ggate.cc @@ -0,0 +1,193 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <sys/param.h> +#include <errno.h> +#include <unistd.h> + +#include "include/stringify.h" +#include "common/SubProcess.h" + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" + +#include <boost/algorithm/string.hpp> +#include <boost/algorithm/string/predicate.hpp> +#include <boost/program_options.hpp> + +#include <iostream> + +namespace rbd { +namespace action { +namespace ggate { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int call_ggate_cmd(const po::variables_map &vm, + const std::vector<std::string> &args, + const std::vector<std::string> &ceph_global_args) { + SubProcess process("rbd-ggate", SubProcess::KEEP, SubProcess::KEEP, + SubProcess::KEEP); + + for (auto &arg : ceph_global_args) { + process.add_cmd_arg(arg.c_str()); + } + + for (auto &arg : args) { + process.add_cmd_arg(arg.c_str()); + } + + if (process.spawn()) { + std::cerr << "rbd: failed to run rbd-ggate: " << process.err() << std::endl; + return -EINVAL; + } else if (process.join()) { + std::cerr << "rbd: rbd-ggate failed with error: " << process.err() + << std::endl; + return -EINVAL; + } + + return 0; +} + +int get_image_or_snap_spec(const po::variables_map &vm, std::string *spec) { + size_t arg_index = 0; + std::string pool_name; + std::string nspace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &nspace_name, + &image_name, &snap_name, true, + utils::SNAPSHOT_PRESENCE_PERMITTED, utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + spec->append(pool_name); + spec->append("/"); + if (!nspace_name.empty()) { + spec->append(nspace_name); + spec->append("/"); + } + spec->append(image_name); + if (!snap_name.empty()) { + spec->append("@"); + spec->append(snap_name); + } + + return 0; +} + +int parse_options(const std::vector<std::string> &options, + std::vector<std::string> *args) { + for (auto &opts : options) { + std::vector<std::string> args_; + boost::split(args_, opts, boost::is_any_of(",")); + for (auto &o : args_) { + args->push_back("--" + o); + } + } + + return 0; +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if !defined(__FreeBSD__) + std::cerr << "rbd: ggate is only supported on FreeBSD" << std::endl; + return -EOPNOTSUPP; +#endif + std::vector<std::string> args; + + args.push_back("list"); + + if (vm.count("format")) { + args.push_back("--format"); + args.push_back(vm["format"].as<at::Format>().value); + } + if (vm["pretty-format"].as<bool>()) { + args.push_back("--pretty-format"); + } + + return call_ggate_cmd(vm, args, ceph_global_init_args); +} + +int execute_map(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if !defined(__FreeBSD__) + std::cerr << "rbd: ggate is only supported on FreeBSD" << std::endl; + return -EOPNOTSUPP; +#endif + std::vector<std::string> args; + + args.push_back("map"); + std::string img; + int r = get_image_or_snap_spec(vm, &img); + if (r < 0) { + return r; + } + args.push_back(img); + + if (vm["read-only"].as<bool>()) { + args.push_back("--read-only"); + } + + if (vm["exclusive"].as<bool>()) { + args.push_back("--exclusive"); + } + + if (vm.count("options")) { + r = parse_options(vm["options"].as<std::vector<std::string>>(), &args); + if (r < 0) { + return r; + } + } + + return call_ggate_cmd(vm, args, ceph_global_init_args); +} + +int execute_unmap(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if !defined(__FreeBSD__) + std::cerr << "rbd: ggate is only supported on FreeBSD" << std::endl; + return -EOPNOTSUPP; +#endif + std::string device_name = utils::get_positional_argument(vm, 0); + if (!boost::starts_with(device_name, "/dev/")) { + device_name.clear(); + } + + std::string image_name; + if (device_name.empty()) { + int r = get_image_or_snap_spec(vm, &image_name); + if (r < 0) { + return r; + } + } + + if (device_name.empty() && image_name.empty()) { + std::cerr << "rbd: unmap requires either image name or device path" + << std::endl; + return -EINVAL; + } + + std::vector<std::string> args; + + args.push_back("unmap"); + args.push_back(device_name.empty() ? image_name : device_name); + + if (vm.count("options")) { + int r = parse_options(vm["options"].as<std::vector<std::string>>(), &args); + if (r < 0) { + return r; + } + } + + return call_ggate_cmd(vm, args, ceph_global_init_args); +} + +} // namespace ggate +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Group.cc b/src/tools/rbd/action/Group.cc new file mode 100644 index 00000000..8554ae3b --- /dev/null +++ b/src/tools/rbd/action/Group.cc @@ -0,0 +1,904 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <iostream> + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_types.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" + +namespace rbd { +namespace action { +namespace group { + +namespace at = argument_types; +namespace po = boost::program_options; + +static const std::string GROUP_SPEC("group-spec"); +static const std::string GROUP_SNAP_SPEC("group-snap-spec"); + +static const std::string GROUP_NAME("group"); +static const std::string DEST_GROUP_NAME("dest-group"); + +static const std::string GROUP_POOL_NAME("group-" + at::POOL_NAME); +static const std::string IMAGE_POOL_NAME("image-" + at::POOL_NAME); + +void add_group_option(po::options_description *opt, + at::ArgumentModifier modifier) { + std::string name = GROUP_NAME; + std::string description = at::get_description_prefix(modifier) + "group name"; + switch (modifier) { + case at::ARGUMENT_MODIFIER_NONE: + case at::ARGUMENT_MODIFIER_SOURCE: + break; + case at::ARGUMENT_MODIFIER_DEST: + name = DEST_GROUP_NAME; + break; + } + + // TODO add validator + opt->add_options() + (name.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_prefixed_pool_option(po::options_description *opt, + const std::string &prefix) { + std::string name = prefix + "-" + at::POOL_NAME; + std::string description = prefix + " pool name"; + + opt->add_options() + (name.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_prefixed_namespace_option(po::options_description *opt, + const std::string &prefix) { + std::string name = prefix + "-" + at::NAMESPACE_NAME; + std::string description = prefix + " namespace name"; + + opt->add_options() + (name.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_group_spec_options(po::options_description *pos, + po::options_description *opt, + at::ArgumentModifier modifier, + bool snap) { + at::add_pool_option(opt, modifier); + at::add_namespace_option(opt, modifier); + add_group_option(opt, modifier); + if (!snap) { + pos->add_options() + ((get_name_prefix(modifier) + GROUP_SPEC).c_str(), + (get_description_prefix(modifier) + "group specification\n" + + "(example: [<pool-name>/[<namespace>/]]<group-name>)").c_str()); + } else { + add_snap_option(opt, modifier); + pos->add_options() + ((get_name_prefix(modifier) + GROUP_SNAP_SPEC).c_str(), + (get_description_prefix(modifier) + "group specification\n" + + "(example: [<pool-name>/[<namespace>/]]<group-name>@<snap-name>)").c_str()); + } +} + +int execute_create(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + + std::string pool_name; + std::string namespace_name; + std::string group_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, nullptr, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + librbd::RBD rbd; + r = rbd.group_create(io_ctx, group_name.c_str()); + if (r < 0) { + std::cerr << "rbd: create error: " << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, false, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + Formatter *f = formatter.get(); + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + std::vector<std::string> names; + r = rbd.group_list(io_ctx, &names); + if (r < 0) + return r; + + if (f) + f->open_array_section("groups"); + for (auto i : names) { + if (f) + f->dump_string("name", i); + else + std::cout << i << std::endl; + } + if (f) { + f->close_section(); + f->flush(std::cout); + } + + return 0; +} + +int execute_remove(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + + std::string pool_name; + std::string namespace_name; + std::string group_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, nullptr, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + librbd::RBD rbd; + + r = rbd.group_remove(io_ctx, group_name.c_str()); + if (r < 0) { + std::cerr << "rbd: remove error: " << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +int execute_rename(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + + std::string pool_name; + std::string namespace_name; + std::string group_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, nullptr, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + std::string dest_pool_name; + std::string dest_namespace_name; + std::string dest_group_name; + + r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, at::DEST_POOL_NAME, + &dest_pool_name, &dest_namespace_name, DEST_GROUP_NAME, "group", + &dest_group_name, nullptr, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + if (pool_name != dest_pool_name) { + std::cerr << "rbd: group rename across pools not supported" << std::endl + << "source pool: " << pool_name << ", dest pool: " + << dest_pool_name << std::endl; + return -EINVAL; + } else if (namespace_name != dest_namespace_name) { + std::cerr << "rbd: group rename across namespaces not supported" + << std::endl + << "source namespace: " << namespace_name << ", dest namespace: " + << dest_namespace_name << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.group_rename(io_ctx, group_name.c_str(), + dest_group_name.c_str()); + + if (r < 0) { + std::cerr << "rbd: failed to rename group: " + << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +int execute_add(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + // Parse group data. + std::string group_pool_name; + std::string group_namespace_name; + std::string group_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, GROUP_POOL_NAME, + &group_pool_name, &group_namespace_name, GROUP_NAME, "group", &group_name, + nullptr, true, utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + std::string image_pool_name; + std::string image_namespace_name; + std::string image_name; + + r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, IMAGE_POOL_NAME, + &image_pool_name, &image_namespace_name, at::IMAGE_NAME, "image", + &image_name, nullptr, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + if (group_namespace_name != image_namespace_name) { + std::cerr << "rbd: group and image namespace must match." << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx cg_io_ctx; + r = utils::init(group_pool_name, group_namespace_name, &rados, &cg_io_ctx); + if (r < 0) { + return r; + } + + librados::IoCtx image_io_ctx; + r = utils::init(image_pool_name, group_namespace_name, &rados, &image_io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.group_image_add(cg_io_ctx, group_name.c_str(), + image_io_ctx, image_name.c_str()); + if (r < 0) { + std::cerr << "rbd: add image error: " << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +int execute_remove_image(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + + std::string group_pool_name; + std::string group_namespace_name; + std::string group_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, GROUP_POOL_NAME, + &group_pool_name, &group_namespace_name, GROUP_NAME, "group", &group_name, + nullptr, true, utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + std::string image_pool_name; + std::string image_namespace_name; + std::string image_name; + std::string image_id; + + if (vm.count(at::IMAGE_ID)) { + image_id = vm[at::IMAGE_ID].as<std::string>(); + } + + r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, IMAGE_POOL_NAME, + &image_pool_name, &image_namespace_name, at::IMAGE_NAME, "image", + &image_name, nullptr, image_id.empty(), utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + if (group_namespace_name != image_namespace_name) { + std::cerr << "rbd: group and image namespace must match." << std::endl; + return -EINVAL; + } else if (!image_id.empty() && !image_name.empty()) { + std::cerr << "rbd: trying to access image using both name and id. " + << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx cg_io_ctx; + r = utils::init(group_pool_name, group_namespace_name, &rados, &cg_io_ctx); + if (r < 0) { + return r; + } + + librados::IoCtx image_io_ctx; + r = utils::init(image_pool_name, group_namespace_name, &rados, &image_io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + if (image_id.empty()) { + r = rbd.group_image_remove(cg_io_ctx, group_name.c_str(), + image_io_ctx, image_name.c_str()); + } else { + r = rbd.group_image_remove_by_id(cg_io_ctx, group_name.c_str(), + image_io_ctx, image_id.c_str()); + } + if (r < 0) { + std::cerr << "rbd: remove image error: " << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +int execute_list_images(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string group_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, nullptr, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + Formatter *f = formatter.get(); + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + std::vector<librbd::group_image_info_t> images; + + r = rbd.group_image_list(io_ctx, group_name.c_str(), &images, + sizeof(librbd::group_image_info_t)); + + if (r == -ENOENT) + r = 0; + + if (r < 0) + return r; + + std::sort(images.begin(), images.end(), + [](const librbd::group_image_info_t &lhs, + const librbd::group_image_info_t &rhs) { + if (lhs.pool != rhs.pool) { + return lhs.pool < rhs.pool; + } + return lhs.name < rhs.name; + } + ); + + if (f) + f->open_array_section("images"); + + for (auto image : images) { + std::string image_name = image.name; + int state = image.state; + std::string state_string; + if (RBD_GROUP_IMAGE_STATE_INCOMPLETE == state) { + state_string = "incomplete"; + } + + std::string pool_name = ""; + + librados::Rados rados(io_ctx); + librados::IoCtx pool_io_ctx; + r = rados.ioctx_create2(image.pool, pool_io_ctx); + if (r < 0) { + pool_name = "<missing image pool " + stringify(image.pool) + ">"; + } else { + pool_name = pool_io_ctx.get_pool_name(); + } + + if (f) { + f->open_object_section("image"); + f->dump_string("image", image_name); + f->dump_string("pool", pool_name); + f->dump_string("namespace", io_ctx.get_namespace()); + f->dump_int("state", state); + f->close_section(); + } else { + std::cout << pool_name << "/"; + if (!io_ctx.get_namespace().empty()) { + std::cout << io_ctx.get_namespace() << "/"; + } + std::cout << image_name << " " << state_string << std::endl; + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } + + return 0; +} + +int execute_group_snap_create(const po::variables_map &vm, + const std::vector<std::string> &global_args) { + size_t arg_index = 0; + + std::string pool_name; + std::string namespace_name; + std::string group_name; + std::string snap_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, &snap_name, true, + utils::SNAPSHOT_PRESENCE_REQUIRED, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + librados::IoCtx io_ctx; + librados::Rados rados; + + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.group_snap_create(io_ctx, group_name.c_str(), snap_name.c_str()); + if (r < 0) { + return r; + } + + return 0; +} + +int execute_group_snap_remove(const po::variables_map &vm, + const std::vector<std::string> &global_args) { + size_t arg_index = 0; + + std::string pool_name; + std::string namespace_name; + std::string group_name; + std::string snap_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, &snap_name, true, + utils::SNAPSHOT_PRESENCE_REQUIRED, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + librados::IoCtx io_ctx; + librados::Rados rados; + + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.group_snap_remove(io_ctx, group_name.c_str(), snap_name.c_str()); + if (r < 0) { + std::cerr << "rbd: failed to remove group snapshot: " + << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +int execute_group_snap_rename(const po::variables_map &vm, + const std::vector<std::string> &global_args) { + size_t arg_index = 0; + + std::string pool_name; + std::string namespace_name; + std::string group_name; + std::string source_snap_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, &source_snap_name, true, + utils::SNAPSHOT_PRESENCE_REQUIRED, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + std::string dest_snap_name; + if (vm.count(at::DEST_SNAPSHOT_NAME)) { + dest_snap_name = vm[at::DEST_SNAPSHOT_NAME].as<std::string>(); + } + + if (dest_snap_name.empty()) { + dest_snap_name = utils::get_positional_argument(vm, arg_index++); + } + + if (dest_snap_name.empty()) { + std::cerr << "rbd: destination snapshot name was not specified" + << std::endl; + return -EINVAL; + } + + r = utils::validate_snapshot_name(at::ARGUMENT_MODIFIER_DEST, dest_snap_name, + utils::SNAPSHOT_PRESENCE_REQUIRED, + utils::SPEC_VALIDATION_SNAP); + if (r < 0) { + return r; + } + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.group_snap_rename(io_ctx, group_name.c_str(), + source_snap_name.c_str(), dest_snap_name.c_str()); + + if (r < 0) { + std::cerr << "rbd: failed to rename group snapshot: " + << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +int execute_group_snap_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string group_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, nullptr, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + Formatter *f = formatter.get(); + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + std::vector<librbd::group_snap_info_t> snaps; + + r = rbd.group_snap_list(io_ctx, group_name.c_str(), &snaps, + sizeof(librbd::group_snap_info_t)); + + if (r == -ENOENT) { + r = 0; + } + if (r < 0) { + return r; + } + + TextTable t; + if (f) { + f->open_array_section("group_snaps"); + } else { + t.define_column("NAME", TextTable::LEFT, TextTable::LEFT); + t.define_column("STATUS", TextTable::LEFT, TextTable::RIGHT); + } + + for (auto i : snaps) { + std::string snap_name = i.name; + int state = i.state; + std::string state_string; + if (RBD_GROUP_SNAP_STATE_INCOMPLETE == state) { + state_string = "incomplete"; + } else { + state_string = "ok"; + } + if (r < 0) { + return r; + } + if (f) { + f->open_object_section("group_snap"); + f->dump_string("snapshot", snap_name); + f->dump_string("state", state_string); + f->close_section(); + } else { + t << snap_name << state_string << TextTable::endrow; + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } else if (snaps.size()) { + std::cout << t; + } + return 0; +} + +int execute_group_snap_rollback(const po::variables_map &vm, + const std::vector<std::string> &global_args) { + size_t arg_index = 0; + + std::string group_name; + std::string namespace_name; + std::string pool_name; + std::string snap_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, &snap_name, true, + utils::SNAPSHOT_PRESENCE_REQUIRED, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + librados::IoCtx io_ctx; + librados::Rados rados; + + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + utils::ProgressContext pc("Rolling back to group snapshot", + vm[at::NO_PROGRESS].as<bool>()); + r = rbd.group_snap_rollback_with_progress(io_ctx, group_name.c_str(), + snap_name.c_str(), pc); + if (r < 0) { + pc.fail(); + std::cerr << "rbd: rollback group to snapshot failed: " + << cpp_strerror(r) << std::endl; + return r; + } + + pc.finish(); + return 0; +} + +void get_create_arguments(po::options_description *positional, + po::options_description *options) { + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE, + false); +} + +void get_remove_arguments(po::options_description *positional, + po::options_description *options) { + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE, + false); +} + +void get_list_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); + at::add_format_options(options); +} + +void get_rename_arguments(po::options_description *positional, + po::options_description *options) { + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_SOURCE, + false); + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST, + false); +} + +void get_add_arguments(po::options_description *positional, + po::options_description *options) { + positional->add_options() + (GROUP_SPEC.c_str(), + "group specification\n" + "(example: [<pool-name>/[<namespace>/]]<group-name>)"); + + add_prefixed_pool_option(options, "group"); + add_prefixed_namespace_option(options, "group"); + add_group_option(options, at::ARGUMENT_MODIFIER_NONE); + + positional->add_options() + (at::IMAGE_SPEC.c_str(), + "image specification\n" + "(example: [<pool-name>/[<namespace>/]]<image-name>)"); + + add_prefixed_pool_option(options, "image"); + add_prefixed_namespace_option(options, "image"); + at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE); + + at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE, + " unless overridden"); +} + +void get_remove_image_arguments(po::options_description *positional, + po::options_description *options) { + positional->add_options() + (GROUP_SPEC.c_str(), + "group specification\n" + "(example: [<pool-name>/[<namespace>/]]<group-name>)"); + + add_prefixed_pool_option(options, "group"); + add_prefixed_namespace_option(options, "group"); + add_group_option(options, at::ARGUMENT_MODIFIER_NONE); + + positional->add_options() + (at::IMAGE_SPEC.c_str(), + "image specification\n" + "(example: [<pool-name>/[<namespace>/]]<image-name>)"); + + add_prefixed_pool_option(options, "image"); + add_prefixed_namespace_option(options, "image"); + at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE); + + at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE, + " unless overridden"); + at::add_image_id_option(options); +} + +void get_list_images_arguments(po::options_description *positional, + po::options_description *options) { + at::add_format_options(options); + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE, + false); +} + +void get_group_snap_create_arguments(po::options_description *positional, + po::options_description *options) { + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE, + true); +} + +void get_group_snap_remove_arguments(po::options_description *positional, + po::options_description *options) { + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE, + true); +} + +void get_group_snap_rename_arguments(po::options_description *positional, + po::options_description *options) { + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE, + true); + + positional->add_options() + (at::DEST_SNAPSHOT_NAME.c_str(), + "destination snapshot name\n(example: <snapshot-name>)"); + at::add_snap_option(options, at::ARGUMENT_MODIFIER_DEST); +} + +void get_group_snap_list_arguments(po::options_description *positional, + po::options_description *options) { + at::add_format_options(options); + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE, + false); +} + +void get_group_snap_rollback_arguments(po::options_description *positional, + po::options_description *options) { + at::add_no_progress_option(options); + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE, + true); +} + +Shell::Action action_create( + {"group", "create"}, {}, "Create a group.", + "", &get_create_arguments, &execute_create); +Shell::Action action_remove( + {"group", "remove"}, {"group", "rm"}, "Delete a group.", + "", &get_remove_arguments, &execute_remove); +Shell::Action action_list( + {"group", "list"}, {"group", "ls"}, "List rbd groups.", + "", &get_list_arguments, &execute_list); +Shell::Action action_rename( + {"group", "rename"}, {}, "Rename a group within pool.", + "", &get_rename_arguments, &execute_rename); +Shell::Action action_add( + {"group", "image", "add"}, {}, "Add an image to a group.", + "", &get_add_arguments, &execute_add); +Shell::Action action_remove_image( + {"group", "image", "remove"}, {"group", "image", "rm"}, + "Remove an image from a group.", "", + &get_remove_image_arguments, &execute_remove_image); +Shell::Action action_list_images( + {"group", "image", "list"}, {"group", "image", "ls"}, + "List images in a group.", "", + &get_list_images_arguments, &execute_list_images); +Shell::Action action_group_snap_create( + {"group", "snap", "create"}, {}, "Make a snapshot of a group.", + "", &get_group_snap_create_arguments, &execute_group_snap_create); +Shell::Action action_group_snap_remove( + {"group", "snap", "remove"}, {"group", "snap", "rm"}, + "Remove a snapshot from a group.", + "", &get_group_snap_remove_arguments, &execute_group_snap_remove); +Shell::Action action_group_snap_rename( + {"group", "snap", "rename"}, {}, "Rename group's snapshot.", + "", &get_group_snap_rename_arguments, &execute_group_snap_rename); +Shell::Action action_group_snap_list( + {"group", "snap", "list"}, {"group", "snap", "ls"}, + "List snapshots of a group.", + "", &get_group_snap_list_arguments, &execute_group_snap_list); +Shell::Action action_group_snap_rollback( + {"group", "snap", "rollback"}, {}, + "Rollback group to snapshot.", + "", &get_group_snap_rollback_arguments, &execute_group_snap_rollback); + +} // namespace group +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/ImageMeta.cc b/src/tools/rbd/action/ImageMeta.cc new file mode 100644 index 00000000..20c4555d --- /dev/null +++ b/src/tools/rbd/action/ImageMeta.cc @@ -0,0 +1,345 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace image_meta { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +void add_key_option(po::options_description *positional) { + positional->add_options() + ("key", "image meta key"); +} + +int get_key(const po::variables_map &vm, size_t *arg_index, + std::string *key) { + *key = utils::get_positional_argument(vm, *arg_index); + if (key->empty()) { + std::cerr << "rbd: metadata key was not specified" << std::endl; + return -EINVAL; + } else { + ++(*arg_index); + } + return 0; +} + +const uint32_t MAX_KEYS = 64; + +} // anonymous namespace + +static int do_metadata_list(librbd::Image& image, Formatter *f) +{ + int r; + TextTable tbl; + + size_t count = 0; + std::string last_key; + bool more_results = true; + while (more_results) { + std::map<std::string, bufferlist> pairs; + r = image.metadata_list(last_key, MAX_KEYS, &pairs); + if (r < 0) { + std::cerr << "failed to list metadata of image : " << cpp_strerror(r) + << std::endl; + return r; + } + + more_results = (pairs.size() == MAX_KEYS); + if (!pairs.empty()) { + if (count == 0) { + if (f) { + f->open_object_section("metadatas"); + } else { + tbl.define_column("Key", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("Value", TextTable::LEFT, TextTable::LEFT); + } + } + + last_key = pairs.rbegin()->first; + count += pairs.size(); + + for (auto kv : pairs) { + std::string val(kv.second.c_str(), kv.second.length()); + if (f) { + f->dump_string(kv.first.c_str(), val.c_str()); + } else { + tbl << kv.first << val << TextTable::endrow; + } + } + } + } + + if (f == nullptr) { + bool single = (count == 1); + std::cout << "There " << (single ? "is" : "are") << " " << count << " " + << (single ? "metadatum" : "metadata") << " on this image" + << (count == 0 ? "." : ":") << std::endl; + } + + if (count > 0) { + if (f) { + f->close_section(); + f->flush(std::cout); + } else { + std::cout << std::endl << tbl; + } + } + return 0; +} + +static int do_metadata_set(librbd::Image& image, std::string &key, + std::string &value) +{ + int r = image.metadata_set(key, value); + if (r < 0) { + std::cerr << "failed to set metadata " << key << " of image : " + << cpp_strerror(r) << std::endl; + } + return r; +} + +static int do_metadata_remove(librbd::Image& image, std::string &key) +{ + int r = image.metadata_remove(key); + if (r == -ENOENT) { + std::cerr << "rbd: no existing metadata key " << key << " of image : " + << cpp_strerror(r) << std::endl; + } else if(r < 0) { + std::cerr << "failed to remove metadata " << key << " of image : " + << cpp_strerror(r) << std::endl; + } + return r; +} + +static int do_metadata_get(librbd::Image& image, std::string &key) +{ + std::string s; + int r = image.metadata_get(key, &s); + if (r < 0) { + std::cerr << "failed to get metadata " << key << " of image : " + << cpp_strerror(r) << std::endl; + return r; + } + std::cout << s << std::endl; + return r; +} + +void get_list_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_format_options(options); +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_metadata_list(image, formatter.get()); + if (r < 0) { + std::cerr << "rbd: listing metadata failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + add_key_option(positional); +} + +int execute_get(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string key; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_metadata_get(image, key); + if (r < 0) { + std::cerr << "rbd: getting metadata failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_set_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + add_key_option(positional); + positional->add_options() + ("value", "image meta value"); +} + +int execute_set(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string key; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + std::string value = utils::get_positional_argument(vm, arg_index); + if (value.empty()) { + std::cerr << "rbd: metadata value was not specified" << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_metadata_set(image, key, value); + if (r < 0) { + std::cerr << "rbd: setting metadata failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_remove_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + add_key_option(positional); +} + +int execute_remove(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string key; + r = get_key(vm, &arg_index, &key); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_metadata_remove(image, key); + if (r < 0) { + std::cerr << "rbd: removing metadata failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +Shell::Action action_list( + {"image-meta", "list"}, {"image-meta", "ls"}, "Image metadata list keys with values.", "", + &get_list_arguments, &execute_list); +Shell::Action action_get( + {"image-meta", "get"}, {}, + "Image metadata get the value associated with the key.", "", + &get_get_arguments, &execute_get); +Shell::Action action_set( + {"image-meta", "set"}, {}, "Image metadata set key with value.", "", + &get_set_arguments, &execute_set); +Shell::Action action_remove( + {"image-meta", "remove"}, {"image-meta", "rm"}, + "Image metadata remove the key and value associated.", "", + &get_remove_arguments, &execute_remove); + +} // namespace image_meta +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Import.cc b/src/tools/rbd/action/Import.cc new file mode 100644 index 00000000..7397d926 --- /dev/null +++ b/src/tools/rbd/action/Import.cc @@ -0,0 +1,1037 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/Context.h" +#include "common/blkdev.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Throttle.h" +#include "include/compat.h" +#include "include/encoding.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/safe_io.h" +#include <iostream> +#include <boost/program_options.hpp> +#include <boost/scoped_ptr.hpp> +#include "include/ceph_assert.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd + +namespace rbd { +namespace action { +namespace import { + +struct ImportDiffContext { + librbd::Image *image; + int fd; + size_t size; + utils::ProgressContext pc; + OrderedThrottle throttle; + uint64_t last_offset; + + ImportDiffContext(librbd::Image *image, int fd, size_t size, bool no_progress) + : image(image), fd(fd), size(size), pc("Importing image diff", no_progress), + throttle((fd == STDIN_FILENO) ? 1 : + g_conf().get_val<uint64_t>("rbd_concurrent_management_ops"), + false), + last_offset(0) { + } + + void update_size(size_t new_size) + { + if (fd == STDIN_FILENO) { + size = new_size; + } + } + + void update_progress(uint64_t off) + { + if (size) { + pc.update_progress(off, size); + last_offset = off; + } + } + + void update_progress() + { + uint64_t off = last_offset; + if (fd != STDIN_FILENO) { + off = lseek(fd, 0, SEEK_CUR); + } + + update_progress(off); + } + + void finish(int r) + { + if (r < 0) { + pc.fail(); + } else { + pc.finish(); + } + } +}; + +class C_ImportDiff : public Context { +public: + C_ImportDiff(ImportDiffContext *idiffctx, bufferlist data, uint64_t offset, + uint64_t length, bool write_zeroes) + : m_idiffctx(idiffctx), m_data(data), m_offset(offset), m_length(length), + m_write_zeroes(write_zeroes) { + // use block offset (stdin) or import file position to report + // progress. + if (m_idiffctx->fd == STDIN_FILENO) { + m_prog_offset = offset; + } else { + m_prog_offset = lseek(m_idiffctx->fd, 0, SEEK_CUR); + } + } + + int send() + { + if (m_idiffctx->throttle.pending_error()) { + return m_idiffctx->throttle.wait_for_ret(); + } + + C_OrderedThrottle *ctx = m_idiffctx->throttle.start_op(this); + librbd::RBD::AioCompletion *aio_completion = + new librbd::RBD::AioCompletion(ctx, &utils::aio_context_callback); + + int r; + if (m_write_zeroes) { + r = m_idiffctx->image->aio_write_zeroes(m_offset, m_length, + aio_completion, 0U, + LIBRADOS_OP_FLAG_FADVISE_NOCACHE); + } else { + r = m_idiffctx->image->aio_write2(m_offset, m_length, m_data, + aio_completion, + LIBRADOS_OP_FLAG_FADVISE_NOCACHE); + } + + if (r < 0) { + aio_completion->release(); + ctx->complete(r); + } + + return r; + } + + void finish(int r) override + { + m_idiffctx->update_progress(m_prog_offset); + m_idiffctx->throttle.end_op(r); + } + +private: + ImportDiffContext *m_idiffctx; + bufferlist m_data; + uint64_t m_offset; + uint64_t m_length; + bool m_write_zeroes; + uint64_t m_prog_offset; +}; + +static int do_image_snap_from(ImportDiffContext *idiffctx) +{ + int r; + string from; + r = utils::read_string(idiffctx->fd, 4096, &from); // 4k limit to make sure we don't get a garbage string + if (r < 0) { + std::cerr << "rbd: failed to decode start snap name" << std::endl; + return r; + } + + bool exists; + r = idiffctx->image->snap_exists2(from.c_str(), &exists); + if (r < 0) { + std::cerr << "rbd: failed to query start snap state" << std::endl; + return r; + } + + if (!exists) { + std::cerr << "start snapshot '" << from + << "' does not exist in the image, aborting" << std::endl; + return -EINVAL; + } + + idiffctx->update_progress(); + return 0; +} + +static int do_image_snap_to(ImportDiffContext *idiffctx, std::string *tosnap) +{ + int r; + string to; + r = utils::read_string(idiffctx->fd, 4096, &to); // 4k limit to make sure we don't get a garbage string + if (r < 0) { + std::cerr << "rbd: failed to decode end snap name" << std::endl; + return r; + } + + bool exists; + r = idiffctx->image->snap_exists2(to.c_str(), &exists); + if (r < 0) { + std::cerr << "rbd: failed to query end snap state" << std::endl; + return r; + } + + if (exists) { + std::cerr << "end snapshot '" << to << "' already exists, aborting" + << std::endl; + return -EEXIST; + } + + *tosnap = to; + idiffctx->update_progress(); + + return 0; +} + +static int get_snap_protection_status(ImportDiffContext *idiffctx, + bool *is_protected) +{ + int r; + char buf[sizeof(__u8)]; + r = safe_read_exact(idiffctx->fd, buf, sizeof(buf)); + if (r < 0) { + std::cerr << "rbd: failed to decode snap protection status" << std::endl; + return r; + } + + *is_protected = (buf[0] != 0); + idiffctx->update_progress(); + + return 0; +} + +static int do_image_resize(ImportDiffContext *idiffctx) +{ + int r; + char buf[sizeof(uint64_t)]; + uint64_t end_size; + r = safe_read_exact(idiffctx->fd, buf, sizeof(buf)); + if (r < 0) { + std::cerr << "rbd: failed to decode image size" << std::endl; + return r; + } + + bufferlist bl; + bl.append(buf, sizeof(buf)); + auto p = bl.cbegin(); + decode(end_size, p); + + uint64_t cur_size; + idiffctx->image->size(&cur_size); + if (cur_size != end_size) { + idiffctx->image->resize(end_size); + } + + idiffctx->update_size(end_size); + idiffctx->update_progress(); + return 0; +} + +static int do_image_io(ImportDiffContext *idiffctx, bool write_zeroes, + size_t sparse_size) +{ + int r; + char buf[16]; + r = safe_read_exact(idiffctx->fd, buf, sizeof(buf)); + if (r < 0) { + std::cerr << "rbd: failed to decode IO length" << std::endl; + return r; + } + + bufferlist bl; + bl.append(buf, sizeof(buf)); + auto p = bl.cbegin(); + + uint64_t image_offset, buffer_length; + decode(image_offset, p); + decode(buffer_length, p); + + if (!write_zeroes) { + bufferptr bp = buffer::create(buffer_length); + r = safe_read_exact(idiffctx->fd, bp.c_str(), buffer_length); + if (r < 0) { + std::cerr << "rbd: failed to decode write data" << std::endl; + return r; + } + + size_t buffer_offset = 0; + while (buffer_offset < buffer_length) { + size_t write_length = 0; + bool zeroed = false; + utils::calc_sparse_extent(bp, sparse_size, buffer_offset, buffer_length, + &write_length, &zeroed); + ceph_assert(write_length > 0); + + bufferlist write_bl; + if (!zeroed) { + bufferptr write_ptr(bp, buffer_offset, write_length); + write_bl.push_back(write_ptr); + ceph_assert(write_bl.length() == write_length); + } + + C_ImportDiff *ctx = new C_ImportDiff(idiffctx, write_bl, + image_offset + buffer_offset, + write_length, zeroed); + r = ctx->send(); + if (r < 0) { + return r; + } + + buffer_offset += write_length; + } + } else { + bufferlist data; + C_ImportDiff *ctx = new C_ImportDiff(idiffctx, data, image_offset, + buffer_length, true); + return ctx->send(); + } + return r; +} + +static int validate_banner(int fd, std::string banner) +{ + int r; + char buf[banner.size() + 1]; + memset(buf, 0, sizeof(buf)); + r = safe_read_exact(fd, buf, banner.size()); + if (r < 0) { + std::cerr << "rbd: failed to decode diff banner" << std::endl; + return r; + } + + buf[banner.size()] = '\0'; + if (strcmp(buf, banner.c_str())) { + std::cerr << "rbd: invalid or unexpected diff banner" << std::endl; + return -EINVAL; + } + + return 0; +} + +static int skip_tag(int fd, uint64_t length) +{ + int r; + + if (fd == STDIN_FILENO) { + // read the appending data out to skip this tag. + char buf[4096]; + uint64_t len = min<uint64_t>(length, sizeof(buf)); + while (len > 0) { + r = safe_read_exact(fd, buf, len); + if (r < 0) { + std::cerr << "rbd: failed to decode skipped tag data" << std::endl; + return r; + } + length -= len; + len = min<uint64_t>(length, sizeof(buf)); + } + } else { + // lseek to skip this tag + off64_t offs = lseek64(fd, length, SEEK_CUR); + if (offs < 0) { + return -errno; + } + } + + return 0; +} + +static int read_tag(int fd, __u8 end_tag, int format, __u8 *tag, uint64_t *readlen) +{ + int r; + __u8 read_tag; + + r = safe_read_exact(fd, &read_tag, sizeof(read_tag)); + if (r < 0) { + std::cerr << "rbd: failed to decode tag" << std::endl; + return r; + } + + *tag = read_tag; + if (read_tag != end_tag && format == 2) { + char buf[sizeof(uint64_t)]; + r = safe_read_exact(fd, buf, sizeof(buf)); + if (r < 0) { + std::cerr << "rbd: failed to decode tag length" << std::endl; + return r; + } + + bufferlist bl; + bl.append(buf, sizeof(buf)); + auto p = bl.cbegin(); + decode(*readlen, p); + } + + return 0; +} + +int do_import_diff_fd(librados::Rados &rados, librbd::Image &image, int fd, + bool no_progress, int format, size_t sparse_size) +{ + int r; + + uint64_t size = 0; + bool from_stdin = (fd == STDIN_FILENO); + if (!from_stdin) { + struct stat stat_buf; + r = ::fstat(fd, &stat_buf); + if (r < 0) { + std::cerr << "rbd: failed to stat specified diff file" << std::endl; + return r; + } + size = (uint64_t)stat_buf.st_size; + } + + r = validate_banner(fd, (format == 1 ? utils::RBD_DIFF_BANNER : + utils::RBD_DIFF_BANNER_V2)); + if (r < 0) { + return r; + } + + // begin image import + std::string tosnap; + bool is_protected = false; + ImportDiffContext idiffctx(&image, fd, size, no_progress); + while (r == 0) { + __u8 tag; + uint64_t length = 0; + + r = read_tag(fd, RBD_DIFF_END, format, &tag, &length); + if (r < 0 || tag == RBD_DIFF_END) { + break; + } + + if (tag == RBD_DIFF_FROM_SNAP) { + r = do_image_snap_from(&idiffctx); + } else if (tag == RBD_DIFF_TO_SNAP) { + r = do_image_snap_to(&idiffctx, &tosnap); + } else if (tag == RBD_SNAP_PROTECTION_STATUS) { + r = get_snap_protection_status(&idiffctx, &is_protected); + } else if (tag == RBD_DIFF_IMAGE_SIZE) { + r = do_image_resize(&idiffctx); + } else if (tag == RBD_DIFF_WRITE || tag == RBD_DIFF_ZERO) { + r = do_image_io(&idiffctx, (tag == RBD_DIFF_ZERO), sparse_size); + } else { + std::cerr << "unrecognized tag byte " << (int)tag << " in stream; skipping" + << std::endl; + r = skip_tag(fd, length); + } + } + + int temp_r = idiffctx.throttle.wait_for_ret(); + r = (r < 0) ? r : temp_r; // preserve original error + if (r == 0 && tosnap.length()) { + r = idiffctx.image->snap_create(tosnap.c_str()); + if (r == 0 && is_protected) { + r = idiffctx.image->snap_protect(tosnap.c_str()); + } + } + + idiffctx.finish(r); + return r; +} + +int do_import_diff(librados::Rados &rados, librbd::Image &image, + const char *path, bool no_progress, size_t sparse_size) +{ + int r; + int fd; + + if (strcmp(path, "-") == 0) { + fd = STDIN_FILENO; + } else { + fd = open(path, O_RDONLY); + if (fd < 0) { + r = -errno; + std::cerr << "rbd: error opening " << path << std::endl; + return r; + } + } + r = do_import_diff_fd(rados, image, fd, no_progress, 1, sparse_size); + + if (fd != 0) + close(fd); + return r; +} + +namespace at = argument_types; +namespace po = boost::program_options; + +void get_arguments_diff(po::options_description *positional, + po::options_description *options) { + at::add_path_options(positional, options, + "import file (or '-' for stdin)"); + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_sparse_size_option(options); + at::add_no_progress_option(options); +} + +int execute_diff(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string path; + size_t arg_index = 0; + int r = utils::get_path(vm, &arg_index, &path); + if (r < 0) { + return r; + } + + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + size_t sparse_size = utils::RBD_DEFAULT_SPARSE_SIZE; + if (vm.count(at::IMAGE_SPARSE_SIZE)) { + sparse_size = vm[at::IMAGE_SPARSE_SIZE].as<size_t>(); + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_import_diff(rados, image, path.c_str(), + vm[at::NO_PROGRESS].as<bool>(), sparse_size); + if (r == -EDOM) { + r = -EBADMSG; + } + if (r < 0) { + cerr << "rbd: import-diff failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action_diff( + {"import-diff"}, {}, "Import an incremental diff.", "", &get_arguments_diff, + &execute_diff); + +class C_Import : public Context { +public: + C_Import(SimpleThrottle &simple_throttle, librbd::Image &image, + bufferlist &bl, uint64_t offset) + : m_throttle(simple_throttle), m_image(image), + m_aio_completion( + new librbd::RBD::AioCompletion(this, &utils::aio_context_callback)), + m_bufferlist(bl), m_offset(offset) + { + } + + void send() + { + m_throttle.start_op(); + + int op_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | + LIBRADOS_OP_FLAG_FADVISE_NOCACHE; + int r = m_image.aio_write2(m_offset, m_bufferlist.length(), m_bufferlist, + m_aio_completion, op_flags); + if (r < 0) { + std::cerr << "rbd: error requesting write to destination image" + << std::endl; + m_aio_completion->release(); + m_throttle.end_op(r); + } + } + + void finish(int r) override + { + if (r < 0) { + std::cerr << "rbd: error writing to destination image at offset " + << m_offset << ": " << cpp_strerror(r) << std::endl; + } + m_throttle.end_op(r); + } + +private: + SimpleThrottle &m_throttle; + librbd::Image &m_image; + librbd::RBD::AioCompletion *m_aio_completion; + bufferlist m_bufferlist; + uint64_t m_offset; +}; + +static int decode_and_set_image_option(int fd, uint64_t imageopt, librbd::ImageOptions& opts) +{ + int r; + char buf[sizeof(uint64_t)]; + + r = safe_read_exact(fd, buf, sizeof(buf)); + if (r < 0) { + std::cerr << "rbd: failed to decode image option" << std::endl; + return r; + } + + bufferlist bl; + bl.append(buf, sizeof(buf)); + auto it = bl.cbegin(); + + uint64_t val; + decode(val, it); + + if (opts.get(imageopt, &val) != 0) { + opts.set(imageopt, val); + } + + return 0; +} + +static int do_import_metadata(int import_format, librbd::Image& image, + const std::map<std::string, std::string> &imagemetas) +{ + int r = 0; + + //v1 format + if (import_format == 1) { + return 0; + } + + for (std::map<std::string, std::string>::const_iterator it = imagemetas.begin(); + it != imagemetas.end(); ++it) { + r = image.metadata_set(it->first, it->second); + if (r < 0) + return r; + } + + return 0; +} + +static int decode_imagemeta(int fd, uint64_t length, std::map<std::string, std::string>* imagemetas) +{ + int r; + string key; + string value; + + r = utils::read_string(fd, length, &key); + if (r < 0) { + std::cerr << "rbd: failed to decode metadata key" << std::endl; + return r; + } + + r = utils::read_string(fd, length, &value); + if (r < 0) { + std::cerr << "rbd: failed to decode metadata value" << std::endl; + return r; + } + + (*imagemetas)[key] = value; + return 0; +} + +static int do_import_header(int fd, int import_format, librbd::ImageOptions& opts, + std::map<std::string, std::string>* imagemetas) +{ + // There is no header in v1 image. + if (import_format == 1) { + return 0; + } + + int r; + r = validate_banner(fd, utils::RBD_IMAGE_BANNER_V2); + if (r < 0) { + return r; + } + + // As V1 format for image is already deprecated, import image in V2 by default. + uint64_t image_format = 2; + if (opts.get(RBD_IMAGE_OPTION_FORMAT, &image_format) != 0) { + opts.set(RBD_IMAGE_OPTION_FORMAT, image_format); + } + + while (r == 0) { + __u8 tag; + uint64_t length = 0; + r = read_tag(fd, RBD_EXPORT_IMAGE_END, image_format, &tag, &length); + if (r < 0 || tag == RBD_EXPORT_IMAGE_END) { + break; + } + + if (tag == RBD_EXPORT_IMAGE_ORDER) { + r = decode_and_set_image_option(fd, RBD_IMAGE_OPTION_ORDER, opts); + } else if (tag == RBD_EXPORT_IMAGE_FEATURES) { + r = decode_and_set_image_option(fd, RBD_IMAGE_OPTION_FEATURES, opts); + } else if (tag == RBD_EXPORT_IMAGE_STRIPE_UNIT) { + r = decode_and_set_image_option(fd, RBD_IMAGE_OPTION_STRIPE_UNIT, opts); + } else if (tag == RBD_EXPORT_IMAGE_STRIPE_COUNT) { + r = decode_and_set_image_option(fd, RBD_IMAGE_OPTION_STRIPE_COUNT, opts); + } else if (tag == RBD_EXPORT_IMAGE_META) { + r = decode_imagemeta(fd, length, imagemetas); + } else { + std::cerr << "rbd: invalid tag in image properties zone: " << tag << "Skip it." + << std::endl; + r = skip_tag(fd, length); + } + } + + return r; +} + +static int do_import_v2(librados::Rados &rados, int fd, librbd::Image &image, + uint64_t size, size_t imgblklen, + utils::ProgressContext &pc, size_t sparse_size) +{ + int r = 0; + r = validate_banner(fd, utils::RBD_IMAGE_DIFFS_BANNER_V2); + if (r < 0) { + return r; + } + + char buf[sizeof(uint64_t)]; + r = safe_read_exact(fd, buf, sizeof(buf)); + if (r < 0) { + std::cerr << "rbd: failed to decode diff count" << std::endl; + return r; + } + bufferlist bl; + bl.append(buf, sizeof(buf)); + auto p = bl.cbegin(); + uint64_t diff_num; + decode(diff_num, p); + for (size_t i = 0; i < diff_num; i++) { + r = do_import_diff_fd(rados, image, fd, true, 2, sparse_size); + if (r < 0) { + pc.fail(); + std::cerr << "rbd: import-diff failed: " << cpp_strerror(r) << std::endl; + return r; + } + pc.update_progress(i + 1, diff_num); + } + + return r; +} + +static int do_import_v1(int fd, librbd::Image &image, uint64_t size, + size_t imgblklen, utils::ProgressContext &pc, + size_t sparse_size) +{ + int r = 0; + size_t reqlen = imgblklen; // amount requested from read + ssize_t readlen; // amount received from one read + size_t blklen = 0; // amount accumulated from reads to fill blk + char *p = new char[imgblklen]; + uint64_t image_pos = 0; + bool from_stdin = (fd == STDIN_FILENO); + boost::scoped_ptr<SimpleThrottle> throttle; + + if (from_stdin) { + throttle.reset(new SimpleThrottle(1, false)); + } else { + throttle.reset(new SimpleThrottle( + g_conf().get_val<uint64_t>("rbd_concurrent_management_ops"), false)); + } + + reqlen = min<uint64_t>(reqlen, size); + // loop body handles 0 return, as we may have a block to flush + while ((readlen = ::read(fd, p + blklen, reqlen)) >= 0) { + if (throttle->pending_error()) { + break; + } + + blklen += readlen; + // if read was short, try again to fill the block before writing + if (readlen && ((size_t)readlen < reqlen)) { + reqlen -= readlen; + continue; + } + if (!from_stdin) + pc.update_progress(image_pos, size); + + bufferptr blkptr(p, blklen); + // resize output image by binary expansion as we go for stdin + if (from_stdin && (image_pos + (size_t)blklen) > size) { + size *= 2; + r = image.resize(size); + if (r < 0) { + std::cerr << "rbd: can't resize image during import" << std::endl; + goto out; + } + } + + // write as much as we got; perhaps less than imgblklen + // but skip writing zeros to create sparse images + size_t buffer_offset = 0; + while (buffer_offset < blklen) { + size_t write_length = 0; + bool zeroed = false; + utils::calc_sparse_extent(blkptr, sparse_size, buffer_offset, blklen, + &write_length, &zeroed); + + if (!zeroed) { + bufferlist write_bl; + bufferptr write_ptr(blkptr, buffer_offset, write_length); + write_bl.push_back(write_ptr); + ceph_assert(write_bl.length() == write_length); + + C_Import *ctx = new C_Import(*throttle, image, write_bl, + image_pos + buffer_offset); + ctx->send(); + } + + buffer_offset += write_length; + } + + // done with whole block, whether written or not + image_pos += blklen; + if (!from_stdin && image_pos >= size) + break; + // if read had returned 0, we're at EOF and should quit + if (readlen == 0) + break; + blklen = 0; + reqlen = imgblklen; + } + r = throttle->wait_for_ret(); + if (r < 0) { + goto out; + } + + if (fd == STDIN_FILENO) { + r = image.resize(image_pos); + if (r < 0) { + std::cerr << "rbd: final image resize failed" << std::endl; + goto out; + } + } +out: + delete[] p; + return r; +} + +static int do_import(librados::Rados &rados, librbd::RBD &rbd, + librados::IoCtx& io_ctx, const char *imgname, + const char *path, librbd::ImageOptions& opts, + bool no_progress, int import_format, size_t sparse_size) +{ + int fd, r; + struct stat stat_buf; + utils::ProgressContext pc("Importing image", no_progress); + std::map<std::string, std::string> imagemetas; + + ceph_assert(imgname); + + uint64_t order; + if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0) { + order = g_conf().get_val<uint64_t>("rbd_default_order"); + } + + // try to fill whole imgblklen blocks for sparsification + size_t imgblklen = 1 << order; + librbd::Image image; + uint64_t size = 0; + + bool from_stdin = !strcmp(path, "-"); + if (from_stdin) { + fd = STDIN_FILENO; + size = 1ULL << order; + } else { + if ((fd = open(path, O_RDONLY)) < 0) { + r = -errno; + std::cerr << "rbd: error opening " << path << std::endl; + goto done2; + } + + if ((fstat(fd, &stat_buf)) < 0) { + r = -errno; + std::cerr << "rbd: stat error " << path << std::endl; + goto done; + } + if (S_ISDIR(stat_buf.st_mode)) { + r = -EISDIR; + std::cerr << "rbd: cannot import a directory" << std::endl; + goto done; + } + if (stat_buf.st_size) + size = (uint64_t)stat_buf.st_size; + + if (!size) { + int64_t bdev_size = 0; + BlkDev blkdev(fd); + r = blkdev.get_size(&bdev_size); + if (r < 0) { + std::cerr << "rbd: unable to get size of file/block device" + << std::endl; + goto done; + } + ceph_assert(bdev_size >= 0); + size = (uint64_t) bdev_size; + } +#ifdef HAVE_POSIX_FADVISE + posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + } + + r = do_import_header(fd, import_format, opts, &imagemetas); + if (r < 0) { + std::cerr << "rbd: import header failed." << std::endl; + goto done; + } + + r = rbd.create4(io_ctx, imgname, size, opts); + if (r < 0) { + std::cerr << "rbd: image creation failed" << std::endl; + goto done; + } + + r = rbd.open(io_ctx, image, imgname); + if (r < 0) { + std::cerr << "rbd: failed to open image" << std::endl; + goto err; + } + + r = do_import_metadata(import_format, image, imagemetas); + if (r < 0) { + std::cerr << "rbd: failed to import image-meta" << std::endl; + goto err; + } + + if (import_format == 1) { + r = do_import_v1(fd, image, size, imgblklen, pc, sparse_size); + } else { + r = do_import_v2(rados, fd, image, size, imgblklen, pc, sparse_size); + } + if (r < 0) { + std::cerr << "rbd: failed to import image" << std::endl; + image.close(); + goto err; + } + + r = image.close(); +err: + if (r < 0) + rbd.remove(io_ctx, imgname); +done: + if (r < 0) + pc.fail(); + else + pc.finish(); + if (!from_stdin) + close(fd); +done2: + return r; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_path_options(positional, options, + "import file (or '-' for stdin)"); + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST); + at::add_create_image_options(options, true); + at::add_sparse_size_option(options); + at::add_no_progress_option(options); + at::add_export_format_option(options); + + // TODO legacy rbd allowed import to accept both 'image'/'dest' and + // 'pool'/'dest-pool' + at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE, " (deprecated)"); + at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE, " (deprecated)"); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string path; + size_t arg_index = 0; + int r = utils::get_path(vm, &arg_index, &path); + if (r < 0) { + return r; + } + + // odd check to support legacy / deprecated behavior of import + std::string deprecated_pool_name; + if (vm.count(at::POOL_NAME)) { + deprecated_pool_name = vm[at::POOL_NAME].as<std::string>(); + std::cerr << "rbd: --pool is deprecated for import, use --dest-pool" + << std::endl; + } + + std::string deprecated_image_name; + if (vm.count(at::IMAGE_NAME)) { + deprecated_image_name = vm[at::IMAGE_NAME].as<std::string>(); + std::cerr << "rbd: --image is deprecated for import, use --dest" + << std::endl; + } else { + deprecated_image_name = path.substr(path.find_last_of("/") + 1); + } + + std::string deprecated_snap_name; + r = utils::extract_spec(deprecated_image_name, &deprecated_pool_name, + nullptr, &deprecated_image_name, + &deprecated_snap_name, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + size_t sparse_size = utils::RBD_DEFAULT_SPARSE_SIZE; + if (vm.count(at::IMAGE_SPARSE_SIZE)) { + sparse_size = vm[at::IMAGE_SPARSE_SIZE].as<size_t>(); + } + + std::string pool_name = deprecated_pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name = deprecated_snap_name; + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, false, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + if (image_name.empty()) { + image_name = deprecated_image_name; + } + + librbd::ImageOptions opts; + r = utils::get_image_options(vm, true, &opts); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + int format = 1; + if (vm.count("export-format")) + format = vm["export-format"].as<uint64_t>(); + + librbd::RBD rbd; + r = do_import(rados, rbd, io_ctx, image_name.c_str(), path.c_str(), + opts, vm[at::NO_PROGRESS].as<bool>(), format, sparse_size); + if (r < 0) { + std::cerr << "rbd: import failed: " << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +Shell::Action action( + {"import"}, {}, "Import image from file.", at::get_long_features_help(), + &get_arguments, &execute); + +} // namespace import +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Info.cc b/src/tools/rbd/action/Info.cc new file mode 100644 index 00000000..5adacb92 --- /dev/null +++ b/src/tools/rbd/action/Info.cc @@ -0,0 +1,459 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/types.h" +#include "include/stringify.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include <iostream> +#include <boost/program_options.hpp> + +#include "common/Clock.h" + +namespace rbd { +namespace action { +namespace info { + +namespace at = argument_types; +namespace po = boost::program_options; + +static void format_bitmask(Formatter *f, const std::string &name, + const std::map<uint64_t, std::string>& mapping, + uint64_t bitmask) +{ + int count = 0; + std::string group_name(name + "s"); + if (f == NULL) { + std::cout << "\t" << group_name << ": "; + } else { + f->open_array_section(group_name.c_str()); + } + for (std::map<uint64_t, std::string>::const_iterator it = mapping.begin(); + it != mapping.end(); ++it) { + if ((it->first & bitmask) == 0) { + continue; + } + + if (f == NULL) { + if (count++ > 0) { + std::cout << ", "; + } + std::cout << it->second; + } else { + f->dump_string(name.c_str(), it->second); + } + } + if (f == NULL) { + std::cout << std::endl; + } else { + f->close_section(); + } +} + +static void format_features(Formatter *f, uint64_t features) +{ + format_bitmask(f, "feature", at::ImageFeatures::FEATURE_MAPPING, features); +} + +static void format_op_features(Formatter *f, uint64_t op_features) +{ + static std::map<uint64_t, std::string> mapping = { + {RBD_OPERATION_FEATURE_CLONE_PARENT, RBD_OPERATION_FEATURE_NAME_CLONE_PARENT}, + {RBD_OPERATION_FEATURE_CLONE_CHILD, RBD_OPERATION_FEATURE_NAME_CLONE_CHILD}, + {RBD_OPERATION_FEATURE_GROUP, RBD_OPERATION_FEATURE_NAME_GROUP}, + {RBD_OPERATION_FEATURE_SNAP_TRASH, RBD_OPERATION_FEATURE_NAME_SNAP_TRASH}}; + format_bitmask(f, "op_feature", mapping, op_features); +} + +static void format_flags(Formatter *f, uint64_t flags) +{ + std::map<uint64_t, std::string> mapping = { + {RBD_FLAG_OBJECT_MAP_INVALID, "object map invalid"}, + {RBD_FLAG_FAST_DIFF_INVALID, "fast diff invalid"}}; + format_bitmask(f, "flag", mapping, flags); +} + +void format_timestamp(struct timespec timestamp, std::string ×tamp_str) { + if(timestamp.tv_sec != 0) { + time_t ts = timestamp.tv_sec; + timestamp_str = ctime(&ts); + timestamp_str = timestamp_str.substr(0, timestamp_str.length() - 1); + } +} + +static int do_show_info(librados::IoCtx &io_ctx, librbd::Image& image, + const std::string &snapname, Formatter *f) +{ + librbd::image_info_t info; + uint8_t old_format; + uint64_t overlap, features, flags, snap_limit; + bool snap_protected = false; + librbd::mirror_image_info_t mirror_image; + std::vector<librbd::snap_info_t> snaps; + int r; + + std::string imgname; + r = image.get_name(&imgname); + if (r < 0) + return r; + + r = image.snap_list(snaps); + if (r < 0) + return r; + + r = image.stat(info, sizeof(info)); + if (r < 0) + return r; + + r = image.old_format(&old_format); + if (r < 0) + return r; + + std::string imgid; + if (!old_format) { + r = image.get_id(&imgid); + if (r < 0) + return r; + } + + std::string data_pool; + if (!old_format) { + int64_t data_pool_id = image.get_data_pool_id(); + if (data_pool_id != io_ctx.get_id()) { + librados::Rados rados(io_ctx); + librados::IoCtx data_io_ctx; + r = rados.ioctx_create2(data_pool_id, data_io_ctx); + if (r < 0) { + data_pool = "<missing data pool " + stringify(data_pool_id) + ">"; + } else { + data_pool = data_io_ctx.get_pool_name(); + } + } + } + + r = image.overlap(&overlap); + if (r < 0) + return r; + + r = image.features(&features); + if (r < 0) + return r; + + uint64_t op_features; + r = image.get_op_features(&op_features); + if (r < 0) { + return r; + } + + r = image.get_flags(&flags); + if (r < 0) { + return r; + } + + if (!snapname.empty()) { + r = image.snap_is_protected(snapname.c_str(), &snap_protected); + if (r < 0) + return r; + } + + if (features & RBD_FEATURE_JOURNALING) { + r = image.mirror_image_get_info(&mirror_image, sizeof(mirror_image)); + if (r < 0) { + return r; + } + } + + r = image.snap_get_limit(&snap_limit); + if (r < 0) + return r; + + std::string prefix = image.get_block_name_prefix(); + + librbd::group_info_t group_info; + r = image.get_group(&group_info, sizeof(group_info)); + if (r < 0) { + return r; + } + + std::string group_string = ""; + if (RBD_GROUP_INVALID_POOL != group_info.pool) { + std::string group_pool; + librados::Rados rados(io_ctx); + librados::IoCtx group_io_ctx; + r = rados.ioctx_create2(group_info.pool, group_io_ctx); + if (r < 0) { + group_pool = "<missing group pool " + stringify(group_info.pool) + ">"; + } else { + group_pool = group_io_ctx.get_pool_name(); + } + + group_string = group_pool + "/"; + if (!io_ctx.get_namespace().empty()) { + group_string += io_ctx.get_namespace() + "/"; + } + group_string += group_info.name; + } + + struct timespec create_timestamp; + image.get_create_timestamp(&create_timestamp); + + std::string create_timestamp_str = ""; + format_timestamp(create_timestamp, create_timestamp_str); + + struct timespec access_timestamp; + image.get_access_timestamp(&access_timestamp); + + std::string access_timestamp_str = ""; + format_timestamp(access_timestamp, access_timestamp_str); + + struct timespec modify_timestamp; + image.get_modify_timestamp(&modify_timestamp); + + std::string modify_timestamp_str = ""; + format_timestamp(modify_timestamp, modify_timestamp_str); + + if (f) { + f->open_object_section("image"); + f->dump_string("name", imgname); + f->dump_string("id", imgid); + f->dump_unsigned("size", info.size); + f->dump_unsigned("objects", info.num_objs); + f->dump_int("order", info.order); + f->dump_unsigned("object_size", info.obj_size); + f->dump_int("snapshot_count", snaps.size()); + if (!data_pool.empty()) { + f->dump_string("data_pool", data_pool); + } + f->dump_string("block_name_prefix", prefix); + f->dump_int("format", (old_format ? 1 : 2)); + } else { + std::cout << "rbd image '" << imgname << "':\n" + << "\tsize " << byte_u_t(info.size) << " in " + << info.num_objs << " objects" + << std::endl + << "\torder " << info.order + << " (" << byte_u_t(info.obj_size) << " objects)" + << std::endl + << "\tsnapshot_count: " << snaps.size() + << std::endl; + if (!imgid.empty()) { + std::cout << "\tid: " << imgid << std::endl; + } + if (!data_pool.empty()) { + std::cout << "\tdata_pool: " << data_pool << std::endl; + } + std::cout << "\tblock_name_prefix: " << prefix + << std::endl + << "\tformat: " << (old_format ? "1" : "2") + << std::endl; + } + + if (!old_format) { + format_features(f, features); + format_op_features(f, op_features); + format_flags(f, flags); + } + + if (!group_string.empty()) { + if (f) { + f->dump_string("group", group_string); + } else { + std::cout << "\tgroup: " << group_string + << std::endl; + } + } + + if (!create_timestamp_str.empty()) { + if (f) { + f->dump_string("create_timestamp", create_timestamp_str); + } else { + std::cout << "\tcreate_timestamp: " << create_timestamp_str + << std::endl; + } + } + + if (!access_timestamp_str.empty()) { + if (f) { + f->dump_string("access_timestamp", access_timestamp_str); + } else { + std::cout << "\taccess_timestamp: " << access_timestamp_str + << std::endl; + } + } + + if (!modify_timestamp_str.empty()) { + if (f) { + f->dump_string("modify_timestamp", modify_timestamp_str); + } else { + std::cout << "\tmodify_timestamp: " << modify_timestamp_str + << std::endl; + } + } + + // snapshot info, if present + if (!snapname.empty()) { + if (f) { + f->dump_string("protected", snap_protected ? "true" : "false"); + } else { + std::cout << "\tprotected: " << (snap_protected ? "True" : "False") + << std::endl; + } + } + + if (snap_limit < UINT64_MAX) { + if (f) { + f->dump_unsigned("snapshot_limit", snap_limit); + } else { + std::cout << "\tsnapshot_limit: " << snap_limit << std::endl; + } + } + + // parent info, if present + librbd::linked_image_spec_t parent_image_spec; + librbd::snap_spec_t parent_snap_spec; + if ((image.get_parent(&parent_image_spec, &parent_snap_spec) == 0) && + (parent_image_spec.image_name.length() > 0)) { + if (f) { + f->open_object_section("parent"); + f->dump_string("pool", parent_image_spec.pool_name); + f->dump_string("pool_namespace", parent_image_spec.pool_namespace); + f->dump_string("image", parent_image_spec.image_name); + f->dump_string("id", parent_image_spec.image_id); + f->dump_string("snapshot", parent_snap_spec.name); + f->dump_bool("trash", parent_image_spec.trash); + f->dump_unsigned("overlap", overlap); + f->close_section(); + } else { + std::cout << "\tparent: " << parent_image_spec.pool_name << "/"; + if (!parent_image_spec.pool_namespace.empty()) { + std::cout << parent_image_spec.pool_namespace << "/"; + } + std::cout << parent_image_spec.image_name << "@" + << parent_snap_spec.name; + if (parent_image_spec.trash) { + std::cout << " (trash " << parent_image_spec.image_id << ")"; + } + std::cout << std::endl; + std::cout << "\toverlap: " << byte_u_t(overlap) << std::endl; + } + } + + // striping info, if feature is set + if (features & RBD_FEATURE_STRIPINGV2) { + if (f) { + f->dump_unsigned("stripe_unit", image.get_stripe_unit()); + f->dump_unsigned("stripe_count", image.get_stripe_count()); + } else { + std::cout << "\tstripe unit: " << byte_u_t(image.get_stripe_unit()) + << std::endl + << "\tstripe count: " << image.get_stripe_count() << std::endl; + } + } + + if (features & RBD_FEATURE_JOURNALING) { + if (f) { + f->dump_string("journal", utils::image_id(image)); + } else { + std::cout << "\tjournal: " << utils::image_id(image) << std::endl; + } + } + + if (features & RBD_FEATURE_JOURNALING) { + if (f) { + f->open_object_section("mirroring"); + f->dump_string("state", + utils::mirror_image_state(mirror_image.state)); + if (mirror_image.state != RBD_MIRROR_IMAGE_DISABLED) { + f->dump_string("global_id", mirror_image.global_id); + f->dump_bool("primary", mirror_image.primary); + } + f->close_section(); + } else { + std::cout << "\tmirroring state: " + << utils::mirror_image_state(mirror_image.state) << std::endl; + if (mirror_image.state != RBD_MIRROR_IMAGE_DISABLED) { + std::cout << "\tmirroring global id: " << mirror_image.global_id + << std::endl + << "\tmirroring primary: " + << (mirror_image.primary ? "true" : "false") <<std::endl; + } + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } + + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_NONE); + at::add_image_id_option(options); + at::add_format_options(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + std::string image_id; + + if (vm.count(at::IMAGE_ID)) { + image_id = vm[at::IMAGE_ID].as<std::string>(); + } + + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, image_id.empty(), + utils::SNAPSHOT_PRESENCE_PERMITTED, utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + if (!image_id.empty() && !image_name.empty()) { + std::cerr << "rbd: trying to access image using both name and id. " + << std::endl; + return -EINVAL; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, + image_id, snap_name, true, &rados, &io_ctx, + &image); + if (r < 0) { + return r; + } + + r = do_show_info(io_ctx, image, snap_name, formatter.get()); + if (r < 0) { + std::cerr << "rbd: info: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"info"}, {}, "Show information about image size, striping, etc.", "", + &get_arguments, &execute); + +} // namespace info +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Journal.cc b/src/tools/rbd/action/Journal.cc new file mode 100644 index 00000000..d3a54f94 --- /dev/null +++ b/src/tools/rbd/action/Journal.cc @@ -0,0 +1,1254 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/Cond.h" +#include "common/Formatter.h" +#include "common/ceph_json.h" +#include "common/errno.h" +#include "common/safe_io.h" +#include "include/stringify.h" +#include <fstream> +#include <sstream> +#include <boost/program_options.hpp> +#include "cls/rbd/cls_rbd_client.h" +#include "cls/journal/cls_journal_types.h" +#include "cls/journal/cls_journal_client.h" + +#include "journal/Journaler.h" +#include "journal/ReplayEntry.h" +#include "journal/ReplayHandler.h" +#include "journal/Settings.h" +#include "librbd/journal/Types.h" + +namespace rbd { +namespace action { +namespace journal { + +namespace at = argument_types; +namespace po = boost::program_options; + +static const std::string JOURNAL_SPEC("journal-spec"); +static const std::string JOURNAL_NAME("journal"); +static const std::string DEST_JOURNAL_NAME("dest-journal"); + +void add_journal_option(po::options_description *opt, + at::ArgumentModifier modifier) { + std::string name = JOURNAL_NAME; + std::string description = at::get_description_prefix(modifier) + + "journal name"; + switch (modifier) { + case at::ARGUMENT_MODIFIER_NONE: + case at::ARGUMENT_MODIFIER_SOURCE: + break; + case at::ARGUMENT_MODIFIER_DEST: + name = DEST_JOURNAL_NAME; + break; + } + + // TODO add validator + opt->add_options() + (name.c_str(), po::value<std::string>(), description.c_str()); +} + +void add_journal_spec_options(po::options_description *pos, + po::options_description *opt, + at::ArgumentModifier modifier) { + + pos->add_options() + ((get_name_prefix(modifier) + JOURNAL_SPEC).c_str(), + (get_description_prefix(modifier) + "journal specification\n" + + "(example: [<pool-name>/[<namespace>/]]<journal-name>)").c_str()); + add_pool_option(opt, modifier); + add_namespace_option(opt, modifier); + add_image_option(opt, modifier); + add_journal_option(opt, modifier); +} + +int get_pool_journal_names(const po::variables_map &vm, + at::ArgumentModifier mod, + size_t *spec_arg_index, + std::string *pool_name, + std::string *namespace_name, + std::string *journal_name) { + std::string pool_key = (mod == at::ARGUMENT_MODIFIER_DEST ? + at::DEST_POOL_NAME : at::POOL_NAME); + std::string namespace_key = (mod == at::ARGUMENT_MODIFIER_DEST ? + at::DEST_NAMESPACE_NAME : at::NAMESPACE_NAME); + std::string image_key = (mod == at::ARGUMENT_MODIFIER_DEST ? + at::DEST_IMAGE_NAME : at::IMAGE_NAME); + std::string journal_key = (mod == at::ARGUMENT_MODIFIER_DEST ? + DEST_JOURNAL_NAME : JOURNAL_NAME); + + if (vm.count(pool_key) && pool_name != nullptr) { + *pool_name = vm[pool_key].as<std::string>(); + } + if (vm.count(namespace_key) && namespace_name != nullptr) { + *namespace_name = vm[namespace_key].as<std::string>(); + } + if (vm.count(journal_key) && journal_name != nullptr) { + *journal_name = vm[journal_key].as<std::string>(); + } + + std::string image_name; + if (vm.count(image_key)) { + image_name = vm[image_key].as<std::string>(); + } + + int r; + if (journal_name != nullptr && !journal_name->empty()) { + // despite the separate pool option, + // we can also specify them via the journal option + std::string journal_name_copy(*journal_name); + r = extract_spec(journal_name_copy, pool_name, namespace_name, journal_name, + nullptr, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + } + + if (!image_name.empty()) { + // despite the separate pool option, + // we can also specify them via the image option + std::string image_name_copy(image_name); + r = extract_spec(image_name_copy, pool_name, namespace_name, &image_name, + nullptr, utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + } + + if (journal_name != nullptr && spec_arg_index != nullptr && + journal_name->empty()) { + std::string spec = utils::get_positional_argument(vm, (*spec_arg_index)++); + if (!spec.empty()) { + r = extract_spec(spec, pool_name, namespace_name, journal_name, nullptr, + utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + } + } + + if (pool_name != nullptr && pool_name->empty()) { + *pool_name = utils::get_default_pool_name(); + } + + if (pool_name != nullptr && namespace_name != nullptr && + journal_name != nullptr && journal_name->empty() && !image_name.empty()) { + // Try to get journal name from image info. + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + int r = utils::init_and_open_image(*pool_name, *namespace_name, image_name, + "", "", true, &rados, &io_ctx, &image); + if (r < 0) { + std::cerr << "rbd: failed to open image " << image_name + << " to get journal name: " << cpp_strerror(r) << std::endl; + return r; + } + + uint64_t features; + r = image.features(&features); + if (r < 0) { + return r; + } + if ((features & RBD_FEATURE_JOURNALING) == 0) { + std::cerr << "rbd: journaling is not enabled for image " << image_name + << std::endl; + return -EINVAL; + } + *journal_name = utils::image_id(image); + } + + if (journal_name != nullptr && journal_name->empty()) { + std::string prefix = at::get_description_prefix(mod); + std::cerr << "rbd: " + << (mod == at::ARGUMENT_MODIFIER_DEST ? prefix : std::string()) + << "journal was not specified" << std::endl; + return -EINVAL; + } + + return 0; +} + +static int do_show_journal_info(librados::Rados& rados, librados::IoCtx& io_ctx, + const std::string& journal_id, Formatter *f) +{ + int r; + C_SaferCond cond; + + std::string header_oid = ::journal::Journaler::header_oid(journal_id); + std::string object_oid_prefix = ::journal::Journaler::object_oid_prefix( + io_ctx.get_id(), journal_id); + uint8_t order; + uint8_t splay_width; + int64_t pool_id; + + cls::journal::client::get_immutable_metadata(io_ctx, header_oid, &order, + &splay_width, &pool_id, &cond); + r = cond.wait(); + if (r < 0) { + std::cerr << "failed to get journal metadata: " << cpp_strerror(r) + << std::endl; + return r; + } + + std::string object_pool_name; + if (pool_id >= 0) { + r = rados.pool_reverse_lookup(pool_id, &object_pool_name); + if (r < 0) { + std::cerr << "error looking up pool name for pool_id=" << pool_id << ": " + << cpp_strerror(r) << std::endl; + } + } + + if (f) { + f->open_object_section("journal"); + f->dump_string("journal_id", journal_id); + f->dump_string("header_oid", header_oid); + f->dump_string("object_oid_prefix", object_oid_prefix); + f->dump_int("order", order); + f->dump_int("splay_width", splay_width); + if (!object_pool_name.empty()) { + f->dump_string("object_pool", object_pool_name); + } + f->close_section(); + f->flush(std::cout); + } else { + std::cout << "rbd journal '" << journal_id << "':" << std::endl; + std::cout << "\theader_oid: " << header_oid << std::endl; + std::cout << "\tobject_oid_prefix: " << object_oid_prefix << std::endl; + std::cout << "\torder: " << static_cast<int>(order) << " (" + << byte_u_t(1ull << order) << " objects)"<< std::endl; + std::cout << "\tsplay_width: " << static_cast<int>(splay_width) << std::endl; + if (!object_pool_name.empty()) { + std::cout << "\tobject_pool: " << object_pool_name << std::endl; + } + } + return 0; +} + +static int do_show_journal_status(librados::IoCtx& io_ctx, + const std::string& journal_id, Formatter *f) +{ + int r; + + C_SaferCond cond; + uint64_t minimum_set; + uint64_t active_set; + std::set<cls::journal::Client> registered_clients; + std::string oid = ::journal::Journaler::header_oid(journal_id); + + cls::journal::client::get_mutable_metadata(io_ctx, oid, &minimum_set, + &active_set, ®istered_clients, + &cond); + r = cond.wait(); + if (r < 0) { + std::cerr << "warning: failed to get journal metadata" << std::endl; + return r; + } + + if (f) { + f->open_object_section("status"); + f->dump_unsigned("minimum_set", minimum_set); + f->dump_unsigned("active_set", active_set); + f->open_array_section("registered_clients"); + for (std::set<cls::journal::Client>::iterator c = + registered_clients.begin(); c != registered_clients.end(); ++c) { + f->open_object_section("client"); + c->dump(f); + f->close_section(); + } + f->close_section(); + f->close_section(); + f->flush(std::cout); + } else { + std::cout << "minimum_set: " << minimum_set << std::endl; + std::cout << "active_set: " << active_set << std::endl; + std::cout << "registered clients: " << std::endl; + for (std::set<cls::journal::Client>::iterator c = + registered_clients.begin(); c != registered_clients.end(); ++c) { + std::cout << "\t" << *c << std::endl; + } + } + return 0; +} + +static int do_reset_journal(librados::IoCtx& io_ctx, + const std::string& journal_id) +{ + // disable/re-enable journaling to delete/re-create the journal + // to properly handle mirroring constraints + std::string image_name; + int r = librbd::cls_client::dir_get_name(&io_ctx, RBD_DIRECTORY, journal_id, + &image_name); + if (r < 0) { + std::cerr << "failed to locate journal's image: " << cpp_strerror(r) + << std::endl; + return r; + } + + librbd::Image image; + r = utils::open_image(io_ctx, image_name, false, &image); + if (r < 0) { + std::cerr << "failed to open image: " << cpp_strerror(r) << std::endl; + return r; + } + + r = image.update_features(RBD_FEATURE_JOURNALING, false); + if (r < 0) { + std::cerr << "failed to disable image journaling: " << cpp_strerror(r) + << std::endl; + return r; + } + + r = image.update_features(RBD_FEATURE_JOURNALING, true); + if (r < 0) { + std::cerr << "failed to re-enable image journaling: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +static int do_disconnect_journal_client(librados::IoCtx& io_ctx, + const std::string& journal_id, + const std::string& client_id) +{ + int r; + + C_SaferCond cond; + uint64_t minimum_set; + uint64_t active_set; + std::set<cls::journal::Client> registered_clients; + std::string oid = ::journal::Journaler::header_oid(journal_id); + + cls::journal::client::get_mutable_metadata(io_ctx, oid, &minimum_set, + &active_set, ®istered_clients, + &cond); + r = cond.wait(); + if (r < 0) { + std::cerr << "warning: failed to get journal metadata" << std::endl; + return r; + } + + static const std::string IMAGE_CLIENT_ID(""); + + bool found = false; + for (auto &c : registered_clients) { + if (c.id == IMAGE_CLIENT_ID || (!client_id.empty() && client_id != c.id)) { + continue; + } + r = cls::journal::client::client_update_state(io_ctx, oid, c.id, + cls::journal::CLIENT_STATE_DISCONNECTED); + if (r < 0) { + std::cerr << "warning: failed to disconnect client " << c.id << ": " + << cpp_strerror(r) << std::endl; + return r; + } + std::cout << "client " << c.id << " disconnected" << std::endl; + found = true; + } + + if (!found) { + if (!client_id.empty()) { + std::cerr << "warning: client " << client_id << " is not registered" + << std::endl; + } else { + std::cerr << "no registered clients to disconnect" << std::endl; + } + return -ENOENT; + } + + bufferlist bl; + r = io_ctx.notify2(oid, bl, 5000, NULL); + if (r < 0) { + std::cerr << "warning: failed to notify state change:" << ": " + << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +class Journaler : public ::journal::Journaler { +public: + Journaler(librados::IoCtx& io_ctx, const std::string& journal_id, + const std::string &client_id) : + ::journal::Journaler(io_ctx, journal_id, client_id, {}) { + } + + int init() { + int r; + + // TODO register with librbd payload + r = register_client(bufferlist()); + if (r < 0) { + std::cerr << "failed to register client: " << cpp_strerror(r) + << std::endl; + return r; + } + + C_SaferCond cond; + + ::journal::Journaler::init(&cond); + r = cond.wait(); + if (r < 0) { + std::cerr << "failed to initialize journal: " << cpp_strerror(r) + << std::endl; + (void) unregister_client(); + return r; + } + + return 0; + } + + int shut_down() { + int r = unregister_client(); + if (r < 0) { + std::cerr << "rbd: failed to unregister journal client: " + << cpp_strerror(r) << std::endl; + } + ::journal::Journaler::shut_down(); + + return r; + } +}; + +class JournalPlayer { +public: + JournalPlayer(librados::IoCtx& io_ctx, const std::string& journal_id, + const std::string &client_id) : + m_journaler(io_ctx, journal_id, client_id), + m_cond(), + m_r(0) { + } + + virtual ~JournalPlayer() {} + + virtual int exec() { + int r; + + r = m_journaler.init(); + if (r < 0) { + return r; + } + + ReplayHandler replay_handler(this); + + m_journaler.start_replay(&replay_handler); + + r = m_cond.wait(); + if (r < 0) { + std::cerr << "rbd: failed to process journal: " << cpp_strerror(r) + << std::endl; + if (m_r == 0) { + m_r = r; + } + } + return m_r; + } + + int shut_down() { + return m_journaler.shut_down(); + } + +protected: + struct ReplayHandler : public ::journal::ReplayHandler { + JournalPlayer *journal; + explicit ReplayHandler(JournalPlayer *_journal) : journal(_journal) {} + + void get() override {} + void put() override {} + + void handle_entries_available() override { + journal->handle_replay_ready(); + } + void handle_complete(int r) override { + journal->handle_replay_complete(r); + } + }; + + void handle_replay_ready() { + int r = 0; + while (true) { + ::journal::ReplayEntry replay_entry; + uint64_t tag_id; + if (!m_journaler.try_pop_front(&replay_entry, &tag_id)) { + break; + } + + r = process_entry(replay_entry, tag_id); + if (r < 0) { + break; + } + } + } + + virtual int process_entry(::journal::ReplayEntry replay_entry, + uint64_t tag_id) = 0; + + void handle_replay_complete(int r) { + if (m_r == 0 && r < 0) { + m_r = r; + } + m_journaler.stop_replay(&m_cond); + } + + Journaler m_journaler; + C_SaferCond m_cond; + int m_r; +}; + +static int inspect_entry(bufferlist& data, + librbd::journal::EventEntry& event_entry, + bool verbose) { + try { + auto it = data.cbegin(); + decode(event_entry, it); + } catch (const buffer::error &err) { + std::cerr << "failed to decode event entry: " << err.what() << std::endl; + return -EINVAL; + } + if (verbose) { + JSONFormatter f(true); + f.open_object_section("event_entry"); + event_entry.dump(&f); + f.close_section(); + f.flush(std::cout); + } + return 0; +} + +class JournalInspector : public JournalPlayer { +public: + JournalInspector(librados::IoCtx& io_ctx, const std::string& journal_id, + bool verbose) : + JournalPlayer(io_ctx, journal_id, "INSPECT"), + m_verbose(verbose), + m_s() { + } + + int exec() override { + int r = JournalPlayer::exec(); + m_s.print(); + return r; + } + +private: + struct Stats { + Stats() : total(0), error(0) {} + + void print() { + std::cout << "Summary:" << std::endl + << " " << total << " entries inspected, " << error << " errors" + << std::endl; + } + + int total; + int error; + }; + + int process_entry(::journal::ReplayEntry replay_entry, + uint64_t tag_id) override { + m_s.total++; + if (m_verbose) { + std::cout << "Entry: tag_id=" << tag_id << ", commit_tid=" + << replay_entry.get_commit_tid() << std::endl; + } + bufferlist data = replay_entry.get_data(); + librbd::journal::EventEntry event_entry; + int r = inspect_entry(data, event_entry, m_verbose); + if (r < 0) { + m_r = r; + m_s.error++; + } + return 0; + } + + bool m_verbose; + Stats m_s; +}; + +static int do_inspect_journal(librados::IoCtx& io_ctx, + const std::string& journal_id, + bool verbose) { + JournalInspector inspector(io_ctx, journal_id, verbose); + int r = inspector.exec(); + if (r < 0) { + inspector.shut_down(); + return r; + } + + r = inspector.shut_down(); + if (r < 0) { + return r; + } + return 0; +} + +struct ExportEntry { + uint64_t tag_id; + uint64_t commit_tid; + int type; + bufferlist entry; + + ExportEntry() : tag_id(0), commit_tid(0), type(0), entry() {} + + ExportEntry(uint64_t tag_id, uint64_t commit_tid, int type, + const bufferlist& entry) + : tag_id(tag_id), commit_tid(commit_tid), type(type), entry(entry) { + } + + void dump(Formatter *f) const { + ::encode_json("tag_id", tag_id, f); + ::encode_json("commit_tid", commit_tid, f); + ::encode_json("type", type, f); + ::encode_json("entry", entry, f); + } + + void decode_json(JSONObj *obj) { + JSONDecoder::decode_json("tag_id", tag_id, obj); + JSONDecoder::decode_json("commit_tid", commit_tid, obj); + JSONDecoder::decode_json("type", type, obj); + JSONDecoder::decode_json("entry", entry, obj); + } +}; + +class JournalExporter : public JournalPlayer { +public: + JournalExporter(librados::IoCtx& io_ctx, const std::string& journal_id, + int fd, bool no_error, bool verbose) : + JournalPlayer(io_ctx, journal_id, "EXPORT"), + m_journal_id(journal_id), + m_fd(fd), + m_no_error(no_error), + m_verbose(verbose), + m_s() { + } + + int exec() override { + std::string header("# journal_id: " + m_journal_id + "\n"); + int r; + r = safe_write(m_fd, header.c_str(), header.size()); + if (r < 0) { + std::cerr << "rbd: failed to write to export file: " << cpp_strerror(r) + << std::endl; + return r; + } + r = JournalPlayer::exec(); + m_s.print(); + return r; + } + +private: + struct Stats { + Stats() : total(0), error(0) {} + + void print() { + std::cout << total << " entries processed, " << error << " errors" + << std::endl; + } + + int total; + int error; + }; + + int process_entry(::journal::ReplayEntry replay_entry, + uint64_t tag_id) override { + m_s.total++; + int type = -1; + bufferlist entry = replay_entry.get_data(); + librbd::journal::EventEntry event_entry; + int r = inspect_entry(entry, event_entry, m_verbose); + if (r < 0) { + m_s.error++; + m_r = r; + return m_no_error ? 0 : r; + } else { + type = event_entry.get_event_type(); + } + ExportEntry export_entry(tag_id, replay_entry.get_commit_tid(), type, + entry); + JSONFormatter f; + ::encode_json("event_entry", export_entry, &f); + std::ostringstream oss; + f.flush(oss); + std::string objstr = oss.str(); + std::string header = stringify(objstr.size()) + " "; + r = safe_write(m_fd, header.c_str(), header.size()); + if (r == 0) { + r = safe_write(m_fd, objstr.c_str(), objstr.size()); + } + if (r == 0) { + r = safe_write(m_fd, "\n", 1); + } + if (r < 0) { + std::cerr << "rbd: failed to write to export file: " << cpp_strerror(r) + << std::endl; + m_s.error++; + return r; + } + return 0; + } + + std::string m_journal_id; + int m_fd; + bool m_no_error; + bool m_verbose; + Stats m_s; +}; + +static int do_export_journal(librados::IoCtx& io_ctx, + const std::string& journal_id, + const std::string& path, + bool no_error, bool verbose) { + int r; + int fd; + bool to_stdout = path == "-"; + if (to_stdout) { + fd = STDOUT_FILENO; + } else { + fd = open(path.c_str(), O_WRONLY | O_CREAT | O_EXCL, 0644); + if (fd < 0) { + r = -errno; + std::cerr << "rbd: error creating " << path << std::endl; + return r; + } +#ifdef HAVE_POSIX_FADVISE + posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + } + + JournalExporter exporter(io_ctx, journal_id, fd, no_error, verbose); + r = exporter.exec(); + + if (!to_stdout) { + close(fd); + } + + int shut_down_r = exporter.shut_down(); + if (r == 0 && shut_down_r < 0) { + r = shut_down_r; + } + + return r; +} + +class JournalImporter { +public: + JournalImporter(librados::IoCtx& io_ctx, const std::string& journal_id, + int fd, bool no_error, bool verbose) : + m_journaler(io_ctx, journal_id, "IMPORT"), + m_fd(fd), + m_no_error(no_error), + m_verbose(verbose) { + } + + bool read_entry(bufferlist& bl, int& r) { + // Entries are stored in the file using the following format: + // + // # Optional comments + // NNN {json encoded entry} + // ... + // + // Where NNN is the encoded entry size. + bl.clear(); + char buf[80]; + // Skip line feed and comments (lines started with #). + while ((r = safe_read_exact(m_fd, buf, 1)) == 0) { + if (buf[0] == '\n') { + continue; + } else if (buf[0] == '#') { + while ((r = safe_read_exact(m_fd, buf, 1)) == 0) { + if (buf[0] == '\n') { + break; + } + } + } else { + break; + } + } + if (r < 0) { + if (r == -EDOM) { + r = 0; + } + return false; + } + // Read entry size to buf. + if (!isdigit(buf[0])) { + r = -EINVAL; + std::cerr << "rbd: import data invalid format (digit expected)" + << std::endl; + return false; + } + for (size_t i = 1; i < sizeof(buf); i++) { + r = safe_read_exact(m_fd, buf + i, 1); + if (r < 0) { + std::cerr << "rbd: error reading import data" << std::endl; + return false; + } + if (!isdigit(buf[i])) { + if (buf[i] != ' ') { + r = -EINVAL; + std::cerr << "rbd: import data invalid format (space expected)" + << std::endl; + return false; + } + buf[i] = '\0'; + break; + } + } + int entry_size = atoi(buf); + if (entry_size == 0) { + r = -EINVAL; + std::cerr << "rbd: import data invalid format (zero entry size)" + << std::endl; + return false; + } + ceph_assert(entry_size > 0); + // Read entry. + r = bl.read_fd(m_fd, entry_size); + if (r < 0) { + std::cerr << "rbd: error reading from stdin: " << cpp_strerror(r) + << std::endl; + return false; + } + if (r != entry_size) { + std::cerr << "rbd: error reading from stdin: truncated" + << std::endl; + r = -EINVAL; + return false; + } + r = 0; + return true; + } + + int exec() { + int r = m_journaler.init(); + if (r < 0) { + return r; + } + m_journaler.start_append(0); + + int r1 = 0; + bufferlist bl; + int n = 0; + int error_count = 0; + while (read_entry(bl, r)) { + n++; + error_count++; + JSONParser p; + if (!p.parse(bl.c_str(), bl.length())) { + std::cerr << "rbd: error parsing input (entry " << n << ")" + << std::endl; + r = -EINVAL; + if (m_no_error) { + r1 = r; + continue; + } else { + break; + } + } + ExportEntry e; + try { + decode_json_obj(e, &p); + } catch (JSONDecoder::err& err) { + std::cerr << "rbd: error json decoding import data (entry " << n << "):" + << err.message << std::endl; + r = -EINVAL; + if (m_no_error) { + r1 = r; + continue; + } else { + break; + } + } + librbd::journal::EventEntry event_entry; + r = inspect_entry(e.entry, event_entry, m_verbose); + if (r < 0) { + std::cerr << "rbd: corrupted entry " << n << ": tag_tid=" << e.tag_id + << ", commit_tid=" << e.commit_tid << std::endl; + if (m_no_error) { + r1 = r; + continue; + } else { + break; + } + } + m_journaler.append(e.tag_id, e.entry); + error_count--; + } + + std::cout << n << " entries processed, " << error_count << " errors" << std::endl; + + std::cout << "Waiting for journal append to complete..." << std::endl; + + C_SaferCond cond; + m_journaler.stop_append(&cond); + r = cond.wait(); + + if (r < 0) { + std::cerr << "failed to append journal: " << cpp_strerror(r) << std::endl; + } + + if (r1 < 0 && r == 0) { + r = r1; + } + return r; + } + + int shut_down() { + return m_journaler.shut_down(); + } + +private: + Journaler m_journaler; + int m_fd; + bool m_no_error; + bool m_verbose; +}; + +static int do_import_journal(librados::IoCtx& io_ctx, + const std::string& journal_id, + const std::string& path, + bool no_error, bool verbose) { + int r; + + int fd; + bool from_stdin = path == "-"; + if (from_stdin) { + fd = STDIN_FILENO; + } else { + if ((fd = open(path.c_str(), O_RDONLY)) < 0) { + r = -errno; + std::cerr << "rbd: error opening " << path << std::endl; + return r; + } +#ifdef HAVE_POSIX_FADVISE + posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + } + + JournalImporter importer(io_ctx, journal_id, fd, no_error, verbose); + r = importer.exec(); + + if (!from_stdin) { + close(fd); + } + + int shut_down_r = importer.shut_down(); + if (r == 0 && shut_down_r < 0) { + r = shut_down_r; + } + + return r; +} + +void get_info_arguments(po::options_description *positional, + po::options_description *options) { + add_journal_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_format_options(options); +} + +int execute_info(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string journal_name; + int r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, + &pool_name, &namespace_name, &journal_name); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = do_show_journal_info(rados, io_ctx, journal_name, formatter.get()); + if (r < 0) { + std::cerr << "rbd: journal info: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; + +} + +void get_status_arguments(po::options_description *positional, + po::options_description *options) { + add_journal_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_format_options(options); +} + +int execute_status(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string journal_name; + int r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, + &pool_name, &namespace_name, &journal_name); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = do_show_journal_status(io_ctx, journal_name, formatter.get()); + if (r < 0) { + std::cerr << "rbd: journal status: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +void get_reset_arguments(po::options_description *positional, + po::options_description *options) { + add_journal_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); +} + +int execute_reset(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string journal_name; + int r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, + &pool_name, &namespace_name, &journal_name); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = do_reset_journal(io_ctx, journal_name); + if (r < 0) { + std::cerr << "rbd: journal reset: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +void get_client_disconnect_arguments(po::options_description *positional, + po::options_description *options) { + add_journal_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + options->add_options() + ("client-id", po::value<std::string>(), + "client ID (or leave unspecified to disconnect all)"); +} + +int execute_client_disconnect(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string journal_name; + int r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, + &pool_name, &namespace_name, &journal_name); + if (r < 0) { + return r; + } + + std::string client_id; + if (vm.count("client-id")) { + client_id = vm["client-id"].as<std::string>(); + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = do_disconnect_journal_client(io_ctx, journal_name, client_id); + if (r < 0) { + std::cerr << "rbd: journal client disconnect: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_inspect_arguments(po::options_description *positional, + po::options_description *options) { + add_journal_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_verbose_option(options); +} + +int execute_inspect(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string journal_name; + int r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, + &pool_name, &namespace_name, &journal_name); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = do_inspect_journal(io_ctx, journal_name, vm[at::VERBOSE].as<bool>()); + if (r < 0) { + std::cerr << "rbd: journal inspect: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +void get_export_arguments(po::options_description *positional, + po::options_description *options) { + add_journal_spec_options(positional, options, + at::ARGUMENT_MODIFIER_SOURCE); + at::add_path_options(positional, options, + "export file (or '-' for stdout)"); + at::add_verbose_option(options); + at::add_no_error_option(options); +} + +int execute_export(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string journal_name; + int r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, + &pool_name, &namespace_name, &journal_name); + if (r < 0) { + return r; + } + + std::string path; + r = utils::get_path(vm, &arg_index, &path); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = do_export_journal(io_ctx, journal_name, path, vm[at::NO_ERROR].as<bool>(), + vm[at::VERBOSE].as<bool>()); + if (r < 0) { + std::cerr << "rbd: journal export: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +void get_import_arguments(po::options_description *positional, + po::options_description *options) { + at::add_path_options(positional, options, + "import file (or '-' for stdin)"); + add_journal_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST); + at::add_verbose_option(options); + at::add_no_error_option(options); +} + +int execute_import(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string path; + size_t arg_index = 0; + int r = utils::get_path(vm, &arg_index, &path); + if (r < 0) { + return r; + } + + std::string pool_name; + std::string namespace_name; + std::string journal_name; + r = get_pool_journal_names(vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, + &pool_name, &namespace_name, &journal_name); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = do_import_journal(io_ctx, journal_name, path, vm[at::NO_ERROR].as<bool>(), + vm[at::VERBOSE].as<bool>()); + if (r < 0) { + std::cerr << "rbd: journal import: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action_info( + {"journal", "info"}, {}, "Show information about image journal.", "", + &get_info_arguments, &execute_info); + +Shell::Action action_status( + {"journal", "status"}, {}, "Show status of image journal.", "", + &get_status_arguments, &execute_status); + +Shell::Action action_reset( + {"journal", "reset"}, {}, "Reset image journal.", "", + &get_reset_arguments, &execute_reset); + +Shell::Action action_inspect( + {"journal", "inspect"}, {}, "Inspect image journal for structural errors.", "", + &get_inspect_arguments, &execute_inspect); + +Shell::Action action_export( + {"journal", "export"}, {}, "Export image journal.", "", + &get_export_arguments, &execute_export); + +Shell::Action action_import( + {"journal", "import"}, {}, "Import image journal.", "", + &get_import_arguments, &execute_import); + +Shell::Action action_disconnect( + {"journal", "client", "disconnect"}, {}, + "Flag image journal client as disconnected.", "", + &get_client_disconnect_arguments, &execute_client_disconnect); + +} // namespace journal +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Kernel.cc b/src/tools/rbd/action/Kernel.cc new file mode 100644 index 00000000..dc0938eb --- /dev/null +++ b/src/tools/rbd/action/Kernel.cc @@ -0,0 +1,561 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "acconfig.h" +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/krbd.h" +#include "include/stringify.h" +#include "include/uuid.h" +#include "common/config_proxy.h" +#include "common/errno.h" +#include "common/safe_io.h" +#include "common/strtol.h" +#include "common/Formatter.h" +#include "msg/msg_types.h" +#include "global/global_context.h" +#include <iostream> +#include <boost/algorithm/string/predicate.hpp> +#include <boost/scope_exit.hpp> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace kernel { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +std::map<std::string, std::string> map_options; // used for both map and unmap + +} // anonymous namespace + +static std::string map_option_uuid_cb(const char *value_char) +{ + uuid_d u; + if (!u.parse(value_char)) + return ""; + + return stringify(u); +} + +static std::string map_option_ip_cb(const char *value_char) +{ + entity_addr_t a; + const char *endptr; + if (!a.parse(value_char, &endptr) || + endptr != value_char + strlen(value_char)) { + return ""; + } + + return stringify(a.get_sockaddr()); +} + +static std::string map_option_int_cb(const char *value_char) +{ + std::string err; + int d = strict_strtol(value_char, 10, &err); + if (!err.empty() || d < 0) + return ""; + + return stringify(d); +} + +static std::string map_option_ms_mode_cb(const char *value_char) +{ + if (!strcmp(value_char, "legacy") || !strcmp(value_char, "crc") || + !strcmp(value_char, "secure") || !strcmp(value_char, "prefer-crc") || + !strcmp(value_char, "prefer-secure")) { + return value_char; + } + return ""; +} + +static void put_map_option(const std::string &key, const std::string &val) +{ + map_options[key] = val; +} + +static int put_map_option_value(const std::string &opt, const char *value_char, + std::string (*parse_cb)(const char *)) +{ + if (!value_char || *value_char == '\0') { + std::cerr << "rbd: " << opt << " option requires a value" << std::endl; + return -EINVAL; + } + + std::string value = parse_cb(value_char); + if (value.empty()) { + std::cerr << "rbd: invalid " << opt << " value '" << value_char << "'" + << std::endl; + return -EINVAL; + } + + put_map_option(opt, opt + "=" + value); + return 0; +} + +static int parse_map_options(const std::string &options_string) +{ + char *options = strdup(options_string.c_str()); + BOOST_SCOPE_EXIT(options) { + free(options); + } BOOST_SCOPE_EXIT_END; + + for (char *this_char = strtok(options, ", "); + this_char != NULL; + this_char = strtok(NULL, ",")) { + char *value_char; + + if ((value_char = strchr(this_char, '=')) != NULL) + *value_char++ = '\0'; + + if (!strcmp(this_char, "fsid")) { + if (put_map_option_value("fsid", value_char, map_option_uuid_cb)) + return -EINVAL; + } else if (!strcmp(this_char, "ip")) { + if (put_map_option_value("ip", value_char, map_option_ip_cb)) + return -EINVAL; + } else if (!strcmp(this_char, "share") || !strcmp(this_char, "noshare")) { + put_map_option("share", this_char); + } else if (!strcmp(this_char, "crc") || !strcmp(this_char, "nocrc")) { + put_map_option("crc", this_char); + } else if (!strcmp(this_char, "cephx_require_signatures") || + !strcmp(this_char, "nocephx_require_signatures")) { + put_map_option("cephx_require_signatures", this_char); + } else if (!strcmp(this_char, "tcp_nodelay") || + !strcmp(this_char, "notcp_nodelay")) { + put_map_option("tcp_nodelay", this_char); + } else if (!strcmp(this_char, "cephx_sign_messages") || + !strcmp(this_char, "nocephx_sign_messages")) { + put_map_option("cephx_sign_messages", this_char); + } else if (!strcmp(this_char, "mount_timeout")) { + if (put_map_option_value("mount_timeout", value_char, map_option_int_cb)) + return -EINVAL; + } else if (!strcmp(this_char, "osd_request_timeout")) { + if (put_map_option_value("osd_request_timeout", value_char, map_option_int_cb)) + return -EINVAL; + } else if (!strcmp(this_char, "lock_timeout")) { + if (put_map_option_value("lock_timeout", value_char, map_option_int_cb)) + return -EINVAL; + } else if (!strcmp(this_char, "osdkeepalive")) { + if (put_map_option_value("osdkeepalive", value_char, map_option_int_cb)) + return -EINVAL; + } else if (!strcmp(this_char, "osd_idle_ttl")) { + if (put_map_option_value("osd_idle_ttl", value_char, map_option_int_cb)) + return -EINVAL; + } else if (!strcmp(this_char, "rw") || !strcmp(this_char, "ro")) { + put_map_option("rw", this_char); + } else if (!strcmp(this_char, "queue_depth")) { + if (put_map_option_value("queue_depth", value_char, map_option_int_cb)) + return -EINVAL; + } else if (!strcmp(this_char, "lock_on_read")) { + put_map_option("lock_on_read", this_char); + } else if (!strcmp(this_char, "exclusive")) { + put_map_option("exclusive", this_char); + } else if (!strcmp(this_char, "notrim")) { + put_map_option("notrim", this_char); + } else if (!strcmp(this_char, "abort_on_full")) { + put_map_option("abort_on_full", this_char); + } else if (!strcmp(this_char, "alloc_size")) { + if (put_map_option_value("alloc_size", value_char, map_option_int_cb)) + return -EINVAL; + } else if (!strcmp(this_char, "ms_mode")) { + if (put_map_option_value("ms_mode", value_char, map_option_ms_mode_cb)) + return -EINVAL; + } else if (!strcmp(this_char, "udev") || !strcmp(this_char, "noudev")) { + put_map_option("udev", this_char); + } else { + std::cerr << "rbd: unknown map option '" << this_char << "'" << std::endl; + return -EINVAL; + } + } + + return 0; +} + +static int parse_unmap_options(const std::string &options_string) +{ + char *options = strdup(options_string.c_str()); + BOOST_SCOPE_EXIT(options) { + free(options); + } BOOST_SCOPE_EXIT_END; + + for (char *this_char = strtok(options, ", "); + this_char != NULL; + this_char = strtok(NULL, ",")) { + char *value_char; + + if ((value_char = strchr(this_char, '=')) != NULL) + *value_char++ = '\0'; + + if (!strcmp(this_char, "force")) { + put_map_option("force", this_char); + } else if (!strcmp(this_char, "udev") || !strcmp(this_char, "noudev")) { + put_map_option("udev", this_char); + } else { + std::cerr << "rbd: unknown unmap option '" << this_char << "'" << std::endl; + return -EINVAL; + } + } + + return 0; +} + +static int do_kernel_list(Formatter *f) { +#if defined(WITH_KRBD) + struct krbd_ctx *krbd; + int r; + + r = krbd_create_from_context(g_ceph_context, 0, &krbd); + if (r < 0) + return r; + + r = krbd_showmapped(krbd, f); + + krbd_destroy(krbd); + return r; +#else + std::cerr << "rbd: kernel device is not supported" << std::endl; + return -EOPNOTSUPP; +#endif +} + +static int get_unsupported_features(librbd::Image &image, + uint64_t *unsupported_features) +{ + char buf[20]; + uint64_t features, supported_features; + int r; + + r = safe_read_file("/sys/bus/rbd/", "supported_features", buf, + sizeof(buf) - 1); + if (r < 0) + return r; + + buf[r] = '\0'; + try { + supported_features = std::stoull(buf, nullptr, 16); + } catch (...) { + return -EINVAL; + } + + r = image.features(&features); + if (r < 0) + return r; + + *unsupported_features = features & ~supported_features; + return 0; +} + +/* + * hint user to check syslog for krbd related messages and provide suggestions + * based on errno return by krbd_map(). also note that even if some librbd calls + * fail, we at least dump the "try dmesg..." message to aid debugging. + */ +static void print_error_description(const char *poolname, + const char *nspace_name, + const char *imgname, + const char *snapname, + int maperrno) +{ + int r; + uint8_t oldformat; + librados::Rados rados; + librados::IoCtx ioctx; + librbd::Image image; + + if (maperrno == -ENOENT) + goto done; + + r = utils::init_and_open_image(poolname, nspace_name, imgname, "", snapname, + true, &rados, &ioctx, &image); + if (r < 0) + goto done; + + r = image.old_format(&oldformat); + if (r < 0) + goto done; + + /* + * kernel returns -ENXIO when mapping a V2 image due to unsupported feature + * set - so, hint about that too... + */ + if (!oldformat && (maperrno == -ENXIO)) { + uint64_t unsupported_features; + bool need_terminate = true; + + std::cout << "RBD image feature set mismatch. "; + r = get_unsupported_features(image, &unsupported_features); + if (r == 0 && (unsupported_features & ~RBD_FEATURES_ALL) == 0) { + uint64_t immutable = RBD_FEATURES_ALL & ~(RBD_FEATURES_MUTABLE | + RBD_FEATURES_DISABLE_ONLY); + if (unsupported_features & immutable) { + std::cout << "This image cannot be mapped because the following " + << "immutable features are unsupported by the kernel:"; + unsupported_features &= immutable; + need_terminate = false; + } else { + std::cout << "You can disable features unsupported by the kernel " + << "with \"rbd feature disable "; + if (poolname != utils::get_default_pool_name() || *nspace_name) { + std::cout << poolname << "/"; + } + if (*nspace_name) { + std::cout << nspace_name << "/"; + } + std::cout << imgname; + } + } else { + std::cout << "Try disabling features unsupported by the kernel " + << "with \"rbd feature disable"; + unsupported_features = 0; + } + for (auto it : at::ImageFeatures::FEATURE_MAPPING) { + if (it.first & unsupported_features) { + std::cout << " " << it.second; + } + } + if (need_terminate) + std::cout << "\""; + std::cout << "." << std::endl; + } + + done: + std::cout << "In some cases useful info is found in syslog - try \"dmesg | tail\"." << std::endl; +} + +static int do_kernel_map(const char *poolname, const char *nspace_name, + const char *imgname, const char *snapname) +{ +#if defined(WITH_KRBD) + struct krbd_ctx *krbd; + std::ostringstream oss; + uint32_t flags = 0; + char *devnode; + int r; + + for (auto it = map_options.begin(); it != map_options.end(); ) { + // for compatibility with < 3.7 kernels, assume that rw is on by + // default and omit it even if it was specified by the user + // (see ceph.git commit fb0f1986449b) + if (it->first == "rw" && it->second == "rw") { + it = map_options.erase(it); + } else if (it->first == "udev") { + if (it->second == "noudev") { + flags |= KRBD_CTX_F_NOUDEV; + } + it = map_options.erase(it); + } else { + if (it != map_options.begin()) + oss << ","; + oss << it->second; + ++it; + } + } + + r = krbd_create_from_context(g_ceph_context, flags, &krbd); + if (r < 0) + return r; + + r = krbd_is_mapped(krbd, poolname, nspace_name, imgname, snapname, &devnode); + if (r < 0) { + std::cerr << "rbd: warning: can't get image map information: " + << cpp_strerror(r) << std::endl; + } else if (r > 0) { + std::cerr << "rbd: warning: image already mapped as " << devnode + << std::endl; + free(devnode); + } + + r = krbd_map(krbd, poolname, nspace_name, imgname, snapname, + oss.str().c_str(), &devnode); + if (r < 0) { + print_error_description(poolname, nspace_name, imgname, snapname, r); + goto out; + } + + std::cout << devnode << std::endl; + + free(devnode); +out: + krbd_destroy(krbd); + return r; +#else + std::cerr << "rbd: kernel device is not supported" << std::endl; + return -EOPNOTSUPP; +#endif +} + +static int do_kernel_unmap(const char *dev, const char *poolname, + const char *nspace_name, const char *imgname, + const char *snapname) +{ +#if defined(WITH_KRBD) + struct krbd_ctx *krbd; + std::ostringstream oss; + uint32_t flags = 0; + int r; + + for (auto it = map_options.begin(); it != map_options.end(); ) { + if (it->first == "udev") { + if (it->second == "noudev") { + flags |= KRBD_CTX_F_NOUDEV; + } + it = map_options.erase(it); + } else { + if (it != map_options.begin()) + oss << ","; + oss << it->second; + ++it; + } + } + + r = krbd_create_from_context(g_ceph_context, flags, &krbd); + if (r < 0) + return r; + + if (dev) + r = krbd_unmap(krbd, dev, oss.str().c_str()); + else + r = krbd_unmap_by_spec(krbd, poolname, nspace_name, imgname, snapname, + oss.str().c_str()); + + krbd_destroy(krbd); + return r; +#else + std::cerr << "rbd: kernel device is not supported" << std::endl; + return -EOPNOTSUPP; +#endif +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + at::Format::Formatter formatter; + int r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + utils::init_context(); + + r = do_kernel_list(formatter.get()); + if (r < 0) { + std::cerr << "rbd: device list failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +int execute_map(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string nspace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &nspace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + // parse default options first so they can be overwritten by cli options + r = parse_map_options( + g_conf().get_val<std::string>("rbd_default_map_options")); + if (r < 0) { + std::cerr << "rbd: couldn't parse default map options" << std::endl; + return r; + } + + if (vm.count("options")) { + for (auto &options : vm["options"].as<std::vector<std::string>>()) { + r = parse_map_options(options); + if (r < 0) { + std::cerr << "rbd: couldn't parse map options" << std::endl; + return r; + } + } + } + + // parse options common to all device types after parsing krbd-specific + // options so that common options win (in particular "-o rw --read-only" + // should result in read-only mapping) + if (vm["read-only"].as<bool>()) { + put_map_option("rw", "ro"); + } + if (vm["exclusive"].as<bool>()) { + put_map_option("exclusive", "exclusive"); + } + + utils::init_context(); + + r = do_kernel_map(pool_name.c_str(), nspace_name.c_str(), image_name.c_str(), + snap_name.c_str()); + if (r < 0) { + std::cerr << "rbd: map failed: " << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +int execute_unmap(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string device_name = utils::get_positional_argument(vm, 0); + if (!boost::starts_with(device_name, "/dev/")) { + device_name.clear(); + } + + size_t arg_index = 0; + std::string pool_name; + std::string nspace_name; + std::string image_name; + std::string snap_name; + int r; + if (device_name.empty()) { + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &nspace_name, + &image_name, &snap_name, false, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + } + + if (device_name.empty() && image_name.empty()) { + std::cerr << "rbd: unmap requires either image name or device path" + << std::endl; + return -EINVAL; + } + + if (vm.count("options")) { + for (auto &options : vm["options"].as<std::vector<std::string>>()) { + r = parse_unmap_options(options); + if (r < 0) { + std::cerr << "rbd: couldn't parse unmap options" << std::endl; + return r; + } + } + } + + utils::init_context(); + + r = do_kernel_unmap(device_name.empty() ? nullptr : device_name.c_str(), + pool_name.c_str(), nspace_name.c_str(), + image_name.c_str(), snap_name.c_str()); + if (r < 0) { + std::cerr << "rbd: unmap failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +} // namespace kernel +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/List.cc b/src/tools/rbd/action/List.cc new file mode 100644 index 00000000..e6025418 --- /dev/null +++ b/src/tools/rbd/action/List.cc @@ -0,0 +1,340 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/Context.h" +#include "include/stringify.h" +#include "include/types.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include <iostream> +#include <boost/bind.hpp> +#include <boost/program_options.hpp> +#include "global/global_context.h" + +namespace rbd { + +namespace action { +namespace list { + +namespace at = argument_types; +namespace po = boost::program_options; + +enum WorkerState { + STATE_IDLE = 0, + STATE_OPENED, + STATE_DONE +} ; + +struct WorkerEntry { + librbd::Image img; + librbd::RBD::AioCompletion* completion; + WorkerState state; + string name; + + WorkerEntry() { + state = STATE_IDLE; + completion = nullptr; + } +}; + + +int list_process_image(librados::Rados* rados, WorkerEntry* w, bool lflag, Formatter *f, TextTable &tbl) +{ + int r = 0; + librbd::image_info_t info; + std::string parent; + + // handle second-nth trips through loop + librbd::linked_image_spec_t parent_image_spec; + librbd::snap_spec_t parent_snap_spec; + r = w->img.get_parent(&parent_image_spec, &parent_snap_spec); + if (r < 0 && r != -ENOENT) { + return r; + } + + bool has_parent = false; + if (r != -ENOENT) { + parent = parent_image_spec.pool_name + "/"; + if (!parent_image_spec.pool_namespace.empty()) { + parent += parent_image_spec.pool_namespace + "/"; + } + parent += parent_image_spec.image_name + "@" + parent_snap_spec.name; + has_parent = true; + } + + if (w->img.stat(info, sizeof(info)) < 0) { + return -EINVAL; + } + + uint8_t old_format; + w->img.old_format(&old_format); + + std::list<librbd::locker_t> lockers; + bool exclusive; + r = w->img.list_lockers(&lockers, &exclusive, NULL); + if (r < 0) + return r; + std::string lockstr; + if (!lockers.empty()) { + lockstr = (exclusive) ? "excl" : "shr"; + } + + if (f) { + f->open_object_section("image"); + f->dump_string("image", w->name); + f->dump_unsigned("size", info.size); + if (has_parent) { + f->open_object_section("parent"); + f->dump_string("pool", parent_image_spec.pool_name); + f->dump_string("pool_namespace", parent_image_spec.pool_namespace); + f->dump_string("image", parent_image_spec.image_name); + f->dump_string("snapshot", parent_snap_spec.name); + f->close_section(); + } + f->dump_int("format", old_format ? 1 : 2); + if (!lockers.empty()) + f->dump_string("lock_type", exclusive ? "exclusive" : "shared"); + f->close_section(); + } else { + tbl << w->name + << stringify(byte_u_t(info.size)) + << parent + << ((old_format) ? '1' : '2') + << "" // protect doesn't apply to images + << lockstr + << TextTable::endrow; + } + + std::vector<librbd::snap_info_t> snaplist; + if (w->img.snap_list(snaplist) >= 0 && !snaplist.empty()) { + snaplist.erase(remove_if(snaplist.begin(), + snaplist.end(), + boost::bind(utils::is_not_user_snap_namespace, &w->img, _1)), + snaplist.end()); + for (std::vector<librbd::snap_info_t>::iterator s = snaplist.begin(); + s != snaplist.end(); ++s) { + bool is_protected; + bool has_parent = false; + parent.clear(); + w->img.snap_set(s->name.c_str()); + r = w->img.snap_is_protected(s->name.c_str(), &is_protected); + if (r < 0) + return r; + if (w->img.get_parent(&parent_image_spec, &parent_snap_spec) >= 0) { + parent = parent_image_spec.pool_name + "/"; + if (!parent_image_spec.pool_namespace.empty()) { + parent += parent_image_spec.pool_namespace + "/"; + } + parent += parent_image_spec.image_name + "@" + parent_snap_spec.name; + has_parent = true; + } + if (f) { + f->open_object_section("snapshot"); + f->dump_string("image", w->name); + f->dump_string("snapshot", s->name); + f->dump_unsigned("size", s->size); + if (has_parent) { + f->open_object_section("parent"); + f->dump_string("pool", parent_image_spec.pool_name); + f->dump_string("pool_namespace", parent_image_spec.pool_namespace); + f->dump_string("image", parent_image_spec.image_name); + f->dump_string("snapshot", parent_snap_spec.name); + f->close_section(); + } + f->dump_int("format", old_format ? 1 : 2); + f->dump_string("protected", is_protected ? "true" : "false"); + f->close_section(); + } else { + tbl << w->name + "@" + s->name + << stringify(byte_u_t(s->size)) + << parent + << ((old_format) ? '1' : '2') + << (is_protected ? "yes" : "") + << "" // locks don't apply to snaps + << TextTable::endrow; + } + } + } + + return 0; +} + +int do_list(const std::string &pool_name, const std::string& namespace_name, + bool lflag, int threads, Formatter *f) { + std::vector<WorkerEntry*> workers; + std::vector<librbd::image_spec_t> images; + librados::Rados rados; + librbd::RBD rbd; + librados::IoCtx ioctx; + + if (threads < 1) { + threads = 1; + } + if (threads > 32) { + threads = 32; + } + + int r = utils::init(pool_name, namespace_name, &rados, &ioctx); + if (r < 0) { + return r; + } + + utils::disable_cache(); + + r = rbd.list2(ioctx, &images); + if (r < 0) + return r; + + if (!lflag) { + if (f) + f->open_array_section("images"); + for (auto& image : images) { + if (f) + f->dump_string("name", image.name); + else + std::cout << image.name << std::endl; + } + if (f) { + f->close_section(); + f->flush(std::cout); + } + return 0; + } + + TextTable tbl; + + if (f) { + f->open_array_section("images"); + } else { + tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("SIZE", TextTable::LEFT, TextTable::RIGHT); + tbl.define_column("PARENT", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("FMT", TextTable::LEFT, TextTable::RIGHT); + tbl.define_column("PROT", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("LOCK", TextTable::LEFT, TextTable::LEFT); + } + + for (size_t left = 0; left < std::min<size_t>(threads, images.size()); + left++) { + workers.push_back(new WorkerEntry()); + } + + auto i = images.begin(); + while (true) { + size_t workers_idle = 0; + for (auto comp : workers) { + switch (comp->state) { + case STATE_DONE: + comp->completion->wait_for_complete(); + comp->state = STATE_IDLE; + comp->completion->release(); + comp->completion = nullptr; + // we want it to fall through in this case + case STATE_IDLE: + if (i == images.end()) { + workers_idle++; + continue; + } + comp->name = i->name; + comp->completion = new librbd::RBD::AioCompletion(nullptr, nullptr); + r = rbd.aio_open_read_only(ioctx, comp->img, i->name.c_str(), nullptr, + comp->completion); + i++; + comp->state = STATE_OPENED; + break; + case STATE_OPENED: + comp->completion->wait_for_complete(); + // image might disappear between rbd.list() and rbd.open(); ignore + // that, warn about other possible errors (EPERM, say, for opening + // an old-format image, because you need execute permission for the + // class method) + r = comp->completion->get_return_value(); + comp->completion->release(); + if (r < 0) { + std::cerr << "rbd: error opening " << comp->name << ": " + << cpp_strerror(r) << std::endl; + + // in any event, continue to next image + comp->state = STATE_IDLE; + continue; + } + r = list_process_image(&rados, comp, lflag, f, tbl); + if (r < 0) { + std::cerr << "rbd: error processing image " << comp->name << ": " + << cpp_strerror(r) << std::endl; + } + comp->completion = new librbd::RBD::AioCompletion(nullptr, nullptr); + r = comp->img.aio_close(comp->completion); + comp->state = STATE_DONE; + break; + } + } + if (workers_idle == workers.size()) { + break; + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } else if (!images.empty()) { + std::cout << tbl; + } + + rados.shutdown(); + + for (auto comp : workers) { + delete comp; + } + + return r < 0 ? r : 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + options->add_options() + ("long,l", po::bool_switch(), "long listing format"); + at::add_pool_options(positional, options, true); + at::add_format_options(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, false, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + r = do_list(pool_name, namespace_name, vm["long"].as<bool>(), + g_conf().get_val<uint64_t>("rbd_concurrent_management_ops"), + formatter.get()); + if (r < 0) { + std::cerr << "rbd: listing images failed: " << cpp_strerror(r) + << std::endl; + return r; + } + + return 0; +} + +Shell::SwitchArguments switched_arguments({"long", "l"}); +Shell::Action action( + {"list"}, {"ls"}, "List rbd images.", "", &get_arguments, &execute); + +} // namespace list +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Lock.cc b/src/tools/rbd/action/Lock.cc new file mode 100644 index 00000000..754cb384 --- /dev/null +++ b/src/tools/rbd/action/Lock.cc @@ -0,0 +1,279 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace lock { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +void add_id_option(po::options_description *positional) { + positional->add_options() + ("lock-id", "unique lock id"); +} + +int get_id(const po::variables_map &vm, size_t *arg_index, + std::string *id) { + *id = utils::get_positional_argument(vm, *arg_index); + if (id->empty()) { + std::cerr << "rbd: lock id was not specified" << std::endl; + return -EINVAL; + } else { + ++(*arg_index); + } + return 0; +} + +} // anonymous namespace + +static int do_lock_list(librbd::Image& image, Formatter *f) +{ + std::list<librbd::locker_t> lockers; + bool exclusive; + std::string tag; + TextTable tbl; + int r; + + r = image.list_lockers(&lockers, &exclusive, &tag); + if (r < 0) + return r; + + if (f) { + f->open_array_section("locks"); + } else { + tbl.define_column("Locker", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("ID", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("Address", TextTable::LEFT, TextTable::LEFT); + } + + if (lockers.size()) { + bool one = (lockers.size() == 1); + + if (!f) { + std::cout << "There " << (one ? "is " : "are ") << lockers.size() + << (exclusive ? " exclusive" : " shared") + << " lock" << (one ? "" : "s") << " on this image.\n"; + if (!exclusive) + std::cout << "Lock tag: " << tag << "\n"; + } + + for (std::list<librbd::locker_t>::const_iterator it = lockers.begin(); + it != lockers.end(); ++it) { + if (f) { + f->open_object_section("lock"); + f->dump_string("id", it->cookie); + f->dump_string("locker", it->client); + f->dump_string("address", it->address); + f->close_section(); + } else { + tbl << it->client << it->cookie << it->address << TextTable::endrow; + } + } + if (!f) + std::cout << tbl; + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } + return 0; +} + +static int do_lock_add(librbd::Image& image, const char *cookie, + const char *tag) +{ + if (tag) + return image.lock_shared(cookie, tag); + else + return image.lock_exclusive(cookie); +} + +static int do_lock_remove(librbd::Image& image, const char *client, + const char *cookie) +{ + return image.break_lock(client, cookie); +} + +void get_list_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_format_options(options); +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_lock_list(image, formatter.get()); + if (r < 0) { + std::cerr << "rbd: listing locks failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +void get_add_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + add_id_option(positional); + options->add_options() + ("shared", po::value<std::string>(), "shared lock tag"); +} + +int execute_add(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string lock_cookie; + r = get_id(vm, &arg_index, &lock_cookie); + if (r < 0) { + return r; + } + + std::string lock_tag; + if (vm.count("shared")) { + lock_tag = vm["shared"].as<std::string>(); + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_lock_add(image, lock_cookie.c_str(), + lock_tag.empty() ? nullptr : lock_tag.c_str()); + if (r < 0) { + if (r == -EBUSY || r == -EEXIST) { + if (!lock_tag.empty()) { + std::cerr << "rbd: lock is already held by someone else" + << " with a different tag" << std::endl; + } else { + std::cerr << "rbd: lock is already held by someone else" << std::endl; + } + } else { + std::cerr << "rbd: taking lock failed: " << cpp_strerror(r) << std::endl; + } + return r; + } + return 0; +} + +void get_remove_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + add_id_option(positional); + positional->add_options() + ("locker", "locker client"); +} + +int execute_remove(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string lock_cookie; + r = get_id(vm, &arg_index, &lock_cookie); + if (r < 0) { + return r; + } + + std::string lock_client = utils::get_positional_argument(vm, arg_index); + if (lock_client.empty()) { + std::cerr << "rbd: locker was not specified" << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_lock_remove(image, lock_client.c_str(), lock_cookie.c_str()); + if (r < 0) { + std::cerr << "rbd: releasing lock failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action_list( + {"lock", "list"}, {"lock", "ls"}, "Show locks held on an image.", "", + &get_list_arguments, &execute_list); +Shell::Action action_add( + {"lock", "add"}, {}, "Take a lock on an image.", "", + &get_add_arguments, &execute_add); +Shell::Action action_remove( + {"lock", "remove"}, {"lock", "rm"}, "Release a lock on an image.", "", + &get_remove_arguments, &execute_remove); + +} // namespace lock +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/MergeDiff.cc b/src/tools/rbd/action/MergeDiff.cc new file mode 100644 index 00000000..406b23b4 --- /dev/null +++ b/src/tools/rbd/action/MergeDiff.cc @@ -0,0 +1,454 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#define _LARGEFILE64_SOURCE +#include <sys/types.h> +#include <unistd.h> + +#include "include/compat.h" +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/safe_io.h" +#include "common/debug.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd + +namespace rbd { +namespace action { +namespace merge_diff { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int parse_diff_header(int fd, __u8 *tag, string *from, string *to, uint64_t *size) +{ + int r; + + {//header + char buf[utils::RBD_DIFF_BANNER.size() + 1]; + r = safe_read_exact(fd, buf, utils::RBD_DIFF_BANNER.size()); + if (r < 0) + return r; + + buf[utils::RBD_DIFF_BANNER.size()] = '\0'; + if (strcmp(buf, utils::RBD_DIFF_BANNER.c_str())) { + std::cerr << "invalid banner '" << buf << "', expected '" + << utils::RBD_DIFF_BANNER << "'" << std::endl; + return -EINVAL; + } + } + + while (true) { + r = safe_read_exact(fd, tag, 1); + if (r < 0) + return r; + + if (*tag == RBD_DIFF_FROM_SNAP) { + r = utils::read_string(fd, 4096, from); // 4k limit to make sure we don't get a garbage string + if (r < 0) + return r; + dout(2) << " from snap " << *from << dendl; + } else if (*tag == RBD_DIFF_TO_SNAP) { + r = utils::read_string(fd, 4096, to); // 4k limit to make sure we don't get a garbage string + if (r < 0) + return r; + dout(2) << " to snap " << *to << dendl; + } else if (*tag == RBD_DIFF_IMAGE_SIZE) { + char buf[8]; + r = safe_read_exact(fd, buf, 8); + if (r < 0) + return r; + + bufferlist bl; + bl.append(buf, 8); + auto p = bl.cbegin(); + decode(*size, p); + } else { + break; + } + } + + return 0; +} + +static int parse_diff_body(int fd, __u8 *tag, uint64_t *offset, uint64_t *length) +{ + int r; + + if (!(*tag)) { + r = safe_read_exact(fd, tag, 1); + if (r < 0) + return r; + } + + if (*tag == RBD_DIFF_END) { + offset = 0; + length = 0; + return 0; + } + + if (*tag != RBD_DIFF_WRITE && *tag != RBD_DIFF_ZERO) + return -ENOTSUP; + + char buf[16]; + r = safe_read_exact(fd, buf, 16); + if (r < 0) + return r; + + bufferlist bl; + bl.append(buf, 16); + auto p = bl.cbegin(); + decode(*offset, p); + decode(*length, p); + + if (!(*length)) + return -ENOTSUP; + + return 0; +} + +/* + * fd: the diff file to read from + * pd: the diff file to be written into + */ +static int accept_diff_body(int fd, int pd, __u8 tag, uint64_t offset, uint64_t length) +{ + if (tag == RBD_DIFF_END) + return 0; + + bufferlist bl; + encode(tag, bl); + encode(offset, bl); + encode(length, bl); + int r; + r = bl.write_fd(pd); + if (r < 0) + return r; + + if (tag == RBD_DIFF_WRITE) { + bufferptr bp = buffer::create(length); + r = safe_read_exact(fd, bp.c_str(), length); + if (r < 0) + return r; + bufferlist data; + data.append(bp); + r = data.write_fd(pd); + if (r < 0) + return r; + } + + return 0; +} + +/* + * Merge two diff files into one single file + * Note: It does not do the merging work if + * either of the source diff files is stripped, + * since which complicates the process and is + * rarely used + */ +static int do_merge_diff(const char *first, const char *second, + const char *path, bool no_progress) +{ + utils::ProgressContext pc("Merging image diff", no_progress); + int fd = -1, sd = -1, pd = -1, r; + + string f_from, f_to; + string s_from, s_to; + uint64_t f_size = 0; + uint64_t s_size = 0; + uint64_t pc_size; + + __u8 f_tag = 0, s_tag = 0; + uint64_t f_off = 0, f_len = 0; + uint64_t s_off = 0, s_len = 0; + bool f_end = false, s_end = false; + + bool first_stdin = !strcmp(first, "-"); + if (first_stdin) { + fd = STDIN_FILENO; + } else { + fd = open(first, O_RDONLY); + if (fd < 0) { + r = -errno; + std::cerr << "rbd: error opening " << first << std::endl; + goto done; + } + } + + sd = open(second, O_RDONLY); + if (sd < 0) { + r = -errno; + std::cerr << "rbd: error opening " << second << std::endl; + goto done; + } + + if (strcmp(path, "-") == 0) { + pd = 1; + } else { + pd = open(path, O_WRONLY | O_CREAT | O_EXCL, 0644); + if (pd < 0) { + r = -errno; + std::cerr << "rbd: error create " << path << std::endl; + goto done; + } + } + + //We just handle the case like 'banner, [ftag], [ttag], stag, [wztag]*,etag', + // and the (offset,length) in wztag must be ascending order. + r = parse_diff_header(fd, &f_tag, &f_from, &f_to, &f_size); + if (r < 0) { + std::cerr << "rbd: failed to parse first diff header" << std::endl; + goto done; + } + + r = parse_diff_header(sd, &s_tag, &s_from, &s_to, &s_size); + if (r < 0) { + std::cerr << "rbd: failed to parse second diff header" << std::endl; + goto done; + } + + if (f_to != s_from) { + r = -EINVAL; + std::cerr << "The first TO snapshot must be equal with the second FROM " + << "snapshot, aborting" << std::endl; + goto done; + } + + { + // header + bufferlist bl; + bl.append(utils::RBD_DIFF_BANNER); + + __u8 tag; + if (f_from.size()) { + tag = RBD_DIFF_FROM_SNAP; + encode(tag, bl); + encode(f_from, bl); + } + + if (s_to.size()) { + tag = RBD_DIFF_TO_SNAP; + encode(tag, bl); + encode(s_to, bl); + } + + tag = RBD_DIFF_IMAGE_SIZE; + encode(tag, bl); + encode(s_size, bl); + + r = bl.write_fd(pd); + if (r < 0) { + std::cerr << "rbd: failed to write merged diff header" << std::endl; + goto done; + } + } + if (f_size > s_size) + pc_size = f_size << 1; + else + pc_size = s_size << 1; + + //data block + while (!f_end || !s_end) { + // progress through input + pc.update_progress(f_off + s_off, pc_size); + + if (!f_end && !f_len) { + uint64_t last_off = f_off; + + r = parse_diff_body(fd, &f_tag, &f_off, &f_len); + dout(2) << "first diff data chunk: tag=" << f_tag << ", " + << "off=" << f_off << ", " + << "len=" << f_len << dendl; + if (r < 0) { + std::cerr << "rbd: failed to read first diff data chunk header" + << std::endl; + goto done; + } + + if (f_tag == RBD_DIFF_END) { + f_end = true; + f_tag = RBD_DIFF_ZERO; + f_off = f_size; + if (f_size < s_size) + f_len = s_size - f_size; + else + f_len = 0; + } + + if (last_off > f_off) { + r = -ENOTSUP; + std::cerr << "rbd: out-of-order offset from first diff (" + << last_off << " > " << f_off << ")" << std::endl; + goto done; + } + } + + if (!s_end && !s_len) { + uint64_t last_off = s_off; + + r = parse_diff_body(sd, &s_tag, &s_off, &s_len); + dout(2) << "second diff data chunk: tag=" << s_tag << ", " + << "off=" << s_off << ", " + << "len=" << s_len << dendl; + if (r < 0) { + std::cerr << "rbd: failed to read second diff data chunk header" + << std::endl; + goto done; + } + + if (s_tag == RBD_DIFF_END) { + s_end = true; + s_off = s_size; + if (s_size < f_size) + s_len = f_size - s_size; + else + s_len = 0; + } + + if (last_off > s_off) { + r = -ENOTSUP; + std::cerr << "rbd: out-of-order offset from second diff (" + << last_off << " > " << s_off << ")" << std::endl; + goto done; + } + } + + if (f_off < s_off && f_len) { + uint64_t delta = s_off - f_off; + if (delta > f_len) + delta = f_len; + r = accept_diff_body(fd, pd, f_tag, f_off, delta); + if (r < 0) { + std::cerr << "rbd: failed to merge diff chunk" << std::endl; + goto done; + } + f_off += delta; + f_len -= delta; + + if (!f_len) { + f_tag = 0; + continue; + } + } + ceph_assert(f_off >= s_off); + + if (f_off < s_off + s_len && f_len) { + uint64_t delta = s_off + s_len - f_off; + if (delta > f_len) + delta = f_len; + if (f_tag == RBD_DIFF_WRITE) { + if (first_stdin) { + bufferptr bp = buffer::create(delta); + r = safe_read_exact(fd, bp.c_str(), delta); + } else { + off64_t l = lseek64(fd, delta, SEEK_CUR); + r = l < 0 ? -errno : 0; + } + if (r < 0) { + std::cerr << "rbd: failed to skip first diff data" << std::endl; + goto done; + } + } + f_off += delta; + f_len -= delta; + + if (!f_len) { + f_tag = 0; + continue; + } + } + ceph_assert(f_off >= s_off + s_len); + if (s_len) { + r = accept_diff_body(sd, pd, s_tag, s_off, s_len); + if (r < 0) { + std::cerr << "rbd: failed to merge diff chunk" << std::endl; + goto done; + } + s_off += s_len; + s_len = 0; + s_tag = 0; + } else { + ceph_assert(f_end && s_end); + } + continue; + } + + {//tail + __u8 tag = RBD_DIFF_END; + bufferlist bl; + encode(tag, bl); + r = bl.write_fd(pd); + } + +done: + if (pd > 2) + close(pd); + if (sd > 2) + close(sd); + if (fd > 2) + close(fd); + + if(r < 0) { + pc.fail(); + if (pd > 2) + unlink(path); + } else + pc.finish(); + + return r; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + positional->add_options() + ("diff1-path", "path to first diff (or '-' for stdin)") + ("diff2-path", "path to second diff"); + at::add_path_options(positional, options, + "path to merged diff (or '-' for stdout)"); + at::add_no_progress_option(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string first_diff = utils::get_positional_argument(vm, 0); + if (first_diff.empty()) { + std::cerr << "rbd: first diff was not specified" << std::endl; + return -EINVAL; + } + + std::string second_diff = utils::get_positional_argument(vm, 1); + if (second_diff.empty()) { + std::cerr << "rbd: second diff was not specified" << std::endl; + return -EINVAL; + } + + std::string path; + size_t arg_index = 2; + int r = utils::get_path(vm, &arg_index, &path); + if (r < 0) { + return r; + } + + r = do_merge_diff(first_diff.c_str(), second_diff.c_str(), path.c_str(), + vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + cerr << "rbd: merge-diff error" << std::endl; + return -r; + } + + return 0; +} + +Shell::Action action( + {"merge-diff"}, {}, "Merge two diff exports together.", "", + &get_arguments, &execute); + +} // namespace merge_diff +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Migration.cc b/src/tools/rbd/action/Migration.cc new file mode 100644 index 00000000..bb05e376 --- /dev/null +++ b/src/tools/rbd/action/Migration.cc @@ -0,0 +1,338 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/errno.h" + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" + +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace migration { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_prepare(librados::IoCtx& io_ctx, const std::string &image_name, + librados::IoCtx& dest_io_ctx, + const std::string &dest_image_name, + librbd::ImageOptions& opts) { + int r = librbd::RBD().migration_prepare(io_ctx, image_name.c_str(), + dest_io_ctx, dest_image_name.c_str(), + opts); + if (r < 0) { + std::cerr << "rbd: preparing migration failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +static int do_execute(librados::IoCtx& io_ctx, const std::string &image_name, + bool no_progress) { + utils::ProgressContext pc("Image migration", no_progress); + int r = librbd::RBD().migration_execute_with_progress(io_ctx, + image_name.c_str(), pc); + if (r < 0) { + pc.fail(); + std::cerr << "rbd: migration failed: " << cpp_strerror(r) << std::endl; + return r; + } + pc.finish(); + return 0; +} + +static int do_abort(librados::IoCtx& io_ctx, const std::string &image_name, + bool no_progress) { + utils::ProgressContext pc("Abort image migration", no_progress); + int r = librbd::RBD().migration_abort_with_progress(io_ctx, + image_name.c_str(), pc); + if (r < 0) { + pc.fail(); + std::cerr << "rbd: aborting migration failed: " << cpp_strerror(r) + << std::endl; + return r; + } + pc.finish(); + return 0; +} + +static int do_commit(librados::IoCtx& io_ctx, const std::string &image_name, + bool force, bool no_progress) { + librbd::image_migration_status_t migration_status; + int r = librbd::RBD().migration_status(io_ctx, image_name.c_str(), + &migration_status, + sizeof(migration_status)); + if (r < 0) { + std::cerr << "rbd: getting migration status failed: " << cpp_strerror(r) + << std::endl; + return r; + } + + librados::IoCtx dst_io_ctx; + r = librados::Rados(io_ctx).ioctx_create2(migration_status.dest_pool_id, dst_io_ctx); + if (r < 0) { + std::cerr << "rbd: accessing source pool id=" + << migration_status.dest_pool_id << " failed: " + << cpp_strerror(r) << std::endl; + return r; + } + + r = utils::set_namespace(migration_status.dest_pool_namespace, &dst_io_ctx); + if (r < 0) { + return r; + } + + librbd::Image image; + r = utils::open_image_by_id(dst_io_ctx, migration_status.dest_image_id, + true, &image); + if (r < 0) { + return r; + } + + std::vector<librbd::linked_image_spec_t> children; + r = image.list_descendants(&children); + if (r < 0) { + std::cerr << "rbd: listing descendants failed: " << cpp_strerror(r) + << std::endl; + return r; + } + + if (children.size() > 0) { + std::cerr << "rbd: the image has " + << (children.size() == 1 ? "a descendant" : "descendants") << ": " + << std::endl; + for (auto& child : children) { + std::cerr << " " << child.pool_name << "/"; + if (!child.pool_namespace.empty()) { + std::cerr << child.pool_namespace << "/"; + } + std::cerr << child.image_name; + if (child.trash) { + std::cerr << " (trash " << child.image_id << ")"; + } + std::cerr << std::endl; + } + std::cerr << "Warning: in-use, read-only descendant images" + << " will not detect the parent update." << std::endl; + if (force) { + std::cerr << "Proceeding anyway due to force flag set." << std::endl; + } else { + std::cerr << "Ensure no descendant images are opened read-only" + << " and run again with force flag." << std::endl; + return -EBUSY; + } + } + + utils::ProgressContext pc("Commit image migration", no_progress); + r = librbd::RBD().migration_commit_with_progress(io_ctx, image_name.c_str(), + pc); + if (r < 0) { + pc.fail(); + std::cerr << "rbd: committing migration failed: " << cpp_strerror(r) + << std::endl; + return r; + } + pc.finish(); + return 0; +} + +void get_prepare_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_SOURCE); + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST); + at::add_create_image_options(options, true); + at::add_flatten_option(options); +} + +int execute_prepare(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name, + &image_name, nullptr, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + io_ctx.set_osdmap_full_try(); + + std::string dest_pool_name; + std::string dest_namespace_name; + std::string dest_image_name; + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dest_pool_name, + &dest_namespace_name, &dest_image_name, nullptr, false, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + librbd::ImageOptions opts; + r = utils::get_image_options(vm, true, &opts); + if (r < 0) { + return r; + } + + librados::IoCtx dest_io_ctx; + if (!dest_pool_name.empty()) { + r = utils::init_io_ctx(rados, dest_pool_name, dest_namespace_name, + &dest_io_ctx); + if (r < 0) { + return r; + } + } + + r = do_prepare(io_ctx, image_name, dest_pool_name.empty() ? io_ctx : + dest_io_ctx, dest_image_name, opts); + if (r < 0) { + return r; + } + + return 0; +} + +void get_execute_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_no_progress_option(options); +} + +int execute_execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, nullptr, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + io_ctx.set_osdmap_full_try(); + + r = do_execute(io_ctx, image_name, vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + return r; + } + + return 0; +} + +void get_abort_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_no_progress_option(options); +} + +int execute_abort(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, nullptr, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + io_ctx.set_osdmap_full_try(); + + r = do_abort(io_ctx, image_name, vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + return r; + } + + return 0; +} + +void get_commit_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_no_progress_option(options); + options->add_options() + ("force", po::bool_switch(), "proceed even if the image has children"); +} + +int execute_commit(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, nullptr, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + io_ctx.set_osdmap_full_try(); + + r = do_commit(io_ctx, image_name, vm["force"].as<bool>(), + vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + return r; + } + + return 0; +} + +Shell::Action action_prepare( + {"migration", "prepare"}, {}, "Prepare image migration.", + at::get_long_features_help(), &get_prepare_arguments, &execute_prepare); + +Shell::Action action_execute( + {"migration", "execute"}, {}, "Execute image migration.", "", + &get_execute_arguments, &execute_execute); + +Shell::Action action_abort( + {"migration", "abort"}, {}, "Cancel interrupted image migration.", "", + &get_abort_arguments, &execute_abort); + +Shell::Action action_commit( + {"migration", "commit"}, {}, "Commit image migration.", "", + &get_commit_arguments, &execute_commit); + +} // namespace migration +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/MirrorImage.cc b/src/tools/rbd/action/MirrorImage.cc new file mode 100644 index 00000000..a250b694 --- /dev/null +++ b/src/tools/rbd/action/MirrorImage.cc @@ -0,0 +1,360 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/MirrorDaemonServiceInfo.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/stringify.h" +#include "common/config.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include "global/global_context.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace mirror_image { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +int validate_mirroring_enabled(librbd::Image& image) { + librbd::mirror_image_info_t mirror_image; + int r = image.mirror_image_get_info(&mirror_image, sizeof(mirror_image)); + if (r < 0) { + std::cerr << "rbd: failed to retrieve mirror mode: " + << cpp_strerror(r) << std::endl; + return r; + } + + if (mirror_image.state != RBD_MIRROR_IMAGE_ENABLED) { + std::cerr << "rbd: mirroring not enabled on the image" << std::endl; + return -EINVAL; + } + return 0; +} + +} // anonymous namespace + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); +} + +void get_arguments_disable(po::options_description *positional, + po::options_description *options) { + options->add_options() + ("force", po::bool_switch(), "disable even if not primary"); + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); +} + +int execute_enable_disable(const po::variables_map &vm, bool enable, + bool force) { + size_t arg_index = 0; + std::string pool_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, nullptr, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + // TODO support namespaces + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, "", image_name, "", "", false, + &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = enable ? image.mirror_image_enable() : image.mirror_image_disable(force); + if (r < 0) { + return r; + } + + std::cout << (enable ? "Mirroring enabled" : "Mirroring disabled") + << std::endl; + + return 0; +} + +int execute_disable(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + return execute_enable_disable(vm, false, vm["force"].as<bool>()); +} + +int execute_enable(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + return execute_enable_disable(vm, true, false); +} + +void get_arguments_promote(po::options_description *positional, + po::options_description *options) { + options->add_options() + ("force", po::bool_switch(), "promote even if not cleanly demoted by remote cluster"); + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); +} + +int execute_promote(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, nullptr, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + bool force = vm["force"].as<bool>(); + + // TODO support namespaces + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, "", image_name, "", "", false, + &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(image); + if (r < 0) { + return r; + } + + r = image.mirror_image_promote(force); + if (r < 0) { + std::cerr << "rbd: error promoting image to primary" << std::endl; + return r; + } + + std::cout << "Image promoted to primary" << std::endl; + return 0; +} + +int execute_demote(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, nullptr, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + // TODO support namespaces + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, "", image_name, "", "", false, + &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(image); + if (r < 0) { + return r; + } + + r = image.mirror_image_demote(); + if (r < 0) { + std::cerr << "rbd: error demoting image to non-primary" << std::endl; + return r; + } + + std::cout << "Image demoted to non-primary" << std::endl; + return 0; +} + +int execute_resync(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, nullptr, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + // TODO support namespaces + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, "", image_name, "", "", false, + &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(image); + if (r < 0) { + return r; + } + + r = image.mirror_image_resync(); + if (r < 0) { + std::cerr << "rbd: error flagging image resync" << std::endl; + return r; + } + + std::cout << "Flagged image for resync from primary" << std::endl; + return 0; +} + +void get_status_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_format_options(options); +} + +int execute_status(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + at::Format::Formatter formatter; + int r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + size_t arg_index = 0; + std::string pool_name; + std::string image_name; + std::string snap_name; + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, nullptr, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + // TODO support namespaces + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, "", image_name, "", "", false, + &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(image); + if (r < 0) { + return r; + } + + librbd::mirror_image_status_t status; + r = image.mirror_image_get_status(&status, sizeof(status)); + if (r < 0) { + std::cerr << "rbd: failed to get status for image " << image_name << ": " + << cpp_strerror(r) << std::endl; + return r; + } + + std::string instance_id; + MirrorDaemonServiceInfo daemon_service_info(io_ctx); + + if (status.up) { + r = image.mirror_image_get_instance_id(&instance_id); + if (r == -EOPNOTSUPP) { + std::cerr << "rbd: newer release of Ceph OSDs required to map image " + << "to rbd-mirror daemon instance" << std::endl; + // not fatal + } else if (r < 0 && r != -ENOENT) { + std::cerr << "rbd: failed to get service id for image " + << image_name << ": " << cpp_strerror(r) << std::endl; + // not fatal + } else if (!instance_id.empty()) { + daemon_service_info.init(); + } + } + + std::string state = utils::mirror_image_status_state(status); + std::string last_update = ( + status.last_update == 0 ? "" : utils::timestr(status.last_update)); + + if (formatter != nullptr) { + formatter->open_object_section("image"); + formatter->dump_string("name", image_name); + formatter->dump_string("global_id", status.info.global_id); + formatter->dump_string("state", state); + formatter->dump_string("description", status.description); + daemon_service_info.dump(instance_id, formatter); + formatter->dump_string("last_update", last_update); + formatter->close_section(); // image + formatter->flush(std::cout); + } else { + std::cout << image_name << ":\n" + << " global_id: " << status.info.global_id << "\n" + << " state: " << state << "\n" + << " description: " << status.description << "\n"; + if (!instance_id.empty()) { + std::cout << " service: " << + daemon_service_info.get_description(instance_id) << "\n"; + } + std::cout << " last_update: " << last_update << std::endl; + } + + return 0; +} + +Shell::Action action_enable( + {"mirror", "image", "enable"}, {}, + "Enable RBD mirroring for an image.", "", + &get_arguments, &execute_enable); +Shell::Action action_disable( + {"mirror", "image", "disable"}, {}, + "Disable RBD mirroring for an image.", "", + &get_arguments_disable, &execute_disable); +Shell::Action action_promote( + {"mirror", "image", "promote"}, {}, + "Promote an image to primary for RBD mirroring.", "", + &get_arguments_promote, &execute_promote); +Shell::Action action_demote( + {"mirror", "image", "demote"}, {}, + "Demote an image to non-primary for RBD mirroring.", "", + &get_arguments, &execute_demote); +Shell::Action action_resync( + {"mirror", "image", "resync"}, {}, + "Force resync to primary image for RBD mirroring.", "", + &get_arguments, &execute_resync); +Shell::Action action_status( + {"mirror", "image", "status"}, {}, + "Show RBD mirroring status for an image.", "", + &get_status_arguments, &execute_status); + +} // namespace mirror_image +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/MirrorPool.cc b/src/tools/rbd/action/MirrorPool.cc new file mode 100644 index 00000000..ff7c3031 --- /dev/null +++ b/src/tools/rbd/action/MirrorPool.cc @@ -0,0 +1,1537 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/MirrorDaemonServiceInfo.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/Context.h" +#include "include/stringify.h" +#include "include/rbd/librbd.hpp" +#include "common/ceph_json.h" +#include "common/config.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include "common/Throttle.h" +#include "global/global_context.h" +#include <fstream> +#include <functional> +#include <iostream> +#include <regex> +#include <set> +#include <boost/program_options.hpp> +#include "include/ceph_assert.h" + +#include <atomic> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd::action::MirrorPool: " + +namespace rbd { +namespace action { +namespace mirror_pool { + +namespace at = argument_types; +namespace po = boost::program_options; + +static const std::string ALL_NAME("all"); +static const std::string SITE_NAME("site-name"); + +namespace { + +void add_site_name_optional(po::options_description *options) { + options->add_options() + (SITE_NAME.c_str(), po::value<std::string>(), "local site name"); +} + +int set_site_name(librados::Rados& rados, const std::string& site_name) { + librbd::RBD rbd; + int r = rbd.mirror_site_name_set(rados, site_name); + if (r == -EOPNOTSUPP) { + std::cerr << "rbd: cluster does not support site names" << std::endl; + return r; + } else if (r < 0) { + std::cerr << "rbd: failed to set site name" << cpp_strerror(r) + << std::endl; + return r; + } + + return 0; +} + +struct MirrorPeerDirection {}; + +void validate(boost::any& v, const std::vector<std::string>& values, + MirrorPeerDirection *target_type, int) { + po::validators::check_first_occurrence(v); + const std::string &s = po::validators::get_single_string(values); + + if (s == "rx-only") { + v = boost::any(RBD_MIRROR_PEER_DIRECTION_RX); + } else if (s == "rx-tx") { + v = boost::any(RBD_MIRROR_PEER_DIRECTION_RX_TX); + } else { + throw po::validation_error(po::validation_error::invalid_option_value); + } +} + +int validate_mirroring_enabled(librados::IoCtx& io_ctx) { + librbd::RBD rbd; + rbd_mirror_mode_t mirror_mode; + int r = rbd.mirror_mode_get(io_ctx, &mirror_mode); + if (r < 0) { + std::cerr << "rbd: failed to retrieve mirror mode: " + << cpp_strerror(r) << std::endl; + return r; + } + + if (mirror_mode == RBD_MIRROR_MODE_DISABLED) { + std::cerr << "rbd: mirroring not enabled on the pool" << std::endl; + return -EINVAL; + } + return 0; +} + +int validate_uuid(const std::string &uuid) { + std::regex pattern("^[A-F0-9]{8}-[A-F0-9]{4}-[A-F0-9]{4}-[A-F0-9]{4}-[A-F0-9]{12}$", + std::regex::icase); + std::smatch match; + if (!std::regex_match(uuid, match, pattern)) { + std::cerr << "rbd: invalid uuid '" << uuid << "'" << std::endl; + return -EINVAL; + } + return 0; +} + +int read_key_file(std::string path, std::string* key) { + std::ifstream key_file; + key_file.open(path); + if (key_file.fail()) { + std::cerr << "rbd: failed to open " << path << std::endl; + return -EINVAL; + } + + std::getline(key_file, *key); + if (key_file.bad()) { + std::cerr << "rbd: failed to read key from " << path << std::endl; + return -EINVAL; + } + + key_file.close(); + return 0; +} + +void add_uuid_option(po::options_description *positional) { + positional->add_options() + ("uuid", po::value<std::string>(), "peer uuid"); +} + +int get_uuid(const po::variables_map &vm, size_t arg_index, + std::string *uuid) { + *uuid = utils::get_positional_argument(vm, arg_index); + if (uuid->empty()) { + std::cerr << "rbd: must specify peer uuid" << std::endl; + return -EINVAL; + } + return validate_uuid(*uuid); +} + +int get_remote_cluster_spec(const po::variables_map &vm, + const std::string &spec, + std::string *remote_client_name, + std::string *remote_cluster, + std::map<std::string, std::string>* attributes) { + if (vm.count("remote-client-name")) { + *remote_client_name = vm["remote-client-name"].as<std::string>(); + } + if (vm.count("remote-cluster")) { + *remote_cluster = vm["remote-cluster"].as<std::string>(); + } + if (vm.count("remote-mon-host")) { + (*attributes)["mon_host"] = vm["remote-mon-host"].as<std::string>(); + } + if (vm.count("remote-key-file")) { + std::string key; + int r = read_key_file(vm["remote-key-file"].as<std::string>(), &key); + if (r < 0) { + return r; + } + (*attributes)["key"] = key; + } + + if (!spec.empty()) { + std::regex pattern("^(?:(client\\.[^@]+)@)?([^/@]+)$"); + std::smatch match; + if (!std::regex_match(spec, match, pattern)) { + std::cerr << "rbd: invalid spec '" << spec << "'" << std::endl; + return -EINVAL; + } + if (match[1].matched) { + *remote_client_name = match[1]; + } + *remote_cluster = match[2]; + } + + if (remote_cluster->empty()) { + std::cerr << "rbd: remote cluster was not specified" << std::endl; + return -EINVAL; + } + return 0; +} + +int set_peer_config_key(librados::IoCtx& io_ctx, const std::string& peer_uuid, + std::map<std::string, std::string>&& attributes) { + librbd::RBD rbd; + int r = rbd.mirror_peer_set_attributes(io_ctx, peer_uuid, attributes); + if (r == -EPERM) { + std::cerr << "rbd: permission denied attempting to set peer " + << "config-key secrets in the monitor" << std::endl; + return r; + } else if (r < 0) { + std::cerr << "rbd: failed to update mirroring peer config: " + << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +int get_peer_config_key(librados::IoCtx& io_ctx, const std::string& peer_uuid, + std::map<std::string, std::string>* attributes) { + librbd::RBD rbd; + int r = rbd.mirror_peer_get_attributes(io_ctx, peer_uuid, attributes); + if (r == -ENOENT) { + return r; + } else if (r == -EPERM) { + std::cerr << "rbd: permission denied attempting to access peer " + << "config-key secrets from the monitor" << std::endl; + return r; + } else if (r == -EINVAL) { + std::cerr << "rbd: corrupt mirroring peer config" << std::endl; + return r; + } else if (r < 0) { + std::cerr << "rbd: error reading mirroring peer config: " + << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +int update_peer_config_key(librados::IoCtx& io_ctx, + const std::string& peer_uuid, + const std::string& key, + const std::string& value) { + std::map<std::string, std::string> attributes; + int r = get_peer_config_key(io_ctx, peer_uuid, &attributes); + if (r == -ENOENT) { + return set_peer_config_key(io_ctx, peer_uuid, {{key, value}}); + } else if (r < 0) { + return r; + } + + if (value.empty()) { + attributes.erase(key); + } else { + attributes[key] = value; + } + return set_peer_config_key(io_ctx, peer_uuid, std::move(attributes)); +} + +int format_mirror_peers(librados::IoCtx& io_ctx, + at::Format::Formatter formatter, + const std::vector<librbd::mirror_peer_t> &peers, + bool config_key) { + TextTable tbl; + if (formatter != nullptr) { + formatter->open_array_section("peers"); + } else { + std::cout << "Peers: "; + if (peers.empty()) { + std::cout << "none" << std::endl; + } else { + tbl.define_column("", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("UUID", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("CLIENT", TextTable::LEFT, TextTable::LEFT); + if (config_key) { + tbl.define_column("MON_HOST", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("KEY", TextTable::LEFT, TextTable::LEFT); + } + } + } + + for (auto &peer : peers) { + std::map<std::string, std::string> attributes; + if (config_key) { + int r = get_peer_config_key(io_ctx, peer.uuid, &attributes); + if (r < 0 && r != -ENOENT) { + return r; + } + } + + if (formatter != nullptr) { + formatter->open_object_section("peer"); + formatter->dump_string("uuid", peer.uuid); + formatter->dump_string("cluster_name", peer.cluster_name); + formatter->dump_string("client_name", peer.client_name); + for (auto& pair : attributes) { + formatter->dump_string(pair.first.c_str(), pair.second); + } + formatter->close_section(); + } else { + tbl << " " + << peer.uuid + << peer.cluster_name + << peer.client_name; + if (config_key) { + tbl << attributes["mon_host"] + << attributes["key"]; + } + tbl << TextTable::endrow; + } + } + + if (formatter != nullptr) { + formatter->close_section(); + } else { + std::cout << std::endl << tbl; + } + return 0; +} + +class ImageRequestBase { +public: + void send() { + dout(20) << this << " " << __func__ << ": image_name=" << m_image_name + << dendl; + + auto ctx = new FunctionContext([this](int r) { + handle_finalize(r); + }); + + // will pause here until slots are available + m_finalize_ctx = m_throttle.start_op(ctx); + + open_image(); + } + +protected: + ImageRequestBase(librados::IoCtx &io_ctx, OrderedThrottle &throttle, + const std::string &image_name) + : m_io_ctx(io_ctx), m_throttle(throttle), m_image_name(image_name) { + } + virtual ~ImageRequestBase() { + } + + virtual bool skip_get_info() const { + return false; + } + virtual void get_info(librbd::Image &image, librbd::mirror_image_info_t *info, + librbd::RBD::AioCompletion *aio_comp) { + image.aio_mirror_image_get_info(info, sizeof(librbd::mirror_image_info_t), + aio_comp); + } + + virtual bool skip_action(const librbd::mirror_image_info_t &info) const { + return false; + } + virtual void execute_action(librbd::Image &image, + librbd::RBD::AioCompletion *aio_comp) = 0; + virtual void handle_execute_action(int r) { + dout(20) << this << " " << __func__ << ": r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + std::cerr << "rbd: failed to " << get_action_type() << " image " + << m_image_name << ": " << cpp_strerror(r) << std::endl; + m_ret_val = r; + } + + close_image(); + } + + virtual void finalize_action() { + } + virtual std::string get_action_type() const = 0; + +private: + /** + * @verbatim + * + * <start> + * | + * v + * OPEN_IMAGE + * | + * v + * GET_INFO + * | + * v + * EXECUTE_ACTION + * | + * v + * CLOSE_IMAGE + * | + * v + * FINALIZE_ACTION + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + OrderedThrottle &m_throttle; + const std::string m_image_name; + + librbd::Image m_image; + Context *m_finalize_ctx = nullptr; + + librbd::mirror_image_info_t m_mirror_image_info; + + int m_ret_val = 0; + + void open_image() { + dout(20) << this << " " << __func__ << dendl; + + librbd::RBD rbd; + auto aio_completion = utils::create_aio_completion< + ImageRequestBase, &ImageRequestBase::handle_open_image>(this); + rbd.aio_open(m_io_ctx, m_image, m_image_name.c_str(), nullptr, + aio_completion); + } + + void handle_open_image(int r) { + dout(20) << this << " " << __func__ << ": r=" << r << dendl; + + if (r < 0) { + std::cerr << "rbd: failed to open image " + << m_image_name << ": " << cpp_strerror(r) << std::endl; + m_finalize_ctx->complete(r); + return; + } + + get_info(); + } + + void get_info() { + if (skip_get_info()) { + execute_action(); + return; + } + dout(20) << this << " " << __func__ << dendl; + + auto aio_completion = utils::create_aio_completion< + ImageRequestBase, &ImageRequestBase::handle_get_info>(this); + get_info(m_image, &m_mirror_image_info, aio_completion); + } + + void handle_get_info(int r) { + dout(20) << this << " " << __func__ << ": r=" << r << dendl; + + if (r == -ENOENT) { + close_image(); + return; + } else if (r < 0) { + std::cerr << "rbd: failed to retrieve mirror image info for " + << m_image_name << ": " << cpp_strerror(r) << std::endl; + m_ret_val = r; + close_image(); + return; + } + + execute_action(); + } + + void execute_action() { + if (skip_action(m_mirror_image_info)) { + close_image(); + return; + } + dout(20) << this << " " << __func__ << dendl; + + auto aio_completion = utils::create_aio_completion< + ImageRequestBase, &ImageRequestBase::handle_execute_action>(this); + execute_action(m_image, aio_completion); + } + + void close_image() { + dout(20) << this << " " << __func__ << dendl; + + auto aio_completion = utils::create_aio_completion< + ImageRequestBase, &ImageRequestBase::handle_close_image>(this); + m_image.aio_close(aio_completion); + } + + void handle_close_image(int r) { + dout(20) << this << " " << __func__ << ": r=" << r << dendl; + + if (r < 0) { + std::cerr << "rbd: failed to close image " + << m_image_name << ": " << cpp_strerror(r) << std::endl; + } + + m_finalize_ctx->complete(r); + } + + void handle_finalize(int r) { + dout(20) << this << " " << __func__ << ": r=" << r << dendl; + + if (r == 0 && m_ret_val < 0) { + r = m_ret_val; + } + if (r >= 0) { + finalize_action(); + } + m_throttle.end_op(r); + delete this; + } + +}; + +class PromoteImageRequest : public ImageRequestBase { +public: + PromoteImageRequest(librados::IoCtx &io_ctx, OrderedThrottle &throttle, + const std::string &image_name, std::atomic<unsigned> *counter, + bool force) + : ImageRequestBase(io_ctx, throttle, image_name), m_counter(counter), + m_force(force) { + } + +protected: + bool skip_action(const librbd::mirror_image_info_t &info) const override { + return (info.state != RBD_MIRROR_IMAGE_ENABLED || info.primary); + } + + void execute_action(librbd::Image &image, + librbd::RBD::AioCompletion *aio_comp) override { + image.aio_mirror_image_promote(m_force, aio_comp); + } + + void handle_execute_action(int r) override { + if (r >= 0) { + (*m_counter)++; + } + ImageRequestBase::handle_execute_action(r); + } + + std::string get_action_type() const override { + return "promote"; + } + +private: + std::atomic<unsigned> *m_counter = nullptr; + bool m_force; +}; + +class DemoteImageRequest : public ImageRequestBase { +public: + DemoteImageRequest(librados::IoCtx &io_ctx, OrderedThrottle &throttle, + const std::string &image_name, std::atomic<unsigned> *counter) + : ImageRequestBase(io_ctx, throttle, image_name), m_counter(counter) { + } + +protected: + bool skip_action(const librbd::mirror_image_info_t &info) const override { + return (info.state != RBD_MIRROR_IMAGE_ENABLED || !info.primary); + } + + void execute_action(librbd::Image &image, + librbd::RBD::AioCompletion *aio_comp) override { + image.aio_mirror_image_demote(aio_comp); + } + void handle_execute_action(int r) override { + if (r >= 0) { + (*m_counter)++; + } + ImageRequestBase::handle_execute_action(r); + } + + std::string get_action_type() const override { + return "demote"; + } + +private: + std::atomic<unsigned> *m_counter = nullptr; +}; + +class StatusImageRequest : public ImageRequestBase { +public: + StatusImageRequest( + librados::IoCtx &io_ctx, OrderedThrottle &throttle, + const std::string &image_name, + const std::map<std::string, std::string> &instance_ids, + const MirrorDaemonServiceInfo &daemon_service_info, + at::Format::Formatter formatter) + : ImageRequestBase(io_ctx, throttle, image_name), + m_instance_ids(instance_ids), m_daemon_service_info(daemon_service_info), + m_formatter(formatter) { + } + +protected: + bool skip_get_info() const override { + return true; + } + + void execute_action(librbd::Image &image, + librbd::RBD::AioCompletion *aio_comp) override { + image.get_id(&m_image_id); + image.aio_mirror_image_get_status(&m_mirror_image_status, + sizeof(m_mirror_image_status), aio_comp); + } + + void finalize_action() override { + if (m_mirror_image_status.info.global_id.empty()) { + return; + } + + std::string state = utils::mirror_image_status_state(m_mirror_image_status); + std::string instance_id = (m_mirror_image_status.up && + m_instance_ids.count(m_image_id)) ? + m_instance_ids.find(m_image_id)->second : ""; + std::string last_update = ( + m_mirror_image_status.last_update == 0 ? + "" : utils::timestr(m_mirror_image_status.last_update)); + + if (m_formatter != nullptr) { + m_formatter->open_object_section("image"); + m_formatter->dump_string("name", m_mirror_image_status.name); + m_formatter->dump_string("global_id", + m_mirror_image_status.info.global_id); + m_formatter->dump_string("state", state); + m_formatter->dump_string("description", + m_mirror_image_status.description); + m_daemon_service_info.dump(instance_id, m_formatter); + m_formatter->dump_string("last_update", last_update); + m_formatter->close_section(); // image + } else { + std::cout << "\n" << m_mirror_image_status.name << ":\n" + << " global_id: " + << m_mirror_image_status.info.global_id << "\n" + << " state: " << state << "\n" + << " description: " + << m_mirror_image_status.description << "\n"; + if (!instance_id.empty()) { + std::cout << " service: " + << m_daemon_service_info.get_description(instance_id) << "\n"; + } + std::cout << " last_update: " << last_update << std::endl; + } + } + + std::string get_action_type() const override { + return "status"; + } + +private: + const std::map<std::string, std::string> &m_instance_ids; + const MirrorDaemonServiceInfo &m_daemon_service_info; + at::Format::Formatter m_formatter; + std::string m_image_id; + librbd::mirror_image_status_t m_mirror_image_status; +}; + +template <typename RequestT> +class ImageRequestAllocator { +public: + template <class... Args> + RequestT *operator()(librados::IoCtx &io_ctx, OrderedThrottle &throttle, + const std::string &image_name, Args&&... args) { + return new RequestT(io_ctx, throttle, image_name, + std::forward<Args>(args)...); + } +}; + +template <typename RequestT> +class ImageRequestGenerator { +public: + template <class... Args> + ImageRequestGenerator(librados::IoCtx &io_ctx, Args&&... args) + : m_io_ctx(io_ctx), + m_factory(std::bind(ImageRequestAllocator<RequestT>(), + std::ref(m_io_ctx), std::ref(m_throttle), + std::placeholders::_1, std::forward<Args>(args)...)), + m_throttle(g_conf().get_val<uint64_t>("rbd_concurrent_management_ops"), + true) { + } + + int execute() { + // use the alphabetical list of image names for pool-level + // mirror image operations + librbd::RBD rbd; + int r = rbd.list2(m_io_ctx, &m_images); + if (r < 0 && r != -ENOENT) { + std::cerr << "rbd: failed to list images within pool" << std::endl; + return r; + } + + for (auto &image : m_images) { + auto request = m_factory(image.name); + request->send(); + } + + return m_throttle.wait_for_ret(); + } +private: + typedef std::function<RequestT*(const std::string&)> Factory; + + librados::IoCtx &m_io_ctx; + Factory m_factory; + + OrderedThrottle m_throttle; + + std::vector<librbd::image_spec_t> m_images; + +}; + +} // anonymous namespace + +void get_peer_bootstrap_create_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); + options->add_options() + (SITE_NAME.c_str(), po::value<std::string>(), "local site name"); +} + +int execute_peer_bootstrap_create( + const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, true, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(io_ctx); + if (r < 0) { + return r; + } + + if (vm.count(SITE_NAME)) { + r = set_site_name(rados, vm[SITE_NAME].as<std::string>()); + if (r < 0) { + return r; + } + } + + librbd::RBD rbd; + std::string token; + r = rbd.mirror_peer_bootstrap_create(io_ctx, &token); + if (r == -EEXIST) { + std::cerr << "rbd: mismatch with pre-existing RBD mirroring peer user caps" + << std::endl; + } else if (r < 0) { + std::cerr << "rbd: failed to create mirroring bootstrap token: " + << cpp_strerror(r) << std::endl; + return r; + } + + std::cout << token << std::endl; + return 0; +} + +void get_peer_bootstrap_import_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); + options->add_options() + (SITE_NAME.c_str(), po::value<std::string>(), "local site name"); + positional->add_options() + ("token-path", po::value<std::string>(), + "bootstrap token file (or '-' for stdin)"); + options->add_options() + ("token-path", po::value<std::string>(), + "bootstrap token file (or '-' for stdin)") + ("direction", po::value<MirrorPeerDirection>(), + "mirroring direction (rx-only, rx-tx)\n" + "[default: rx-tx]"); +} + +int execute_peer_bootstrap_import( + const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, true, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + std::string token_path; + if (vm.count("token-path")) { + token_path = vm["token-path"].as<std::string>(); + } else { + token_path = utils::get_positional_argument(vm, arg_index++); + } + + if (token_path.empty()) { + std::cerr << "rbd: token path was not specified" << std::endl; + return -EINVAL; + } + + rbd_mirror_peer_direction_t mirror_peer_direction = + RBD_MIRROR_PEER_DIRECTION_RX_TX; + if (vm.count("direction")) { + mirror_peer_direction = vm["direction"].as<rbd_mirror_peer_direction_t>(); + } + + int fd = STDIN_FILENO; + if (token_path != "-") { + fd = open(token_path.c_str(), O_RDONLY); + if (fd < 0) { + r = -errno; + std::cerr << "rbd: error opening " << token_path << std::endl; + return r; + } + } + + char token[1024]; + memset(token, 0, sizeof(token)); + r = safe_read(fd, token, sizeof(token) - 1); + if (fd != STDIN_FILENO) { + VOID_TEMP_FAILURE_RETRY(close(fd)); + } + + if (r < 0) { + std::cerr << "rbd: error reading token file: " << cpp_strerror(r) + << std::endl; + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + if (vm.count(SITE_NAME)) { + r = set_site_name(rados, vm[SITE_NAME].as<std::string>()); + if (r < 0) { + return r; + } + } + + librbd::RBD rbd; + r = rbd.mirror_peer_bootstrap_import(io_ctx, mirror_peer_direction, token); + if (r == -ENOSYS) { + std::cerr << "rbd: mirroring is not enabled on remote peer" << std::endl; + return r; + } else if (r < 0) { + std::cerr << "rbd: failed to import peer bootstrap token" << std::endl; + return r; + } + + return 0; +} + +void get_peer_add_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); + positional->add_options() + ("remote-cluster-spec", "remote cluster spec\n" + "(example: [<client name>@]<cluster name>)"); + options->add_options() + ("remote-client-name", po::value<std::string>(), "remote client name") + ("remote-cluster", po::value<std::string>(), "remote cluster name") + ("remote-mon-host", po::value<std::string>(), "remote mon host(s)") + ("remote-key-file", po::value<std::string>(), + "path to file containing remote key"); +} + +int execute_peer_add(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, true, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + std::string remote_client_name = g_ceph_context->_conf->name.to_str(); + std::string remote_cluster; + std::map<std::string, std::string> attributes; + r = get_remote_cluster_spec( + vm, utils::get_positional_argument(vm, arg_index), + &remote_client_name, &remote_cluster, &attributes); + if (r < 0) { + return r; + } + + // TODO support namespaces + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(io_ctx); + if (r < 0) { + return r; + } + + // TODO: temporary restriction to prevent adding multiple peers + // until rbd-mirror daemon can properly handle the scenario + librbd::RBD rbd; + std::vector<librbd::mirror_peer_t> mirror_peers; + r = rbd.mirror_peer_list(io_ctx, &mirror_peers); + if (r < 0) { + std::cerr << "rbd: failed to list mirror peers" << std::endl; + return r; + } + if (!mirror_peers.empty()) { + std::cerr << "rbd: multiple peers are not currently supported" << std::endl; + return -EINVAL; + } + + std::string uuid; + r = rbd.mirror_peer_add(io_ctx, &uuid, remote_cluster, remote_client_name); + if (r < 0) { + std::cerr << "rbd: error adding mirror peer" << std::endl; + return r; + } + + if (!attributes.empty()) { + r = set_peer_config_key(io_ctx, uuid, std::move(attributes)); + if (r < 0) { + return r; + } + } + + std::cout << uuid << std::endl; + return 0; +} + +void get_peer_remove_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); + add_uuid_option(positional); +} + +int execute_peer_remove(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, true, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + std::string uuid; + r = get_uuid(vm, arg_index, &uuid); + if (r < 0) { + return r; + } + + // TODO support namespaces + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.mirror_peer_remove(io_ctx, uuid); + if (r < 0) { + std::cerr << "rbd: error removing mirror peer" << std::endl; + return r; + } + return 0; +} + +void get_peer_set_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); + add_uuid_option(positional); + positional->add_options() + ("key", "peer parameter [client, cluster, mon-host, key-file]") + ("value", "new value for specified key"); +} + +int execute_peer_set(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, true, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + std::string uuid; + r = get_uuid(vm, arg_index++, &uuid); + if (r < 0) { + return r; + } + + std::set<std::string> valid_keys{{"client", "cluster", "mon-host", + "key-file"}}; + std::string key = utils::get_positional_argument(vm, arg_index++); + if (valid_keys.find(key) == valid_keys.end()) { + std::cerr << "rbd: must specify "; + for (auto& valid_key : valid_keys) { + std::cerr << "'" << valid_key << "'"; + if (&valid_key != &(*valid_keys.rbegin())) { + std::cerr << ", "; + } + } + std::cerr << " key." << std::endl; + return -EINVAL; + } + + std::string value = utils::get_positional_argument(vm, arg_index++); + if (value.empty() && (key == "client" || key == "cluster")) { + std::cerr << "rbd: must specify new " << key << " value." << std::endl; + } else if (key == "key-file") { + key = "key"; + r = read_key_file(value, &value); + if (r < 0) { + return r; + } + } else if (key == "mon-host") { + key = "mon_host"; + } + + // TODO support namespaces + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + if (key == "client") { + r = rbd.mirror_peer_set_client(io_ctx, uuid.c_str(), value.c_str()); + } else if (key == "cluster") { + r = rbd.mirror_peer_set_cluster(io_ctx, uuid.c_str(), value.c_str()); + } else { + r = update_peer_config_key(io_ctx, uuid, key, value); + if (r == -ENOENT) { + std::cerr << "rbd: mirror peer " << uuid << " does not exist" + << std::endl; + } + } + + if (r < 0) { + return r; + } + return 0; +} + +void get_disable_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); +} + +void get_enable_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); + positional->add_options() + ("mode", "mirror mode [image or pool]"); + add_site_name_optional(options); +} + +int execute_enable_disable(librados::IoCtx& io_ctx, + rbd_mirror_mode_t next_mirror_mode, + const std::string &mode, bool ignore_no_update) { + librbd::RBD rbd; + rbd_mirror_mode_t current_mirror_mode; + int r = rbd.mirror_mode_get(io_ctx, ¤t_mirror_mode); + if (r < 0) { + std::cerr << "rbd: failed to retrieve mirror mode: " + << cpp_strerror(r) << std::endl; + return r; + } + + if (current_mirror_mode == next_mirror_mode) { + if (!ignore_no_update) { + if (mode == "disabled") { + std::cout << "rbd: mirroring is already " << mode << std::endl; + } else { + std::cout << "rbd: mirroring is already configured for " + << mode << " mode" << std::endl; + } + } + return 0; + } else if (next_mirror_mode == RBD_MIRROR_MODE_IMAGE && + current_mirror_mode == RBD_MIRROR_MODE_POOL) { + std::cout << "note: changing mirroring mode from pool to image" + << std::endl; + } else if (next_mirror_mode == RBD_MIRROR_MODE_POOL && + current_mirror_mode == RBD_MIRROR_MODE_IMAGE) { + std::cout << "note: changing mirroring mode from image to pool" + << std::endl; + } + + r = rbd.mirror_mode_set(io_ctx, next_mirror_mode); + if (r < 0) { + return r; + } + return 0; +} + +int execute_disable(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, true, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + + // TODO support namespaces + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + return execute_enable_disable(io_ctx, RBD_MIRROR_MODE_DISABLED, "disabled", + false); +} + +int execute_enable(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, true, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + rbd_mirror_mode_t mirror_mode; + std::string mode = utils::get_positional_argument(vm, arg_index++); + if (mode == "image") { + mirror_mode = RBD_MIRROR_MODE_IMAGE; + } else if (mode == "pool") { + mirror_mode = RBD_MIRROR_MODE_POOL; + } else { + std::cerr << "rbd: must specify 'image' or 'pool' mode." << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + + // TODO support namespaces + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + bool updated = false; + if (vm.count(SITE_NAME)) { + librbd::RBD rbd; + + auto site_name = vm[SITE_NAME].as<std::string>(); + std::string original_site_name; + r = rbd.mirror_site_name_get(rados, &original_site_name); + updated = (r >= 0 && site_name != original_site_name); + + r = set_site_name(rados, site_name); + if (r < 0) { + return r; + } + } + + return execute_enable_disable(io_ctx, mirror_mode, mode, updated); +} + +void get_info_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); + at::add_format_options(options); + options->add_options() + (ALL_NAME.c_str(), po::bool_switch(), "list all attributes"); +} + +int execute_info(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, false, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + // TODO support namespaces + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + rbd_mirror_mode_t mirror_mode; + r = rbd.mirror_mode_get(io_ctx, &mirror_mode); + if (r < 0) { + return r; + } + + std::string site_name; + r = rbd.mirror_site_name_get(rados, &site_name); + if (r < 0 && r != -EOPNOTSUPP) { + return r; + } + + std::vector<librbd::mirror_peer_t> mirror_peers; + r = rbd.mirror_peer_list(io_ctx, &mirror_peers); + if (r < 0) { + return r; + } + + std::string mirror_mode_desc; + switch (mirror_mode) { + case RBD_MIRROR_MODE_DISABLED: + mirror_mode_desc = "disabled"; + break; + case RBD_MIRROR_MODE_IMAGE: + mirror_mode_desc = "image"; + break; + case RBD_MIRROR_MODE_POOL: + mirror_mode_desc = "pool"; + break; + default: + mirror_mode_desc = "unknown"; + break; + } + + if (formatter != nullptr) { + formatter->open_object_section("mirror"); + formatter->dump_string("mode", mirror_mode_desc); + } else { + std::cout << "Mode: " << mirror_mode_desc << std::endl; + } + + if (mirror_mode != RBD_MIRROR_MODE_DISABLED) { + if (formatter != nullptr) { + formatter->dump_string("site_name", site_name); + } else { + std::cout << "Site Name: " << site_name << std::endl; + } + + r = format_mirror_peers(io_ctx, formatter, mirror_peers, + vm[ALL_NAME].as<bool>()); + if (r < 0) { + return r; + } + } + if (formatter != nullptr) { + formatter->close_section(); + formatter->flush(std::cout); + } + return 0; +} + +void get_status_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); + at::add_format_options(options); + at::add_verbose_option(options); +} + +int execute_status(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, false, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + bool verbose = vm[at::VERBOSE].as<bool>(); + + // TODO support namespaces + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + + std::map<librbd::mirror_image_status_state_t, int> states; + r = rbd.mirror_image_status_summary(io_ctx, &states); + if (r < 0) { + std::cerr << "rbd: failed to get status summary for mirrored images: " + << cpp_strerror(r) << std::endl; + return r; + } + + if (formatter != nullptr) { + formatter->open_object_section("status"); + } + + enum Health {Ok = 0, Warning = 1, Error = 2} health = Ok; + const char *names[] = {"OK", "WARNING", "ERROR"}; + int total = 0; + + for (auto &it : states) { + auto &state = it.first; + if (health < Warning && + (state != MIRROR_IMAGE_STATUS_STATE_REPLAYING && + state != MIRROR_IMAGE_STATUS_STATE_STOPPED)) { + health = Warning; + } + if (health < Error && + state == MIRROR_IMAGE_STATUS_STATE_ERROR) { + health = Error; + } + total += it.second; + } + + if (formatter != nullptr) { + formatter->open_object_section("summary"); + formatter->dump_string("health", names[health]); + formatter->open_object_section("states"); + for (auto &it : states) { + std::string state_name = utils::mirror_image_status_state(it.first); + formatter->dump_int(state_name.c_str(), it.second); + } + formatter->close_section(); // states + formatter->close_section(); // summary + } else { + std::cout << "health: " << names[health] << std::endl; + std::cout << "images: " << total << " total" << std::endl; + for (auto &it : states) { + std::cout << " " << it.second << " " + << utils::mirror_image_status_state(it.first) << std::endl; + } + } + + int ret = 0; + + if (verbose) { + if (formatter != nullptr) { + formatter->open_array_section("images"); + } + + std::map<std::string, std::string> instance_ids; + MirrorDaemonServiceInfo daemon_service_info(io_ctx); + + std::string start_image_id; + while (true) { + std::map<std::string, std::string> ids; + r = rbd.mirror_image_instance_id_list(io_ctx, start_image_id, 1024, &ids); + if (r < 0) { + if (r == -EOPNOTSUPP) { + std::cerr << "rbd: newer release of Ceph OSDs required to map image " + << "to rbd-mirror daemon instance" << std::endl; + } else { + std::cerr << "rbd: failed to get instance id list: " + << cpp_strerror(r) << std::endl; + } + // not fatal + break; + } + if (ids.empty()) { + break; + } + instance_ids.insert(ids.begin(), ids.end()); + start_image_id = ids.rbegin()->first; + } + + if (!instance_ids.empty()) { + daemon_service_info.init(); + } + + ImageRequestGenerator<StatusImageRequest> generator( + io_ctx, instance_ids, daemon_service_info, formatter); + ret = generator.execute(); + + if (formatter != nullptr) { + formatter->close_section(); // images + } + } + + if (formatter != nullptr) { + formatter->close_section(); // status + formatter->flush(std::cout); + } + + return ret; +} + +void get_promote_arguments(po::options_description *positional, + po::options_description *options) { + options->add_options() + ("force", po::bool_switch(), + "promote even if not cleanly demoted by remote cluster"); + at::add_pool_options(positional, options, false); +} + +int execute_promote(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, true, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + // TODO support namespaces + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(io_ctx); + if (r < 0) { + return r; + } + + utils::disable_cache(); + + std::atomic<unsigned> counter = { 0 }; + ImageRequestGenerator<PromoteImageRequest> generator(io_ctx, &counter, + vm["force"].as<bool>()); + r = generator.execute(); + + std::cout << "Promoted " << counter.load() << " mirrored images" << std::endl; + return r; +} + +void get_demote_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); +} + +int execute_demote(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, true, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + // TODO support namespaces + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + r = validate_mirroring_enabled(io_ctx); + if (r < 0) { + return r; + } + + utils::disable_cache(); + + std::atomic<unsigned> counter { 0 }; + ImageRequestGenerator<DemoteImageRequest> generator(io_ctx, &counter); + r = generator.execute(); + + std::cout << "Demoted " << counter.load() << " mirrored images" << std::endl; + return r; +} + +Shell::Action action_bootstrap_create( + {"mirror", "pool", "peer", "bootstrap", "create"}, {}, + "Create a peer bootstrap token to import in a remote cluster", "", + &get_peer_bootstrap_create_arguments, &execute_peer_bootstrap_create); +Shell::Action action_bootstreap_import( + {"mirror", "pool", "peer", "bootstrap", "import"}, {}, + "Import a peer bootstrap token created from a remote cluster", "", + &get_peer_bootstrap_import_arguments, &execute_peer_bootstrap_import); + +Shell::Action action_add( + {"mirror", "pool", "peer", "add"}, {}, + "Add a mirroring peer to a pool.", "", + &get_peer_add_arguments, &execute_peer_add); +Shell::Action action_remove( + {"mirror", "pool", "peer", "remove"}, {}, + "Remove a mirroring peer from a pool.", "", + &get_peer_remove_arguments, &execute_peer_remove); +Shell::Action action_set( + {"mirror", "pool", "peer", "set"}, {}, + "Update mirroring peer settings.", "", + &get_peer_set_arguments, &execute_peer_set); + +Shell::Action action_disable( + {"mirror", "pool", "disable"}, {}, + "Disable RBD mirroring by default within a pool.", "", + &get_disable_arguments, &execute_disable); +Shell::Action action_enable( + {"mirror", "pool", "enable"}, {}, + "Enable RBD mirroring by default within a pool.", "", + &get_enable_arguments, &execute_enable); +Shell::Action action_info( + {"mirror", "pool", "info"}, {}, + "Show information about the pool mirroring configuration.", {}, + &get_info_arguments, &execute_info); +Shell::Action action_status( + {"mirror", "pool", "status"}, {}, + "Show status for all mirrored images in the pool.", {}, + &get_status_arguments, &execute_status); +Shell::Action action_promote( + {"mirror", "pool", "promote"}, {}, + "Promote all non-primary images in the pool.", {}, + &get_promote_arguments, &execute_promote); +Shell::Action action_demote( + {"mirror", "pool", "demote"}, {}, + "Demote all primary images in the pool.", {}, + &get_demote_arguments, &execute_demote); + +} // namespace mirror_pool +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Namespace.cc b/src/tools/rbd/action/Namespace.cc new file mode 100644 index 00000000..746ab40c --- /dev/null +++ b/src/tools/rbd/action/Namespace.cc @@ -0,0 +1,191 @@ + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include "include/stringify.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include <algorithm> +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace ns { + +namespace at = argument_types; +namespace po = boost::program_options; + +void get_create_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); +} + +int execute_create(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, true, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + if (namespace_name.empty()) { + std::cerr << "rbd: namespace name was not specified" << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.namespace_create(io_ctx, namespace_name.c_str()); + if (r < 0) { + std::cerr << "rbd: failed to created namespace: " << cpp_strerror(r) + << std::endl; + return r; + } + + return 0; +} + +void get_remove_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); +} + +int execute_remove(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, true, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + if (namespace_name.empty()) { + std::cerr << "rbd: namespace name was not specified" << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.namespace_remove(io_ctx, namespace_name.c_str()); + if (r == -EBUSY) { + std::cerr << "rbd: namespace contains images which must be deleted first." + << std::endl; + return r; + } else if (r == -ENOENT) { + std::cerr << "rbd: namespace does not exist." << std::endl; + return r; + } else if (r < 0) { + std::cerr << "rbd: failed to remove namespace: " << cpp_strerror(r) + << std::endl; + return r; + } + + return 0; +} + +void get_list_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); + at::add_format_options(options); +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, true, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + std::vector<std::string> names; + r = rbd.namespace_list(io_ctx, &names); + if (r < 0 && r != -ENOENT) { + std::cerr << "rbd: failed to list namespaces: " << cpp_strerror(r) + << std::endl; + return r; + } + + std::sort(names.begin(), names.end()); + + TextTable tbl; + if (formatter) { + formatter->open_array_section("namespaces"); + } else { + tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT); + } + + for (auto& name : names) { + if (formatter) { + formatter->open_object_section("namespace"); + formatter->dump_string("name", name); + formatter->close_section(); + } else { + tbl << name << TextTable::endrow; + } + } + + if (formatter) { + formatter->close_section(); + formatter->flush(std::cout); + } else if (!names.empty()) { + std::cout << tbl; + } + + return 0; +} + +Shell::Action action_create( + {"namespace", "create"}, {}, + "Create an RBD image namespace.", "", + &get_create_arguments, &execute_create); + +Shell::Action action_remove( + {"namespace", "remove"}, {"namespace", "rm"}, + "Remove an RBD image namespace.", "", + &get_remove_arguments, &execute_remove); + +Shell::Action action_list( + {"namespace", "list"}, {"namespace", "ls"}, "List RBD image namespaces.", "", + &get_list_arguments, &execute_list); + +} // namespace ns +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Nbd.cc b/src/tools/rbd/action/Nbd.cc new file mode 100644 index 00000000..5c55adea --- /dev/null +++ b/src/tools/rbd/action/Nbd.cc @@ -0,0 +1,286 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/stringify.h" +#include "common/SubProcess.h" +#include <iostream> +#include <boost/algorithm/string.hpp> +#include <boost/algorithm/string/predicate.hpp> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace nbd { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int call_nbd_cmd(const po::variables_map &vm, + const std::vector<std::string> &args, + const std::vector<std::string> &ceph_global_init_args) { + char exe_path[PATH_MAX]; + ssize_t exe_path_bytes = readlink("/proc/self/exe", exe_path, + sizeof(exe_path) - 1); + if (exe_path_bytes < 0) { + strcpy(exe_path, "rbd-nbd"); + } else { + if (snprintf(exe_path + exe_path_bytes, + sizeof(exe_path) - exe_path_bytes, + "-nbd") < 0) { + return -EOVERFLOW; + } + } + + SubProcess process(exe_path, SubProcess::KEEP, SubProcess::KEEP, SubProcess::KEEP); + + for (auto &arg : ceph_global_init_args) { + process.add_cmd_arg(arg.c_str()); + } + + for (auto &arg : args) { + process.add_cmd_arg(arg.c_str()); + } + + if (process.spawn()) { + std::cerr << "rbd: failed to run rbd-nbd: " << process.err() << std::endl; + return -EINVAL; + } else if (process.join()) { + std::cerr << "rbd: rbd-nbd failed with error: " << process.err() << std::endl; + return -EINVAL; + } + + return 0; +} + +int get_image_or_snap_spec(const po::variables_map &vm, std::string *spec) { + size_t arg_index = 0; + std::string pool_name; + std::string nspace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &nspace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + spec->append(pool_name); + spec->append("/"); + if (!nspace_name.empty()) { + spec->append(nspace_name); + spec->append("/"); + } + spec->append(image_name); + if (!snap_name.empty()) { + spec->append("@"); + spec->append(snap_name); + } + + return 0; +} + +int parse_options(const std::vector<std::string> &options, + std::vector<std::string> *args) { + for (auto &opts : options) { + std::vector<std::string> args_; + boost::split(args_, opts, boost::is_any_of(",")); + for (auto &o : args_) { + args->push_back("--" + o); + } + } + + return 0; +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if defined(__FreeBSD__) + std::cerr << "rbd: nbd device is not supported" << std::endl; + return -EOPNOTSUPP; +#endif + std::vector<std::string> args; + + args.push_back("list-mapped"); + + if (vm.count("format")) { + args.push_back("--format"); + args.push_back(vm["format"].as<at::Format>().value); + } + if (vm["pretty-format"].as<bool>()) { + args.push_back("--pretty-format"); + } + + return call_nbd_cmd(vm, args, ceph_global_init_args); +} + +int execute_map(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if defined(__FreeBSD__) + std::cerr << "rbd: nbd device is not supported" << std::endl; + return -EOPNOTSUPP; +#endif + std::vector<std::string> args; + + args.push_back("map"); + std::string img; + int r = get_image_or_snap_spec(vm, &img); + if (r < 0) { + return r; + } + args.push_back(img); + + if (vm["read-only"].as<bool>()) { + args.push_back("--read-only"); + } + + if (vm["exclusive"].as<bool>()) { + args.push_back("--exclusive"); + } + + if (vm.count("options")) { + r = parse_options(vm["options"].as<std::vector<std::string>>(), &args); + if (r < 0) { + return r; + } + } + + return call_nbd_cmd(vm, args, ceph_global_init_args); +} + +int execute_unmap(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { +#if defined(__FreeBSD__) + std::cerr << "rbd: nbd device is not supported" << std::endl; + return -EOPNOTSUPP; +#endif + std::string device_name = utils::get_positional_argument(vm, 0); + if (!boost::starts_with(device_name, "/dev/")) { + device_name.clear(); + } + + std::string image_name; + if (device_name.empty()) { + int r = get_image_or_snap_spec(vm, &image_name); + if (r < 0) { + return r; + } + } + + if (device_name.empty() && image_name.empty()) { + std::cerr << "rbd: unmap requires either image name or device path" + << std::endl; + return -EINVAL; + } + + std::vector<std::string> args; + + args.push_back("unmap"); + args.push_back(device_name.empty() ? image_name : device_name); + + if (vm.count("options")) { + int r = parse_options(vm["options"].as<std::vector<std::string>>(), &args); + if (r < 0) { + return r; + } + } + + return call_nbd_cmd(vm, args, ceph_global_init_args); +} + +void get_list_arguments_deprecated(po::options_description *positional, + po::options_description *options) { + at::add_format_options(options); +} + +int execute_list_deprecated(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_args) { + std::cerr << "rbd: 'nbd list' command is deprecated, " + << "use 'device list -t nbd' instead" << std::endl; + return execute_list(vm, ceph_global_args); +} + +void get_map_arguments_deprecated(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_NONE); + options->add_options() + ("read-only", po::bool_switch(), "map read-only") + ("exclusive", po::bool_switch(), "forbid writes by other clients") + ("device", po::value<std::string>(), "specify nbd device") + ("nbds_max", po::value<std::string>(), "override module param nbds_max") + ("max_part", po::value<std::string>(), "override module param max_part") + ("timeout", po::value<std::string>(), "set nbd request timeout (seconds)"); +} + +int execute_map_deprecated(const po::variables_map &vm_deprecated, + const std::vector<std::string> &ceph_global_args) { + std::cerr << "rbd: 'nbd map' command is deprecated, " + << "use 'device map -t nbd' instead" << std::endl; + + po::options_description options; + options.add_options() + ("options,o", po::value<std::vector<std::string>>() + ->default_value(std::vector<std::string>(), ""), ""); + + po::variables_map vm = vm_deprecated; + po::store(po::command_line_parser({}).options(options).run(), vm); + + std::vector<std::string> opts; + if (vm_deprecated.count("device")) { + opts.push_back("device=" + vm_deprecated["device"].as<std::string>()); + } + if (vm_deprecated.count("nbds_max")) { + opts.push_back("nbds_max=" + vm_deprecated["nbds_max"].as<std::string>()); + } + if (vm_deprecated.count("max_part")) { + opts.push_back("max_part=" + vm_deprecated["max_part"].as<std::string>()); + } + if (vm_deprecated.count("timeout")) { + opts.push_back("timeout=" + vm_deprecated["timeout"].as<std::string>()); + } + + vm.at("options").value() = boost::any(opts); + + return execute_map(vm, ceph_global_args); +} + +void get_unmap_arguments_deprecated(po::options_description *positional, + po::options_description *options) { + positional->add_options() + ("image-or-snap-or-device-spec", + "image, snapshot, or device specification\n" + "[<pool-name>/]<image-name>[@<snapshot-name>] or <device-path>"); + at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_snap_option(options, at::ARGUMENT_MODIFIER_NONE); +} + +int execute_unmap_deprecated(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_args) { + std::cerr << "rbd: 'nbd unmap' command is deprecated, " + << "use 'device unmap -t nbd' instead" << std::endl; + return execute_unmap(vm, ceph_global_args); +} + +Shell::SwitchArguments switched_arguments({"read-only", "exclusive"}); + +Shell::Action action_show_deprecated( + {"nbd", "list"}, {"nbd", "ls"}, "List the nbd devices already used.", "", + &get_list_arguments_deprecated, &execute_list_deprecated, false); + +Shell::Action action_map_deprecated( + {"nbd", "map"}, {}, "Map image to a nbd device.", "", + &get_map_arguments_deprecated, &execute_map_deprecated, false); + +Shell::Action action_unmap_deprecated( + {"nbd", "unmap"}, {}, "Unmap a nbd device.", "", + &get_unmap_arguments_deprecated, &execute_unmap_deprecated, false); + +} // namespace nbd +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/ObjectMap.cc b/src/tools/rbd/action/ObjectMap.cc new file mode 100644 index 00000000..40ee2d47 --- /dev/null +++ b/src/tools/rbd/action/ObjectMap.cc @@ -0,0 +1,131 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace object_map { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_object_map_rebuild(librbd::Image &image, bool no_progress) +{ + utils::ProgressContext pc("Object Map Rebuild", no_progress); + int r = image.rebuild_object_map(pc); + if (r < 0) { + pc.fail(); + return r; + } + pc.finish(); + return 0; +} + +void get_rebuild_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_NONE); + at::add_no_progress_option(options); +} + +int execute_rebuild(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", + snap_name, false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_object_map_rebuild(image, vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + std::cerr << "rbd: rebuilding object map failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +static int do_object_map_check(librbd::Image &image, bool no_progress) +{ + utils::ProgressContext pc("Object Map Check", no_progress); + int r = image.check_object_map(pc); + if (r < 0) { + pc.fail(); + return r; + } + pc.finish(); + return 0; +} + +void get_check_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_or_snap_spec_options(positional, options, + at::ARGUMENT_MODIFIER_NONE); + at::add_no_progress_option(options); +} + +int execute_check(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_PERMITTED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", + snap_name, false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_object_map_check(image, vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + std::cerr << "rbd: checking object map failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +Shell::Action action_rebuild( + {"object-map", "rebuild"}, {}, "Rebuild an invalid object map.", "", + &get_rebuild_arguments, &execute_rebuild); +Shell::Action action_check( + {"object-map", "check"}, {}, "Verify the object map is correct.", "", + &get_check_arguments, &execute_check); + +} // namespace object_map +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Perf.cc b/src/tools/rbd/action/Perf.cc new file mode 100644 index 00000000..8f76e85f --- /dev/null +++ b/src/tools/rbd/action/Perf.cc @@ -0,0 +1,699 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/stringify.h" +#include "common/ceph_context.h" +#include "common/ceph_json.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include "global/global_context.h" +#include <ncurses.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/time.h> +#include <sys/types.h> +#include <iostream> +#include <vector> +#include <boost/algorithm/string.hpp> +#include <boost/assign.hpp> +#include <boost/bimap.hpp> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace perf { + +namespace at = argument_types; +namespace po = boost::program_options; + +namespace { + +enum class StatDescriptor { + WRITE_OPS = 0, + READ_OPS, + WRITE_BYTES, + READ_BYTES, + WRITE_LATENCY, + READ_LATENCY +}; + +typedef boost::bimap<StatDescriptor, std::string> StatDescriptors; + +static const StatDescriptors STAT_DESCRIPTORS = + boost::assign::list_of<StatDescriptors::relation> + (StatDescriptor::WRITE_OPS, "write_ops") + (StatDescriptor::READ_OPS, "read_ops") + (StatDescriptor::WRITE_BYTES, "write_bytes") + (StatDescriptor::READ_BYTES, "read_bytes") + (StatDescriptor::WRITE_LATENCY, "write_latency") + (StatDescriptor::READ_LATENCY, "read_latency"); + +std::ostream& operator<<(std::ostream& os, const StatDescriptor& val) { + auto it = STAT_DESCRIPTORS.left.find(val); + if (it == STAT_DESCRIPTORS.left.end()) { + os << "unknown (" << static_cast<int>(val) << ")"; + } else { + os << it->second; + } + return os; +} + +void validate(boost::any& v, const std::vector<std::string>& values, + StatDescriptor *target_type, int) { + po::validators::check_first_occurrence(v); + std::string s = po::validators::get_single_string(values); + boost::replace_all(s, "_", " "); + boost::replace_all(s, "-", "_"); + + auto it = STAT_DESCRIPTORS.right.find(s); + if (it == STAT_DESCRIPTORS.right.end()) { + throw po::validation_error(po::validation_error::invalid_option_value); + } + v = boost::any(it->second); +} + +struct ImageStat { + ImageStat(const std::string& pool_name, const std::string& pool_namespace, + const std::string& image_name) + : pool_name(pool_name), pool_namespace(pool_namespace), + image_name(image_name) { + stats.resize(STAT_DESCRIPTORS.size()); + } + + std::string pool_name; + std::string pool_namespace; + std::string image_name; + std::vector<double> stats; +}; + +typedef std::vector<ImageStat> ImageStats; + +typedef std::pair<std::string, std::string> SpecPair; + +std::string format_pool_spec(const std::string& pool, + const std::string& pool_namespace) { + std::string pool_spec{pool}; + if (!pool_namespace.empty()) { + pool_spec += "/" + pool_namespace; + } + return pool_spec; +} + +int query_iostats(librados::Rados& rados, const std::string& pool_spec, + StatDescriptor sort_by, ImageStats* image_stats, + std::ostream& err_os) { + auto sort_by_str = STAT_DESCRIPTORS.left.find(sort_by)->second; + + std::string cmd = R"( + { + "prefix": "rbd perf image stats", + "pool_spec": ")" + pool_spec + R"(", + "sort_by": ")" + sort_by_str + R"(", + "format": "json" + }")"; + + bufferlist in_bl; + bufferlist out_bl; + std::string outs; + int r = rados.mgr_command(cmd, in_bl, &out_bl, &outs); + if (r == -EOPNOTSUPP) { + err_os << "rbd: 'rbd_support' mgr module is not enabled." + << std::endl << std::endl + << "Use 'ceph mgr module enable rbd_support' to enable." + << std::endl; + return r; + } else if (r < 0) { + err_os << "rbd: mgr command failed: " << cpp_strerror(r); + if (!outs.empty()) { + err_os << ": " << outs; + } + err_os << std::endl; + return r; + } + + json_spirit::mValue json_root; + if (!json_spirit::read(out_bl.to_str(), json_root)) { + err_os << "rbd: error parsing perf stats" << std::endl; + return -EINVAL; + } + + image_stats->clear(); + try { + auto& root = json_root.get_obj(); + + // map JSON stat descriptor order to our internal order + std::map<uint32_t, uint32_t> json_to_internal_stats; + auto& json_stat_descriptors = root["stat_descriptors"].get_array(); + for (size_t idx = 0; idx < json_stat_descriptors.size(); ++idx) { + auto it = STAT_DESCRIPTORS.right.find( + json_stat_descriptors[idx].get_str()); + if (it == STAT_DESCRIPTORS.right.end()) { + continue; + } + json_to_internal_stats[idx] = static_cast<uint32_t>(it->second); + } + + // cache a mapping from pool descriptors back to pool-specs + std::map<std::string, SpecPair> json_to_internal_pools; + auto& pool_descriptors = root["pool_descriptors"].get_obj(); + for (auto& pool : pool_descriptors) { + auto& pool_spec = pool.second.get_str(); + auto pos = pool_spec.rfind("/"); + + SpecPair pair{pool_spec.substr(0, pos), ""}; + if (pos != std::string::npos) { + pair.second = pool_spec.substr(pos + 1); + } + + json_to_internal_pools[pool.first] = pair; + } + + auto& stats = root["stats"].get_array(); + for (auto& stat : stats) { + auto& stat_obj = stat.get_obj(); + if (!stat_obj.empty()) { + auto& image_spec = stat_obj.begin()->first; + + auto pos = image_spec.find("/"); + SpecPair pair{image_spec.substr(0, pos), ""}; + if (pos != std::string::npos) { + pair.second = image_spec.substr(pos + 1); + } + + const auto pool_it = json_to_internal_pools.find(pair.first); + if (pool_it == json_to_internal_pools.end()) { + continue; + } + + image_stats->emplace_back( + pool_it->second.first, pool_it->second.second, pair.second); + + auto& image_stat = image_stats->back(); + auto& data = stat_obj.begin()->second.get_array(); + for (auto& indexes : json_to_internal_stats) { + image_stat.stats[indexes.second] = data[indexes.first].get_real(); + } + } + } + } catch (std::runtime_error &e) { + err_os << "rbd: error parsing perf stats: " << e.what() << std::endl; + return -EINVAL; + } + + return 0; +} + +void format_stat(StatDescriptor stat_descriptor, double stat, + std::ostream& os) { + switch (stat_descriptor) { + case StatDescriptor::WRITE_OPS: + case StatDescriptor::READ_OPS: + os << si_u_t(stat) << "/s"; + break; + case StatDescriptor::WRITE_BYTES: + case StatDescriptor::READ_BYTES: + os << byte_u_t(stat) << "/s"; + break; + case StatDescriptor::WRITE_LATENCY: + case StatDescriptor::READ_LATENCY: + os << std::fixed << std::setprecision(2); + if (stat >= 1000000000) { + os << (stat / 1000000000) << " s"; + } else if (stat >= 1000000) { + os << (stat / 1000000) << " ms"; + } else if (stat >= 1000) { + os << (stat / 1000) << " us"; + } else { + os << stat << " ns"; + } + break; + default: + ceph_assert(false); + break; + } +} + +} // anonymous namespace + +namespace iostat { + +struct Iterations {}; + +void validate(boost::any& v, const std::vector<std::string>& values, + Iterations *target_type, int) { + po::validators::check_first_occurrence(v); + auto& s = po::validators::get_single_string(values); + + try { + auto iterations = boost::lexical_cast<uint32_t>(s); + if (iterations > 0) { + v = boost::any(iterations); + return; + } + } catch (const boost::bad_lexical_cast &) { + } + throw po::validation_error(po::validation_error::invalid_option_value); +} + +void format(const ImageStats& image_stats, Formatter* f, bool global_search) { + TextTable tbl; + if (f) { + f->open_array_section("images"); + } else { + tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT); + for (auto& stat : STAT_DESCRIPTORS.left) { + std::string title; + switch (stat.first) { + case StatDescriptor::WRITE_OPS: + title = "WR "; + break; + case StatDescriptor::READ_OPS: + title = "RD "; + break; + case StatDescriptor::WRITE_BYTES: + title = "WR_BYTES "; + break; + case StatDescriptor::READ_BYTES: + title = "RD_BYTES "; + break; + case StatDescriptor::WRITE_LATENCY: + title = "WR_LAT "; + break; + case StatDescriptor::READ_LATENCY: + title = "RD_LAT "; + break; + default: + ceph_assert(false); + break; + } + tbl.define_column(title, TextTable::RIGHT, TextTable::RIGHT); + } + } + + for (auto& image_stat : image_stats) { + if (f) { + f->open_object_section("image"); + f->dump_string("pool", image_stat.pool_name); + f->dump_string("pool_namespace", image_stat.pool_namespace); + f->dump_string("image", image_stat.image_name); + for (auto& pair : STAT_DESCRIPTORS.left) { + f->dump_float(pair.second.c_str(), + image_stat.stats[static_cast<size_t>(pair.first)]); + } + f->close_section(); + } else { + std::string name; + if (global_search) { + name += image_stat.pool_name + "/"; + if (!image_stat.pool_namespace.empty()) { + name += image_stat.pool_namespace + "/"; + } + } + name += image_stat.image_name; + + tbl << name; + for (auto& pair : STAT_DESCRIPTORS.left) { + std::stringstream str; + format_stat(pair.first, + image_stat.stats[static_cast<size_t>(pair.first)], str); + str << ' '; + tbl << str.str(); + } + tbl << TextTable::endrow; + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } else { + std::cout << tbl << std::endl; + } +} + +} // namespace iostat + +namespace iotop { + +class MainWindow { +public: + MainWindow(librados::Rados& rados, const std::string& pool_spec) + : m_rados(rados), m_pool_spec(pool_spec) { + initscr(); + curs_set(0); + cbreak(); + noecho(); + keypad(stdscr, TRUE); + nodelay(stdscr, TRUE); + + init_columns(); + } + + int run() { + redraw(); + + int r = 0; + std::stringstream err_str; + while (true) { + r = query_iostats(m_rados, m_pool_spec, m_sort_by, &m_image_stats, + err_str); + if (r < 0) { + break; + return r; + } + + redraw(); + wait_for_key_or_delay(); + + int ch = getch(); + if (ch == 'q' || ch == 'Q') { + break; + } else if (ch == '<' || ch == KEY_LEFT) { + auto it = STAT_DESCRIPTORS.left.find(m_sort_by); + if (it != STAT_DESCRIPTORS.left.begin()) { + m_sort_by = (--it)->first; + } + } else if (ch == '>' || ch == KEY_RIGHT) { + auto it = STAT_DESCRIPTORS.left.find(m_sort_by); + if (it != STAT_DESCRIPTORS.left.end() && + ++it != STAT_DESCRIPTORS.left.end()) { + m_sort_by = it->first; + } + } + } + + endwin(); + + if (r < 0) { + std::cerr << err_str.str() << std::endl; + } + return r; + } + +private: + static const size_t STAT_COLUMN_WIDTH = 12; + + librados::Rados& m_rados; + std::string m_pool_spec; + + ImageStats m_image_stats; + StatDescriptor m_sort_by = StatDescriptor::WRITE_OPS; + + bool m_pending_win_opened = false; + WINDOW* m_pending_win = nullptr; + + int m_height = 1; + int m_width = 1; + + std::map<StatDescriptor, std::string> m_columns; + + void init_columns() { + m_columns.clear(); + for (auto& pair : STAT_DESCRIPTORS.left) { + std::string title; + switch (pair.first) { + case StatDescriptor::WRITE_OPS: + title = "WRITES OPS"; + break; + case StatDescriptor::READ_OPS: + title = "READS OPS"; + break; + case StatDescriptor::WRITE_BYTES: + title = "WRITE BYTES"; + break; + case StatDescriptor::READ_BYTES: + title = "READ BYTES"; + break; + case StatDescriptor::WRITE_LATENCY: + title = "WRITE LAT"; + break; + case StatDescriptor::READ_LATENCY: + title = "READ LAT"; + break; + default: + ceph_assert(false); + break; + } + m_columns[pair.first] = (title); + } + } + + void redraw() { + getmaxyx(stdscr, m_height, m_width); + + redraw_main_window(); + redraw_pending_window(); + + doupdate(); + } + + void redraw_main_window() { + werase(stdscr); + mvhline(0, 0, ' ' | A_REVERSE, m_width); + + // print header for all metrics + int remaining_cols = m_width; + std::stringstream str; + for (auto& pair : m_columns) { + int attr = A_REVERSE; + std::string title; + if (pair.first == m_sort_by) { + title += '>'; + attr |= A_BOLD; + } else { + title += ' '; + } + title += pair.second; + + str.str(""); + str << std::right << std::setfill(' ') + << std::setw(STAT_COLUMN_WIDTH) + << title << ' '; + + attrset(attr); + addstr(str.str().c_str()); + remaining_cols -= title.size(); + } + + attrset(A_REVERSE); + addstr("IMAGE"); + attrset(A_NORMAL); + + // print each image (one per line) + int row = 1; + int remaining_lines = m_height - 1; + for (auto& image_stat : m_image_stats) { + if (remaining_lines <= 0) { + break; + } + --remaining_lines; + + move(row++, 0); + for (auto& pair : m_columns) { + str.str(""); + format_stat(pair.first, + image_stat.stats[static_cast<size_t>(pair.first)], str); + auto value = str.str().substr(0, STAT_COLUMN_WIDTH); + + str.str(""); + str << std::right << std::setfill(' ') + << std::setw(STAT_COLUMN_WIDTH) + << value << ' '; + addstr(str.str().c_str()); + } + + std::string image; + if (m_pool_spec.empty()) { + image = format_pool_spec(image_stat.pool_name, + image_stat.pool_namespace) + "/"; + } + image += image_stat.image_name; + addstr(image.substr(0, remaining_cols).c_str()); + } + + wnoutrefresh(stdscr); + } + + void redraw_pending_window() { + // draw a "please by patient" window while waiting + const char* msg = "Waiting for initial stats"; + int height = 5; + int width = strlen(msg) + 4;; + int starty = (m_height - height) / 2; + int startx = (m_width - width) / 2; + + if (m_image_stats.empty() && !m_pending_win_opened) { + m_pending_win_opened = true; + m_pending_win = newwin(height, width, starty, startx); + } + + if (m_pending_win != nullptr) { + if (m_image_stats.empty()) { + box(m_pending_win, 0 , 0); + mvwaddstr(m_pending_win, 2, 2, msg); + wnoutrefresh(m_pending_win); + } else { + delwin(m_pending_win); + m_pending_win = nullptr; + } + } + } + + void wait_for_key_or_delay() { + fd_set fds; + FD_ZERO(&fds); + FD_SET(STDIN_FILENO, &fds); + + // no point to refreshing faster than the stats period + struct timeval tval; + tval.tv_sec = std::min<uint32_t>( + 10, g_conf().get_val<int64_t>("mgr_stats_period")); + tval.tv_usec = 0; + + select(STDIN_FILENO + 1, &fds, NULL, NULL, &tval); + } +}; + +} // namespace iotop + + +void get_arguments_iostat(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); + options->add_options() + ("iterations", po::value<iostat::Iterations>(), + "iterations of metric collection [> 0]") + ("sort-by", po::value<StatDescriptor>()->default_value(StatDescriptor::WRITE_OPS), + "sort-by IO metric " + "(write-ops, read-ops, write-bytes, read-bytes, write-latency, read-latency) " + "[default: write-ops]"); + at::add_format_options(options); +} + +int execute_iostat(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool; + std::string pool_namespace; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, false, false, &pool, + &pool_namespace, &arg_index); + if (r < 0) { + return r; + } + + uint32_t iterations = 0; + if (vm.count("iterations")) { + iterations = vm["iterations"].as<uint32_t>(); + } + auto sort_by = vm["sort-by"].as<StatDescriptor>(); + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + auto f = formatter.get(); + if (iterations > 1 && f != nullptr) { + std::cerr << "rbd: specifing iterations is not valid with formatted output" + << std::endl; + return -EINVAL; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + r = rados.wait_for_latest_osdmap(); + if (r < 0) { + std::cerr << "rbd: failed to retrieve OSD map" << std::endl; + return r; + } + + std::string pool_spec = format_pool_spec(pool, pool_namespace); + + // no point to refreshing faster than the stats period + auto delay = std::min<uint32_t>(10, g_conf().get_val<int64_t>("mgr_stats_period")); + + ImageStats image_stats; + uint32_t count = 0; + bool printed_notice = false; + while (count++ < iterations || iterations == 0) { + r = query_iostats(rados, pool_spec, sort_by, &image_stats, std::cerr); + if (r < 0) { + return r; + } + + if (count == 1 && image_stats.empty()) { + count = 0; + if (!printed_notice) { + std::cerr << "rbd: waiting for initial image stats" + << std::endl << std::endl;; + printed_notice = true; + } + } else { + iostat::format(image_stats, f, pool_spec.empty()); + if (f != nullptr) { + break; + } + } + + sleep(delay); + } + + return 0; +} + +void get_arguments_iotop(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); +} + +int execute_iotop(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool; + std::string pool_namespace; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, false, false, &pool, + &pool_namespace, &arg_index); + if (r < 0) { + return r; + } + + librados::Rados rados; + r = utils::init_rados(&rados); + if (r < 0) { + return r; + } + + r = rados.wait_for_latest_osdmap(); + if (r < 0) { + std::cerr << "rbd: failed to retrieve OSD map" << std::endl; + return r; + } + + iotop::MainWindow mainWindow(rados, format_pool_spec(pool, pool_namespace)); + r = mainWindow.run(); + if (r < 0) { + return r; + } + + return 0; +} + +Shell::Action stat_action( + {"perf", "image", "iostat"}, {}, "Display image IO statistics.", "", + &get_arguments_iostat, &execute_iostat); +Shell::Action top_action( + {"perf", "image", "iotop"}, {}, "Display a top-like IO monitor.", "", + &get_arguments_iotop, &execute_iotop); + +} // namespace perf +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Pool.cc b/src/tools/rbd/action/Pool.cc new file mode 100644 index 00000000..f1718eb1 --- /dev/null +++ b/src/tools/rbd/action/Pool.cc @@ -0,0 +1,162 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/stringify.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace pool { + +namespace at = argument_types; +namespace po = boost::program_options; + +void get_arguments_init(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, false); + options->add_options() + ("force", po::bool_switch(), + "force initialize pool for RBD use if registered by another application"); +} + +int execute_init(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, false, &pool_name, + nullptr, &arg_index); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, "", &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = rbd.pool_init(io_ctx, vm["force"].as<bool>()); + if (r == -EOPNOTSUPP) { + std::cerr << "rbd: luminous or later release required." << std::endl; + } else if (r == -EPERM) { + std::cerr << "rbd: pool already registered to a different application." + << std::endl; + } else if (r < 0) { + std::cerr << "rbd: error registered application: " << cpp_strerror(r) + << std::endl; + } + + return 0; +} + +void get_arguments_stats(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); + at::add_format_options(options); +} + +int execute_stats(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, false, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + uint64_t image_count; + uint64_t provisioned_bytes; + uint64_t snap_count; + uint64_t trash_count; + uint64_t trash_provisioned_bytes; + uint64_t trash_snap_count; + + librbd::PoolStats pool_stats; + pool_stats.add(RBD_POOL_STAT_OPTION_IMAGES, &image_count); + pool_stats.add(RBD_POOL_STAT_OPTION_IMAGE_MAX_PROVISIONED_BYTES, + &provisioned_bytes); + pool_stats.add(RBD_POOL_STAT_OPTION_IMAGE_SNAPSHOTS, &snap_count); + pool_stats.add(RBD_POOL_STAT_OPTION_TRASH_IMAGES, &trash_count); + pool_stats.add(RBD_POOL_STAT_OPTION_TRASH_MAX_PROVISIONED_BYTES, + &trash_provisioned_bytes); + pool_stats.add(RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS, &trash_snap_count); + + r = rbd.pool_stats_get(io_ctx, &pool_stats); + if (r < 0) { + std::cerr << "rbd: failed to query pool stats: " << cpp_strerror(r) + << std::endl; + return r; + } + + if (formatter) { + formatter->open_object_section("stats"); + formatter->open_object_section("images"); + formatter->dump_unsigned("count", image_count); + formatter->dump_unsigned("provisioned_bytes", provisioned_bytes); + formatter->dump_unsigned("snap_count", snap_count); + formatter->close_section(); + formatter->open_object_section("trash"); + formatter->dump_unsigned("count", trash_count); + formatter->dump_unsigned("provisioned_bytes", trash_provisioned_bytes); + formatter->dump_unsigned("snap_count", trash_snap_count); + formatter->close_section(); + formatter->close_section(); + formatter->flush(std::cout); + } else { + std::cout << "Total Images: " << image_count; + if (trash_count > 0) { + std::cout << " (" << trash_count << " in trash)"; + } + std::cout << std::endl; + + std::cout << "Total Snapshots: " << snap_count; + if (trash_count > 0) { + std::cout << " (" << trash_snap_count << " in trash)"; + } + std::cout << std::endl; + + std::cout << "Provisioned Size: " << byte_u_t(provisioned_bytes); + if (trash_count > 0) { + std::cout << " (" << byte_u_t(trash_provisioned_bytes) << " in trash)"; + } + std::cout << std::endl; + } + + return 0; +} + +Shell::Action init_action( + {"pool", "init"}, {}, "Initialize pool for use by RBD.", "", + &get_arguments_init, &execute_init); +Shell::Action stat_action( + {"pool", "stats"}, {}, "Display pool statistics.", + "Note: legacy v1 images are not included in stats", + &get_arguments_stats, &execute_stats); + +} // namespace pool +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Remove.cc b/src/tools/rbd/action/Remove.cc new file mode 100644 index 00000000..337d42be --- /dev/null +++ b/src/tools/rbd/action/Remove.cc @@ -0,0 +1,161 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include "include/stringify.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace remove { + +namespace { + +bool is_auto_delete_snapshot(librbd::Image* image, + const librbd::snap_info_t &snap_info) { + librbd::snap_namespace_type_t namespace_type; + int r = image->snap_get_namespace_type(snap_info.id, &namespace_type); + if (r < 0) { + return false; + } + + switch (namespace_type) { + case RBD_SNAP_NAMESPACE_TYPE_TRASH: + return true; + default: + return false; + } +} + +} // anonymous namespace + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_delete(librbd::RBD &rbd, librados::IoCtx& io_ctx, + const char *imgname, bool no_progress) +{ + utils::ProgressContext pc("Removing image", no_progress); + int r = rbd.remove_with_progress(io_ctx, imgname, pc); + if (r < 0) { + pc.fail(); + return r; + } + pc.finish(); + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_no_progress_option(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + io_ctx.set_osdmap_full_try(); + + librbd::RBD rbd; + r = do_delete(rbd, io_ctx, image_name.c_str(), + vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + if (r == -ENOTEMPTY) { + librbd::Image image; + std::vector<librbd::snap_info_t> snaps; + int image_r = utils::open_image(io_ctx, image_name, true, &image); + if (image_r >= 0) { + image_r = image.snap_list(snaps); + } + if (image_r >= 0) { + snaps.erase(std::remove_if(snaps.begin(), snaps.end(), + [&image](const librbd::snap_info_t& snap) { + return is_auto_delete_snapshot(&image, + snap); + }), + snaps.end()); + } + + if (!snaps.empty()) { + std::cerr << "rbd: image has snapshots - these must be deleted" + << " with 'rbd snap purge' before the image can be removed." + << std::endl; + } else { + std::cerr << "rbd: image has snapshots with linked clones - these must " + << "be deleted or flattened before the image can be removed." + << std::endl; + } + } else if (r == -EBUSY) { + std::cerr << "rbd: error: image still has watchers" + << std::endl + << "This means the image is still open or the client using " + << "it crashed. Try again after closing/unmapping it or " + << "waiting 30s for the crashed client to timeout." + << std::endl; + } else if (r == -EMLINK) { + librbd::Image image; + int image_r = utils::open_image(io_ctx, image_name, true, &image); + librbd::group_info_t group_info; + if (image_r == 0) { + image_r = image.get_group(&group_info, sizeof(group_info)); + } + if (image_r == 0) { + std::string pool_name = ""; + librados::Rados rados(io_ctx); + librados::IoCtx pool_io_ctx; + image_r = rados.ioctx_create2(group_info.pool, pool_io_ctx); + if (image_r < 0) { + pool_name = "<missing group pool " + stringify(group_info.pool) + ">"; + } else { + pool_name = pool_io_ctx.get_pool_name(); + } + std::cerr << "rbd: error: image belongs to a group " + << pool_name << "/"; + if (!io_ctx.get_namespace().empty()) { + std::cerr << io_ctx.get_namespace() << "/"; + } + std::cerr << group_info.name; + } else + std::cerr << "rbd: error: image belongs to a group"; + + std::cerr << std::endl + << "Remove the image from the group and try again." + << std::endl; + image.close(); + } else { + std::cerr << "rbd: delete error: " << cpp_strerror(r) << std::endl; + } + return r; + } + return 0; +} + +Shell::Action action( + {"remove"}, {"rm"}, "Delete an image.", "", &get_arguments, &execute); + +} // namespace remove +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Rename.cc b/src/tools/rbd/action/Rename.cc new file mode 100644 index 00000000..b4954bcb --- /dev/null +++ b/src/tools/rbd/action/Rename.cc @@ -0,0 +1,94 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace rename { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_rename(librbd::RBD &rbd, librados::IoCtx& io_ctx, + const char *imgname, const char *destname) +{ + int r = rbd.rename(io_ctx, imgname, destname); + if (r < 0) + return r; + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_SOURCE); + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + std::string dst_image_name; + std::string dst_snap_name; + std::string dst_pool_name = pool_name; + std::string dst_namespace_name = namespace_name; + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dst_pool_name, + &dst_namespace_name, &dst_image_name, &dst_snap_name, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + if (pool_name != dst_pool_name) { + std::cerr << "rbd: mv/rename across pools not supported" << std::endl + << "source pool: " << pool_name << " dest pool: " << dst_pool_name + << std::endl; + return -EINVAL; + } else if (namespace_name != dst_namespace_name) { + std::cerr << "rbd: mv/rename across namespaces not supported" << std::endl + << "source namespace: " << namespace_name << " dest namespace: " + << dst_namespace_name << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + r = do_rename(rbd, io_ctx, image_name.c_str(), dst_image_name.c_str()); + if (r < 0) { + std::cerr << "rbd: rename error: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"rename"}, {"mv"}, "Rename image within pool.", "", &get_arguments, + &execute); + +} // namespace rename +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Resize.cc b/src/tools/rbd/action/Resize.cc new file mode 100644 index 00000000..60c16429 --- /dev/null +++ b/src/tools/rbd/action/Resize.cc @@ -0,0 +1,106 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace resize { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_resize(librbd::Image& image, uint64_t size, bool allow_shrink, bool no_progress) +{ + utils::ProgressContext pc("Resizing image", no_progress); + int r = image.resize2(size, allow_shrink, pc); + if (r < 0) { + pc.fail(); + return r; + } + pc.finish(); + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_size_option(options); + options->add_options() + ("allow-shrink", po::bool_switch(), "permit shrinking"); + at::add_no_progress_option(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + uint64_t size; + r = utils::get_image_size(vm, &size); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", + snap_name, false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + librbd::image_info_t info; + r = image.stat(info, sizeof(info)); + if (r < 0) { + std::cerr << "rbd: resize error: " << cpp_strerror(r) << std::endl; + return r; + } + + if (info.size == size) { + std::cerr << "rbd: new size is equal to original size " << std::endl; + return -EINVAL; + } + + if (info.size > size && !vm["allow-shrink"].as<bool>()) { + r = -EINVAL; + } else { + r = do_resize(image, size, vm["allow-shrink"].as<bool>(), vm[at::NO_PROGRESS].as<bool>()); + } + + if (r < 0) { + if (r == -EINVAL && !vm["allow-shrink"].as<bool>()) { + std::cerr << "rbd: shrinking an image is only allowed with the " + << "--allow-shrink flag" << std::endl; + return r; + } + std::cerr << "rbd: resize error: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::SwitchArguments switched_arguments({"allow-shrink"}); +Shell::Action action( + {"resize"}, {}, "Resize (expand or shrink) image.", "", &get_arguments, + &execute); + +} // namespace resize +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Snap.cc b/src/tools/rbd/action/Snap.cc new file mode 100644 index 00000000..70cf62da --- /dev/null +++ b/src/tools/rbd/action/Snap.cc @@ -0,0 +1,889 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/types.h" +#include "include/stringify.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include <iostream> +#include <boost/program_options.hpp> +#include <boost/bind.hpp> + +namespace rbd { +namespace action { +namespace snap { + +static const std::string ALL_NAME("all"); + +namespace at = argument_types; +namespace po = boost::program_options; + +int do_list_snaps(librbd::Image& image, Formatter *f, bool all_snaps, librados::Rados& rados) +{ + std::vector<librbd::snap_info_t> snaps; + TextTable t; + int r; + + r = image.snap_list(snaps); + if (r < 0) { + std::cerr << "rbd: unable to list snapshots" << std::endl; + return r; + } + + if (!all_snaps) { + snaps.erase(remove_if(snaps.begin(), + snaps.end(), + boost::bind(utils::is_not_user_snap_namespace, &image, _1)), + snaps.end()); + } + + if (f) { + f->open_array_section("snapshots"); + } else { + t.define_column("SNAPID", TextTable::LEFT, TextTable::RIGHT); + t.define_column("NAME", TextTable::LEFT, TextTable::LEFT); + t.define_column("SIZE", TextTable::LEFT, TextTable::RIGHT); + t.define_column("PROTECTED", TextTable::LEFT, TextTable::LEFT); + t.define_column("TIMESTAMP", TextTable::LEFT, TextTable::RIGHT); + if (all_snaps) { + t.define_column("NAMESPACE", TextTable::LEFT, TextTable::LEFT); + } + } + + std::list<std::pair<int64_t, std::string>> pool_list; + rados.pool_list2(pool_list); + std::map<int64_t, std::string> pool_map(pool_list.begin(), pool_list.end()); + + for (std::vector<librbd::snap_info_t>::iterator s = snaps.begin(); + s != snaps.end(); ++s) { + struct timespec timestamp; + bool snap_protected = false; + image.snap_get_timestamp(s->id, ×tamp); + string tt_str = ""; + if(timestamp.tv_sec != 0) { + time_t tt = timestamp.tv_sec; + tt_str = ctime(&tt); + tt_str = tt_str.substr(0, tt_str.length() - 1); + } + + librbd::snap_namespace_type_t snap_namespace; + r = image.snap_get_namespace_type(s->id, &snap_namespace); + if (r < 0) { + std::cerr << "rbd: unable to retrieve snap namespace" << std::endl; + return r; + } + + std::string snap_namespace_name = "Unknown"; + switch (snap_namespace) { + case RBD_SNAP_NAMESPACE_TYPE_USER: + snap_namespace_name = "user"; + break; + case RBD_SNAP_NAMESPACE_TYPE_GROUP: + snap_namespace_name = "group"; + break; + case RBD_SNAP_NAMESPACE_TYPE_TRASH: + snap_namespace_name = "trash"; + break; + } + + int get_trash_res = -ENOENT; + std::string trash_original_name; + int get_group_res = -ENOENT; + librbd::snap_group_namespace_t group_snap; + if (snap_namespace == RBD_SNAP_NAMESPACE_TYPE_GROUP) { + get_group_res = image.snap_get_group_namespace(s->id, &group_snap, + sizeof(group_snap)); + } else if (snap_namespace == RBD_SNAP_NAMESPACE_TYPE_TRASH) { + get_trash_res = image.snap_get_trash_namespace( + s->id, &trash_original_name); + } + + std::string protected_str = ""; + if (snap_namespace == RBD_SNAP_NAMESPACE_TYPE_USER) { + r = image.snap_is_protected(s->name.c_str(), &snap_protected); + if (r < 0) { + std::cerr << "rbd: unable to retrieve snap protection" << std::endl; + return r; + } + } + + if (f) { + protected_str = snap_protected ? "true" : "false"; + f->open_object_section("snapshot"); + f->dump_unsigned("id", s->id); + f->dump_string("name", s->name); + f->dump_unsigned("size", s->size); + f->dump_string("protected", protected_str); + f->dump_string("timestamp", tt_str); + if (all_snaps) { + f->open_object_section("namespace"); + f->dump_string("type", snap_namespace_name); + if (get_group_res == 0) { + std::string pool_name = pool_map[group_snap.group_pool]; + f->dump_string("pool", pool_name); + f->dump_string("group", group_snap.group_name); + f->dump_string("group snap", group_snap.group_snap_name); + } else if (get_trash_res == 0) { + f->dump_string("original_name", trash_original_name); + } + f->close_section(); + } + f->close_section(); + } else { + protected_str = snap_protected ? "yes" : ""; + t << s->id << s->name << stringify(byte_u_t(s->size)) << protected_str << tt_str; + + if (all_snaps) { + ostringstream oss; + oss << snap_namespace_name; + + if (get_group_res == 0) { + std::string pool_name = pool_map[group_snap.group_pool]; + oss << " (" << pool_name << "/" + << group_snap.group_name << "@" + << group_snap.group_snap_name << ")"; + } else if (get_trash_res == 0) { + oss << " (" << trash_original_name << ")"; + } + + t << oss.str(); + } + t << TextTable::endrow; + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } else if (snaps.size()) { + std::cout << t; + } + + return 0; +} + +int do_add_snap(librbd::Image& image, const char *snapname) +{ + int r = image.snap_create(snapname); + if (r < 0) + return r; + + return 0; +} + +int do_remove_snap(librbd::Image& image, const char *snapname, bool force, + bool no_progress) +{ + uint32_t flags = force? RBD_SNAP_REMOVE_FORCE : 0; + int r = 0; + utils::ProgressContext pc("Removing snap", no_progress); + + r = image.snap_remove2(snapname, flags, pc); + if (r < 0) { + pc.fail(); + return r; + } + + pc.finish(); + return 0; +} + +int do_rollback_snap(librbd::Image& image, const char *snapname, + bool no_progress) +{ + utils::ProgressContext pc("Rolling back to snapshot", no_progress); + int r = image.snap_rollback_with_progress(snapname, pc); + if (r < 0) { + pc.fail(); + return r; + } + pc.finish(); + return 0; +} + +int do_purge_snaps(librbd::Image& image, bool no_progress) +{ + utils::ProgressContext pc("Removing all snapshots", no_progress); + std::vector<librbd::snap_info_t> snaps; + bool is_protected = false; + int r = image.snap_list(snaps); + if (r < 0) { + pc.fail(); + return r; + } else if (0 == snaps.size()) { + return 0; + } else { + list<std::string> protect; + snaps.erase(remove_if(snaps.begin(), + snaps.end(), + boost::bind(utils::is_not_user_snap_namespace, &image, _1)), + snaps.end()); + for (auto it = snaps.begin(); it != snaps.end();) { + r = image.snap_is_protected(it->name.c_str(), &is_protected); + if (r < 0) { + pc.fail(); + return r; + } else if (is_protected == true) { + protect.push_back(it->name.c_str()); + snaps.erase(it); + } else { + ++it; + } + } + + if (!protect.empty()) { + std::cout << "rbd: error removing snapshot(s) '" << protect << "', which " + << (1 == protect.size() ? "is" : "are") + << " protected - these must be unprotected with " + << "`rbd snap unprotect`." + << std::endl; + } + for (size_t i = 0; i < snaps.size(); ++i) { + r = image.snap_remove(snaps[i].name.c_str()); + if (r < 0) { + pc.fail(); + return r; + } + pc.update_progress(i + 1, snaps.size() + protect.size()); + } + + if (!protect.empty()) { + pc.fail(); + } else if (snaps.size() > 0) { + pc.finish(); + } + + return 0; + } +} + +int do_protect_snap(librbd::Image& image, const char *snapname) +{ + int r = image.snap_protect(snapname); + if (r < 0) + return r; + + return 0; +} + +int do_unprotect_snap(librbd::Image& image, const char *snapname) +{ + int r = image.snap_unprotect(snapname); + if (r < 0) + return r; + + return 0; +} + +int do_set_limit(librbd::Image& image, uint64_t limit) +{ + return image.snap_set_limit(limit); +} + +void get_list_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_id_option(options); + at::add_format_options(options); + + std::string name = ALL_NAME + ",a"; + + options->add_options() + (name.c_str(), po::bool_switch(), "list snapshots from all namespaces"); +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + std::string image_id; + + if (vm.count(at::IMAGE_ID)) { + image_id = vm[at::IMAGE_ID].as<std::string>(); + } + + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, image_id.empty(), + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + if (!image_id.empty() && !image_name.empty()) { + std::cerr << "rbd: trying to access image using both name and id. " + << std::endl; + return -EINVAL; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, + image_id, "", true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + bool all_snaps = vm[ALL_NAME].as<bool>(); + r = do_list_snaps(image, formatter.get(), all_snaps, rados); + if (r < 0) { + cerr << "rbd: failed to list snapshots: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_create_arguments(po::options_description *positional, + po::options_description *options) { + at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); +} + +int execute_create(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_REQUIRED, + utils::SPEC_VALIDATION_SNAP); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_add_snap(image, snap_name.c_str()); + if (r < 0) { + cerr << "rbd: failed to create snapshot: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_remove_arguments(po::options_description *positional, + po::options_description *options) { + at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_id_option(options); + at::add_snap_id_option(options); + at::add_no_progress_option(options); + + options->add_options() + ("force", po::bool_switch(), "flatten children and unprotect snapshot if needed."); +} + +int execute_remove(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + std::string image_id; + uint64_t snap_id = CEPH_NOSNAP; + bool force = vm["force"].as<bool>(); + bool no_progress = vm[at::NO_PROGRESS].as<bool>(); + + if (vm.count(at::IMAGE_ID)) { + image_id = vm[at::IMAGE_ID].as<std::string>(); + } + if (vm.count(at::SNAPSHOT_ID)) { + snap_id = vm[at::SNAPSHOT_ID].as<uint64_t>(); + } + + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, image_id.empty(), + (snap_id == CEPH_NOSNAP ? utils::SNAPSHOT_PRESENCE_REQUIRED : + utils::SNAPSHOT_PRESENCE_PERMITTED), + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + if (!image_id.empty() && !image_name.empty()) { + std::cerr << "rbd: trying to access image using both name and id." + << std::endl; + return -EINVAL; + } else if (!snap_name.empty() && snap_id != CEPH_NOSNAP) { + std::cerr << "rbd: trying to access snapshot using both name and id." + << std::endl; + return -EINVAL; + } else if ((force || no_progress) && snap_id != CEPH_NOSNAP) { + std::cerr << "rbd: force and no-progress options not permitted when " + << "removing by id." << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + io_ctx.set_osdmap_full_try(); + if (image_id.empty()) { + r = utils::open_image(io_ctx, image_name, false, &image); + } else { + r = utils::open_image_by_id(io_ctx, image_id, false, &image); + } + if (r < 0) { + return r; + } + + if (!snap_name.empty()) { + r = do_remove_snap(image, snap_name.c_str(), force, no_progress); + } else { + r = image.snap_remove_by_id(snap_id); + } + + if (r < 0) { + if (r == -EBUSY) { + std::cerr << "rbd: snapshot " + << (snap_name.empty() ? std::string("id ") + stringify(snap_id) : + std::string("'") + snap_name + "'") + << " is protected from removal." << std::endl; + } else { + std::cerr << "rbd: failed to remove snapshot: " << cpp_strerror(r) + << std::endl; + } + return r; + } + return 0; +} + +void get_purge_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_id_option(options); + at::add_no_progress_option(options); +} + +int execute_purge(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + std::string image_id; + + if (vm.count(at::IMAGE_ID)) { + image_id = vm[at::IMAGE_ID].as<std::string>(); + } + + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, image_id.empty(), + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + if (!image_id.empty() && !image_name.empty()) { + std::cerr << "rbd: trying to access image using both name and id. " + << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + io_ctx.set_osdmap_full_try(); + if (image_id.empty()) { + r = utils::open_image(io_ctx, image_name, false, &image); + } else { + r = utils::open_image_by_id(io_ctx, image_id, false, &image); + } + if (r < 0) { + return r; + } + + r = do_purge_snaps(image, vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + if (r != -EBUSY) { + std::cerr << "rbd: removing snaps failed: " << cpp_strerror(r) + << std::endl; + } + return r; + } + return 0; +} + +void get_rollback_arguments(po::options_description *positional, + po::options_description *options) { + at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_no_progress_option(options); +} + +int execute_rollback(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_REQUIRED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_rollback_snap(image, snap_name.c_str(), + vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + std::cerr << "rbd: rollback failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +void get_protect_arguments(po::options_description *positional, + po::options_description *options) { + at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); +} + +int execute_protect(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_REQUIRED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + bool is_protected = false; + r = image.snap_is_protected(snap_name.c_str(), &is_protected); + if (r < 0) { + std::cerr << "rbd: protecting snap failed: " << cpp_strerror(r) + << std::endl; + return r; + } else if (is_protected) { + std::cerr << "rbd: snap is already protected" << std::endl; + return -EBUSY; + } + + r = do_protect_snap(image, snap_name.c_str()); + if (r < 0) { + std::cerr << "rbd: protecting snap failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_unprotect_arguments(po::options_description *positional, + po::options_description *options) { + at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_id_option(options); +} + +int execute_unprotect(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + std::string image_id; + + if (vm.count(at::IMAGE_ID)) { + image_id = vm[at::IMAGE_ID].as<std::string>(); + } + + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, image_id.empty(), + utils::SNAPSHOT_PRESENCE_REQUIRED, utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + if (!image_id.empty() && !image_name.empty()) { + std::cerr << "rbd: trying to access image using both name and id. " + << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + io_ctx.set_osdmap_full_try(); + if (image_id.empty()) { + r = utils::open_image(io_ctx, image_name, false, &image); + } else { + r = utils::open_image_by_id(io_ctx, image_id, false, &image); + } + if (r < 0) { + return r; + } + + bool is_protected = false; + r = image.snap_is_protected(snap_name.c_str(), &is_protected); + if (r < 0) { + std::cerr << "rbd: unprotecting snap failed: " << cpp_strerror(r) + << std::endl; + return r; + } else if (!is_protected) { + std::cerr << "rbd: snap is already unprotected" << std::endl; + return -EINVAL; + } + + r = do_unprotect_snap(image, snap_name.c_str()); + if (r < 0) { + std::cerr << "rbd: unprotecting snap failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_set_limit_arguments(po::options_description *pos, + po::options_description *opt) { + at::add_image_spec_options(pos, opt, at::ARGUMENT_MODIFIER_NONE); + at::add_limit_option(opt); +} + +int execute_set_limit(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + uint64_t limit; + + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + if (vm.count(at::LIMIT)) { + limit = vm[at::LIMIT].as<uint64_t>(); + } else { + std::cerr << "rbd: must specify --limit <num>" << std::endl; + return -ERANGE; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_set_limit(image, limit); + if (r < 0) { + std::cerr << "rbd: setting snapshot limit failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_clear_limit_arguments(po::options_description *pos, + po::options_description *opt) { + at::add_image_spec_options(pos, opt, at::ARGUMENT_MODIFIER_NONE); +} + +int execute_clear_limit(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_set_limit(image, UINT64_MAX); + if (r < 0) { + std::cerr << "rbd: clearing snapshot limit failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +void get_rename_arguments(po::options_description *positional, + po::options_description *options) { + at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_SOURCE); + at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST); +} + +int execute_rename(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string src_snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name, + &image_name, &src_snap_name, true, utils::SNAPSHOT_PRESENCE_REQUIRED, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return -r; + } + + std::string dest_pool_name(pool_name); + std::string dest_namespace_name(namespace_name); + std::string dest_image_name; + std::string dest_snap_name; + r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dest_pool_name, + &dest_namespace_name, &dest_image_name, &dest_snap_name, true, + utils::SNAPSHOT_PRESENCE_REQUIRED, utils::SPEC_VALIDATION_SNAP); + if (r < 0) { + return -r; + } + + if (pool_name != dest_pool_name) { + std::cerr << "rbd: source and destination pool must be the same" + << std::endl; + return -EINVAL; + } else if (namespace_name != dest_namespace_name) { + std::cerr << "rbd: source and destination namespace must be the same" + << std::endl; + return -EINVAL; + } else if (image_name != dest_image_name) { + std::cerr << "rbd: source and destination image name must be the same" + << std::endl; + return -EINVAL; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = image.snap_rename(src_snap_name.c_str(), dest_snap_name.c_str()); + if (r < 0) { + std::cerr << "rbd: renaming snap failed: " << cpp_strerror(r) + << std::endl; + return r; + } + return 0; +} + +Shell::Action action_list( + {"snap", "list"}, {"snap", "ls"}, "Dump list of image snapshots.", "", + &get_list_arguments, &execute_list); +Shell::Action action_create( + {"snap", "create"}, {"snap", "add"}, "Create a snapshot.", "", + &get_create_arguments, &execute_create); +Shell::Action action_remove( + {"snap", "remove"}, {"snap", "rm"}, "Delete a snapshot.", "", + &get_remove_arguments, &execute_remove); +Shell::Action action_purge( + {"snap", "purge"}, {}, "Delete all unprotected snapshots.", "", + &get_purge_arguments, &execute_purge); +Shell::Action action_rollback( + {"snap", "rollback"}, {"snap", "revert"}, "Rollback image to snapshot.", "", + &get_rollback_arguments, &execute_rollback); +Shell::Action action_protect( + {"snap", "protect"}, {}, "Prevent a snapshot from being deleted.", "", + &get_protect_arguments, &execute_protect); +Shell::Action action_unprotect( + {"snap", "unprotect"}, {}, "Allow a snapshot to be deleted.", "", + &get_unprotect_arguments, &execute_unprotect); +Shell::Action action_set_limit( + {"snap", "limit", "set"}, {}, "Limit the number of snapshots.", "", + &get_set_limit_arguments, &execute_set_limit); +Shell::Action action_clear_limit( + {"snap", "limit", "clear"}, {}, "Remove snapshot limit.", "", + &get_clear_limit_arguments, &execute_clear_limit); +Shell::Action action_rename( + {"snap", "rename"}, {}, "Rename a snapshot.", "", + &get_rename_arguments, &execute_rename); + +} // namespace snap +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Sparsify.cc b/src/tools/rbd/action/Sparsify.cc new file mode 100644 index 00000000..a345f920 --- /dev/null +++ b/src/tools/rbd/action/Sparsify.cc @@ -0,0 +1,82 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace sparsify { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_sparsify(librbd::Image& image, size_t sparse_size, + bool no_progress) +{ + utils::ProgressContext pc("Image sparsify", no_progress); + int r = image.sparsify_with_progress(sparse_size, pc); + if (r < 0) { + pc.fail(); + return r; + } + pc.finish(); + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_no_progress_option(options); + at::add_sparse_size_option(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + false, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + size_t sparse_size = utils::RBD_DEFAULT_SPARSE_SIZE; + if (vm.count(at::IMAGE_SPARSE_SIZE)) { + sparse_size = vm[at::IMAGE_SPARSE_SIZE].as<size_t>(); + } + + r = do_sparsify(image, sparse_size, vm[at::NO_PROGRESS].as<bool>()); + if (r < 0) { + std::cerr << "rbd: sparsify error: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"sparsify"}, {}, + "Reclaim space for zeroed image extents.", "", + &get_arguments, &execute); + +} // namespace sparsify +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Status.cc b/src/tools/rbd/action/Status.cc new file mode 100644 index 00000000..0a599e7f --- /dev/null +++ b/src/tools/rbd/action/Status.cc @@ -0,0 +1,214 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/rbd_types.h" +#include "include/stringify.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace status { + +namespace at = argument_types; +namespace po = boost::program_options; + +static int do_show_status(librados::IoCtx& io_ctx, const std::string &image_name, + librbd::Image &image, Formatter *f) +{ + int r; + std::list<librbd::image_watcher_t> watchers; + + r = image.list_watchers(watchers); + if (r < 0) + return r; + + uint64_t features; + r = image.features(&features); + if (r < 0) { + return r; + } + + librbd::image_migration_status_t migration_status; + std::string source_pool_name; + std::string dest_pool_name; + std::string migration_state; + if ((features & RBD_FEATURE_MIGRATING) != 0) { + r = librbd::RBD().migration_status(io_ctx, image_name.c_str(), + &migration_status, + sizeof(migration_status)); + if (r < 0) { + std::cerr << "rbd: getting migration status failed: " << cpp_strerror(r) + << std::endl; + // not fatal + } else { + librados::IoCtx src_io_ctx; + r = librados::Rados(io_ctx).ioctx_create2(migration_status.source_pool_id, src_io_ctx); + if (r < 0) { + source_pool_name = stringify(migration_status.source_pool_id); + } else { + source_pool_name = src_io_ctx.get_pool_name(); + } + + librados::IoCtx dst_io_ctx; + r = librados::Rados(io_ctx).ioctx_create2(migration_status.dest_pool_id, dst_io_ctx); + if (r < 0) { + dest_pool_name = stringify(migration_status.dest_pool_id); + } else { + dest_pool_name = dst_io_ctx.get_pool_name(); + } + + switch (migration_status.state) { + case RBD_IMAGE_MIGRATION_STATE_ERROR: + migration_state = "error"; + break; + case RBD_IMAGE_MIGRATION_STATE_PREPARING: + migration_state = "preparing"; + break; + case RBD_IMAGE_MIGRATION_STATE_PREPARED: + migration_state = "prepared"; + break; + case RBD_IMAGE_MIGRATION_STATE_EXECUTING: + migration_state = "executing"; + break; + case RBD_IMAGE_MIGRATION_STATE_EXECUTED: + migration_state = "executed"; + break; + case RBD_IMAGE_MIGRATION_STATE_ABORTING: + migration_state = "aborting"; + break; + default: + migration_state = "unknown"; + } + } + } + + if (f) + f->open_object_section("status"); + + if (f) { + f->open_array_section("watchers"); + for (auto &watcher : watchers) { + f->open_object_section("watcher"); + f->dump_string("address", watcher.addr); + f->dump_unsigned("client", watcher.id); + f->dump_unsigned("cookie", watcher.cookie); + f->close_section(); + } + f->close_section(); // watchers + if (!migration_state.empty()) { + f->open_object_section("migration"); + f->dump_string("source_pool_name", source_pool_name); + f->dump_string("source_pool_namespace", + migration_status.source_pool_namespace); + f->dump_string("source_image_name", migration_status.source_image_name); + f->dump_string("source_image_id", migration_status.source_image_id); + f->dump_string("dest_pool_name", dest_pool_name); + f->dump_string("dest_pool_namespace", + migration_status.dest_pool_namespace); + f->dump_string("dest_image_name", migration_status.dest_image_name); + f->dump_string("dest_image_id", migration_status.dest_image_id); + f->dump_string("state", migration_state); + f->dump_string("state_description", migration_status.state_description); + f->close_section(); // migration + } + } else { + if (watchers.size()) { + std::cout << "Watchers:" << std::endl; + for (auto &watcher : watchers) { + std::cout << "\twatcher=" << watcher.addr << " client." << watcher.id + << " cookie=" << watcher.cookie << std::endl; + } + } else { + std::cout << "Watchers: none" << std::endl; + } + if (!migration_state.empty()) { + if (!migration_status.source_pool_namespace.empty()) { + source_pool_name += ("/" + migration_status.source_pool_namespace); + } + if (!migration_status.dest_pool_namespace.empty()) { + dest_pool_name += ("/" + migration_status.dest_pool_namespace); + } + + std::cout << "Migration:" << std::endl; + std::cout << "\tsource: " << source_pool_name << "/" + << migration_status.source_image_name; + if (!migration_status.source_image_id.empty()) { + std::cout << " (" << migration_status.source_image_id << ")"; + } + std::cout << std::endl; + std::cout << "\tdestination: " << dest_pool_name << "/" + << migration_status.dest_image_name << " (" + << migration_status.dest_image_id << ")" << std::endl; + std::cout << "\tstate: " << migration_state; + if (!migration_status.state_description.empty()) { + std::cout << " (" << migration_status.state_description << ")"; + } + std::cout << std::endl; + } + } + + if (f) { + f->close_section(); // status + f->flush(std::cout); + } + + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + at::add_format_options(options); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_show_status(io_ctx, image_name, image, formatter.get()); + if (r < 0) { + std::cerr << "rbd: show status failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"status"}, {}, "Show the status of this image.", "", &get_arguments, + &execute); + +} // namespace status +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Trash.cc b/src/tools/rbd/action/Trash.cc new file mode 100644 index 00000000..327b20ba --- /dev/null +++ b/src/tools/rbd/action/Trash.cc @@ -0,0 +1,525 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2017 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "common/errno.h" +#include "include/stringify.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include "common/Clock.h" +#include <iostream> +#include <sstream> +#include <boost/program_options.hpp> +#include <boost/bind.hpp> + +namespace rbd { +namespace action { +namespace trash { + +namespace at = argument_types; +namespace po = boost::program_options; + +//Optional arguments used only by this set of commands (rbd trash *) +static const std::string EXPIRES_AT("expires-at"); +static const std::string EXPIRED_BEFORE("expired-before"); +static const std::string THRESHOLD("threshold"); + +static bool is_not_trash_user(const librbd::trash_image_info_t &trash_info) { + return trash_info.source != RBD_TRASH_IMAGE_SOURCE_USER; +} + +void get_move_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); + options->add_options() + (EXPIRES_AT.c_str(), po::value<std::string>()->default_value("now"), + "set the expiration time of an image so it can be purged when it is stale"); +} + +int execute_move(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + utime_t now = ceph_clock_now(); + utime_t exp_time = now; + std::string expires_at; + if (vm.find(EXPIRES_AT) != vm.end()) { + expires_at = vm[EXPIRES_AT].as<std::string>(); + r = utime_t::invoke_date(expires_at, &exp_time); + if (r < 0) { + std::cerr << "rbd: error calling /bin/date: " << cpp_strerror(r) + << std::endl; + return r; + } + } + + time_t dt = (exp_time - now).sec(); + if(dt < 0) { + std::cerr << "rbd: cannot use a date in the past as an expiration date" + << std::endl; + return -EINVAL; + } + + librbd::RBD rbd; + r = rbd.trash_move(io_ctx, image_name.c_str(), dt); + if (r < 0) { + std::cerr << "rbd: deferred delete error: " << cpp_strerror(r) + << std::endl; + } + + return r; +} + +void get_remove_arguments(po::options_description *positional, + po::options_description *options) { + positional->add_options() + (at::IMAGE_ID.c_str(), "image id\n(example: [<pool-name>/[<namespace>/]]<image-id>)"); + at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_namespace_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_id_option(options); + + at::add_no_progress_option(options); + options->add_options() + ("force", po::bool_switch(), "force remove of non-expired delayed images"); +} + +int execute_remove(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_id; + int r = utils::get_pool_image_id(vm, &arg_index, &pool_name, &namespace_name, + &image_id); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + io_ctx.set_osdmap_full_try(); + librbd::RBD rbd; + + utils::ProgressContext pc("Removing image", vm[at::NO_PROGRESS].as<bool>()); + r = rbd.trash_remove_with_progress(io_ctx, image_id.c_str(), + vm["force"].as<bool>(), pc); + if (r < 0) { + if (r == -ENOTEMPTY) { + std::cerr << "rbd: image has snapshots - these must be deleted" + << " with 'rbd snap purge' before the image can be removed." + << std::endl; + } else if (r == -EBUSY) { + std::cerr << "rbd: error: image still has watchers" + << std::endl + << "This means the image is still open or the client using " + << "it crashed. Try again after closing/unmapping it or " + << "waiting 30s for the crashed client to timeout." + << std::endl; + } else if (r == -EMLINK) { + std::cerr << std::endl + << "Remove the image from the group and try again." + << std::endl; + } else if (r == -EPERM) { + std::cerr << std::endl + << "Deferment time has not expired, please use --force if you " + << "really want to remove the image" + << std::endl; + } else { + std::cerr << "rbd: remove error: " << cpp_strerror(r) << std::endl; + } + pc.fail(); + return r; + } + + pc.finish(); + + return r; +} + +std::string delete_status(time_t deferment_end_time) { + time_t now = time(nullptr); + + std::string time_str = ctime(&deferment_end_time); + time_str = time_str.substr(0, time_str.length() - 1); + + std::stringstream ss; + if (now < deferment_end_time) { + ss << "protected until " << time_str; + } else { + ss << "expired at " << time_str; + } + + return ss.str(); +} + +int do_list(librbd::RBD &rbd, librados::IoCtx& io_ctx, bool long_flag, + bool all_flag, Formatter *f) { + std::vector<librbd::trash_image_info_t> trash_entries; + int r = rbd.trash_list(io_ctx, trash_entries); + if (r < 0) { + return r; + } + + if (!all_flag) { + trash_entries.erase(remove_if(trash_entries.begin(), + trash_entries.end(), + boost::bind(is_not_trash_user, _1)), + trash_entries.end()); + } + + if (!long_flag) { + if (f) { + f->open_array_section("trash"); + } + for (const auto& entry : trash_entries) { + if (f) { + f->open_object_section("image"); + f->dump_string("id", entry.id); + f->dump_string("name", entry.name); + f->close_section(); + } else { + std::cout << entry.id << " " << entry.name << std::endl; + } + } + if (f) { + f->close_section(); + f->flush(std::cout); + } + return 0; + } + + TextTable tbl; + + if (f) { + f->open_array_section("trash"); + } else { + tbl.define_column("ID", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("NAME", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("SOURCE", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("DELETED_AT", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("STATUS", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("PARENT", TextTable::LEFT, TextTable::LEFT); + } + + for (const auto& entry : trash_entries) { + librbd::Image im; + + r = rbd.open_by_id_read_only(io_ctx, im, entry.id.c_str(), NULL); + // image might disappear between rbd.list() and rbd.open(); ignore + // that, warn about other possible errors (EPERM, say, for opening + // an old-format image, because you need execute permission for the + // class method) + if (r < 0) { + if (r != -ENOENT) { + std::cerr << "rbd: error opening " << entry.id << ": " + << cpp_strerror(r) << std::endl; + } + // in any event, continue to next image + continue; + } + + std::string del_source; + switch (entry.source) { + case RBD_TRASH_IMAGE_SOURCE_USER: + del_source = "USER"; + break; + case RBD_TRASH_IMAGE_SOURCE_MIRRORING: + del_source = "MIRRORING"; + break; + case RBD_TRASH_IMAGE_SOURCE_MIGRATION: + del_source = "MIGRATION"; + break; + case RBD_TRASH_IMAGE_SOURCE_REMOVING: + del_source = "REMOVING"; + break; + } + + std::string time_str = ctime(&entry.deletion_time); + time_str = time_str.substr(0, time_str.length() - 1); + + bool has_parent = false; + std::string parent; + librbd::linked_image_spec_t parent_image; + librbd::snap_spec_t parent_snap; + r = im.get_parent(&parent_image, &parent_snap); + if (r == -ENOENT) { + r = 0; + } else if (r < 0) { + return r; + } else { + parent = parent_image.pool_name + "/"; + if (!parent_image.pool_namespace.empty()) { + parent += parent_image.pool_namespace + "/"; + } + parent += parent_image.image_name + "@" + parent_snap.name; + has_parent = true; + } + + if (f) { + f->open_object_section("image"); + f->dump_string("id", entry.id); + f->dump_string("name", entry.name); + f->dump_string("source", del_source); + f->dump_string("deleted_at", time_str); + f->dump_string("status", + delete_status(entry.deferment_end_time)); + if (has_parent) { + f->open_object_section("parent"); + f->dump_string("pool", parent_image.pool_name); + f->dump_string("pool_namespace", parent_image.pool_namespace); + f->dump_string("image", parent_image.image_name); + f->dump_string("snapshot", parent_snap.name); + f->close_section(); + } + f->close_section(); + } else { + tbl << entry.id + << entry.name + << del_source + << time_str + << delete_status(entry.deferment_end_time); + if (has_parent) + tbl << parent; + tbl << TextTable::endrow; + } + } + + if (f) { + f->close_section(); + f->flush(std::cout); + } else if (!trash_entries.empty()) { + std::cout << tbl; + } + + return r < 0 ? r : 0; +} + +void get_list_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); + options->add_options() + ("all,a", po::bool_switch(), "list images from all sources"); + options->add_options() + ("long,l", po::bool_switch(), "long listing format"); + at::add_format_options(options); +} + +int execute_list(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, false, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + utils::disable_cache(); + + librbd::RBD rbd; + r = do_list(rbd, io_ctx, vm["long"].as<bool>(), vm["all"].as<bool>(), + formatter.get()); + if (r < 0) { + std::cerr << "rbd: trash list: " << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +void get_purge_arguments(po::options_description *positional, + po::options_description *options) { + at::add_pool_options(positional, options, true); + at::add_no_progress_option(options); + + options->add_options() + (EXPIRED_BEFORE.c_str(), po::value<std::string>()->value_name("date"), + "purges images that expired before the given date"); + options->add_options() + (THRESHOLD.c_str(), po::value<float>(), + "purges images until the current pool data usage is reduced to X%, " + "value range: 0.0-1.0"); +} + +int execute_purge (const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + std::string pool_name; + std::string namespace_name; + size_t arg_index = 0; + int r = utils::get_pool_and_namespace_names(vm, true, false, &pool_name, + &namespace_name, &arg_index); + if (r < 0) { + return r; + } + + utils::disable_cache(); + + librbd::RBD rbd; + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + io_ctx.set_osdmap_full_try(); + + float threshold = -1; + time_t expire_ts = 0; + + if (vm.find(THRESHOLD) != vm.end()) { + threshold = vm[THRESHOLD].as<float>(); + } else { + if (vm.find(EXPIRED_BEFORE) != vm.end()) { + utime_t new_time; + r = utime_t::invoke_date(vm[EXPIRED_BEFORE].as<std::string>(), &new_time); + if (r < 0) { + std::cerr << "rbd: error calling /bin/date: " << cpp_strerror(r) + << std::endl; + return r; + } + expire_ts = new_time.sec(); + } + } + + utils::ProgressContext pc("Removing images", vm[at::NO_PROGRESS].as<bool>()); + r = rbd.trash_purge_with_progress(io_ctx, expire_ts, threshold, pc); + if (r < 0) { + pc.fail(); + } else { + pc.finish(); + } + + return 0; +} + +void get_restore_arguments(po::options_description *positional, + po::options_description *options) { + positional->add_options() + (at::IMAGE_ID.c_str(), "image id\n(example: [<pool-name>/]<image-id>)"); + at::add_pool_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_namespace_option(options, at::ARGUMENT_MODIFIER_NONE); + at::add_image_id_option(options); + at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE, ""); +} + +int execute_restore(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_id; + int r = utils::get_pool_image_id(vm, &arg_index, &pool_name, &namespace_name, + &image_id); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + std::string name; + if (vm.find(at::IMAGE_NAME) != vm.end()) { + name = vm[at::IMAGE_NAME].as<std::string>(); + } + + librbd::RBD rbd; + r = rbd.trash_restore(io_ctx, image_id.c_str(), name.c_str()); + if (r < 0) { + if (r == -ENOENT) { + std::cerr << "rbd: error: image does not exist in trash" + << std::endl; + } else if (r == -EEXIST) { + std::cerr << "rbd: error: an image with the same name already exists, " + << "try again with with a different name" + << std::endl; + } else { + std::cerr << "rbd: restore error: " << cpp_strerror(r) << std::endl; + } + return r; + } + + return r; +} + + +Shell::Action action_move( + {"trash", "move"}, {"trash", "mv"}, "Move an image to the trash.", "", + &get_move_arguments, &execute_move); + +Shell::Action action_remove( + {"trash", "remove"}, {"trash", "rm"}, "Remove an image from trash.", "", + &get_remove_arguments, &execute_remove); + +Shell::Action action_purge( + {"trash", "purge"}, {}, "Remove all expired images from trash.", "", + &get_purge_arguments, &execute_purge); + +Shell::SwitchArguments switched_arguments({"long", "l"}); +Shell::Action action_list( + {"trash", "list"}, {"trash", "ls"}, "List trash images.", "", + &get_list_arguments, &execute_list); + +Shell::Action action_restore( + {"trash", "restore"}, {}, "Restore an image from trash.", "", + &get_restore_arguments, &execute_restore); + +} // namespace trash +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/action/Watch.cc b/src/tools/rbd/action/Watch.cc new file mode 100644 index 00000000..65f0f93d --- /dev/null +++ b/src/tools/rbd/action/Watch.cc @@ -0,0 +1,149 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/ArgumentTypes.h" +#include "tools/rbd/Shell.h" +#include "tools/rbd/Utils.h" +#include "include/rbd_types.h" +#include "librbd/WatchNotifyTypes.h" +#include "common/errno.h" +#include <iostream> +#include <boost/program_options.hpp> + +namespace rbd { +namespace action { +namespace watch { + +namespace at = argument_types; +namespace po = boost::program_options; + +class RbdWatchCtx : public librados::WatchCtx2 { +public: + RbdWatchCtx(librados::IoCtx& io_ctx, const char *image_name, + const std::string &header_oid) + : m_io_ctx(io_ctx), m_image_name(image_name), m_header_oid(header_oid) + { + } + + ~RbdWatchCtx() override {} + + void handle_notify(uint64_t notify_id, + uint64_t cookie, + uint64_t notifier_id, + bufferlist& bl) override { + using namespace librbd::watch_notify; + NotifyMessage notify_message; + if (bl.length() == 0) { + notify_message = NotifyMessage(HeaderUpdatePayload()); + } else { + try { + auto iter = bl.cbegin(); + notify_message.decode(iter); + } catch (const buffer::error &err) { + std::cerr << "rbd: failed to decode image notification" << std::endl; + } + } + + std::cout << m_image_name << " received notification: notify_id=" + << notify_id << ", cookie=" << cookie << ", notifier_id=" + << notifier_id << ", bl.length=" << bl.length() << ", notify_op=" + << notify_message.get_notify_op() << std::endl; + bufferlist reply; + m_io_ctx.notify_ack(m_header_oid, notify_id, cookie, reply); + } + + void handle_error(uint64_t cookie, int err) override { + std::cerr << m_image_name << " received error: cookie=" << cookie << ", " + << "err=" << cpp_strerror(err) << std::endl; + } +private: + librados::IoCtx m_io_ctx; + const char *m_image_name; + std::string m_header_oid; +}; + +static int do_watch(librados::IoCtx& pp, librbd::Image &image, + const char *imgname) +{ + uint8_t old_format; + int r = image.old_format(&old_format); + if (r < 0) { + std::cerr << "failed to query format" << std::endl; + return r; + } + + std::string header_oid; + if (old_format != 0) { + header_oid = std::string(imgname) + RBD_SUFFIX; + } else { + std::string id; + r = image.get_id(&id); + if (r < 0) { + return r; + } + + header_oid = RBD_HEADER_PREFIX + id; + } + + uint64_t cookie; + RbdWatchCtx ctx(pp, imgname, header_oid); + r = pp.watch2(header_oid, &cookie, &ctx); + if (r < 0) { + std::cerr << "rbd: watch failed" << std::endl; + return r; + } + + std::cout << "press enter to exit..." << std::endl; + getchar(); + + r = pp.unwatch2(cookie); + if (r < 0) { + std::cerr << "rbd: unwatch failed" << std::endl; + return r; + } + return 0; +} + +void get_arguments(po::options_description *positional, + po::options_description *options) { + at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); +} + +int execute(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + std::string pool_name; + std::string namespace_name; + std::string image_name; + std::string snap_name; + int r = utils::get_pool_image_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, &pool_name, &namespace_name, + &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_NONE, + utils::SPEC_VALIDATION_NONE); + if (r < 0) { + return r; + } + + librados::Rados rados; + librados::IoCtx io_ctx; + librbd::Image image; + r = utils::init_and_open_image(pool_name, namespace_name, image_name, "", "", + true, &rados, &io_ctx, &image); + if (r < 0) { + return r; + } + + r = do_watch(io_ctx, image, image_name.c_str()); + if (r < 0) { + std::cerr << "rbd: watch failed: " << cpp_strerror(r) << std::endl; + return r; + } + return 0; +} + +Shell::Action action( + {"watch"}, {}, "Watch events on image.", "", &get_arguments, &execute); + +} // namespace watch +} // namespace action +} // namespace rbd diff --git a/src/tools/rbd/rbd.cc b/src/tools/rbd/rbd.cc new file mode 100644 index 00000000..a8c59d57 --- /dev/null +++ b/src/tools/rbd/rbd.cc @@ -0,0 +1,10 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd/Shell.h" + +int main(int argc, const char **argv) +{ + rbd::Shell shell; + return shell.execute(argc, argv); +} diff --git a/src/tools/rbd_ggate/CMakeLists.txt b/src/tools/rbd_ggate/CMakeLists.txt new file mode 100644 index 00000000..5c5572c4 --- /dev/null +++ b/src/tools/rbd_ggate/CMakeLists.txt @@ -0,0 +1,9 @@ +add_executable(rbd-ggate + Driver.cc + Server.cc + Watcher.cc + debug.cc + ggate_drv.c + main.cc) +target_link_libraries(rbd-ggate geom librbd librados global) +install(TARGETS rbd-ggate DESTINATION bin) diff --git a/src/tools/rbd_ggate/Driver.cc b/src/tools/rbd_ggate/Driver.cc new file mode 100644 index 00000000..752ef56f --- /dev/null +++ b/src/tools/rbd_ggate/Driver.cc @@ -0,0 +1,165 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <stdlib.h> + +#include "common/debug.h" +#include "common/errno.h" +#include "Driver.h" +#include "Request.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd::ggate::Driver: " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace ggate { + +int Driver::load() { + + return ggate_drv_load(); +} + +int Driver::kill(const std::string &devname) { + + int r = ggate_drv_kill(devname.c_str()); + + return r; +} + +int Driver::list(std::map<std::string, DevInfo> *devices) { + size_t size = 1024; + ggate_drv_info *devs = nullptr; + int r; + + while (size <= 1024 * 1024) { + devs = static_cast<ggate_drv_info *>( + realloc(static_cast<void *>(devs), size * sizeof(*devs))); + r = ggate_drv_list(devs, &size); + if (r != -ERANGE) { + break; + } + } + if (r < 0) { + goto free; + } + + devices->clear(); + for (size_t i = 0; i < size; i++) { + auto &dev = devs[i]; + (*devices)[dev.id] = {dev.name, dev.info}; + } + +free: + free(devs); + + return r; +} + +Driver::Driver(const std::string &devname, size_t sectorsize, size_t mediasize, + bool readonly, const std::string &info) + : m_devname(devname), m_sectorsize(sectorsize), m_mediasize(mediasize), + m_readonly(readonly), m_info(info) { +} + +int Driver::init() { + dout(20) << dendl; + + char name[PATH_MAX]; + size_t namelen; + + if (m_devname.empty()) { + name[0] = '\0'; + namelen = PATH_MAX; + } else { + namelen = m_devname.size(); + if (namelen >= PATH_MAX) { + return -ENAMETOOLONG; + } + strncpy(name, m_devname.c_str(), namelen + 1); + } + + int r = ggate_drv_create(name, namelen, m_sectorsize, m_mediasize, m_readonly, + m_info.c_str(), &m_drv); + if (r < 0) { + return r; + } + + if (m_devname.empty()) { + m_devname = name; + } + + return 0; +} + +std::string Driver::get_devname() const { + dout(30) << m_devname << dendl; + + return m_devname; +} + +void Driver::shut_down() { + dout(20) << dendl; + + ggate_drv_destroy(m_drv); +} + +int Driver::resize(size_t newsize) { + dout(20) << "newsize=" << newsize << dendl; + + int r = ggate_drv_resize(m_drv, newsize); + if (r < 0) { + return r; + } + + m_mediasize = newsize; + return 0; +} + +int Driver::recv(Request **req) { + dout(20) << dendl; + + ggate_drv_req_t req_; + + int r = ggate_drv_recv(m_drv, &req_); + if (r < 0) { + return r; + } + + *req = new Request(req_); + + dout(20) << "req=" << *req << dendl; + + if (ggate_drv_req_cmd(req_) == GGATE_DRV_CMD_WRITE) { + bufferptr ptr(buffer::claim_malloc( + ggate_drv_req_length(req_), + static_cast<char *>(ggate_drv_req_release_buf(req_)))); + (*req)->bl.push_back(ptr); + } + + return 0; +} + +int Driver::send(Request *req) { + dout(20) << "req=" << req << dendl; + + if (ggate_drv_req_cmd(req->req) == GGATE_DRV_CMD_READ && + ggate_drv_req_error(req->req) == 0) { + ceph_assert(req->bl.length() == ggate_drv_req_length(req->req)); + // TODO: avoid copying? + req->bl.copy(0, ggate_drv_req_length(req->req), + static_cast<char *>(ggate_drv_req_buf(req->req))); + dout(20) << "copied resulting " << req->bl.length() << " bytes to " + << ggate_drv_req_buf(req->req) << dendl; + } + + int r = ggate_drv_send(m_drv, req->req); + + delete req; + return r; +} + +} // namespace ggate +} // namespace rbd diff --git a/src/tools/rbd_ggate/Driver.h b/src/tools/rbd_ggate/Driver.h new file mode 100644 index 00000000..50be72b9 --- /dev/null +++ b/src/tools/rbd_ggate/Driver.h @@ -0,0 +1,50 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_GGATE_DRIVER_H +#define CEPH_RBD_GGATE_DRIVER_H + +#include <map> +#include <string> + +#include "ggate_drv.h" + +namespace rbd { +namespace ggate { + +struct Request; + +class Driver { +public: + typedef std::pair<std::string, std::string> DevInfo; + static int load(); + static int kill(const std::string &devname); + static int list(std::map<std::string, DevInfo> *devices); + + Driver(const std::string &devname, size_t sectorsize, size_t mediasize, + bool readonly, const std::string &info); + + int init(); + void shut_down(); + + std::string get_devname() const; + + int recv(Request **req); + int send(Request *req); + + int resize(size_t newsize); + +private: + std::string m_devname; + size_t m_sectorsize; + size_t m_mediasize; + bool m_readonly; + std::string m_info; + ggate_drv_t m_drv = 0; +}; + +} // namespace ggate +} // namespace rbd + +#endif // CEPH_RBD_GGATE_DRIVER_H + diff --git a/src/tools/rbd_ggate/Request.h b/src/tools/rbd_ggate/Request.h new file mode 100644 index 00000000..66f21985 --- /dev/null +++ b/src/tools/rbd_ggate/Request.h @@ -0,0 +1,55 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_GGATE_REQUEST_H +#define CEPH_RBD_GGATE_REQUEST_H + +#include "ggate_drv.h" + +namespace rbd { +namespace ggate { + +struct Request { + enum Command { + Unknown = 0, + Write = 1, + Read = 2, + Flush = 3, + Discard = 4, + }; + + ggate_drv_req_t req; + bufferlist bl; + + Request(ggate_drv_req_t req) : req(req) { + } + + uint64_t get_id() { + return ggate_drv_req_id(req); + } + + Command get_cmd() { + return static_cast<Command>(ggate_drv_req_cmd(req)); + } + + size_t get_length() { + return ggate_drv_req_length(req); + } + + uint64_t get_offset() { + return ggate_drv_req_offset(req); + } + + uint64_t get_error() { + return ggate_drv_req_error(req); + } + + void set_error(int error) { + ggate_drv_req_set_error(req, error); + } +}; + +} // namespace ggate +} // namespace rbd + +#endif // CEPH_RBD_GGATE_REQUEST_H diff --git a/src/tools/rbd_ggate/Server.cc b/src/tools/rbd_ggate/Server.cc new file mode 100644 index 00000000..3beeec3f --- /dev/null +++ b/src/tools/rbd_ggate/Server.cc @@ -0,0 +1,270 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" +#include "Driver.h" +#include "Server.h" +#include "Request.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd::ggate::Server: " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace ggate { + +Server::Server(Driver *drv, librbd::Image& image) + : m_drv(drv), m_image(image), m_lock("rbd::ggate::Server::m_lock"), + m_reader_thread(this, &Server::reader_entry), + m_writer_thread(this, &Server::writer_entry) { +} + +void Server::run() { + dout(10) << dendl; + + int r = start(); + ceph_assert(r == 0); + + dout(20) << "entering run loop" << dendl; + + { + Mutex::Locker locker(m_lock); + while (!m_stopping) { + m_cond.WaitInterval(m_lock, utime_t(1, 0)); + } + } + + dout(20) << "exiting run loop" << dendl; + + stop(); +} + +int Server::start() { + dout(10) << dendl; + + m_reader_thread.create("rbd_reader"); + m_writer_thread.create("rbd_writer"); + return 0; +} + +void Server::stop() { + dout(10) << dendl; + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_stopping); + } + + m_reader_thread.join(); + m_writer_thread.join(); + + wait_clean(); +} + +void Server::io_start(IOContext *ctx) { + dout(20) << ctx << dendl; + + Mutex::Locker locker(m_lock); + m_io_pending.push_back(&ctx->item); +} + +void Server::io_finish(IOContext *ctx) { + dout(20) << ctx << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(ctx->item.is_on_list()); + + ctx->item.remove_myself(); + m_io_finished.push_back(&ctx->item); + m_cond.Signal(); +} + +Server::IOContext *Server::wait_io_finish() { + dout(20) << dendl; + + Mutex::Locker locker(m_lock); + + while (m_io_finished.empty() && !m_stopping) { + m_cond.Wait(m_lock); + } + + if (m_io_finished.empty()) { + return nullptr; + } + + IOContext *ret = m_io_finished.front(); + m_io_finished.pop_front(); + + return ret; +} + +void Server::wait_clean() { + dout(20) << dendl; + + ceph_assert(!m_reader_thread.is_started()); + + Mutex::Locker locker(m_lock); + + while (!m_io_pending.empty()) { + m_cond.Wait(m_lock); + } + + while (!m_io_finished.empty()) { + std::unique_ptr<IOContext> free_ctx(m_io_finished.front()); + m_io_finished.pop_front(); + } +} + +void Server::aio_callback(librbd::completion_t cb, void *arg) { + librbd::RBD::AioCompletion *aio_completion = + reinterpret_cast<librbd::RBD::AioCompletion*>(cb); + + IOContext *ctx = reinterpret_cast<IOContext *>(arg); + int r = aio_completion->get_return_value(); + + ctx->server->handle_aio(ctx, r); + aio_completion->release(); +} + +void Server::handle_aio(IOContext *ctx, int r) { + dout(20) << ctx << ": r=" << r << dendl; + + if (r == -EINVAL) { + // if shrinking an image, a pagecache writeback might reference + // extents outside of the range of the new image extents + dout(5) << "masking IO out-of-bounds error" << dendl; + ctx->req->bl.clear(); + r = 0; + } + + if (r < 0) { + ctx->req->set_error(-r); + } else if ((ctx->req->get_cmd() == Request::Read) && + r != static_cast<int>(ctx->req->get_length())) { + int pad_byte_count = static_cast<int> (ctx->req->get_length()) - r; + ctx->req->bl.append_zero(pad_byte_count); + dout(20) << ctx << ": pad byte count: " << pad_byte_count << dendl; + ctx->req->set_error(0); + } else { + ctx->req->set_error(0); + } + io_finish(ctx); +} + +void Server::reader_entry() { + dout(20) << dendl; + + while (!m_stopping) { + std::unique_ptr<IOContext> ctx(new IOContext(this)); + + dout(20) << "waiting for ggate request" << dendl; + + int r = m_drv->recv(&ctx->req); + if (r < 0) { + if (r != -ECANCELED) { + derr << "recv: " << cpp_strerror(r) << dendl; + } + Mutex::Locker locker(m_lock); + m_stopping = true; + m_cond.Signal(); + return; + } + + IOContext *pctx = ctx.release(); + + dout(20) << pctx << ": start: " << *pctx << dendl; + + io_start(pctx); + librbd::RBD::AioCompletion *c = + new librbd::RBD::AioCompletion(pctx, aio_callback); + switch (pctx->req->get_cmd()) + { + case rbd::ggate::Request::Write: + m_image.aio_write(pctx->req->get_offset(), pctx->req->get_length(), + pctx->req->bl, c); + break; + case rbd::ggate::Request::Read: + m_image.aio_read(pctx->req->get_offset(), pctx->req->get_length(), + pctx->req->bl, c); + break; + case rbd::ggate::Request::Flush: + m_image.aio_flush(c); + break; + case rbd::ggate::Request::Discard: + m_image.aio_discard(pctx->req->get_offset(), pctx->req->get_length(), c); + break; + default: + derr << pctx << ": invalid request command: " << pctx->req->get_cmd() + << dendl; + c->release(); + Mutex::Locker locker(m_lock); + m_stopping = true; + m_cond.Signal(); + return; + } + } + dout(20) << "terminated" << dendl; +} + +void Server::writer_entry() { + dout(20) << dendl; + + while (!m_stopping) { + dout(20) << "waiting for io request" << dendl; + + std::unique_ptr<IOContext> ctx(wait_io_finish()); + if (!ctx) { + dout(20) << "no io requests, terminating" << dendl; + return; + } + + dout(20) << ctx.get() << ": got: " << *ctx << dendl; + + int r = m_drv->send(ctx->req); + if (r < 0) { + derr << ctx.get() << ": send: " << cpp_strerror(r) << dendl; + Mutex::Locker locker(m_lock); + m_stopping = true; + m_cond.Signal(); + return; + } + dout(20) << ctx.get() << " finish" << dendl; + } + dout(20) << "terminated" << dendl; +} + +std::ostream &operator<<(std::ostream &os, const Server::IOContext &ctx) { + + os << "[" << ctx.req->get_id(); + + switch (ctx.req->get_cmd()) + { + case rbd::ggate::Request::Write: + os << " Write "; + break; + case rbd::ggate::Request::Read: + os << " Read "; + break; + case rbd::ggate::Request::Flush: + os << " Flush "; + break; + case rbd::ggate::Request::Discard: + os << " Discard "; + break; + default: + os << " Unknow(" << ctx.req->get_cmd() << ") "; + break; + } + + os << ctx.req->get_offset() << "~" << ctx.req->get_length() << " " + << ctx.req->get_error() << "]"; + + return os; +} + +} // namespace ggate +} // namespace rbd + diff --git a/src/tools/rbd_ggate/Server.h b/src/tools/rbd_ggate/Server.h new file mode 100644 index 00000000..8ed4f512 --- /dev/null +++ b/src/tools/rbd_ggate/Server.h @@ -0,0 +1,88 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_GGATE_SERVER_H +#define CEPH_RBD_GGATE_SERVER_H + +#include "include/rbd/librbd.hpp" +#include "include/xlist.h" +#include "common/Cond.h" +#include "common/Mutex.h" +#include "common/Thread.h" + +namespace rbd { +namespace ggate { + +class Driver; +struct Request; + +class Server { +public: + Server(Driver *drv, librbd::Image& image); + + void run(); + +private: + struct IOContext { + xlist<IOContext*>::item item; + Server *server; + Request *req = nullptr; + + IOContext(Server *server) : item(this), server(server) { + } + }; + + class ThreadHelper : public Thread { + public: + typedef void (Server::*entry_func)(); + + ThreadHelper(Server *server, entry_func func) + : server(server), func(func) { + } + + protected: + virtual void* entry() { + (server->*func)(); + return nullptr; + } + + private: + Server *server; + entry_func func; + }; + + friend std::ostream &operator<<(std::ostream &os, const IOContext &ctx); + + Driver *m_drv; + librbd::Image &m_image; + + mutable Mutex m_lock; + Cond m_cond; + bool m_stopping = false; + ThreadHelper m_reader_thread, m_writer_thread; + xlist<IOContext*> m_io_pending; + xlist<IOContext*> m_io_finished; + + static void aio_callback(librbd::completion_t cb, void *arg); + + int start(); + void stop(); + + void reader_entry(); + void writer_entry(); + + void io_start(IOContext *ctx); + void io_finish(IOContext *ctx); + + IOContext *wait_io_finish(); + void wait_clean(); + + void handle_aio(IOContext *ctx, int r); +}; + +std::ostream &operator<<(std::ostream &os, const Server::IOContext &ctx); + +} // namespace ggate +} // namespace rbd + +#endif // CEPH_RBD_GGATE_SERVER_H diff --git a/src/tools/rbd_ggate/Watcher.cc b/src/tools/rbd_ggate/Watcher.cc new file mode 100644 index 00000000..57b3f960 --- /dev/null +++ b/src/tools/rbd_ggate/Watcher.cc @@ -0,0 +1,48 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" +#include "Driver.h" +#include "Watcher.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd::ggate::Watcher: " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace ggate { + +Watcher::Watcher(Driver *drv, librados::IoCtx &ioctx, librbd::Image &image, + size_t size) + : m_drv(drv), m_ioctx(ioctx), m_image(image), m_size(size) { +} + +void Watcher::handle_notify() { + dout(20) << dendl; + + librbd::image_info_t info; + + if (m_image.stat(info, sizeof(info)) == 0) { + size_t new_size = info.size; + + if (new_size != m_size) { + int r = m_drv->resize(new_size); + if (r < 0) { + derr << "resize failed: " << cpp_strerror(r) << dendl; + m_drv->shut_down(); + } + r = m_image.invalidate_cache(); + if (r < 0) { + derr << "invalidate rbd cache failed: " << cpp_strerror(r) << dendl; + m_drv->shut_down(); + } + m_size = new_size; + } + } +} + +} // namespace ggate +} // namespace rbd diff --git a/src/tools/rbd_ggate/Watcher.h b/src/tools/rbd_ggate/Watcher.h new file mode 100644 index 00000000..8f524b43 --- /dev/null +++ b/src/tools/rbd_ggate/Watcher.h @@ -0,0 +1,34 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_GGATE_WATCHER_H +#define CEPH_RBD_GGATE_WATCHER_H + +#include "include/rbd/librbd.hpp" + +namespace rbd { +namespace ggate { + +class Driver; + +class Watcher : public librbd::UpdateWatchCtx +{ +public: + Watcher(Driver *m_drv, librados::IoCtx &ioctx, librbd::Image &image, + size_t size); + + void handle_notify() override; + +private: + Driver *m_drv; + librados::IoCtx &m_ioctx; + librbd::Image &m_image; + size_t m_size; +}; + + +} // namespace ggate +} // namespace rbd + +#endif // CEPH_RBD_GGATE_WATCHER_H + diff --git a/src/tools/rbd_ggate/debug.cc b/src/tools/rbd_ggate/debug.cc new file mode 100644 index 00000000..b675ba5b --- /dev/null +++ b/src/tools/rbd_ggate/debug.cc @@ -0,0 +1,55 @@ +#include "common/debug.h" +#include "common/errno.h" +#include "debug.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd::ggate: " + +extern "C" void debugv(int level, const char *fmt, va_list ap) { + char *msg; + int saved_errno = errno; + + if (g_ceph_context == nullptr) { + return; + } + + vasprintf(&msg, fmt, ap); + + dout(ceph::dout::need_dynamic(level)) << msg << dendl; + + free(msg); + errno = saved_errno; +} + +extern "C" void debug(int level, const char *fmt, ...) { + va_list ap; + + va_start(ap, fmt); + debugv(level, fmt, ap); + va_end(ap); +} + +extern "C" void errx(const char *fmt, ...) { + va_list ap; + + va_start(ap, fmt); + debugv(-1, fmt, ap); + va_end(ap); +} + +extern "C" void err(const char *fmt, ...) { + va_list ap; + char *msg; + int saved_errno = errno; + + va_start(ap, fmt); + vasprintf(&msg, fmt, ap); + va_end(ap); + errno = saved_errno; + + errx("%s: %s", msg, cpp_strerror(errno).c_str()); + + free(msg); +} diff --git a/src/tools/rbd_ggate/debug.h b/src/tools/rbd_ggate/debug.h new file mode 100644 index 00000000..da9b46a3 --- /dev/null +++ b/src/tools/rbd_ggate/debug.h @@ -0,0 +1,17 @@ +#ifndef CEPH_RBD_GGATE_DEBUG_H +#define CEPH_RBD_GGATE_DEBUG_H + +#ifdef __cplusplus +extern "C" { +#endif + +void debug(int level, const char *fmt, ...) __printflike(2, 3); +void debugv(int level, const char *fmt, va_list ap) __printflike(2, 0); +void err(const char *fmt, ...) __printflike(1, 2); +void errx(const char *fmt, ...) __printflike(1, 2); + +#ifdef __cplusplus +} +#endif + +#endif // CEPH_RBD_GGATE_DEBUG_H diff --git a/src/tools/rbd_ggate/ggate_drv.c b/src/tools/rbd_ggate/ggate_drv.c new file mode 100644 index 00000000..b1faccd2 --- /dev/null +++ b/src/tools/rbd_ggate/ggate_drv.c @@ -0,0 +1,379 @@ +// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <sys/param.h> +#include <sys/bio.h> +#include <sys/disk.h> +#include <sys/linker.h> +#include <sys/queue.h> +#include <sys/stat.h> + +#include <geom/gate/g_gate.h> + +#include <errno.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <libgeom.h> + +#include "debug.h" +#include "ggate_drv.h" + +uint64_t ggate_drv_req_id(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + return ggio->gctl_seq; +} + +int ggate_drv_req_cmd(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + switch (ggio->gctl_cmd) { + case BIO_WRITE: + return GGATE_DRV_CMD_WRITE; + case BIO_READ: + return GGATE_DRV_CMD_READ; + case BIO_FLUSH: + return GGATE_DRV_CMD_FLUSH; + case BIO_DELETE: + return GGATE_DRV_CMD_DISCARD; + default: + return GGATE_DRV_CMD_UNKNOWN; + } +} + +uint64_t ggate_drv_req_offset(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + return ggio->gctl_offset; +} + +size_t ggate_drv_req_length(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + return ggio->gctl_length; +} + +void *ggate_drv_req_buf(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + return ggio->gctl_data; +} + +int ggate_drv_req_error(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + return ggio->gctl_error; +} + +void ggate_drv_req_set_error(ggate_drv_req_t req, int error) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + ggio->gctl_error = error; +} + +void *ggate_drv_req_release_buf(ggate_drv_req_t req) { + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + + void *data = ggio->gctl_data; + ggio->gctl_data = NULL; + + return data; +} + +struct ggate_drv { + int fd; + int unit; +}; + +int ggate_drv_load() { + if (modfind("g_gate") != -1) { + /* Present in kernel. */ + return 0; + } + + if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) { + if (errno != EEXIST) { + err("failed to load geom_gate module"); + return -errno; + } + } + return 0; +} + +int ggate_drv_create(char *name, size_t namelen, size_t sectorsize, + size_t mediasize, bool readonly, const char *info, ggate_drv_t *drv_) { + struct ggate_drv *drv; + struct g_gate_ctl_create ggiocreate; + + debug(20, "%s: name=%s, sectorsize=%zd, mediasize=%zd, readonly=%d, info=%s", + __func__, name, sectorsize, mediasize, (int)readonly, info); + + if (*name != '\0') { + if (namelen > sizeof(ggiocreate.gctl_name) - 1) { + return -ENAMETOOLONG; + } + } + + /* + * We communicate with ggate via /dev/ggctl. Open it. + */ + int fd = open("/dev/" G_GATE_CTL_NAME, O_RDWR); + if (fd == -1) { + err("failed to open /dev/" G_GATE_CTL_NAME); + return -errno; + } + + drv = calloc(1, sizeof(*drv)); + if (drv == NULL) { + errno = -ENOMEM; + goto fail_close; + } + + /* + * Create provider. + */ + memset(&ggiocreate, 0, sizeof(ggiocreate)); + ggiocreate.gctl_version = G_GATE_VERSION; + ggiocreate.gctl_mediasize = mediasize; + ggiocreate.gctl_sectorsize = sectorsize; + ggiocreate.gctl_flags = readonly ? G_GATE_FLAG_READONLY : 0; + ggiocreate.gctl_maxcount = 0; + ggiocreate.gctl_timeout = 0; + if (*name != '\0') { + ggiocreate.gctl_unit = G_GATE_NAME_GIVEN; + strlcpy(ggiocreate.gctl_name, name, sizeof(ggiocreate.gctl_name)); + } else { + ggiocreate.gctl_unit = G_GATE_UNIT_AUTO; + } + strlcpy(ggiocreate.gctl_info, info, sizeof(ggiocreate.gctl_info)); + if (ioctl(fd, G_GATE_CMD_CREATE, &ggiocreate) == -1) { + err("failed to create " G_GATE_PROVIDER_NAME " device"); + goto fail; + } + + debug(20, "%s: created, unit: %d, name: %s", __func__, ggiocreate.gctl_unit, + ggiocreate.gctl_name); + + drv->fd = fd; + drv->unit = ggiocreate.gctl_unit; + *drv_ = drv; + + if (*name == '\0') { + snprintf(name, namelen, "%s%d", G_GATE_PROVIDER_NAME, drv->unit); + } + + return 0; + +fail: + free(drv); +fail_close: + close(fd); + return -errno; +} + +void ggate_drv_destroy(ggate_drv_t drv_) { + struct ggate_drv *drv = (struct ggate_drv *)drv_; + struct g_gate_ctl_destroy ggiodestroy; + + debug(20, "%s %p", __func__, drv); + + memset(&ggiodestroy, 0, sizeof(ggiodestroy)); + ggiodestroy.gctl_version = G_GATE_VERSION; + ggiodestroy.gctl_unit = drv->unit; + ggiodestroy.gctl_force = 1; + + // Remember errno. + int rerrno = errno; + + int r = ioctl(drv->fd, G_GATE_CMD_DESTROY, &ggiodestroy); + if (r == -1) { + err("failed to destroy /dev/%s%d device", G_GATE_PROVIDER_NAME, + drv->unit); + } + // Restore errno. + errno = rerrno; + + free(drv); +} + +int ggate_drv_resize(ggate_drv_t drv_, size_t newsize) { + struct ggate_drv *drv = (struct ggate_drv *)drv_; + + debug(20, "%s %p: newsize=%zd", __func__, drv, newsize); + + struct g_gate_ctl_modify ggiomodify; + + memset(&ggiomodify, 0, sizeof(ggiomodify)); + ggiomodify.gctl_version = G_GATE_VERSION; + ggiomodify.gctl_unit = drv->unit; + ggiomodify.gctl_modify = GG_MODIFY_MEDIASIZE; + ggiomodify.gctl_mediasize = newsize; + + int r = ioctl(drv->fd, G_GATE_CMD_MODIFY, &ggiomodify); + if (r == -1) { + r = -errno; + err("failed to resize /dev/%s%d device", G_GATE_PROVIDER_NAME, drv->unit); + } + return r; +} + +int ggate_drv_kill(const char *devname) { + debug(20, "%s %s", __func__, devname); + + int fd = open("/dev/" G_GATE_CTL_NAME, O_RDWR); + if (fd == -1) { + err("failed to open /dev/" G_GATE_CTL_NAME); + return -errno; + } + + struct g_gate_ctl_destroy ggiodestroy; + memset(&ggiodestroy, 0, sizeof(ggiodestroy)); + ggiodestroy.gctl_version = G_GATE_VERSION; + ggiodestroy.gctl_unit = G_GATE_NAME_GIVEN; + ggiodestroy.gctl_force = 1; + + strlcpy(ggiodestroy.gctl_name, devname, sizeof(ggiodestroy.gctl_name)); + + int r = ioctl(fd, G_GATE_CMD_DESTROY, &ggiodestroy); + if (r == -1) { + r = -errno; + err("failed to destroy %s device", devname); + } + + close(fd); + return r; +} + +int ggate_drv_recv(ggate_drv_t drv_, ggate_drv_req_t *req) { + struct ggate_drv *drv = (struct ggate_drv *)drv_; + struct g_gate_ctl_io *ggio; + int error, r; + + debug(20, "%s", __func__); + + ggio = calloc(1, sizeof(*ggio)); + if (ggio == NULL) { + return -ENOMEM; + } + + ggio->gctl_version = G_GATE_VERSION; + ggio->gctl_unit = drv->unit; + ggio->gctl_data = malloc(MAXPHYS); + ggio->gctl_length = MAXPHYS; + + debug(20, "%s: waiting for request from kernel", __func__); + if (ioctl(drv->fd, G_GATE_CMD_START, ggio) == -1) { + err("%s: G_GATE_CMD_START failed", __func__); + return -errno; + } + + debug(20, "%s: got request from kernel: " + "unit=%d, seq=%ju, cmd=%u, offset=%ju, length=%ju, error=%d, data=%p", + __func__, ggio->gctl_unit, (uintmax_t)ggio->gctl_seq, ggio->gctl_cmd, + (uintmax_t)ggio->gctl_offset, (uintmax_t)ggio->gctl_length, + ggio->gctl_error, ggio->gctl_data); + + error = ggio->gctl_error; + switch (error) { + case 0: + break; + case ECANCELED: + debug(10, "%s: canceled: exit gracefully", __func__); + r = -error; + goto fail; + case ENOMEM: + /* + * Buffer too small? Impossible, we allocate MAXPHYS + * bytes - request can't be bigger than that. + */ + /* FALLTHROUGH */ + case ENXIO: + default: + errno = error; + err("%s: G_GATE_CMD_START failed", __func__); + r = -error; + goto fail; + } + + *req = ggio; + return 0; + +fail: + free(ggio->gctl_data); + free(ggio); + return r; +} + +int ggate_drv_send(ggate_drv_t drv_, ggate_drv_req_t req) { + struct ggate_drv *drv = (struct ggate_drv *)drv_; + struct g_gate_ctl_io *ggio = (struct g_gate_ctl_io *)req; + int r = 0; + + debug(20, "%s: send request to kernel: " + "unit=%d, seq=%ju, cmd=%u, offset=%ju, length=%ju, error=%d, data=%p", + __func__, ggio->gctl_unit, (uintmax_t)ggio->gctl_seq, ggio->gctl_cmd, + (uintmax_t)ggio->gctl_offset, (uintmax_t)ggio->gctl_length, + ggio->gctl_error, ggio->gctl_data); + + if (ioctl(drv->fd, G_GATE_CMD_DONE, ggio) == -1) { + err("%s: G_GATE_CMD_DONE failed", __func__); + r = -errno; + } + + free(ggio->gctl_data); + free(ggio); + return r; +} + +static const char * get_conf(struct ggeom *gp, const char *name) { + struct gconfig *conf; + + LIST_FOREACH(conf, &gp->lg_config, lg_config) { + if (strcmp(conf->lg_name, name) == 0) + return (conf->lg_val); + } + return ""; +} + +int ggate_drv_list(struct ggate_drv_info *info, size_t *size) { + struct gmesh mesh; + struct gclass *class; + struct ggeom *gp; + int r; + size_t max_size; + + r = geom_gettree(&mesh); + if (r != 0) { + return -errno; + } + + max_size = *size; + *size = 0; + + LIST_FOREACH(class, &mesh.lg_class, lg_class) { + if (strcmp(class->lg_name, G_GATE_CLASS_NAME) == 0) { + LIST_FOREACH(gp, &class->lg_geom, lg_geom) { + (*size)++; + } + if (*size > max_size) { + r = -ERANGE; + goto done; + } + LIST_FOREACH(gp, &class->lg_geom, lg_geom) { + strlcpy(info->id, get_conf(gp, "unit"), sizeof(info->id)); + strlcpy(info->name, gp->lg_name, sizeof(info->name)); + strlcpy(info->info, get_conf(gp, "info"), sizeof(info->info)); + info++; + } + } + } + +done: + geom_deletetree(&mesh); + return r; +} diff --git a/src/tools/rbd_ggate/ggate_drv.h b/src/tools/rbd_ggate/ggate_drv.h new file mode 100644 index 00000000..a32f5113 --- /dev/null +++ b/src/tools/rbd_ggate/ggate_drv.h @@ -0,0 +1,64 @@ +// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_GGATE_GGATE_DRV_H +#define CEPH_RBD_GGATE_GGATE_DRV_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/param.h> + +#include <stdbool.h> +#include <stdint.h> + +typedef void *ggate_drv_t; +typedef void *ggate_drv_req_t; + +/* + * GGATE driver commands. They are mapped to GgateReq::Command. + */ +enum { + GGATE_DRV_CMD_UNKNOWN = 0, + GGATE_DRV_CMD_WRITE = 1, + GGATE_DRV_CMD_READ = 2, + GGATE_DRV_CMD_FLUSH = 3, + GGATE_DRV_CMD_DISCARD = 4, +}; + +struct ggate_drv_info { + char id[16]; + char name[NAME_MAX]; + char info[2048]; /* G_GATE_INFOSIZE */ +}; + +uint64_t ggate_drv_req_id(ggate_drv_req_t req); +int ggate_drv_req_cmd(ggate_drv_req_t req); +void *ggate_drv_req_buf(ggate_drv_req_t req); +size_t ggate_drv_req_length(ggate_drv_req_t req); +uint64_t ggate_drv_req_offset(ggate_drv_req_t req); +int ggate_drv_req_error(ggate_drv_req_t req); + +void ggate_drv_req_set_error(ggate_drv_req_t req, int error); +void *ggate_drv_req_release_buf(ggate_drv_req_t req); + +int ggate_drv_load(); + +int ggate_drv_create(char *name, size_t namelen, size_t sectorsize, + size_t mediasize, bool readonly, const char *info, ggate_drv_t *drv); +void ggate_drv_destroy(ggate_drv_t drv); + +int ggate_drv_recv(ggate_drv_t drv, ggate_drv_req_t *req); +int ggate_drv_send(ggate_drv_t drv, ggate_drv_req_t req); + +int ggate_drv_resize(ggate_drv_t drv, size_t newsize); + +int ggate_drv_kill(const char *devname); +int ggate_drv_list(struct ggate_drv_info *info, size_t *size); + +#ifdef __cplusplus +} +#endif + +#endif // CEPH_RBD_GGATE_GGATE_DRV_H diff --git a/src/tools/rbd_ggate/main.cc b/src/tools/rbd_ggate/main.cc new file mode 100644 index 00000000..5ed582fb --- /dev/null +++ b/src/tools/rbd_ggate/main.cc @@ -0,0 +1,521 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/int_types.h" + +#include <sys/types.h> + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> +#include <string.h> +#include <assert.h> + +#include <iostream> +#include <memory> +#include <boost/algorithm/string/predicate.hpp> +#include <regex> + +#include "common/Formatter.h" +#include "common/Preforker.h" +#include "common/TextTable.h" +#include "common/ceph_argparse.h" +#include "common/config_proxy.h" +#include "common/debug.h" +#include "common/errno.h" +#include "global/global_init.h" +#include "global/signal_handler.h" + +#include "include/rados/librados.hpp" +#include "include/rbd/librbd.hpp" +#include "include/stringify.h" + +#include "Driver.h" +#include "Server.h" +#include "Watcher.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd-ggate: " << __func__ << ": " + +static void usage() { + std::cout << "Usage: rbd-ggate [options] map <image-or-snap-spec> Map an image to ggate device\n" + << " unmap <device path> Unmap ggate device\n" + << " list List mapped ggate devices\n" + << "\n" + << "Map options:\n" + << " --device <device path> Specify ggate device path\n" + << " --read-only Map readonly\n" + << " --exclusive Forbid writes by other clients\n" + << "\n" + << "List options:\n" + << " --format plain|json|xml Output format (default: plain)\n" + << " --pretty-format Pretty formatting (json and xml)\n" + << std::endl; + generic_server_usage(); +} + +static std::string devpath, poolname, nsname, imgname, snapname; +static bool readonly = false; +static bool exclusive = false; + +static std::unique_ptr<rbd::ggate::Driver> drv; + +static void handle_signal(int signum) +{ + derr << "*** Got signal " << sig_str(signum) << " ***" << dendl; + + ceph_assert(signum == SIGINT || signum == SIGTERM); + ceph_assert(drv); + + drv->shut_down(); +} + +static int do_map(int argc, const char *argv[]) +{ + int r; + + librados::Rados rados; + librbd::RBD rbd; + librados::IoCtx io_ctx; + librbd::Image image; + + librbd::image_info_t info; + std::string desc; + + Preforker forker; + + vector<const char*> args; + argv_to_vec(argc, argv, args); + + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + g_ceph_context->_conf.set_val_or_die("pid_file", ""); + + if (global_init_prefork(g_ceph_context) >= 0) { + std::string err; + r = forker.prefork(err); + if (r < 0) { + std::cerr << err << std::endl; + return r; + } + if (forker.is_parent()) { + if (forker.parent_wait(err) != 0) { + return -ENXIO; + } + return 0; + } + global_init_postfork_start(g_ceph_context); + } + + common_init_finish(g_ceph_context); + global_init_chdir(g_ceph_context); + + if (poolname.empty()) { + poolname = g_ceph_context->_conf.get_val<std::string>("rbd_default_pool"); + } + + std::string devname = boost::starts_with(devpath, "/dev/") ? + devpath.substr(5) : devpath; + std::unique_ptr<rbd::ggate::Watcher> watcher; + uint64_t handle; + + r = rados.init_with_context(g_ceph_context); + if (r < 0) { + goto done; + } + + r = rados.connect(); + if (r < 0) { + std::cerr << "rbd-ggate: failed to connect to cluster: " << cpp_strerror(r) + << std::endl; + goto done; + } + + r = rados.ioctx_create(poolname.c_str(), io_ctx); + if (r < 0) { + std::cerr << "rbd-ggate: failed to acces pool " << poolname << ": " + << cpp_strerror(r) << std::endl; + goto done; + } + + io_ctx.set_namespace(nsname); + + r = rbd.open(io_ctx, image, imgname.c_str()); + if (r < 0) { + std::cerr << "rbd-ggate: failed to open image " << imgname << ": " + << cpp_strerror(r) << std::endl; + goto done; + } + + if (exclusive) { + r = image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE); + if (r < 0) { + std::cerr << "rbd-ggate: failed to acquire exclusive lock: " + << cpp_strerror(r) << std::endl; + goto done; + } + } + + desc = "RBD " + poolname + "/" + (nsname.empty() ? "" : nsname + "/") + + imgname; + + if (!snapname.empty()) { + r = image.snap_set(snapname.c_str()); + if (r < 0) { + std::cerr << "rbd-ggate: failed to set snapshot " << snapname << ": " + << cpp_strerror(r) << std::endl; + goto done; + } + readonly = true; + desc += "@" + snapname; + } + + r = image.stat(info, sizeof(info)); + if (r < 0) { + std::cerr << "rbd-ggate: image stat failed: " << cpp_strerror(r) + << std::endl; + goto done; + } + + rbd::ggate::Driver::load(); + drv.reset(new rbd::ggate::Driver(devname, 512, info.size, readonly, desc)); + r = drv->init(); + if (r < 0) { + r = -errno; + std::cerr << "rbd-ggate: failed to create ggate device: " << cpp_strerror(r) + << std::endl; + goto done; + } + + watcher.reset(new rbd::ggate::Watcher(drv.get(), io_ctx, image, info.size)); + r = image.update_watch(watcher.get(), &handle); + if (r < 0) { + std::cerr << "rbd-ggate: failed to set watcher: " << cpp_strerror(r) + << std::endl; + drv->shut_down(); + goto done; + } + + std::cout << "/dev/" << drv->get_devname() << std::endl; + + if (g_conf()->daemonize) { + global_init_postfork_finish(g_ceph_context); + forker.daemonize(); + } + + init_async_signal_handler(); + register_async_signal_handler(SIGHUP, sighup_handler); + register_async_signal_handler_oneshot(SIGINT, handle_signal); + register_async_signal_handler_oneshot(SIGTERM, handle_signal); + + rbd::ggate::Server(drv.get(), image).run(); + + unregister_async_signal_handler(SIGHUP, sighup_handler); + unregister_async_signal_handler(SIGINT, handle_signal); + unregister_async_signal_handler(SIGTERM, handle_signal); + shutdown_async_signal_handler(); + + r = image.update_unwatch(handle); + ceph_assert(r == 0); + +done: + image.close(); + io_ctx.close(); + rados.shutdown(); + + if (r < 0) { + std::cerr << "rbd-ggate: failed to map: " << cpp_strerror(r) << std::endl; + } + + forker.exit(r < 0 ? EXIT_FAILURE : 0); + // Unreachable; + return r; +} + +static int do_unmap() +{ + std::string devname = boost::starts_with(devpath, "/dev/") ? + devpath.substr(5) : devpath; + + int r = rbd::ggate::Driver::kill(devname); + if (r < 0) { + cerr << "rbd-ggate: failed to destroy " << devname << ": " + << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + +static int parse_imgpath(const std::string &imgpath, std::string *poolname, + std::string *nsname, std::string *imgname, + std::string *snapname) { + std::regex pattern("^(?:([^/]+)/(?:([^/@]+)/)?)?([^@]+)(?:@([^/@]+))?$"); + std::smatch match; + if (!std::regex_match(imgpath, match, pattern)) { + std::cerr << "rbd-ggate: invalid spec '" << imgpath << "'" << std::endl; + return -EINVAL; + } + + if (match[1].matched) { + *poolname = match[1]; + } + + if (match[2].matched) { + *nsname = match[2]; + } + + *imgname = match[3]; + + if (match[4].matched) { + *snapname = match[4]; + } + + return 0; +} + +static bool find_mapped_dev_by_spec(const std::string &spec, + std::string *devname) { + std::string poolname, nsname, imgname, snapname; + int r = parse_imgpath(spec, &poolname, &nsname, &imgname, &snapname); + if (r < 0) { + return false; + } + if (poolname.empty()) { + // We could use rbd_default_pool config to set pool name but then + // we would need to initialize the global context. So right now it + // is mandatory for the user to specify a pool. Fortunately the + // preferred way for users to call rbd-ggate is via rbd, which + // cares to set the pool name. + return false; + } + + std::map<std::string, rbd::ggate::Driver::DevInfo> devs; + r = rbd::ggate::Driver::list(&devs); + if (r < 0) { + return false; + } + + for (auto &it : devs) { + auto &name = it.second.first; + auto &info = it.second.second; + if (!boost::starts_with(info, "RBD ")) { + continue; + } + + std::string p, n, i, s; + parse_imgpath(info.substr(4), &p, &n, &i, &s); + if (p == poolname && n == nsname && i == imgname && s == snapname) { + *devname = name; + return true; + } + } + + return false; +} + +static int do_list(const std::string &format, bool pretty_format) +{ + rbd::ggate::Driver::load(); + + std::map<std::string, rbd::ggate::Driver::DevInfo> devs; + int r = rbd::ggate::Driver::list(&devs); + if (r < 0) { + return -r; + } + + std::unique_ptr<ceph::Formatter> f; + TextTable tbl; + + if (format == "json") { + f.reset(new JSONFormatter(pretty_format)); + } else if (format == "xml") { + f.reset(new XMLFormatter(pretty_format)); + } else if (!format.empty() && format != "plain") { + std::cerr << "rbd-ggate: invalid output format: " << format << std::endl; + return -EINVAL; + } + + if (f) { + f->open_array_section("devices"); + } else { + tbl.define_column("id", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("pool", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("namespace", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("image", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("snap", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("device", TextTable::LEFT, TextTable::LEFT); + } + + int count = 0; + + for (auto &it : devs) { + auto &id = it.first; + auto &name = it.second.first; + auto &info = it.second.second; + if (!boost::starts_with(info, "RBD ")) { + continue; + } + + std::string poolname; + std::string nsname; + std::string imgname; + std::string snapname(f ? "" : "-"); + parse_imgpath(info.substr(4), &poolname, &nsname, &imgname, &snapname); + + if (f) { + f->open_object_section("device"); + f->dump_string("id", id); + f->dump_string("pool", poolname); + f->dump_string("namespace", nsname); + f->dump_string("image", imgname); + f->dump_string("snap", snapname); + f->dump_string("device", "/dev/" + name); + f->close_section(); + } else { + tbl << id << poolname << nsname << imgname << snapname << "/dev/" + name + << TextTable::endrow; + } + count++; + } + + if (f) { + f->close_section(); // devices + f->flush(std::cout); + } else if (count > 0) { + std::cout << tbl; + } + + return 0; +} + +int main(int argc, const char *argv[]) { + int r; + enum { + None, + Connect, + Disconnect, + List + } cmd = None; + + vector<const char*> args; + + argv_to_vec(argc, argv, args); + if (args.empty()) { + cerr << argv[0] << ": -h or --help for usage" << std::endl; + exit(1); + } + if (ceph_argparse_need_usage(args)) { + usage(); + exit(0); + } + // filter out ceph config options + ConfigProxy{false}.parse_argv(args); + + std::string format; + bool pretty_format = false; + std::vector<const char*>::iterator i; + + for (i = args.begin(); i != args.end(); ) { + if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { + usage(); + return 0; + } else if (ceph_argparse_witharg(args, i, &devpath, "--device", + (char *)NULL)) { + } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) { + readonly = true; + } else if (ceph_argparse_flag(args, i, "--exclusive", (char *)NULL)) { + exclusive = true; + } else if (ceph_argparse_witharg(args, i, &format, "--format", + (char *)NULL)) { + } else if (ceph_argparse_flag(args, i, "--pretty-format", (char *)NULL)) { + pretty_format = true; + } else { + ++i; + } + } + + if (args.begin() != args.end()) { + if (strcmp(*args.begin(), "map") == 0) { + cmd = Connect; + } else if (strcmp(*args.begin(), "unmap") == 0) { + cmd = Disconnect; + } else if (strcmp(*args.begin(), "list") == 0) { + cmd = List; + } else { + cerr << "rbd-ggate: unknown command: " << *args.begin() << std::endl; + return EXIT_FAILURE; + } + args.erase(args.begin()); + } + + if (cmd == None) { + cerr << "rbd-ggate: must specify command" << std::endl; + return EXIT_FAILURE; + } + + switch (cmd) { + case Connect: + if (args.begin() == args.end()) { + cerr << "rbd-ggate: must specify image-or-snap-spec" << std::endl; + return EXIT_FAILURE; + } + if (parse_imgpath(*args.begin(), &poolname, &nsname, &imgname, + &snapname) < 0) { + return EXIT_FAILURE; + } + args.erase(args.begin()); + break; + case Disconnect: + if (args.begin() == args.end()) { + std::cerr << "rbd-ggate: must specify ggate device or image-or-snap-spec" + << std::endl; + return EXIT_FAILURE; + } + if (boost::starts_with(*args.begin(), "/dev/") || + !find_mapped_dev_by_spec(*args.begin(), &devpath)) { + devpath = *args.begin(); + } + args.erase(args.begin()); + break; + default: + break; + } + + if (args.begin() != args.end()) { + cerr << "rbd-ggate: unknown args: " << *args.begin() << std::endl; + return EXIT_FAILURE; + } + + switch (cmd) { + case Connect: + if (imgname.empty()) { + cerr << "rbd-ggate: image name was not specified" << std::endl; + return EXIT_FAILURE; + } + + r = do_map(argc, argv); + if (r < 0) + return EXIT_FAILURE; + break; + case Disconnect: + r = do_unmap(); + if (r < 0) + return EXIT_FAILURE; + break; + case List: + r = do_list(format, pretty_format); + if (r < 0) + return EXIT_FAILURE; + break; + default: + usage(); + return EXIT_FAILURE; + } + + return 0; +} diff --git a/src/tools/rbd_mirror/BaseRequest.h b/src/tools/rbd_mirror/BaseRequest.h new file mode 100644 index 00000000..5053eb83 --- /dev/null +++ b/src/tools/rbd_mirror/BaseRequest.h @@ -0,0 +1,43 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_BASE_REQUEST_H +#define CEPH_RBD_MIRROR_BASE_REQUEST_H + +#include "common/RefCountedObj.h" +#include "include/Context.h" + +namespace rbd { +namespace mirror { + +class BaseRequest : public RefCountedObject { +public: + BaseRequest(const std::string& name, CephContext *cct, Context *on_finish) + : RefCountedObject(cct, 1), m_name(name), m_cct(cct), + m_on_finish(on_finish) { + } + + virtual void send() = 0; + virtual void cancel() {} + +protected: + virtual void finish(int r) { + if (m_cct) { + lsubdout(m_cct, rbd_mirror, 20) << m_name << "::finish: r=" << r << dendl; + } + if (m_on_finish) { + m_on_finish->complete(r); + } + put(); + } + +private: + const std::string m_name; + CephContext *m_cct; + Context *m_on_finish; +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_BASE_REQUEST_H diff --git a/src/tools/rbd_mirror/CMakeLists.txt b/src/tools/rbd_mirror/CMakeLists.txt new file mode 100644 index 00000000..30106a86 --- /dev/null +++ b/src/tools/rbd_mirror/CMakeLists.txt @@ -0,0 +1,69 @@ +add_library(rbd_mirror_types STATIC + image_map/Types.cc + instance_watcher/Types.cc + leader_watcher/Types.cc) + +set(rbd_mirror_internal + ClusterWatcher.cc + ImageDeleter.cc + ImageMap.cc + ImageReplayer.cc + ImageSync.cc + ImageSyncThrottler.cc + InstanceReplayer.cc + InstanceWatcher.cc + Instances.cc + LeaderWatcher.cc + Mirror.cc + MirrorStatusWatcher.cc + PoolReplayer.cc + PoolWatcher.cc + ServiceDaemon.cc + Threads.cc + Types.cc + image_deleter/SnapshotPurgeRequest.cc + image_deleter/TrashMoveRequest.cc + image_deleter/TrashRemoveRequest.cc + image_deleter/TrashWatcher.cc + image_map/LoadRequest.cc + image_map/Policy.cc + image_map/SimplePolicy.cc + image_map/StateTransition.cc + image_map/UpdateRequest.cc + image_replayer/BootstrapRequest.cc + image_replayer/CloseImageRequest.cc + image_replayer/CreateImageRequest.cc + image_replayer/EventPreprocessor.cc + image_replayer/GetMirrorImageIdRequest.cc + image_replayer/IsPrimaryRequest.cc + image_replayer/OpenImageRequest.cc + image_replayer/OpenLocalImageRequest.cc + image_replayer/PrepareLocalImageRequest.cc + image_replayer/PrepareRemoteImageRequest.cc + image_replayer/ReplayStatusFormatter.cc + image_replayer/Utils.cc + image_sync/SyncPointCreateRequest.cc + image_sync/SyncPointPruneRequest.cc + pool_watcher/RefreshImagesRequest.cc + service_daemon/Types.cc) + +add_library(rbd_mirror_internal STATIC + ${rbd_mirror_internal}) + +add_executable(rbd-mirror + main.cc) +target_link_libraries(rbd-mirror + rbd_mirror_internal + rbd_mirror_types + rbd_api + rbd_internal + rbd_types + journal + librados + osdc + cls_rbd_client + cls_lock_client + cls_journal_client + global + ${ALLOC_LIBS}) +install(TARGETS rbd-mirror DESTINATION bin) diff --git a/src/tools/rbd_mirror/ClusterWatcher.cc b/src/tools/rbd_mirror/ClusterWatcher.cc new file mode 100644 index 00000000..54329de6 --- /dev/null +++ b/src/tools/rbd_mirror/ClusterWatcher.cc @@ -0,0 +1,223 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ClusterWatcher.h" +#include "include/stringify.h" +#include "common/ceph_json.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/internal.h" +#include "librbd/api/Mirror.h" +#include "tools/rbd_mirror/ServiceDaemon.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ClusterWatcher:" << this << " " \ + << __func__ << ": " + +using std::list; +using std::map; +using std::set; +using std::string; +using std::unique_ptr; +using std::vector; + +using librados::Rados; +using librados::IoCtx; + +namespace rbd { +namespace mirror { + +ClusterWatcher::ClusterWatcher(RadosRef cluster, Mutex &lock, + ServiceDaemon<librbd::ImageCtx>* service_daemon) + : m_cluster(cluster), m_lock(lock), m_service_daemon(service_daemon) +{ +} + +const ClusterWatcher::PoolPeers& ClusterWatcher::get_pool_peers() const +{ + ceph_assert(m_lock.is_locked()); + return m_pool_peers; +} + +void ClusterWatcher::refresh_pools() +{ + dout(20) << "enter" << dendl; + + PoolPeers pool_peers; + read_pool_peers(&pool_peers); + + Mutex::Locker l(m_lock); + m_pool_peers = pool_peers; + // TODO: perhaps use a workqueue instead, once we get notifications + // about config changes for existing pools +} + +void ClusterWatcher::read_pool_peers(PoolPeers *pool_peers) +{ + int r = m_cluster->wait_for_latest_osdmap(); + if (r < 0) { + derr << "error waiting for OSD map: " << cpp_strerror(r) << dendl; + return; + } + + list<pair<int64_t, string> > pools; + r = m_cluster->pool_list2(pools); + if (r < 0) { + derr << "error listing pools: " << cpp_strerror(r) << dendl; + return; + } + + std::set<int64_t> service_pool_ids; + for (auto& kv : pools) { + int64_t pool_id = kv.first; + auto& pool_name = kv.second; + int64_t base_tier; + r = m_cluster->pool_get_base_tier(pool_id, &base_tier); + if (r == -ENOENT) { + dout(10) << "pool " << pool_name << " no longer exists" << dendl; + continue; + } else if (r < 0) { + derr << "Error retrieving base tier for pool " << pool_name << dendl; + continue; + } + if (pool_id != base_tier) { + // pool is a cache; skip it + continue; + } + + IoCtx ioctx; + r = m_cluster->ioctx_create2(pool_id, ioctx); + if (r == -ENOENT) { + dout(10) << "pool " << pool_id << " no longer exists" << dendl; + continue; + } else if (r < 0) { + derr << "Error accessing pool " << pool_name << cpp_strerror(r) << dendl; + continue; + } + + cls::rbd::MirrorMode mirror_mode_internal; + r = librbd::cls_client::mirror_mode_get(&ioctx, &mirror_mode_internal); + if (r == 0 && mirror_mode_internal == cls::rbd::MIRROR_MODE_DISABLED) { + dout(10) << "mirroring is disabled for pool " << pool_name << dendl; + continue; + } + + service_pool_ids.insert(pool_id); + if (m_service_pools.find(pool_id) == m_service_pools.end()) { + m_service_pools[pool_id] = {}; + m_service_daemon->add_pool(pool_id, pool_name); + } + + if (r == -EPERM) { + dout(10) << "access denied querying pool " << pool_name << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_WARNING, "access denied"); + continue; + } else if (r < 0) { + derr << "could not tell whether mirroring was enabled for " << pool_name + << " : " << cpp_strerror(r) << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_WARNING, "mirroring mode query failed"); + continue; + } + + vector<librbd::mirror_peer_t> configs; + r = librbd::api::Mirror<>::peer_list(ioctx, &configs); + if (r < 0) { + derr << "error reading mirroring config for pool " << pool_name + << cpp_strerror(r) << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_ERROR, "mirroring peer list failed"); + continue; + } + + std::vector<PeerSpec> peers{configs.begin(), configs.end()}; + for (auto& peer : peers) { + r = resolve_peer_config_keys(pool_id, pool_name, &peer); + if (r < 0) { + break; + } + } + + if (m_service_pools[pool_id] != service_daemon::CALLOUT_ID_NONE) { + m_service_daemon->remove_callout(pool_id, m_service_pools[pool_id]); + m_service_pools[pool_id] = service_daemon::CALLOUT_ID_NONE; + } + + pool_peers->emplace(pool_id, Peers{peers.begin(), peers.end()}); + } + + for (auto it = m_service_pools.begin(); it != m_service_pools.end(); ) { + auto current_it(it++); + if (service_pool_ids.find(current_it->first) == service_pool_ids.end()) { + m_service_daemon->remove_pool(current_it->first); + m_service_pools.erase(current_it->first); + } + } +} + +int ClusterWatcher::resolve_peer_config_keys(int64_t pool_id, + const std::string& pool_name, + PeerSpec* peer) { + dout(10) << "retrieving config-key: pool_id=" << pool_id << ", " + << "pool_name=" << pool_name << ", " + << "peer_uuid=" << peer->uuid << dendl; + + std::string cmd = + "{" + "\"prefix\": \"config-key get\", " + "\"key\": \"" RBD_MIRROR_PEER_CONFIG_KEY_PREFIX + stringify(pool_id) + + "/" + peer->uuid + "\"" + "}"; + + bufferlist in_bl; + bufferlist out_bl; + int r = m_cluster->mon_command(cmd, in_bl, &out_bl, nullptr); + if (r == -ENOENT || out_bl.length() == 0) { + return 0; + } else if (r < 0) { + derr << "error reading mirroring peer config for pool " << pool_name << ": " + << cpp_strerror(r) << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_WARNING, + "mirroring peer config-key query failed"); + return r; + } + + bool json_valid = false; + json_spirit::mValue json_root; + if(json_spirit::read(out_bl.to_str(), json_root)) { + try { + auto& json_obj = json_root.get_obj(); + if (json_obj.count("mon_host")) { + peer->mon_host = json_obj["mon_host"].get_str(); + } + if (json_obj.count("key")) { + peer->key = json_obj["key"].get_str(); + } + json_valid = true; + } catch (std::runtime_error&) { + } + } + + if (!json_valid) { + derr << "error parsing mirroring peer config for pool " << pool_name << ", " + << "peer " << peer->uuid << dendl; + m_service_pools[pool_id] = m_service_daemon->add_or_update_callout( + pool_id, m_service_pools[pool_id], + service_daemon::CALLOUT_LEVEL_WARNING, + "mirroring peer config-key decode failed"); + } + + return 0; +} + +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/ClusterWatcher.h b/src/tools/rbd_mirror/ClusterWatcher.h new file mode 100644 index 00000000..e8430b47 --- /dev/null +++ b/src/tools/rbd_mirror/ClusterWatcher.h @@ -0,0 +1,69 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_CLUSTER_WATCHER_H +#define CEPH_RBD_MIRROR_CLUSTER_WATCHER_H + +#include <map> +#include <memory> +#include <set> + +#include "common/ceph_context.h" +#include "common/Mutex.h" +#include "common/Timer.h" +#include "include/rados/librados.hpp" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/service_daemon/Types.h" +#include <unordered_map> + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> class ServiceDaemon; + +/** + * Tracks mirroring configuration for pools in a single + * cluster. + */ +class ClusterWatcher { +public: + struct PeerSpecCompare { + bool operator()(const PeerSpec& lhs, const PeerSpec& rhs) const { + return (lhs.uuid < rhs.uuid); + } + }; + typedef std::set<PeerSpec, PeerSpecCompare> Peers; + typedef std::map<int64_t, Peers> PoolPeers; + + ClusterWatcher(RadosRef cluster, Mutex &lock, + ServiceDaemon<librbd::ImageCtx>* service_daemon); + ~ClusterWatcher() = default; + ClusterWatcher(const ClusterWatcher&) = delete; + ClusterWatcher& operator=(const ClusterWatcher&) = delete; + + // Caller controls frequency of calls + void refresh_pools(); + const PoolPeers& get_pool_peers() const; + +private: + typedef std::unordered_map<int64_t, service_daemon::CalloutId> ServicePools; + + RadosRef m_cluster; + Mutex &m_lock; + ServiceDaemon<librbd::ImageCtx>* m_service_daemon; + + ServicePools m_service_pools; + PoolPeers m_pool_peers; + + void read_pool_peers(PoolPeers *pool_peers); + + int resolve_peer_config_keys(int64_t pool_id, const std::string& pool_name, + PeerSpec* peer); +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_CLUSTER_WATCHER_H diff --git a/src/tools/rbd_mirror/ImageDeleter.cc b/src/tools/rbd_mirror/ImageDeleter.cc new file mode 100644 index 00000000..f4d928ca --- /dev/null +++ b/src/tools/rbd_mirror/ImageDeleter.cc @@ -0,0 +1,549 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "include/rados/librados.hpp" +#include "common/Formatter.h" +#include "common/admin_socket.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" +#include "common/WorkQueue.h" +#include "global/global_context.h" +#include "librbd/internal.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Journal.h" +#include "librbd/Operations.h" +#include "cls/rbd/cls_rbd_client.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/Utils.h" +#include "ImageDeleter.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_deleter/TrashMoveRequest.h" +#include "tools/rbd_mirror/image_deleter/TrashRemoveRequest.h" +#include "tools/rbd_mirror/image_deleter/TrashWatcher.h" +#include <map> +#include <sstream> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror + +using std::string; +using std::stringstream; +using std::vector; +using std::pair; +using std::make_pair; + +using librados::IoCtx; +using namespace librbd; + +namespace rbd { +namespace mirror { + +namespace { + +class ImageDeleterAdminSocketCommand { +public: + virtual ~ImageDeleterAdminSocketCommand() {} + virtual bool call(Formatter *f, stringstream *ss) = 0; +}; + +template <typename I> +class StatusCommand : public ImageDeleterAdminSocketCommand { +public: + explicit StatusCommand(ImageDeleter<I> *image_del) : image_del(image_del) {} + + bool call(Formatter *f, stringstream *ss) override { + image_del->print_status(f, ss); + return true; + } + +private: + ImageDeleter<I> *image_del; +}; + +} // anonymous namespace + +template <typename I> +class ImageDeleterAdminSocketHook : public AdminSocketHook { +public: + ImageDeleterAdminSocketHook(CephContext *cct, const std::string& pool_name, + ImageDeleter<I> *image_del) : + admin_socket(cct->get_admin_socket()) { + + std::string command; + int r; + + command = "rbd mirror deletion status " + pool_name; + r = admin_socket->register_command(command, command, this, + "get status for image deleter"); + if (r == 0) { + commands[command] = new StatusCommand<I>(image_del); + } + + } + + ~ImageDeleterAdminSocketHook() override { + for (Commands::const_iterator i = commands.begin(); i != commands.end(); + ++i) { + (void)admin_socket->unregister_command(i->first); + delete i->second; + } + } + + bool call(std::string_view command, const cmdmap_t& cmdmap, + std::string_view format, bufferlist& out) override { + Commands::const_iterator i = commands.find(command); + ceph_assert(i != commands.end()); + Formatter *f = Formatter::create(format); + stringstream ss; + bool r = i->second->call(f, &ss); + delete f; + out.append(ss); + return r; + } + +private: + typedef std::map<std::string, ImageDeleterAdminSocketCommand*, + std::less<>> Commands; + AdminSocket *admin_socket; + Commands commands; +}; + +template <typename I> +ImageDeleter<I>::ImageDeleter(librados::IoCtx& local_io_ctx, + Threads<librbd::ImageCtx>* threads, + ServiceDaemon<librbd::ImageCtx>* service_daemon) + : m_local_io_ctx(local_io_ctx), m_threads(threads), + m_service_daemon(service_daemon), m_trash_listener(this), + m_lock(librbd::util::unique_lock_name("rbd::mirror::ImageDeleter::m_lock", + this)) { +} + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ImageDeleter: " << " " \ + << __func__ << ": " + +template <typename I> +void ImageDeleter<I>::trash_move(librados::IoCtx& local_io_ctx, + const std::string& global_image_id, + bool resync, + ContextWQ* work_queue, Context* on_finish) { + dout(10) << "global_image_id=" << global_image_id << ", " + << "resync=" << resync << dendl; + + auto req = rbd::mirror::image_deleter::TrashMoveRequest<>::create( + local_io_ctx, global_image_id, resync, work_queue, on_finish); + req->send(); +} + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ImageDeleter: " << this << " " \ + << __func__ << ": " + +template <typename I> +void ImageDeleter<I>::init(Context* on_finish) { + dout(10) << dendl; + + m_asok_hook = new ImageDeleterAdminSocketHook<I>( + g_ceph_context, m_local_io_ctx.get_pool_name(), this); + + m_trash_watcher = image_deleter::TrashWatcher<I>::create(m_local_io_ctx, + m_threads, + m_trash_listener); + m_trash_watcher->init(on_finish); +} + +template <typename I> +void ImageDeleter<I>::shut_down(Context* on_finish) { + dout(10) << dendl; + + delete m_asok_hook; + m_asok_hook = nullptr; + + shut_down_trash_watcher(on_finish); +} + +template <typename I> +void ImageDeleter<I>::shut_down_trash_watcher(Context* on_finish) { + dout(10) << dendl; + ceph_assert(m_trash_watcher); + auto ctx = new FunctionContext([this, on_finish](int r) { + delete m_trash_watcher; + m_trash_watcher = nullptr; + + wait_for_ops(on_finish); + }); + m_trash_watcher->shut_down(ctx); +} + +template <typename I> +void ImageDeleter<I>::wait_for_ops(Context* on_finish) { + { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + m_running = false; + cancel_retry_timer(); + } + + auto ctx = new FunctionContext([this, on_finish](int) { + cancel_all_deletions(on_finish); + }); + m_async_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void ImageDeleter<I>::cancel_all_deletions(Context* on_finish) { + { + Mutex::Locker locker(m_lock); + // wake up any external state machines waiting on deletions + ceph_assert(m_in_flight_delete_queue.empty()); + for (auto& queue : {&m_delete_queue, &m_retry_delete_queue}) { + for (auto& info : *queue) { + notify_on_delete(info->image_id, -ECANCELED); + } + queue->clear(); + } + } + on_finish->complete(0); +} + +template <typename I> +void ImageDeleter<I>::wait_for_deletion(const std::string& image_id, + bool scheduled_only, + Context* on_finish) { + dout(5) << "image_id=" << image_id << dendl; + + on_finish = new FunctionContext([this, on_finish](int r) { + m_threads->work_queue->queue(on_finish, r); + }); + + Mutex::Locker locker(m_lock); + auto del_info = find_delete_info(image_id); + if (!del_info && scheduled_only) { + // image not scheduled for deletion + on_finish->complete(0); + return; + } + + notify_on_delete(image_id, -ESTALE); + m_on_delete_contexts[image_id] = on_finish; +} + +template <typename I> +void ImageDeleter<I>::complete_active_delete(DeleteInfoRef* delete_info, + int r) { + dout(20) << "info=" << *delete_info << ", r=" << r << dendl; + Mutex::Locker locker(m_lock); + notify_on_delete((*delete_info)->image_id, r); + delete_info->reset(); +} + +template <typename I> +void ImageDeleter<I>::enqueue_failed_delete(DeleteInfoRef* delete_info, + int error_code, + double retry_delay) { + dout(20) << "info=" << *delete_info << ", r=" << error_code << dendl; + if (error_code == -EBLACKLISTED) { + Mutex::Locker locker(m_lock); + derr << "blacklisted while deleting local image" << dendl; + complete_active_delete(delete_info, error_code); + return; + } + + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + auto& delete_info_ref = *delete_info; + notify_on_delete(delete_info_ref->image_id, error_code); + delete_info_ref->error_code = error_code; + ++delete_info_ref->retries; + delete_info_ref->retry_time = ceph_clock_now(); + delete_info_ref->retry_time += retry_delay; + m_retry_delete_queue.push_back(delete_info_ref); + + schedule_retry_timer(); +} + +template <typename I> +typename ImageDeleter<I>::DeleteInfoRef +ImageDeleter<I>::find_delete_info(const std::string &image_id) { + ceph_assert(m_lock.is_locked()); + DeleteQueue delete_queues[] = {m_in_flight_delete_queue, + m_retry_delete_queue, + m_delete_queue}; + + DeleteInfo delete_info{image_id}; + for (auto& queue : delete_queues) { + auto it = std::find_if(queue.begin(), queue.end(), + [&delete_info](const DeleteInfoRef& ref) { + return delete_info == *ref; + }); + if (it != queue.end()) { + return *it; + } + } + return {}; +} + +template <typename I> +void ImageDeleter<I>::print_status(Formatter *f, stringstream *ss) { + dout(20) << dendl; + + if (f) { + f->open_object_section("image_deleter_status"); + f->open_array_section("delete_images_queue"); + } + + Mutex::Locker l(m_lock); + for (const auto& image : m_delete_queue) { + image->print_status(f, ss); + } + + if (f) { + f->close_section(); + f->open_array_section("failed_deletes_queue"); + } + + for (const auto& image : m_retry_delete_queue) { + image->print_status(f, ss, true); + } + + if (f) { + f->close_section(); + f->close_section(); + f->flush(*ss); + } +} + +template <typename I> +vector<string> ImageDeleter<I>::get_delete_queue_items() { + vector<string> items; + + Mutex::Locker l(m_lock); + for (const auto& del_info : m_delete_queue) { + items.push_back(del_info->image_id); + } + + return items; +} + +template <typename I> +vector<pair<string, int> > ImageDeleter<I>::get_failed_queue_items() { + vector<pair<string, int> > items; + + Mutex::Locker l(m_lock); + for (const auto& del_info : m_retry_delete_queue) { + items.push_back(make_pair(del_info->image_id, + del_info->error_code)); + } + + return items; +} + +template <typename I> +void ImageDeleter<I>::remove_images() { + dout(10) << dendl; + + auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct()); + uint64_t max_concurrent_deletions = cct->_conf.get_val<uint64_t>( + "rbd_mirror_concurrent_image_deletions"); + + Mutex::Locker locker(m_lock); + while (true) { + if (!m_running || m_delete_queue.empty() || + m_in_flight_delete_queue.size() >= max_concurrent_deletions) { + return; + } + + DeleteInfoRef delete_info = m_delete_queue.front(); + m_delete_queue.pop_front(); + + ceph_assert(delete_info); + remove_image(delete_info); + } +} + +template <typename I> +void ImageDeleter<I>::remove_image(DeleteInfoRef delete_info) { + dout(10) << "info=" << *delete_info << dendl; + ceph_assert(m_lock.is_locked()); + + m_in_flight_delete_queue.push_back(delete_info); + m_async_op_tracker.start_op(); + + auto ctx = new FunctionContext([this, delete_info](int r) { + handle_remove_image(delete_info, r); + m_async_op_tracker.finish_op(); + }); + + auto req = image_deleter::TrashRemoveRequest<I>::create( + m_local_io_ctx, delete_info->image_id, &delete_info->error_result, + m_threads->work_queue, ctx); + req->send(); +} + +template <typename I> +void ImageDeleter<I>::handle_remove_image(DeleteInfoRef delete_info, + int r) { + dout(10) << "info=" << *delete_info << ", r=" << r << dendl; + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_lock.is_locked()); + auto it = std::find(m_in_flight_delete_queue.begin(), + m_in_flight_delete_queue.end(), delete_info); + ceph_assert(it != m_in_flight_delete_queue.end()); + m_in_flight_delete_queue.erase(it); + } + + if (r < 0) { + if (delete_info->error_result == image_deleter::ERROR_RESULT_COMPLETE) { + complete_active_delete(&delete_info, r); + } else if (delete_info->error_result == + image_deleter::ERROR_RESULT_RETRY_IMMEDIATELY) { + enqueue_failed_delete(&delete_info, r, m_busy_interval); + } else { + auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct()); + double failed_interval = cct->_conf.get_val<double>( + "rbd_mirror_delete_retry_interval"); + enqueue_failed_delete(&delete_info, r, failed_interval); + } + } else { + complete_active_delete(&delete_info, 0); + } + + // process the next queued image to delete + remove_images(); +} + +template <typename I> +void ImageDeleter<I>::schedule_retry_timer() { + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + if (!m_running || m_timer_ctx != nullptr || m_retry_delete_queue.empty()) { + return; + } + + dout(10) << dendl; + auto &delete_info = m_retry_delete_queue.front(); + m_timer_ctx = new FunctionContext([this](int r) { + handle_retry_timer(); + }); + m_threads->timer->add_event_at(delete_info->retry_time, m_timer_ctx); +} + +template <typename I> +void ImageDeleter<I>::cancel_retry_timer() { + dout(10) << dendl; + ceph_assert(m_threads->timer_lock.is_locked()); + if (m_timer_ctx != nullptr) { + bool canceled = m_threads->timer->cancel_event(m_timer_ctx); + m_timer_ctx = nullptr; + ceph_assert(canceled); + } +} + +template <typename I> +void ImageDeleter<I>::handle_retry_timer() { + dout(10) << dendl; + ceph_assert(m_threads->timer_lock.is_locked()); + Mutex::Locker locker(m_lock); + + ceph_assert(m_timer_ctx != nullptr); + m_timer_ctx = nullptr; + + ceph_assert(m_running); + ceph_assert(!m_retry_delete_queue.empty()); + + // move all ready-to-ready items back to main queue + utime_t now = ceph_clock_now(); + while (!m_retry_delete_queue.empty()) { + auto &delete_info = m_retry_delete_queue.front(); + if (delete_info->retry_time > now) { + break; + } + + m_delete_queue.push_back(delete_info); + m_retry_delete_queue.pop_front(); + } + + // schedule wake up for any future retries + schedule_retry_timer(); + + // start (concurrent) removal of images + m_async_op_tracker.start_op(); + auto ctx = new FunctionContext([this](int r) { + remove_images(); + m_async_op_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void ImageDeleter<I>::handle_trash_image(const std::string& image_id, + const utime_t& deferment_end_time) { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + + auto del_info = find_delete_info(image_id); + if (del_info != nullptr) { + dout(20) << "image " << image_id << " " + << "was already scheduled for deletion" << dendl; + return; + } + + dout(10) << "image_id=" << image_id << ", " + << "deferment_end_time=" << deferment_end_time << dendl; + + del_info.reset(new DeleteInfo(image_id)); + del_info->retry_time = deferment_end_time; + m_retry_delete_queue.push_back(del_info); + + schedule_retry_timer(); +} + +template <typename I> +void ImageDeleter<I>::notify_on_delete(const std::string& image_id, + int r) { + dout(10) << "image_id=" << image_id << ", r=" << r << dendl; + auto it = m_on_delete_contexts.find(image_id); + if (it == m_on_delete_contexts.end()) { + return; + } + + it->second->complete(r); + m_on_delete_contexts.erase(it); +} + +template <typename I> +void ImageDeleter<I>::DeleteInfo::print_status(Formatter *f, stringstream *ss, + bool print_failure_info) { + if (f) { + f->open_object_section("delete_info"); + f->dump_string("image_id", image_id); + if (print_failure_info) { + f->dump_string("error_code", cpp_strerror(error_code)); + f->dump_int("retries", retries); + } + f->close_section(); + f->flush(*ss); + } else { + *ss << *this; + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ImageDeleter<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ImageDeleter.h b/src/tools/rbd_mirror/ImageDeleter.h new file mode 100644 index 00000000..8a17eb38 --- /dev/null +++ b/src/tools/rbd_mirror/ImageDeleter.h @@ -0,0 +1,180 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_H +#define CEPH_RBD_MIRROR_IMAGE_DELETER_H + +#include "include/utime.h" +#include "common/AsyncOpTracker.h" +#include "common/Mutex.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/image_deleter/Types.h" +#include <atomic> +#include <deque> +#include <iosfwd> +#include <map> +#include <memory> +#include <vector> + +class AdminSocketHook; +class Context; +class ContextWQ; +class SafeTimer; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> class ServiceDaemon; +template <typename> class Threads; + +namespace image_deleter { template <typename> struct TrashWatcher; } + +/** + * Manage deletion of non-primary images. + */ +template <typename ImageCtxT = librbd::ImageCtx> +class ImageDeleter { +public: + static ImageDeleter* create(librados::IoCtx& local_io_ctx, + Threads<librbd::ImageCtx>* threads, + ServiceDaemon<librbd::ImageCtx>* service_daemon) { + return new ImageDeleter(local_io_ctx, threads, service_daemon); + } + + ImageDeleter(librados::IoCtx& local_io_ctx, + Threads<librbd::ImageCtx>* threads, + ServiceDaemon<librbd::ImageCtx>* service_daemon); + + ImageDeleter(const ImageDeleter&) = delete; + ImageDeleter& operator=(const ImageDeleter&) = delete; + + static void trash_move(librados::IoCtx& local_io_ctx, + const std::string& global_image_id, bool resync, + ContextWQ* work_queue, Context* on_finish); + + void init(Context* on_finish); + void shut_down(Context* on_finish); + + void print_status(Formatter *f, std::stringstream *ss); + + // for testing purposes + void wait_for_deletion(const std::string &image_id, + bool scheduled_only, Context* on_finish); + + std::vector<std::string> get_delete_queue_items(); + std::vector<std::pair<std::string, int> > get_failed_queue_items(); + + inline void set_busy_timer_interval(double interval) { + m_busy_interval = interval; + } + +private: + struct TrashListener : public image_deleter::TrashListener { + ImageDeleter *image_deleter; + + TrashListener(ImageDeleter *image_deleter) : image_deleter(image_deleter) { + } + + void handle_trash_image(const std::string& image_id, + const utime_t& deferment_end_time) override { + image_deleter->handle_trash_image(image_id, deferment_end_time); + } + }; + + struct DeleteInfo { + std::string image_id; + + image_deleter::ErrorResult error_result = {}; + int error_code = 0; + utime_t retry_time = {}; + int retries = 0; + + DeleteInfo(const std::string& image_id) + : image_id(image_id) { + } + + inline bool operator==(const DeleteInfo& delete_info) const { + return (image_id == delete_info.image_id); + } + + friend std::ostream& operator<<(std::ostream& os, DeleteInfo& delete_info) { + os << "[image_id=" << delete_info.image_id << "]"; + return os; + } + + void print_status(Formatter *f, std::stringstream *ss, + bool print_failure_info=false); + }; + typedef std::shared_ptr<DeleteInfo> DeleteInfoRef; + typedef std::deque<DeleteInfoRef> DeleteQueue; + typedef std::map<std::string, Context*> OnDeleteContexts; + + librados::IoCtx& m_local_io_ctx; + Threads<librbd::ImageCtx>* m_threads; + ServiceDaemon<librbd::ImageCtx>* m_service_daemon; + + image_deleter::TrashWatcher<ImageCtxT>* m_trash_watcher = nullptr; + TrashListener m_trash_listener; + + std::atomic<unsigned> m_running { 1 }; + + double m_busy_interval = 1; + + AsyncOpTracker m_async_op_tracker; + + Mutex m_lock; + DeleteQueue m_delete_queue; + DeleteQueue m_retry_delete_queue; + DeleteQueue m_in_flight_delete_queue; + + OnDeleteContexts m_on_delete_contexts; + + AdminSocketHook *m_asok_hook = nullptr; + + Context *m_timer_ctx = nullptr; + + bool process_image_delete(); + + void complete_active_delete(DeleteInfoRef* delete_info, int r); + void enqueue_failed_delete(DeleteInfoRef* delete_info, int error_code, + double retry_delay); + + DeleteInfoRef find_delete_info(const std::string &image_id); + + void remove_images(); + void remove_image(DeleteInfoRef delete_info); + void handle_remove_image(DeleteInfoRef delete_info, int r); + + void schedule_retry_timer(); + void cancel_retry_timer(); + void handle_retry_timer(); + + void handle_trash_image(const std::string& image_id, + const utime_t& deferment_end_time); + + void shut_down_trash_watcher(Context* on_finish); + void wait_for_ops(Context* on_finish); + void cancel_all_deletions(Context* on_finish); + + void notify_on_delete(const std::string& image_id, int r); + +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::ImageDeleter<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_H diff --git a/src/tools/rbd_mirror/ImageMap.cc b/src/tools/rbd_mirror/ImageMap.cc new file mode 100644 index 00000000..58fa5e03 --- /dev/null +++ b/src/tools/rbd_mirror/ImageMap.cc @@ -0,0 +1,601 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" +#include "common/WorkQueue.h" + +#include "librbd/Utils.h" +#include "tools/rbd_mirror/Threads.h" + +#include "ImageMap.h" +#include "image_map/LoadRequest.h" +#include "image_map/SimplePolicy.h" +#include "image_map/UpdateRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ImageMap: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { + +using ::operator<<; +using image_map::Policy; + +using librbd::util::unique_lock_name; +using librbd::util::create_async_context_callback; + +template <typename I> +struct ImageMap<I>::C_NotifyInstance : public Context { + ImageMap* image_map; + std::string global_image_id; + bool acquire_release; + + C_NotifyInstance(ImageMap* image_map, const std::string& global_image_id, + bool acquire_release) + : image_map(image_map), global_image_id(global_image_id), + acquire_release(acquire_release) { + image_map->start_async_op(); + } + + void finish(int r) override { + if (acquire_release) { + image_map->handle_peer_ack(global_image_id, r); + } else { + image_map->handle_peer_ack_remove(global_image_id, r); + } + image_map->finish_async_op(); + } +}; + +template <typename I> +ImageMap<I>::ImageMap(librados::IoCtx &ioctx, Threads<I> *threads, + const std::string& instance_id, + image_map::Listener &listener) + : m_ioctx(ioctx), m_threads(threads), m_instance_id(instance_id), + m_listener(listener), + m_lock(unique_lock_name("rbd::mirror::ImageMap::m_lock", this)) { +} + +template <typename I> +ImageMap<I>::~ImageMap() { + ceph_assert(m_async_op_tracker.empty()); + ceph_assert(m_timer_task == nullptr); + ceph_assert(m_rebalance_task == nullptr); +} + +template <typename I> +void ImageMap<I>::continue_action(const std::set<std::string> &global_image_ids, + int r) { + dout(20) << dendl; + + { + Mutex::Locker locker(m_lock); + if (m_shutting_down) { + return; + } + + for (auto const &global_image_id : global_image_ids) { + bool schedule = m_policy->finish_action(global_image_id, r); + if (schedule) { + schedule_action(global_image_id); + } + } + } + + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::handle_update_request( + const Updates &updates, + const std::set<std::string> &remove_global_image_ids, int r) { + dout(20) << "r=" << r << dendl; + + std::set<std::string> global_image_ids; + + global_image_ids.insert(remove_global_image_ids.begin(), + remove_global_image_ids.end()); + for (auto const &update : updates) { + global_image_ids.insert(update.global_image_id); + } + + continue_action(global_image_ids, r); +} + +template <typename I> +void ImageMap<I>::update_image_mapping(Updates&& map_updates, + std::set<std::string>&& map_removals) { + if (map_updates.empty() && map_removals.empty()) { + return; + } + + dout(5) << "updates=[" << map_updates << "], " + << "removes=[" << map_removals << "]" << dendl; + + Context *on_finish = new FunctionContext( + [this, map_updates, map_removals](int r) { + handle_update_request(map_updates, map_removals, r); + finish_async_op(); + }); + on_finish = create_async_context_callback(m_threads->work_queue, on_finish); + + // empty meta policy for now.. + image_map::PolicyMetaNone policy_meta; + + bufferlist bl; + encode(image_map::PolicyData(policy_meta), bl); + + // prepare update map + std::map<std::string, cls::rbd::MirrorImageMap> update_mapping; + for (auto const &update : map_updates) { + update_mapping.emplace( + update.global_image_id, cls::rbd::MirrorImageMap(update.instance_id, + update.mapped_time, bl)); + } + + start_async_op(); + image_map::UpdateRequest<I> *req = image_map::UpdateRequest<I>::create( + m_ioctx, std::move(update_mapping), std::move(map_removals), on_finish); + req->send(); +} + +template <typename I> +void ImageMap<I>::process_updates() { + dout(20) << dendl; + + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_timer_task == nullptr); + + Updates map_updates; + std::set<std::string> map_removals; + Updates acquire_updates; + Updates release_updates; + + // gather updates by advancing the state machine + m_lock.Lock(); + for (auto const &global_image_id : m_global_image_ids) { + image_map::ActionType action_type = + m_policy->start_action(global_image_id); + image_map::LookupInfo info = m_policy->lookup(global_image_id); + + dout(15) << "global_image_id=" << global_image_id << ", " + << "action=" << action_type << ", " + << "instance=" << info.instance_id << dendl; + switch (action_type) { + case image_map::ACTION_TYPE_NONE: + continue; + case image_map::ACTION_TYPE_MAP_UPDATE: + ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID); + map_updates.emplace_back(global_image_id, info.instance_id, + info.mapped_time); + break; + case image_map::ACTION_TYPE_MAP_REMOVE: + map_removals.emplace(global_image_id); + break; + case image_map::ACTION_TYPE_ACQUIRE: + ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID); + acquire_updates.emplace_back(global_image_id, info.instance_id); + break; + case image_map::ACTION_TYPE_RELEASE: + ceph_assert(info.instance_id != image_map::UNMAPPED_INSTANCE_ID); + release_updates.emplace_back(global_image_id, info.instance_id); + break; + } + } + m_global_image_ids.clear(); + m_lock.Unlock(); + + // notify listener (acquire, release) and update on-disk map. note + // that its safe to process this outside m_lock as we still hold + // timer lock. + notify_listener_acquire_release_images(acquire_updates, release_updates); + update_image_mapping(std::move(map_updates), std::move(map_removals)); +} + +template <typename I> +void ImageMap<I>::schedule_update_task() { + Mutex::Locker timer_lock(m_threads->timer_lock); + schedule_update_task(m_threads->timer_lock); +} + +template <typename I> +void ImageMap<I>::schedule_update_task(const Mutex &timer_lock) { + ceph_assert(m_threads->timer_lock.is_locked()); + + schedule_rebalance_task(); + + if (m_timer_task != nullptr) { + return; + } + + { + Mutex::Locker locker(m_lock); + if (m_global_image_ids.empty()) { + return; + } + } + + m_timer_task = new FunctionContext([this](int r) { + ceph_assert(m_threads->timer_lock.is_locked()); + m_timer_task = nullptr; + + process_updates(); + }); + + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + double after = cct->_conf.get_val<double>("rbd_mirror_image_policy_update_throttle_interval"); + + dout(20) << "scheduling image check update (" << m_timer_task << ")" + << " after " << after << " second(s)" << dendl; + m_threads->timer->add_event_after(after, m_timer_task); +} + +template <typename I> +void ImageMap<I>::rebalance() { + ceph_assert(m_rebalance_task == nullptr); + + { + Mutex::Locker locker(m_lock); + if (m_async_op_tracker.empty() && m_global_image_ids.empty()){ + dout(20) << "starting rebalance" << dendl; + + std::set<std::string> remap_global_image_ids; + m_policy->add_instances({}, &remap_global_image_ids); + + for (auto const &global_image_id : remap_global_image_ids) { + schedule_action(global_image_id); + } + } + } + + schedule_update_task(m_threads->timer_lock); +} + +template <typename I> +void ImageMap<I>::schedule_rebalance_task() { + ceph_assert(m_threads->timer_lock.is_locked()); + + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + + // fetch the updated value of idle timeout for (re)scheduling + double resched_after = cct->_conf.get_val<double>( + "rbd_mirror_image_policy_rebalance_timeout"); + if (!resched_after) { + return; + } + + // cancel existing rebalance task if any before scheduling + if (m_rebalance_task != nullptr) { + m_threads->timer->cancel_event(m_rebalance_task); + } + + m_rebalance_task = new FunctionContext([this](int _) { + ceph_assert(m_threads->timer_lock.is_locked()); + m_rebalance_task = nullptr; + + rebalance(); + }); + + dout(20) << "scheduling rebalance (" << m_rebalance_task << ")" + << " after " << resched_after << " second(s)" << dendl; + m_threads->timer->add_event_after(resched_after, m_rebalance_task); +} + +template <typename I> +void ImageMap<I>::schedule_action(const std::string &global_image_id) { + dout(20) << "global_image_id=" << global_image_id << dendl; + ceph_assert(m_lock.is_locked()); + + m_global_image_ids.emplace(global_image_id); +} + +template <typename I> +void ImageMap<I>::notify_listener_acquire_release_images( + const Updates &acquire, const Updates &release) { + if (acquire.empty() && release.empty()) { + return; + } + + dout(5) << "acquire=[" << acquire << "], " + << "release=[" << release << "]" << dendl; + + for (auto const &update : acquire) { + m_listener.acquire_image( + update.global_image_id, update.instance_id, + create_async_context_callback( + m_threads->work_queue, + new C_NotifyInstance(this, update.global_image_id, true))); + } + + for (auto const &update : release) { + m_listener.release_image( + update.global_image_id, update.instance_id, + create_async_context_callback( + m_threads->work_queue, + new C_NotifyInstance(this, update.global_image_id, true))); + } +} + +template <typename I> +void ImageMap<I>::notify_listener_remove_images(const std::string &peer_uuid, + const Updates &remove) { + dout(5) << "peer_uuid=" << peer_uuid << ", " + << "remove=[" << remove << "]" << dendl; + + for (auto const &update : remove) { + m_listener.remove_image( + peer_uuid, update.global_image_id, update.instance_id, + create_async_context_callback( + m_threads->work_queue, + new C_NotifyInstance(this, update.global_image_id, false))); + } +} + +template <typename I> +void ImageMap<I>::handle_load(const std::map<std::string, + cls::rbd::MirrorImageMap> &image_mapping) { + dout(20) << dendl; + + { + Mutex::Locker locker(m_lock); + m_policy->init(image_mapping); + + for (auto& pair : image_mapping) { + schedule_action(pair.first); + } + } + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::handle_peer_ack_remove(const std::string &global_image_id, + int r) { + Mutex::Locker locker(m_lock); + dout(5) << "global_image_id=" << global_image_id << dendl; + + if (r < 0) { + derr << "failed to remove global_image_id=" << global_image_id << dendl; + } + + auto peer_it = m_peer_map.find(global_image_id); + if (peer_it == m_peer_map.end()) { + return; + } + + m_peer_map.erase(peer_it); +} + +template <typename I> +void ImageMap<I>::update_images_added( + const std::string &peer_uuid, + const std::set<std::string> &global_image_ids) { + dout(5) << "peer_uuid=" << peer_uuid << ", " + << "global_image_ids=[" << global_image_ids << "]" << dendl; + ceph_assert(m_lock.is_locked()); + + for (auto const &global_image_id : global_image_ids) { + auto result = m_peer_map[global_image_id].insert(peer_uuid); + if (result.second && m_peer_map[global_image_id].size() == 1) { + if (m_policy->add_image(global_image_id)) { + schedule_action(global_image_id); + } + } + } +} + +template <typename I> +void ImageMap<I>::update_images_removed( + const std::string &peer_uuid, + const std::set<std::string> &global_image_ids) { + dout(5) << "peer_uuid=" << peer_uuid << ", " + << "global_image_ids=[" << global_image_ids << "]" << dendl; + ceph_assert(m_lock.is_locked()); + + Updates to_remove; + for (auto const &global_image_id : global_image_ids) { + image_map::LookupInfo info = m_policy->lookup(global_image_id); + bool image_mapped = (info.instance_id != image_map::UNMAPPED_INSTANCE_ID); + + bool image_removed = image_mapped; + bool peer_removed = false; + auto peer_it = m_peer_map.find(global_image_id); + if (peer_it != m_peer_map.end()) { + auto& peer_set = peer_it->second; + peer_removed = peer_set.erase(peer_uuid); + image_removed = peer_removed && peer_set.empty(); + } + + if (image_mapped && peer_removed && !peer_uuid.empty()) { + // peer image has been deleted + to_remove.emplace_back(global_image_id, info.instance_id); + } + + if (image_mapped && image_removed) { + // local and peer images have been deleted + if (m_policy->remove_image(global_image_id)) { + schedule_action(global_image_id); + } + } + } + + if (!to_remove.empty()) { + // removal notification will be notified instantly. this is safe + // even after scheduling action for images as we still hold m_lock + notify_listener_remove_images(peer_uuid, to_remove); + } +} + +template <typename I> +void ImageMap<I>::update_instances_added( + const std::vector<std::string> &instance_ids) { + { + Mutex::Locker locker(m_lock); + if (m_shutting_down) { + return; + } + + std::vector<std::string> filtered_instance_ids; + filter_instance_ids(instance_ids, &filtered_instance_ids, false); + if (filtered_instance_ids.empty()) { + return; + } + + dout(20) << "instance_ids=" << filtered_instance_ids << dendl; + + std::set<std::string> remap_global_image_ids; + m_policy->add_instances(filtered_instance_ids, &remap_global_image_ids); + + for (auto const &global_image_id : remap_global_image_ids) { + schedule_action(global_image_id); + } + } + + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::update_instances_removed( + const std::vector<std::string> &instance_ids) { + { + Mutex::Locker locker(m_lock); + if (m_shutting_down) { + return; + } + + std::vector<std::string> filtered_instance_ids; + filter_instance_ids(instance_ids, &filtered_instance_ids, true); + if (filtered_instance_ids.empty()) { + return; + } + + dout(20) << "instance_ids=" << filtered_instance_ids << dendl; + + std::set<std::string> remap_global_image_ids; + m_policy->remove_instances(filtered_instance_ids, &remap_global_image_ids); + + for (auto const &global_image_id : remap_global_image_ids) { + schedule_action(global_image_id); + } + } + + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::update_images(const std::string &peer_uuid, + std::set<std::string> &&added_global_image_ids, + std::set<std::string> &&removed_global_image_ids) { + dout(5) << "peer_uuid=" << peer_uuid << ", " << "added_count=" + << added_global_image_ids.size() << ", " << "removed_count=" + << removed_global_image_ids.size() << dendl; + + { + Mutex::Locker locker(m_lock); + if (m_shutting_down) { + return; + } + + if (!removed_global_image_ids.empty()) { + update_images_removed(peer_uuid, removed_global_image_ids); + } + if (!added_global_image_ids.empty()) { + update_images_added(peer_uuid, added_global_image_ids); + } + } + + schedule_update_task(); +} + +template <typename I> +void ImageMap<I>::handle_peer_ack(const std::string &global_image_id, int r) { + dout (20) << "global_image_id=" << global_image_id << ", r=" << r + << dendl; + + continue_action({global_image_id}, r); +} + +template <typename I> +void ImageMap<I>::init(Context *on_finish) { + dout(20) << dendl; + + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + std::string policy_type = cct->_conf.get_val<string>("rbd_mirror_image_policy_type"); + + if (policy_type == "none" || policy_type == "simple") { + m_policy.reset(image_map::SimplePolicy::create(m_ioctx)); + } else { + ceph_abort(); // not really needed as such, but catch it. + } + + dout(20) << "mapping policy=" << policy_type << dendl; + + start_async_op(); + C_LoadMap *ctx = new C_LoadMap(this, on_finish); + image_map::LoadRequest<I> *req = image_map::LoadRequest<I>::create( + m_ioctx, &ctx->image_mapping, ctx); + req->send(); +} + +template <typename I> +void ImageMap<I>::shut_down(Context *on_finish) { + dout(20) << dendl; + + { + Mutex::Locker timer_lock(m_threads->timer_lock); + + { + Mutex::Locker locker(m_lock); + ceph_assert(!m_shutting_down); + + m_shutting_down = true; + m_policy.reset(); + } + + if (m_timer_task != nullptr) { + m_threads->timer->cancel_event(m_timer_task); + m_timer_task = nullptr; + } + if (m_rebalance_task != nullptr) { + m_threads->timer->cancel_event(m_rebalance_task); + m_rebalance_task = nullptr; + } + } + + wait_for_async_ops(on_finish); +} + +template <typename I> +void ImageMap<I>::filter_instance_ids( + const std::vector<std::string> &instance_ids, + std::vector<std::string> *filtered_instance_ids, bool removal) const { + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + std::string policy_type = cct->_conf.get_val<string>("rbd_mirror_image_policy_type"); + + if (policy_type != "none") { + *filtered_instance_ids = instance_ids; + return; + } + + if (removal) { + // propagate removals for external instances + for (auto& instance_id : instance_ids) { + if (instance_id != m_instance_id) { + filtered_instance_ids->push_back(instance_id); + } + } + } else if (std::find(instance_ids.begin(), instance_ids.end(), + m_instance_id) != instance_ids.end()) { + // propagate addition only for local instance + filtered_instance_ids->push_back(m_instance_id); + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ImageMap<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ImageMap.h b/src/tools/rbd_mirror/ImageMap.h new file mode 100644 index 00000000..283f55db --- /dev/null +++ b/src/tools/rbd_mirror/ImageMap.h @@ -0,0 +1,175 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_H + +#include <vector> + +#include "common/Mutex.h" +#include "include/Context.h" +#include "common/AsyncOpTracker.h" +#include "cls/rbd/cls_rbd_types.h" +#include "include/rados/librados.hpp" + +#include "image_map/Policy.h" +#include "image_map/Types.h" + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class ImageMap { +public: + static ImageMap *create(librados::IoCtx &ioctx, Threads<ImageCtxT> *threads, + const std::string& instance_id, + image_map::Listener &listener) { + return new ImageMap(ioctx, threads, instance_id, listener); + } + + ~ImageMap(); + + // init (load) the instance map from disk + void init(Context *on_finish); + + // shut down map operations + void shut_down(Context *on_finish); + + // update (add/remove) images + void update_images(const std::string &peer_uuid, + std::set<std::string> &&added_global_image_ids, + std::set<std::string> &&removed_global_image_ids); + + // add/remove instances + void update_instances_added(const std::vector<std::string> &instances); + void update_instances_removed(const std::vector<std::string> &instances); + +private: + struct C_NotifyInstance; + + ImageMap(librados::IoCtx &ioctx, Threads<ImageCtxT> *threads, + const std::string& instance_id, image_map::Listener &listener); + + struct Update { + std::string global_image_id; + std::string instance_id; + utime_t mapped_time; + + Update(const std::string &global_image_id, const std::string &instance_id, + utime_t mapped_time) + : global_image_id(global_image_id), + instance_id(instance_id), + mapped_time(mapped_time) { + } + Update(const std::string &global_image_id, const std::string &instance_id) + : Update(global_image_id, instance_id, ceph_clock_now()) { + } + + friend std::ostream& operator<<(std::ostream& os, + const Update& update) { + os << "{global_image_id=" << update.global_image_id << ", " + << "instance_id=" << update.instance_id << "}"; + return os; + } + + }; + typedef std::list<Update> Updates; + + // Lock ordering: m_threads->timer_lock, m_lock + + librados::IoCtx &m_ioctx; + Threads<ImageCtxT> *m_threads; + std::string m_instance_id; + image_map::Listener &m_listener; + + std::unique_ptr<image_map::Policy> m_policy; // our mapping policy + + Context *m_timer_task = nullptr; + Mutex m_lock; + bool m_shutting_down = false; + AsyncOpTracker m_async_op_tracker; + + // global_image_id -> registered peers ("" == local, remote otherwise) + std::map<std::string, std::set<std::string> > m_peer_map; + + std::set<std::string> m_global_image_ids; + + Context *m_rebalance_task = nullptr; + + struct C_LoadMap : Context { + ImageMap *image_map; + Context *on_finish; + + std::map<std::string, cls::rbd::MirrorImageMap> image_mapping; + + C_LoadMap(ImageMap *image_map, Context *on_finish) + : image_map(image_map), + on_finish(on_finish) { + } + + void finish(int r) override { + if (r == 0) { + image_map->handle_load(image_mapping); + } + + image_map->finish_async_op(); + on_finish->complete(r); + } + }; + + // async op-tracker helper routines + void start_async_op() { + m_async_op_tracker.start_op(); + } + void finish_async_op() { + m_async_op_tracker.finish_op(); + } + void wait_for_async_ops(Context *on_finish) { + m_async_op_tracker.wait_for_ops(on_finish); + } + + void handle_peer_ack(const std::string &global_image_id, int r); + void handle_peer_ack_remove(const std::string &global_image_id, int r); + + void handle_load(const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping); + void handle_update_request(const Updates &updates, + const std::set<std::string> &remove_global_image_ids, int r); + + // continue (retry or resume depending on state machine) processing + // current action. + void continue_action(const std::set<std::string> &global_image_ids, int r); + + // schedule an image for update + void schedule_action(const std::string &global_image_id); + + void schedule_update_task(); + void schedule_update_task(const Mutex &timer_lock); + void process_updates(); + void update_image_mapping(Updates&& map_updates, + std::set<std::string>&& map_removals); + + void rebalance(); + void schedule_rebalance_task(); + + void notify_listener_acquire_release_images(const Updates &acquire, const Updates &release); + void notify_listener_remove_images(const std::string &peer_uuid, const Updates &remove); + + void update_images_added(const std::string &peer_uuid, + const std::set<std::string> &global_image_ids); + void update_images_removed(const std::string &peer_uuid, + const std::set<std::string> &global_image_ids); + + void filter_instance_ids(const std::vector<std::string> &instance_ids, + std::vector<std::string> *filtered_instance_ids, + bool removal) const; + +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_H diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc new file mode 100644 index 00000000..6c6ee2d5 --- /dev/null +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -0,0 +1,1896 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/compat.h" +#include "common/Formatter.h" +#include "common/admin_socket.h" +#include "common/debug.h" +#include "common/errno.h" +#include "include/stringify.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/Timer.h" +#include "common/WorkQueue.h" +#include "global/global_context.h" +#include "journal/Journaler.h" +#include "journal/ReplayHandler.h" +#include "journal/Settings.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Journal.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/journal/Replay.h" +#include "ImageDeleter.h" +#include "ImageReplayer.h" +#include "Threads.h" +#include "tools/rbd_mirror/image_replayer/BootstrapRequest.h" +#include "tools/rbd_mirror/image_replayer/CloseImageRequest.h" +#include "tools/rbd_mirror/image_replayer/EventPreprocessor.h" +#include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h" +#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h" +#include "tools/rbd_mirror/image_replayer/ReplayStatusFormatter.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::" << *this << " " \ + << __func__ << ": " + +using std::map; +using std::string; +using std::unique_ptr; +using std::shared_ptr; +using std::vector; + +extern PerfCounters *g_perf_counters; + +namespace rbd { +namespace mirror { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; +using namespace rbd::mirror::image_replayer; + +template <typename I> +std::ostream &operator<<(std::ostream &os, + const typename ImageReplayer<I>::State &state); + +namespace { + +template <typename I> +struct ReplayHandler : public ::journal::ReplayHandler { + ImageReplayer<I> *replayer; + ReplayHandler(ImageReplayer<I> *replayer) : replayer(replayer) {} + void get() override {} + void put() override {} + + void handle_entries_available() override { + replayer->handle_replay_ready(); + } + void handle_complete(int r) override { + std::stringstream ss; + if (r < 0) { + ss << "replay completed with error: " << cpp_strerror(r); + } + replayer->handle_replay_complete(r, ss.str()); + } +}; + +template <typename I> +class ImageReplayerAdminSocketCommand { +public: + ImageReplayerAdminSocketCommand(const std::string &desc, + ImageReplayer<I> *replayer) + : desc(desc), replayer(replayer) { + } + virtual ~ImageReplayerAdminSocketCommand() {} + virtual bool call(Formatter *f, stringstream *ss) = 0; + + std::string desc; + ImageReplayer<I> *replayer; + bool registered = false; +}; + +template <typename I> +class StatusCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit StatusCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->replayer->print_status(f, ss); + return true; + } +}; + +template <typename I> +class StartCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit StartCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->replayer->start(nullptr, true); + return true; + } +}; + +template <typename I> +class StopCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit StopCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->replayer->stop(nullptr, true); + return true; + } +}; + +template <typename I> +class RestartCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit RestartCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->replayer->restart(); + return true; + } +}; + +template <typename I> +class FlushCommand : public ImageReplayerAdminSocketCommand<I> { +public: + explicit FlushCommand(const std::string &desc, ImageReplayer<I> *replayer) + : ImageReplayerAdminSocketCommand<I>(desc, replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->replayer->flush(); + return true; + } +}; + +template <typename I> +class ImageReplayerAdminSocketHook : public AdminSocketHook { +public: + ImageReplayerAdminSocketHook(CephContext *cct, const std::string &name, + ImageReplayer<I> *replayer) + : admin_socket(cct->get_admin_socket()), + commands{{"rbd mirror flush " + name, + new FlushCommand<I>("flush rbd mirror " + name, replayer)}, + {"rbd mirror restart " + name, + new RestartCommand<I>("restart rbd mirror " + name, replayer)}, + {"rbd mirror start " + name, + new StartCommand<I>("start rbd mirror " + name, replayer)}, + {"rbd mirror status " + name, + new StatusCommand<I>("get status for rbd mirror " + name, replayer)}, + {"rbd mirror stop " + name, + new StopCommand<I>("stop rbd mirror " + name, replayer)}} { + } + + int register_commands() { + for (auto &it : commands) { + int r = admin_socket->register_command(it.first, it.first, this, + it.second->desc); + if (r < 0) { + return r; + } + it.second->registered = true; + } + return 0; + } + + ~ImageReplayerAdminSocketHook() override { + for (auto &it : commands) { + if (it.second->registered) { + admin_socket->unregister_command(it.first); + } + delete it.second; + } + commands.clear(); + } + + bool call(std::string_view command, const cmdmap_t& cmdmap, + std::string_view format, bufferlist& out) override { + auto i = commands.find(command); + ceph_assert(i != commands.end()); + Formatter *f = Formatter::create(format); + stringstream ss; + bool r = i->second->call(f, &ss); + delete f; + out.append(ss); + return r; + } + +private: + typedef std::map<std::string, ImageReplayerAdminSocketCommand<I>*, + std::less<>> Commands; + + AdminSocket *admin_socket; + Commands commands; +}; + +uint32_t calculate_replay_delay(const utime_t &event_time, + int mirroring_replay_delay) { + if (mirroring_replay_delay <= 0) { + return 0; + } + + utime_t now = ceph_clock_now(); + if (event_time + mirroring_replay_delay <= now) { + return 0; + } + + // ensure it is rounded up when converting to integer + return (event_time + mirroring_replay_delay - now) + 1; +} + +} // anonymous namespace + +template <typename I> +void ImageReplayer<I>::BootstrapProgressContext::update_progress( + const std::string &description, bool flush) +{ + const std::string desc = "bootstrapping, " + description; + replayer->set_state_description(0, desc); + if (flush) { + replayer->update_mirror_image_status(false, boost::none); + } +} + +template <typename I> +void ImageReplayer<I>::RemoteJournalerListener::handle_update( + ::journal::JournalMetadata *) { + FunctionContext *ctx = new FunctionContext([this](int r) { + replayer->handle_remote_journal_metadata_updated(); + }); + replayer->m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +ImageReplayer<I>::ImageReplayer(Threads<I> *threads, + InstanceWatcher<I> *instance_watcher, + RadosRef local, + const std::string &local_mirror_uuid, + int64_t local_pool_id, + const std::string &global_image_id) : + m_threads(threads), + m_instance_watcher(instance_watcher), + m_local(local), + m_local_mirror_uuid(local_mirror_uuid), + m_local_pool_id(local_pool_id), + m_global_image_id(global_image_id), m_local_image_name(global_image_id), + m_lock("rbd::mirror::ImageReplayer " + stringify(local_pool_id) + " " + + global_image_id), + m_progress_cxt(this), + m_journal_listener(new JournalListener(this)), + m_remote_listener(this) +{ + // Register asok commands using a temporary "remote_pool_name/global_image_id" + // name. When the image name becomes known on start the asok commands will be + // re-registered using "remote_pool_name/remote_image_name" name. + + std::string pool_name; + int r = m_local->pool_reverse_lookup(m_local_pool_id, &pool_name); + if (r < 0) { + derr << "error resolving local pool " << m_local_pool_id + << ": " << cpp_strerror(r) << dendl; + pool_name = stringify(m_local_pool_id); + } + + m_name = pool_name + "/" + m_global_image_id; + register_admin_socket_hook(); +} + +template <typename I> +ImageReplayer<I>::~ImageReplayer() +{ + unregister_admin_socket_hook(); + ceph_assert(m_event_preprocessor == nullptr); + ceph_assert(m_replay_status_formatter == nullptr); + ceph_assert(m_local_image_ctx == nullptr); + ceph_assert(m_local_replay == nullptr); + ceph_assert(m_remote_journaler == nullptr); + ceph_assert(m_replay_handler == nullptr); + ceph_assert(m_on_start_finish == nullptr); + ceph_assert(m_on_stop_finish == nullptr); + ceph_assert(m_bootstrap_request == nullptr); + ceph_assert(m_in_flight_status_updates == 0); + + delete m_journal_listener; +} + +template <typename I> +image_replayer::HealthState ImageReplayer<I>::get_health_state() const { + Mutex::Locker locker(m_lock); + + if (!m_mirror_image_status_state) { + return image_replayer::HEALTH_STATE_OK; + } else if (*m_mirror_image_status_state == + cls::rbd::MIRROR_IMAGE_STATUS_STATE_SYNCING || + *m_mirror_image_status_state == + cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN) { + return image_replayer::HEALTH_STATE_WARNING; + } + return image_replayer::HEALTH_STATE_ERROR; +} + +template <typename I> +void ImageReplayer<I>::add_peer(const std::string &peer_uuid, + librados::IoCtx &io_ctx) { + Mutex::Locker locker(m_lock); + auto it = m_peers.find({peer_uuid}); + if (it == m_peers.end()) { + m_peers.insert({peer_uuid, io_ctx}); + } +} + +template <typename I> +void ImageReplayer<I>::set_state_description(int r, const std::string &desc) { + dout(10) << r << " " << desc << dendl; + + Mutex::Locker l(m_lock); + m_last_r = r; + m_state_desc = desc; +} + +template <typename I> +void ImageReplayer<I>::start(Context *on_finish, bool manual) +{ + dout(10) << "on_finish=" << on_finish << dendl; + + int r = 0; + { + Mutex::Locker locker(m_lock); + if (!is_stopped_()) { + derr << "already running" << dendl; + r = -EINVAL; + } else if (m_manual_stop && !manual) { + dout(5) << "stopped manually, ignoring start without manual flag" + << dendl; + r = -EPERM; + } else { + m_state = STATE_STARTING; + m_last_r = 0; + m_state_desc.clear(); + m_manual_stop = false; + m_delete_requested = false; + + if (on_finish != nullptr) { + ceph_assert(m_on_start_finish == nullptr); + m_on_start_finish = on_finish; + } + ceph_assert(m_on_stop_finish == nullptr); + } + } + + if (r < 0) { + if (on_finish) { + on_finish->complete(r); + } + return; + } + + m_local_ioctx.reset(new librados::IoCtx{}); + r = m_local->ioctx_create2(m_local_pool_id, *m_local_ioctx); + if (r < 0) { + m_local_ioctx.reset(); + + derr << "error opening ioctx for local pool " << m_local_pool_id + << ": " << cpp_strerror(r) << dendl; + on_start_fail(r, "error opening local pool"); + return; + } + + prepare_local_image(); +} + +template <typename I> +void ImageReplayer<I>::prepare_local_image() { + dout(10) << dendl; + + m_local_image_id = ""; + Context *ctx = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_prepare_local_image>(this); + auto req = PrepareLocalImageRequest<I>::create( + *m_local_ioctx, m_global_image_id, &m_local_image_id, &m_local_image_name, + &m_local_image_tag_owner, m_threads->work_queue, ctx); + req->send(); +} + +template <typename I> +void ImageReplayer<I>::handle_prepare_local_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(10) << "local image does not exist" << dendl; + } else if (r < 0) { + on_start_fail(r, "error preparing local image for replay"); + return; + } else { + reregister_admin_socket_hook(); + } + + // local image doesn't exist or is non-primary + prepare_remote_image(); +} + +template <typename I> +void ImageReplayer<I>::prepare_remote_image() { + dout(10) << dendl; + if (m_peers.empty()) { + // technically nothing to bootstrap, but it handles the status update + bootstrap(); + return; + } + + // TODO need to support multiple remote images + ceph_assert(!m_peers.empty()); + m_remote_image = {*m_peers.begin()}; + + auto cct = static_cast<CephContext *>(m_local->cct()); + journal::Settings journal_settings; + journal_settings.commit_interval = cct->_conf.get_val<double>( + "rbd_mirror_journal_commit_age"); + journal_settings.max_fetch_bytes = cct->_conf.get_val<Option::size_t>( + "rbd_mirror_journal_max_fetch_bytes"); + + Context *ctx = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_prepare_remote_image>(this); + auto req = PrepareRemoteImageRequest<I>::create( + m_threads, m_remote_image.io_ctx, m_global_image_id, m_local_mirror_uuid, + m_local_image_id, journal_settings, &m_remote_image.mirror_uuid, + &m_remote_image.image_id, &m_remote_journaler, &m_client_state, + &m_client_meta, ctx); + req->send(); +} + +template <typename I> +void ImageReplayer<I>::handle_prepare_remote_image(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r < 0 ? m_remote_journaler == nullptr : m_remote_journaler != nullptr); + if (r < 0 && !m_local_image_id.empty() && + m_local_image_tag_owner == librbd::Journal<>::LOCAL_MIRROR_UUID) { + // local image is primary -- fall-through + } else if (r == -ENOENT) { + dout(10) << "remote image does not exist" << dendl; + + // TODO need to support multiple remote images + if (m_remote_image.image_id.empty() && !m_local_image_id.empty() && + m_local_image_tag_owner == m_remote_image.mirror_uuid) { + // local image exists and is non-primary and linked to the missing + // remote image + + m_delete_requested = true; + on_start_fail(0, "remote image no longer exists"); + } else { + on_start_fail(-ENOENT, "remote image does not exist"); + } + return; + } else if (r < 0) { + on_start_fail(r, "error retrieving remote image id"); + return; + } + + bootstrap(); +} + +template <typename I> +void ImageReplayer<I>::bootstrap() { + dout(10) << dendl; + + if (!m_local_image_id.empty() && + m_local_image_tag_owner == librbd::Journal<>::LOCAL_MIRROR_UUID) { + dout(5) << "local image is primary" << dendl; + on_start_fail(0, "local image is primary"); + return; + } else if (m_peers.empty()) { + dout(5) << "no peer clusters" << dendl; + on_start_fail(-ENOENT, "no peer clusters"); + return; + } + + BootstrapRequest<I> *request = nullptr; + { + Mutex::Locker locker(m_lock); + if (on_start_interrupted(m_lock)) { + return; + } + + auto ctx = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_bootstrap>(this); + request = BootstrapRequest<I>::create( + m_threads, *m_local_ioctx, m_remote_image.io_ctx, m_instance_watcher, + &m_local_image_ctx, m_local_image_id, m_remote_image.image_id, + m_global_image_id, m_local_mirror_uuid, m_remote_image.mirror_uuid, + m_remote_journaler, &m_client_state, &m_client_meta, ctx, + &m_resync_requested, &m_progress_cxt); + request->get(); + m_bootstrap_request = request; + } + + update_mirror_image_status(false, boost::none); + reschedule_update_status_task(10); + + request->send(); +} + +template <typename I> +void ImageReplayer<I>::handle_bootstrap(int r) { + dout(10) << "r=" << r << dendl; + { + Mutex::Locker locker(m_lock); + m_bootstrap_request->put(); + m_bootstrap_request = nullptr; + if (m_local_image_ctx) { + m_local_image_id = m_local_image_ctx->id; + } + } + + if (on_start_interrupted()) { + return; + } else if (r == -EREMOTEIO) { + m_local_image_tag_owner = ""; + dout(5) << "remote image is non-primary" << dendl; + on_start_fail(-EREMOTEIO, "remote image is non-primary"); + return; + } else if (r == -EEXIST) { + m_local_image_tag_owner = ""; + on_start_fail(r, "split-brain detected"); + return; + } else if (r < 0) { + on_start_fail(r, "error bootstrapping replay"); + return; + } else if (m_resync_requested) { + on_start_fail(0, "resync requested"); + return; + } + + ceph_assert(m_local_journal == nullptr); + { + RWLock::RLocker snap_locker(m_local_image_ctx->snap_lock); + if (m_local_image_ctx->journal != nullptr) { + m_local_journal = m_local_image_ctx->journal; + m_local_journal->add_listener(m_journal_listener); + } + } + + if (m_local_journal == nullptr) { + on_start_fail(-EINVAL, "error accessing local journal"); + return; + } + + update_mirror_image_status(false, boost::none); + init_remote_journaler(); +} + +template <typename I> +void ImageReplayer<I>::init_remote_journaler() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_init_remote_journaler>(this); + m_remote_journaler->init(ctx); +} + +template <typename I> +void ImageReplayer<I>::handle_init_remote_journaler(int r) { + dout(10) << "r=" << r << dendl; + + if (on_start_interrupted()) { + return; + } else if (r < 0) { + derr << "failed to initialize remote journal: " << cpp_strerror(r) << dendl; + on_start_fail(r, "error initializing remote journal"); + return; + } + + m_remote_journaler->add_listener(&m_remote_listener); + + cls::journal::Client client; + r = m_remote_journaler->get_cached_client(m_local_mirror_uuid, &client); + if (r < 0) { + derr << "error retrieving remote journal client: " << cpp_strerror(r) + << dendl; + on_start_fail(r, "error retrieving remote journal client"); + return; + } + + dout(5) << "image_id=" << m_local_image_id << ", " + << "client_meta.image_id=" << m_client_meta.image_id << ", " + << "client.state=" << client.state << dendl; + if (m_client_meta.image_id == m_local_image_id && + client.state != cls::journal::CLIENT_STATE_CONNECTED) { + dout(5) << "client flagged disconnected, stopping image replay" << dendl; + if (m_local_image_ctx->config.template get_val<bool>("rbd_mirroring_resync_after_disconnect")) { + m_resync_requested = true; + on_start_fail(-ENOTCONN, "disconnected: automatic resync"); + } else { + on_start_fail(-ENOTCONN, "disconnected"); + } + return; + } + + start_replay(); +} + +template <typename I> +void ImageReplayer<I>::start_replay() { + dout(10) << dendl; + + Context *start_ctx = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_start_replay>(this); + m_local_journal->start_external_replay(&m_local_replay, start_ctx); +} + +template <typename I> +void ImageReplayer<I>::handle_start_replay(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + ceph_assert(m_local_replay == nullptr); + derr << "error starting external replay on local image " + << m_local_image_id << ": " << cpp_strerror(r) << dendl; + on_start_fail(r, "error starting replay on local image"); + return; + } + + m_replay_status_formatter = + ReplayStatusFormatter<I>::create(m_remote_journaler, m_local_mirror_uuid); + + Context *on_finish(nullptr); + { + Mutex::Locker locker(m_lock); + ceph_assert(m_state == STATE_STARTING); + m_state = STATE_REPLAYING; + std::swap(m_on_start_finish, on_finish); + } + + m_event_preprocessor = EventPreprocessor<I>::create( + *m_local_image_ctx, *m_remote_journaler, m_local_mirror_uuid, + &m_client_meta, m_threads->work_queue); + + update_mirror_image_status(true, boost::none); + reschedule_update_status_task(30); + + if (on_replay_interrupted()) { + return; + } + + { + CephContext *cct = static_cast<CephContext *>(m_local->cct()); + double poll_seconds = cct->_conf.get_val<double>( + "rbd_mirror_journal_poll_age"); + + Mutex::Locker locker(m_lock); + m_replay_handler = new ReplayHandler<I>(this); + m_remote_journaler->start_live_replay(m_replay_handler, poll_seconds); + + dout(10) << "m_remote_journaler=" << *m_remote_journaler << dendl; + } + + dout(10) << "start succeeded" << dendl; + if (on_finish != nullptr) { + dout(10) << "on finish complete, r=" << r << dendl; + on_finish->complete(r); + } +} + +template <typename I> +void ImageReplayer<I>::on_start_fail(int r, const std::string &desc) +{ + dout(10) << "r=" << r << dendl; + Context *ctx = new FunctionContext([this, r, desc](int _r) { + { + Mutex::Locker locker(m_lock); + ceph_assert(m_state == STATE_STARTING); + m_state = STATE_STOPPING; + if (r < 0 && r != -ECANCELED && r != -EREMOTEIO && r != -ENOENT) { + derr << "start failed: " << cpp_strerror(r) << dendl; + } else { + dout(10) << "start canceled" << dendl; + } + } + + set_state_description(r, desc); + if (m_local_ioctx) { + update_mirror_image_status(false, boost::none); + } + reschedule_update_status_task(-1); + shut_down(r); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +bool ImageReplayer<I>::on_start_interrupted() { + Mutex::Locker locker(m_lock); + return on_start_interrupted(m_lock); +} + +template <typename I> +bool ImageReplayer<I>::on_start_interrupted(Mutex& lock) { + ceph_assert(m_lock.is_locked()); + ceph_assert(m_state == STATE_STARTING); + if (!m_stop_requested) { + return false; + } + + on_start_fail(-ECANCELED, ""); + return true; +} + +template <typename I> +void ImageReplayer<I>::stop(Context *on_finish, bool manual, int r, + const std::string& desc) +{ + dout(10) << "on_finish=" << on_finish << ", manual=" << manual + << ", desc=" << desc << dendl; + + image_replayer::BootstrapRequest<I> *bootstrap_request = nullptr; + bool shut_down_replay = false; + bool running = true; + { + Mutex::Locker locker(m_lock); + + if (!is_running_()) { + running = false; + } else { + if (!is_stopped_()) { + if (m_state == STATE_STARTING) { + dout(10) << "canceling start" << dendl; + if (m_bootstrap_request != nullptr) { + bootstrap_request = m_bootstrap_request; + bootstrap_request->get(); + } + } else { + dout(10) << "interrupting replay" << dendl; + shut_down_replay = true; + } + + ceph_assert(m_on_stop_finish == nullptr); + std::swap(m_on_stop_finish, on_finish); + m_stop_requested = true; + m_manual_stop = manual; + } + } + } + + // avoid holding lock since bootstrap request will update status + if (bootstrap_request != nullptr) { + dout(10) << "canceling bootstrap" << dendl; + bootstrap_request->cancel(); + bootstrap_request->put(); + } + + if (!running) { + dout(20) << "not running" << dendl; + if (on_finish) { + on_finish->complete(-EINVAL); + } + return; + } + + if (shut_down_replay) { + on_stop_journal_replay(r, desc); + } else if (on_finish != nullptr) { + on_finish->complete(0); + } +} + +template <typename I> +void ImageReplayer<I>::on_stop_journal_replay(int r, const std::string &desc) +{ + dout(10) << dendl; + + { + Mutex::Locker locker(m_lock); + if (m_state != STATE_REPLAYING) { + // might be invoked multiple times while stopping + return; + } + m_stop_requested = true; + m_state = STATE_STOPPING; + } + + set_state_description(r, desc); + update_mirror_image_status(true, boost::none); + reschedule_update_status_task(-1); + shut_down(0); +} + +template <typename I> +void ImageReplayer<I>::handle_replay_ready() +{ + dout(20) << dendl; + if (on_replay_interrupted()) { + return; + } + + if (!m_remote_journaler->try_pop_front(&m_replay_entry, &m_replay_tag_tid)) { + return; + } + + m_event_replay_tracker.start_op(); + + m_lock.Lock(); + bool stopping = (m_state == STATE_STOPPING); + m_lock.Unlock(); + + if (stopping) { + dout(10) << "stopping event replay" << dendl; + m_event_replay_tracker.finish_op(); + return; + } + + if (m_replay_tag_valid && m_replay_tag.tid == m_replay_tag_tid) { + preprocess_entry(); + return; + } + + replay_flush(); +} + +template <typename I> +void ImageReplayer<I>::restart(Context *on_finish) +{ + FunctionContext *ctx = new FunctionContext( + [this, on_finish](int r) { + if (r < 0) { + // Try start anyway. + } + start(on_finish, true); + }); + stop(ctx); +} + +template <typename I> +void ImageReplayer<I>::flush() +{ + dout(10) << dendl; + C_SaferCond ctx; + flush_local_replay(&ctx); + ctx.wait(); + + update_mirror_image_status(false, boost::none); +} + +template <typename I> +void ImageReplayer<I>::flush_local_replay(Context* on_flush) +{ + m_lock.Lock(); + if (m_state != STATE_REPLAYING) { + m_lock.Unlock(); + on_flush->complete(0); + return; + } + + dout(15) << dendl; + auto ctx = new FunctionContext( + [this, on_flush](int r) { + handle_flush_local_replay(on_flush, r); + }); + m_local_replay->flush(ctx); + m_lock.Unlock(); +} + +template <typename I> +void ImageReplayer<I>::handle_flush_local_replay(Context* on_flush, int r) +{ + dout(15) << "r=" << r << dendl; + if (r < 0) { + derr << "error flushing local replay: " << cpp_strerror(r) << dendl; + on_flush->complete(r); + return; + } + + flush_commit_position(on_flush); +} + +template <typename I> +void ImageReplayer<I>::flush_commit_position(Context* on_flush) +{ + m_lock.Lock(); + if (m_state != STATE_REPLAYING) { + m_lock.Unlock(); + on_flush->complete(0); + return; + } + + dout(15) << dendl; + auto ctx = new FunctionContext( + [this, on_flush](int r) { + handle_flush_commit_position(on_flush, r); + }); + m_remote_journaler->flush_commit_position(ctx); + m_lock.Unlock(); +} + +template <typename I> +void ImageReplayer<I>::handle_flush_commit_position(Context* on_flush, int r) +{ + dout(15) << "r=" << r << dendl; + if (r < 0) { + derr << "error flushing remote journal commit position: " + << cpp_strerror(r) << dendl; + } + + on_flush->complete(r); +} + +template <typename I> +bool ImageReplayer<I>::on_replay_interrupted() +{ + bool shut_down; + { + Mutex::Locker locker(m_lock); + shut_down = m_stop_requested; + } + + if (shut_down) { + on_stop_journal_replay(); + } + return shut_down; +} + +template <typename I> +void ImageReplayer<I>::print_status(Formatter *f, stringstream *ss) +{ + dout(10) << dendl; + + Mutex::Locker l(m_lock); + + if (f) { + f->open_object_section("image_replayer"); + f->dump_string("name", m_name); + f->dump_string("state", to_string(m_state)); + f->close_section(); + f->flush(*ss); + } else { + *ss << m_name << ": state: " << to_string(m_state); + } +} + +template <typename I> +void ImageReplayer<I>::handle_replay_complete(int r, const std::string &error_desc) +{ + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "replay encountered an error: " << cpp_strerror(r) << dendl; + } + + { + Mutex::Locker locker(m_lock); + m_stop_requested = true; + } + on_stop_journal_replay(r, error_desc); +} + +template <typename I> +void ImageReplayer<I>::replay_flush() { + dout(10) << dendl; + + bool interrupted = false; + { + Mutex::Locker locker(m_lock); + if (m_state != STATE_REPLAYING) { + dout(10) << "replay interrupted" << dendl; + interrupted = true; + } else { + m_state = STATE_REPLAY_FLUSHING; + } + } + + if (interrupted) { + m_event_replay_tracker.finish_op(); + return; + } + + // shut down the replay to flush all IO and ops and create a new + // replayer to handle the new tag epoch + Context *ctx = create_context_callback< + ImageReplayer<I>, &ImageReplayer<I>::handle_replay_flush>(this); + ctx = new FunctionContext([this, ctx](int r) { + m_local_image_ctx->journal->stop_external_replay(); + m_local_replay = nullptr; + + if (r < 0) { + ctx->complete(r); + return; + } + + m_local_journal->start_external_replay(&m_local_replay, ctx); + }); + m_local_replay->shut_down(false, ctx); +} + +template <typename I> +void ImageReplayer<I>::handle_replay_flush(int r) { + dout(10) << "r=" << r << dendl; + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_state == STATE_REPLAY_FLUSHING); + m_state = STATE_REPLAYING; + } + + if (r < 0) { + derr << "replay flush encountered an error: " << cpp_strerror(r) << dendl; + m_event_replay_tracker.finish_op(); + handle_replay_complete(r, "replay flush encountered an error"); + return; + } else if (on_replay_interrupted()) { + m_event_replay_tracker.finish_op(); + return; + } + + get_remote_tag(); +} + +template <typename I> +void ImageReplayer<I>::get_remote_tag() { + dout(15) << "tag_tid: " << m_replay_tag_tid << dendl; + + Context *ctx = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_get_remote_tag>(this); + m_remote_journaler->get_tag(m_replay_tag_tid, &m_replay_tag, ctx); +} + +template <typename I> +void ImageReplayer<I>::handle_get_remote_tag(int r) { + dout(15) << "r=" << r << dendl; + + if (r == 0) { + try { + auto it = m_replay_tag.data.cbegin(); + decode(m_replay_tag_data, it); + } catch (const buffer::error &err) { + r = -EBADMSG; + } + } + + if (r < 0) { + derr << "failed to retrieve remote tag " << m_replay_tag_tid << ": " + << cpp_strerror(r) << dendl; + m_event_replay_tracker.finish_op(); + handle_replay_complete(r, "failed to retrieve remote tag"); + return; + } + + m_replay_tag_valid = true; + dout(15) << "decoded remote tag " << m_replay_tag_tid << ": " + << m_replay_tag_data << dendl; + + allocate_local_tag(); +} + +template <typename I> +void ImageReplayer<I>::allocate_local_tag() { + dout(15) << dendl; + + std::string mirror_uuid = m_replay_tag_data.mirror_uuid; + if (mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) { + mirror_uuid = m_remote_image.mirror_uuid; + } else if (mirror_uuid == m_local_mirror_uuid) { + mirror_uuid = librbd::Journal<>::LOCAL_MIRROR_UUID; + } else if (mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID) { + // handle possible edge condition where daemon can failover and + // the local image has already been promoted/demoted + auto local_tag_data = m_local_journal->get_tag_data(); + if (local_tag_data.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID && + (local_tag_data.predecessor.commit_valid && + local_tag_data.predecessor.mirror_uuid == + librbd::Journal<>::LOCAL_MIRROR_UUID)) { + dout(15) << "skipping stale demotion event" << dendl; + handle_process_entry_safe(m_replay_entry, m_replay_start_time, 0); + handle_replay_ready(); + return; + } else { + dout(5) << "encountered image demotion: stopping" << dendl; + Mutex::Locker locker(m_lock); + m_stop_requested = true; + } + } + + librbd::journal::TagPredecessor predecessor(m_replay_tag_data.predecessor); + if (predecessor.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) { + predecessor.mirror_uuid = m_remote_image.mirror_uuid; + } else if (predecessor.mirror_uuid == m_local_mirror_uuid) { + predecessor.mirror_uuid = librbd::Journal<>::LOCAL_MIRROR_UUID; + } + + dout(15) << "mirror_uuid=" << mirror_uuid << ", " + << "predecessor=" << predecessor << ", " + << "replay_tag_tid=" << m_replay_tag_tid << dendl; + Context *ctx = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_allocate_local_tag>(this); + m_local_journal->allocate_tag(mirror_uuid, predecessor, ctx); +} + +template <typename I> +void ImageReplayer<I>::handle_allocate_local_tag(int r) { + dout(15) << "r=" << r << ", " + << "tag_tid=" << m_local_journal->get_tag_tid() << dendl; + + if (r < 0) { + derr << "failed to allocate journal tag: " << cpp_strerror(r) << dendl; + m_event_replay_tracker.finish_op(); + handle_replay_complete(r, "failed to allocate journal tag"); + return; + } + + preprocess_entry(); +} + +template <typename I> +void ImageReplayer<I>::preprocess_entry() { + dout(20) << "preprocessing entry tid=" << m_replay_entry.get_commit_tid() + << dendl; + + bufferlist data = m_replay_entry.get_data(); + auto it = data.cbegin(); + int r = m_local_replay->decode(&it, &m_event_entry); + if (r < 0) { + derr << "failed to decode journal event" << dendl; + m_event_replay_tracker.finish_op(); + handle_replay_complete(r, "failed to decode journal event"); + return; + } + + uint32_t delay = calculate_replay_delay( + m_event_entry.timestamp, m_local_image_ctx->mirroring_replay_delay); + if (delay == 0) { + handle_preprocess_entry_ready(0); + return; + } + + dout(20) << "delaying replay by " << delay << " sec" << dendl; + + Mutex::Locker timer_locker(m_threads->timer_lock); + ceph_assert(m_delayed_preprocess_task == nullptr); + m_delayed_preprocess_task = new FunctionContext( + [this](int r) { + ceph_assert(m_threads->timer_lock.is_locked()); + m_delayed_preprocess_task = nullptr; + m_threads->work_queue->queue( + create_context_callback<ImageReplayer, + &ImageReplayer<I>::handle_preprocess_entry_ready>(this), 0); + }); + m_threads->timer->add_event_after(delay, m_delayed_preprocess_task); +} + +template <typename I> +void ImageReplayer<I>::handle_preprocess_entry_ready(int r) { + dout(20) << "r=" << r << dendl; + ceph_assert(r == 0); + + m_replay_start_time = ceph_clock_now(); + if (!m_event_preprocessor->is_required(m_event_entry)) { + process_entry(); + return; + } + + Context *ctx = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_preprocess_entry_safe>(this); + m_event_preprocessor->preprocess(&m_event_entry, ctx); +} + +template <typename I> +void ImageReplayer<I>::handle_preprocess_entry_safe(int r) { + dout(20) << "r=" << r << dendl; + + if (r < 0) { + m_event_replay_tracker.finish_op(); + + if (r == -ECANCELED) { + handle_replay_complete(0, "lost exclusive lock"); + } else { + derr << "failed to preprocess journal event" << dendl; + handle_replay_complete(r, "failed to preprocess journal event"); + } + return; + } + + process_entry(); +} + +template <typename I> +void ImageReplayer<I>::process_entry() { + dout(20) << "processing entry tid=" << m_replay_entry.get_commit_tid() + << dendl; + + // stop replaying events if stop has been requested + if (on_replay_interrupted()) { + m_event_replay_tracker.finish_op(); + return; + } + + Context *on_ready = create_context_callback< + ImageReplayer, &ImageReplayer<I>::handle_process_entry_ready>(this); + Context *on_commit = new C_ReplayCommitted(this, std::move(m_replay_entry), + m_replay_start_time); + + m_local_replay->process(m_event_entry, on_ready, on_commit); +} + +template <typename I> +void ImageReplayer<I>::handle_process_entry_ready(int r) { + dout(20) << dendl; + ceph_assert(r == 0); + + bool update_status = false; + { + RWLock::RLocker snap_locker(m_local_image_ctx->snap_lock); + if (m_local_image_name != m_local_image_ctx->name) { + m_local_image_name = m_local_image_ctx->name; + update_status = true; + } + } + + if (update_status) { + reschedule_update_status_task(0); + } + + // attempt to process the next event + handle_replay_ready(); +} + +template <typename I> +void ImageReplayer<I>::handle_process_entry_safe(const ReplayEntry &replay_entry, + const utime_t &replay_start_time, + int r) { + dout(20) << "commit_tid=" << replay_entry.get_commit_tid() << ", r=" << r + << dendl; + + if (r < 0) { + derr << "failed to commit journal event: " << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to commit journal event"); + } else { + ceph_assert(m_remote_journaler != nullptr); + m_remote_journaler->committed(replay_entry); + } + + auto bytes = replay_entry.get_data().length(); + auto latency = ceph_clock_now() - replay_start_time; + + if (g_perf_counters) { + g_perf_counters->inc(l_rbd_mirror_replay); + g_perf_counters->inc(l_rbd_mirror_replay_bytes, bytes); + g_perf_counters->tinc(l_rbd_mirror_replay_latency, latency); + } + + auto ctx = new FunctionContext( + [this, bytes, latency](int r) { + Mutex::Locker locker(m_lock); + if (m_perf_counters) { + m_perf_counters->inc(l_rbd_mirror_replay); + m_perf_counters->inc(l_rbd_mirror_replay_bytes, bytes); + m_perf_counters->tinc(l_rbd_mirror_replay_latency, latency); + } + m_event_replay_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +bool ImageReplayer<I>::update_mirror_image_status(bool force, + const OptionalState &state) { + dout(15) << dendl; + { + Mutex::Locker locker(m_lock); + if (!start_mirror_image_status_update(force, false)) { + return false; + } + } + + queue_mirror_image_status_update(state); + return true; +} + +template <typename I> +bool ImageReplayer<I>::start_mirror_image_status_update(bool force, + bool restarting) { + ceph_assert(m_lock.is_locked()); + + if (!force && !is_stopped_()) { + if (!is_running_()) { + dout(15) << "shut down in-progress: ignoring update" << dendl; + return false; + } else if (m_in_flight_status_updates > (restarting ? 1 : 0)) { + dout(15) << "already sending update" << dendl; + m_update_status_requested = true; + return false; + } + } + + ++m_in_flight_status_updates; + dout(15) << "in-flight updates=" << m_in_flight_status_updates << dendl; + return true; +} + +template <typename I> +void ImageReplayer<I>::finish_mirror_image_status_update() { + reregister_admin_socket_hook(); + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + ceph_assert(m_in_flight_status_updates > 0); + if (--m_in_flight_status_updates > 0) { + dout(15) << "waiting on " << m_in_flight_status_updates << " in-flight " + << "updates" << dendl; + return; + } + + std::swap(on_finish, m_on_update_status_finish); + } + + dout(15) << dendl; + if (on_finish != nullptr) { + on_finish->complete(0); + } +} + +template <typename I> +void ImageReplayer<I>::queue_mirror_image_status_update(const OptionalState &state) { + dout(15) << dendl; + + auto ctx = new FunctionContext( + [this, state](int r) { + send_mirror_status_update(state); + }); + + // ensure pending IO is flushed and the commit position is updated + // prior to updating the mirror status + ctx = new FunctionContext( + [this, ctx](int r) { + flush_local_replay(ctx); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void ImageReplayer<I>::send_mirror_status_update(const OptionalState &opt_state) { + State state; + std::string state_desc; + int last_r; + bool stopping_replay; + + OptionalMirrorImageStatusState mirror_image_status_state = + boost::make_optional(false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN); + image_replayer::BootstrapRequest<I>* bootstrap_request = nullptr; + { + Mutex::Locker locker(m_lock); + state = m_state; + state_desc = m_state_desc; + mirror_image_status_state = m_mirror_image_status_state; + last_r = m_last_r; + stopping_replay = (m_local_image_ctx != nullptr); + + if (m_bootstrap_request != nullptr) { + bootstrap_request = m_bootstrap_request; + bootstrap_request->get(); + } + } + + bool syncing = false; + if (bootstrap_request != nullptr) { + syncing = bootstrap_request->is_syncing(); + bootstrap_request->put(); + bootstrap_request = nullptr; + } + + if (opt_state) { + state = *opt_state; + } + + cls::rbd::MirrorImageStatus status; + status.up = true; + switch (state) { + case STATE_STARTING: + if (syncing) { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_SYNCING; + status.description = state_desc.empty() ? "syncing" : state_desc; + mirror_image_status_state = status.state; + } else { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STARTING_REPLAY; + status.description = "starting replay"; + } + break; + case STATE_REPLAYING: + case STATE_REPLAY_FLUSHING: + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_REPLAYING; + { + Context *on_req_finish = new FunctionContext( + [this](int r) { + dout(15) << "replay status ready: r=" << r << dendl; + if (r >= 0) { + send_mirror_status_update(boost::none); + } else if (r == -EAGAIN) { + // decrement in-flight status update counter + handle_mirror_status_update(r); + } + }); + + std::string desc; + ceph_assert(m_replay_status_formatter != nullptr); + if (!m_replay_status_formatter->get_or_send_update(&desc, + on_req_finish)) { + dout(15) << "waiting for replay status" << dendl; + return; + } + status.description = "replaying, " + desc; + mirror_image_status_state = boost::make_optional( + false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN); + } + break; + case STATE_STOPPING: + if (stopping_replay) { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STOPPING_REPLAY; + status.description = state_desc.empty() ? "stopping replay" : state_desc; + break; + } + // FALLTHROUGH + case STATE_STOPPED: + if (last_r == -EREMOTEIO) { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN; + status.description = state_desc; + mirror_image_status_state = status.state; + } else if (last_r < 0) { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_ERROR; + status.description = state_desc; + mirror_image_status_state = status.state; + } else { + status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STOPPED; + status.description = state_desc.empty() ? "stopped" : state_desc; + mirror_image_status_state = boost::none; + } + break; + default: + ceph_assert(!"invalid state"); + } + + { + Mutex::Locker locker(m_lock); + m_mirror_image_status_state = mirror_image_status_state; + } + + // prevent the status from ping-ponging when failed replays are restarted + if (mirror_image_status_state && + *mirror_image_status_state == cls::rbd::MIRROR_IMAGE_STATUS_STATE_ERROR) { + status.state = *mirror_image_status_state; + } + + dout(15) << "status=" << status << dendl; + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_status_set(&op, m_global_image_id, status); + + ceph_assert(m_local_ioctx); + librados::AioCompletion *aio_comp = create_rados_callback< + ImageReplayer<I>, &ImageReplayer<I>::handle_mirror_status_update>(this); + int r = m_local_ioctx->aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void ImageReplayer<I>::handle_mirror_status_update(int r) { + dout(15) << "r=" << r << dendl; + + bool running = false; + bool started = false; + { + Mutex::Locker locker(m_lock); + bool update_status_requested = false; + std::swap(update_status_requested, m_update_status_requested); + + running = is_running_(); + if (running && update_status_requested) { + started = start_mirror_image_status_update(false, true); + } + } + + // if a deferred update is available, send it -- otherwise reschedule + // the timer task + if (started) { + queue_mirror_image_status_update(boost::none); + } else if (running) { + reschedule_update_status_task(0); + } + + // mark committed status update as no longer in-flight + finish_mirror_image_status_update(); +} + +template <typename I> +void ImageReplayer<I>::reschedule_update_status_task(int new_interval) { + bool canceled_task = false; + { + Mutex::Locker locker(m_lock); + Mutex::Locker timer_locker(m_threads->timer_lock); + + if (m_update_status_task) { + dout(15) << "canceling existing status update task" << dendl; + + canceled_task = m_threads->timer->cancel_event(m_update_status_task); + m_update_status_task = nullptr; + } + + if (new_interval > 0) { + m_update_status_interval = new_interval; + } + + if (new_interval >= 0 && is_running_() && + start_mirror_image_status_update(true, false)) { + m_update_status_task = new FunctionContext( + [this](int r) { + ceph_assert(m_threads->timer_lock.is_locked()); + m_update_status_task = nullptr; + + queue_mirror_image_status_update(boost::none); + }); + dout(15) << "scheduling status update task after " + << m_update_status_interval << " seconds" << dendl; + m_threads->timer->add_event_after(m_update_status_interval, + m_update_status_task); + } + } + + if (canceled_task) { + // decrement in-flight status update counter for canceled task + finish_mirror_image_status_update(); + } +} + +template <typename I> +void ImageReplayer<I>::shut_down(int r) { + dout(10) << "r=" << r << dendl; + + bool canceled_delayed_preprocess_task = false; + { + Mutex::Locker timer_locker(m_threads->timer_lock); + if (m_delayed_preprocess_task != nullptr) { + canceled_delayed_preprocess_task = m_threads->timer->cancel_event( + m_delayed_preprocess_task); + ceph_assert(canceled_delayed_preprocess_task); + m_delayed_preprocess_task = nullptr; + } + } + if (canceled_delayed_preprocess_task) { + // wake up sleeping replay + m_event_replay_tracker.finish_op(); + } + + reschedule_update_status_task(-1); + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_state == STATE_STOPPING); + + // if status updates are in-flight, wait for them to complete + // before proceeding + if (m_in_flight_status_updates > 0) { + if (m_on_update_status_finish == nullptr) { + dout(15) << "waiting for in-flight status update" << dendl; + m_on_update_status_finish = new FunctionContext( + [this, r](int _r) { + shut_down(r); + }); + } + return; + } + } + + // NOTE: it's important to ensure that the local image is fully + // closed before attempting to close the remote journal in + // case the remote cluster is unreachable + + // chain the shut down sequence (reverse order) + Context *ctx = new FunctionContext( + [this, r](int _r) { + if (m_local_ioctx) { + update_mirror_image_status(true, STATE_STOPPED); + } + handle_shut_down(r); + }); + + // close the remote journal + if (m_remote_journaler != nullptr) { + ctx = new FunctionContext([this, ctx](int r) { + delete m_remote_journaler; + m_remote_journaler = nullptr; + ctx->complete(0); + }); + ctx = new FunctionContext([this, ctx](int r) { + m_remote_journaler->remove_listener(&m_remote_listener); + m_remote_journaler->shut_down(ctx); + }); + } + + // stop the replay of remote journal events + if (m_replay_handler != nullptr) { + ctx = new FunctionContext([this, ctx](int r) { + delete m_replay_handler; + m_replay_handler = nullptr; + + m_event_replay_tracker.wait_for_ops(ctx); + }); + ctx = new FunctionContext([this, ctx](int r) { + m_remote_journaler->stop_replay(ctx); + }); + } + + // close the local image (release exclusive lock) + if (m_local_image_ctx) { + ctx = new FunctionContext([this, ctx](int r) { + CloseImageRequest<I> *request = CloseImageRequest<I>::create( + &m_local_image_ctx, ctx); + request->send(); + }); + } + + // shut down event replay into the local image + if (m_local_journal != nullptr) { + ctx = new FunctionContext([this, ctx](int r) { + m_local_journal = nullptr; + ctx->complete(0); + }); + if (m_local_replay != nullptr) { + ctx = new FunctionContext([this, ctx](int r) { + m_local_journal->stop_external_replay(); + m_local_replay = nullptr; + + EventPreprocessor<I>::destroy(m_event_preprocessor); + m_event_preprocessor = nullptr; + ctx->complete(0); + }); + } + ctx = new FunctionContext([this, ctx](int r) { + // blocks if listener notification is in-progress + m_local_journal->remove_listener(m_journal_listener); + ctx->complete(0); + }); + } + + // wait for all local in-flight replay events to complete + ctx = new FunctionContext([this, ctx](int r) { + if (r < 0) { + derr << "error shutting down journal replay: " << cpp_strerror(r) + << dendl; + } + + m_event_replay_tracker.wait_for_ops(ctx); + }); + + // flush any local in-flight replay events + if (m_local_replay != nullptr) { + ctx = new FunctionContext([this, ctx](int r) { + m_local_replay->shut_down(true, ctx); + }); + } + + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void ImageReplayer<I>::handle_shut_down(int r) { + reschedule_update_status_task(-1); + + bool resync_requested = false; + bool delete_requested = false; + bool unregister_asok_hook = false; + { + Mutex::Locker locker(m_lock); + + // if status updates are in-flight, wait for them to complete + // before proceeding + if (m_in_flight_status_updates > 0) { + if (m_on_update_status_finish == nullptr) { + dout(15) << "waiting for in-flight status update" << dendl; + m_on_update_status_finish = new FunctionContext( + [this, r](int _r) { + handle_shut_down(r); + }); + } + return; + } + + if (m_delete_requested && !m_local_image_id.empty()) { + ceph_assert(m_remote_image.image_id.empty()); + dout(0) << "remote image no longer exists: scheduling deletion" << dendl; + unregister_asok_hook = true; + std::swap(delete_requested, m_delete_requested); + } + + std::swap(resync_requested, m_resync_requested); + if (delete_requested || resync_requested) { + m_local_image_id = ""; + } else if (m_last_r == -ENOENT && + m_local_image_id.empty() && m_remote_image.image_id.empty()) { + dout(0) << "mirror image no longer exists" << dendl; + unregister_asok_hook = true; + m_finished = true; + } + } + + if (unregister_asok_hook) { + unregister_admin_socket_hook(); + } + + if (delete_requested || resync_requested) { + dout(5) << "moving image to trash" << dendl; + auto ctx = new FunctionContext([this, r](int) { + handle_shut_down(r); + }); + ImageDeleter<I>::trash_move(*m_local_ioctx, m_global_image_id, + resync_requested, m_threads->work_queue, ctx); + return; + } + + dout(10) << "stop complete" << dendl; + ReplayStatusFormatter<I>::destroy(m_replay_status_formatter); + m_replay_status_formatter = nullptr; + + Context *on_start = nullptr; + Context *on_stop = nullptr; + { + Mutex::Locker locker(m_lock); + std::swap(on_start, m_on_start_finish); + std::swap(on_stop, m_on_stop_finish); + m_stop_requested = false; + ceph_assert(m_delayed_preprocess_task == nullptr); + ceph_assert(m_state == STATE_STOPPING); + m_state = STATE_STOPPED; + } + + if (on_start != nullptr) { + dout(10) << "on start finish complete, r=" << r << dendl; + on_start->complete(r); + r = 0; + } + if (on_stop != nullptr) { + dout(10) << "on stop finish complete, r=" << r << dendl; + on_stop->complete(r); + } +} + +template <typename I> +void ImageReplayer<I>::handle_remote_journal_metadata_updated() { + dout(20) << dendl; + + cls::journal::Client client; + { + Mutex::Locker locker(m_lock); + if (!is_running_()) { + return; + } + + int r = m_remote_journaler->get_cached_client(m_local_mirror_uuid, &client); + if (r < 0) { + derr << "failed to retrieve client: " << cpp_strerror(r) << dendl; + return; + } + } + + if (client.state != cls::journal::CLIENT_STATE_CONNECTED) { + dout(0) << "client flagged disconnected, stopping image replay" << dendl; + stop(nullptr, false, -ENOTCONN, "disconnected"); + } +} + +template <typename I> +std::string ImageReplayer<I>::to_string(const State state) { + switch (state) { + case ImageReplayer<I>::STATE_STARTING: + return "Starting"; + case ImageReplayer<I>::STATE_REPLAYING: + return "Replaying"; + case ImageReplayer<I>::STATE_REPLAY_FLUSHING: + return "ReplayFlushing"; + case ImageReplayer<I>::STATE_STOPPING: + return "Stopping"; + case ImageReplayer<I>::STATE_STOPPED: + return "Stopped"; + default: + break; + } + return "Unknown(" + stringify(state) + ")"; +} + +template <typename I> +void ImageReplayer<I>::resync_image(Context *on_finish) { + dout(10) << dendl; + + m_resync_requested = true; + stop(on_finish); +} + +template <typename I> +void ImageReplayer<I>::register_admin_socket_hook() { + ImageReplayerAdminSocketHook<I> *asok_hook; + { + Mutex::Locker locker(m_lock); + if (m_asok_hook != nullptr) { + return; + } + + ceph_assert(m_perf_counters == nullptr); + + dout(15) << "registered asok hook: " << m_name << dendl; + asok_hook = new ImageReplayerAdminSocketHook<I>(g_ceph_context, m_name, + this); + int r = asok_hook->register_commands(); + if (r == 0) { + m_asok_hook = asok_hook; + + CephContext *cct = static_cast<CephContext *>(m_local->cct()); + auto prio = cct->_conf.get_val<int64_t>("rbd_mirror_perf_stats_prio"); + PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_" + m_name, + l_rbd_mirror_first, l_rbd_mirror_last); + plb.add_u64_counter(l_rbd_mirror_replay, "replay", "Replays", "r", prio); + plb.add_u64_counter(l_rbd_mirror_replay_bytes, "replay_bytes", + "Replayed data", "rb", prio, unit_t(UNIT_BYTES)); + plb.add_time_avg(l_rbd_mirror_replay_latency, "replay_latency", + "Replay latency", "rl", prio); + m_perf_counters = plb.create_perf_counters(); + g_ceph_context->get_perfcounters_collection()->add(m_perf_counters); + + return; + } + derr << "error registering admin socket commands" << dendl; + } + delete asok_hook; +} + +template <typename I> +void ImageReplayer<I>::unregister_admin_socket_hook() { + dout(15) << dendl; + + AdminSocketHook *asok_hook = nullptr; + PerfCounters *perf_counters = nullptr; + { + Mutex::Locker locker(m_lock); + std::swap(asok_hook, m_asok_hook); + std::swap(perf_counters, m_perf_counters); + } + delete asok_hook; + if (perf_counters != nullptr) { + g_ceph_context->get_perfcounters_collection()->remove(perf_counters); + delete perf_counters; + } +} + +template <typename I> +void ImageReplayer<I>::reregister_admin_socket_hook() { + { + Mutex::Locker locker(m_lock); + auto name = m_local_ioctx->get_pool_name() + "/" + m_local_image_name; + if (m_asok_hook != nullptr && m_name == name) { + return; + } + m_name = name; + } + unregister_admin_socket_hook(); + register_admin_socket_hook(); +} + +template <typename I> +std::ostream &operator<<(std::ostream &os, const ImageReplayer<I> &replayer) +{ + os << "ImageReplayer: " << &replayer << " [" << replayer.get_local_pool_id() + << "/" << replayer.get_global_image_id() << "]"; + return os; +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ImageReplayer<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ImageReplayer.h b/src/tools/rbd_mirror/ImageReplayer.h new file mode 100644 index 00000000..9af3e961 --- /dev/null +++ b/src/tools/rbd_mirror/ImageReplayer.h @@ -0,0 +1,438 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_H +#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_H + +#include "common/AsyncOpTracker.h" +#include "common/Mutex.h" +#include "common/WorkQueue.h" +#include "include/rados/librados.hpp" +#include "cls/journal/cls_journal_types.h" +#include "cls/rbd/cls_rbd_types.h" +#include "journal/JournalMetadataListener.h" +#include "journal/ReplayEntry.h" +#include "librbd/ImageCtx.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include "ProgressContext.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/image_replayer/Types.h" + +#include <boost/noncopyable.hpp> +#include <boost/optional.hpp> + +#include <set> +#include <map> +#include <atomic> +#include <string> +#include <vector> + +class AdminSocketHook; +class PerfCounters; + +namespace journal { + +class Journaler; +class ReplayHandler; + +} + +namespace librbd { + +class ImageCtx; +namespace journal { template <typename> class Replay; } + +} + +namespace rbd { +namespace mirror { + +template <typename> struct InstanceWatcher; +template <typename> struct Threads; + +namespace image_replayer { template <typename> class BootstrapRequest; } +namespace image_replayer { template <typename> class EventPreprocessor; } +namespace image_replayer { template <typename> class ReplayStatusFormatter; } + +/** + * Replays changes from a remote cluster for a single image. + */ +template <typename ImageCtxT = librbd::ImageCtx> +class ImageReplayer { +public: + static ImageReplayer *create( + Threads<ImageCtxT> *threads, InstanceWatcher<ImageCtxT> *instance_watcher, + RadosRef local, const std::string &local_mirror_uuid, int64_t local_pool_id, + const std::string &global_image_id) { + return new ImageReplayer(threads, instance_watcher, local, + local_mirror_uuid, local_pool_id, global_image_id); + } + void destroy() { + delete this; + } + + ImageReplayer(Threads<ImageCtxT> *threads, + InstanceWatcher<ImageCtxT> *instance_watcher, + RadosRef local, const std::string &local_mirror_uuid, + int64_t local_pool_id, const std::string &global_image_id); + virtual ~ImageReplayer(); + ImageReplayer(const ImageReplayer&) = delete; + ImageReplayer& operator=(const ImageReplayer&) = delete; + + bool is_stopped() { Mutex::Locker l(m_lock); return is_stopped_(); } + bool is_running() { Mutex::Locker l(m_lock); return is_running_(); } + bool is_replaying() { Mutex::Locker l(m_lock); return is_replaying_(); } + + std::string get_name() { Mutex::Locker l(m_lock); return m_name; }; + void set_state_description(int r, const std::string &desc); + + // TODO temporary until policy handles release of image replayers + inline bool is_finished() const { + Mutex::Locker locker(m_lock); + return m_finished; + } + inline void set_finished(bool finished) { + Mutex::Locker locker(m_lock); + m_finished = finished; + } + + inline bool is_blacklisted() const { + Mutex::Locker locker(m_lock); + return (m_last_r == -EBLACKLISTED); + } + + image_replayer::HealthState get_health_state() const; + + void add_peer(const std::string &peer_uuid, librados::IoCtx &remote_io_ctx); + + inline int64_t get_local_pool_id() const { + return m_local_pool_id; + } + inline const std::string& get_global_image_id() const { + return m_global_image_id; + } + + void start(Context *on_finish = nullptr, bool manual = false); + void stop(Context *on_finish = nullptr, bool manual = false, + int r = 0, const std::string& desc = ""); + void restart(Context *on_finish = nullptr); + void flush(); + + void resync_image(Context *on_finish=nullptr); + + void print_status(Formatter *f, stringstream *ss); + + virtual void handle_replay_ready(); + virtual void handle_replay_complete(int r, const std::string &error_desc); + +protected: + /** + * @verbatim + * (error) + * <uninitialized> <------------------------------------ FAIL + * | ^ + * v * + * <starting> * + * | * + * v (error) * + * PREPARE_LOCAL_IMAGE * * * * * * * * * * * * * * * * * * + * | * + * v (error) * + * PREPARE_REMOTE_IMAGE * * * * * * * * * * * * * * * * * * + * | * + * v (error) * + * BOOTSTRAP_IMAGE * * * * * * * * * * * * * * * * * * * * + * | * + * v (error) * + * INIT_REMOTE_JOURNALER * * * * * * * * * * * * * * * * * + * | * + * v (error) * + * START_REPLAY * * * * * * * * * * * * * * * * * * * * * * + * | + * | /--------------------------------------------\ + * | | | + * v v (asok flush) | + * REPLAYING -------------> LOCAL_REPLAY_FLUSH | + * | \ | | + * | | v | + * | | FLUSH_COMMIT_POSITION | + * | | | | + * | | \--------------------/| + * | | | + * | | (entries available) | + * | \-----------> REPLAY_READY | + * | | | + * | | (skip if not | + * | v needed) (error) + * | REPLAY_FLUSH * * * * * * * * * + * | | | * + * | | (skip if not | * + * | v needed) (error) * + * | GET_REMOTE_TAG * * * * * * * * + * | | | * + * | | (skip if not | * + * | v needed) (error) * + * | ALLOCATE_LOCAL_TAG * * * * * * + * | | | * + * | v (error) * + * | PREPROCESS_ENTRY * * * * * * * + * | | | * + * | v (error) * + * | PROCESS_ENTRY * * * * * * * * * + * | | | * + * | \---------------------/ * + * v * + * REPLAY_COMPLETE < * * * * * * * * * * * * * * * * * * * + * | + * v + * JOURNAL_REPLAY_SHUT_DOWN + * | + * v + * LOCAL_IMAGE_CLOSE + * | + * v + * <stopped> + * + * @endverbatim + */ + + virtual void on_start_fail(int r, const std::string &desc); + virtual bool on_start_interrupted(); + virtual bool on_start_interrupted(Mutex& lock); + + virtual void on_stop_journal_replay(int r = 0, const std::string &desc = ""); + + bool on_replay_interrupted(); + +private: + typedef typename librbd::journal::TypeTraits<ImageCtxT>::ReplayEntry ReplayEntry; + + enum State { + STATE_UNKNOWN, + STATE_STARTING, + STATE_REPLAYING, + STATE_REPLAY_FLUSHING, + STATE_STOPPING, + STATE_STOPPED, + }; + + struct RemoteImage { + std::string mirror_uuid; + std::string image_id; + librados::IoCtx io_ctx; + + RemoteImage() { + } + RemoteImage(const Peer& peer) : io_ctx(peer.io_ctx) { + } + }; + + typedef typename librbd::journal::TypeTraits<ImageCtxT>::Journaler Journaler; + typedef boost::optional<State> OptionalState; + typedef boost::optional<cls::rbd::MirrorImageStatusState> + OptionalMirrorImageStatusState; + + struct JournalListener : public librbd::journal::Listener { + ImageReplayer *img_replayer; + + JournalListener(ImageReplayer *img_replayer) + : img_replayer(img_replayer) { + } + + void handle_close() override { + img_replayer->on_stop_journal_replay(); + } + + void handle_promoted() override { + img_replayer->on_stop_journal_replay(0, "force promoted"); + } + + void handle_resync() override { + img_replayer->resync_image(); + } + }; + + class BootstrapProgressContext : public ProgressContext { + public: + BootstrapProgressContext(ImageReplayer<ImageCtxT> *replayer) : + replayer(replayer) { + } + + void update_progress(const std::string &description, + bool flush = true) override; + private: + ImageReplayer<ImageCtxT> *replayer; + }; + + Threads<ImageCtxT> *m_threads; + InstanceWatcher<ImageCtxT> *m_instance_watcher; + + Peers m_peers; + RemoteImage m_remote_image; + + RadosRef m_local; + std::string m_local_mirror_uuid; + int64_t m_local_pool_id; + std::string m_local_image_id; + std::string m_global_image_id; + std::string m_local_image_name; + std::string m_name; + + mutable Mutex m_lock; + State m_state = STATE_STOPPED; + std::string m_state_desc; + + OptionalMirrorImageStatusState m_mirror_image_status_state = + boost::make_optional(false, cls::rbd::MIRROR_IMAGE_STATUS_STATE_UNKNOWN); + int m_last_r = 0; + + BootstrapProgressContext m_progress_cxt; + + bool m_finished = false; + bool m_delete_requested = false; + bool m_resync_requested = false; + + image_replayer::EventPreprocessor<ImageCtxT> *m_event_preprocessor = nullptr; + image_replayer::ReplayStatusFormatter<ImageCtxT> *m_replay_status_formatter = + nullptr; + IoCtxRef m_local_ioctx; + ImageCtxT *m_local_image_ctx = nullptr; + std::string m_local_image_tag_owner; + + decltype(ImageCtxT::journal) m_local_journal = nullptr; + librbd::journal::Replay<ImageCtxT> *m_local_replay = nullptr; + Journaler* m_remote_journaler = nullptr; + ::journal::ReplayHandler *m_replay_handler = nullptr; + librbd::journal::Listener *m_journal_listener; + + Context *m_on_start_finish = nullptr; + Context *m_on_stop_finish = nullptr; + Context *m_update_status_task = nullptr; + int m_update_status_interval = 0; + librados::AioCompletion *m_update_status_comp = nullptr; + bool m_stop_requested = false; + bool m_manual_stop = false; + + AdminSocketHook *m_asok_hook = nullptr; + PerfCounters *m_perf_counters = nullptr; + + image_replayer::BootstrapRequest<ImageCtxT> *m_bootstrap_request = nullptr; + + uint32_t m_in_flight_status_updates = 0; + bool m_update_status_requested = false; + Context *m_on_update_status_finish = nullptr; + + cls::journal::ClientState m_client_state = + cls::journal::CLIENT_STATE_DISCONNECTED; + librbd::journal::MirrorPeerClientMeta m_client_meta; + + ReplayEntry m_replay_entry; + utime_t m_replay_start_time; + bool m_replay_tag_valid = false; + uint64_t m_replay_tag_tid = 0; + cls::journal::Tag m_replay_tag; + librbd::journal::TagData m_replay_tag_data; + librbd::journal::EventEntry m_event_entry; + AsyncOpTracker m_event_replay_tracker; + Context *m_delayed_preprocess_task = nullptr; + + struct RemoteJournalerListener : public ::journal::JournalMetadataListener { + ImageReplayer *replayer; + + RemoteJournalerListener(ImageReplayer *replayer) : replayer(replayer) { } + + void handle_update(::journal::JournalMetadata *) override; + } m_remote_listener; + + struct C_ReplayCommitted : public Context { + ImageReplayer *replayer; + ReplayEntry replay_entry; + utime_t replay_start_time; + + C_ReplayCommitted(ImageReplayer *replayer, + ReplayEntry &&replay_entry, + const utime_t &replay_start_time) + : replayer(replayer), replay_entry(std::move(replay_entry)), + replay_start_time(replay_start_time) { + } + void finish(int r) override { + replayer->handle_process_entry_safe(replay_entry, replay_start_time, r); + } + }; + + static std::string to_string(const State state); + + bool is_stopped_() const { + return m_state == STATE_STOPPED; + } + bool is_running_() const { + return !is_stopped_() && m_state != STATE_STOPPING && !m_stop_requested; + } + bool is_replaying_() const { + return (m_state == STATE_REPLAYING || + m_state == STATE_REPLAY_FLUSHING); + } + + void flush_local_replay(Context* on_flush); + void handle_flush_local_replay(Context* on_flush, int r); + + void flush_commit_position(Context* on_flush); + void handle_flush_commit_position(Context* on_flush, int r); + + bool update_mirror_image_status(bool force, const OptionalState &state); + bool start_mirror_image_status_update(bool force, bool restarting); + void finish_mirror_image_status_update(); + void queue_mirror_image_status_update(const OptionalState &state); + void send_mirror_status_update(const OptionalState &state); + void handle_mirror_status_update(int r); + void reschedule_update_status_task(int new_interval); + + void shut_down(int r); + void handle_shut_down(int r); + void handle_remote_journal_metadata_updated(); + + void prepare_local_image(); + void handle_prepare_local_image(int r); + + void prepare_remote_image(); + void handle_prepare_remote_image(int r); + + void bootstrap(); + void handle_bootstrap(int r); + + void init_remote_journaler(); + void handle_init_remote_journaler(int r); + + void start_replay(); + void handle_start_replay(int r); + + void replay_flush(); + void handle_replay_flush(int r); + + void get_remote_tag(); + void handle_get_remote_tag(int r); + + void allocate_local_tag(); + void handle_allocate_local_tag(int r); + + void preprocess_entry(); + void handle_preprocess_entry_ready(int r); + void handle_preprocess_entry_safe(int r); + + void process_entry(); + void handle_process_entry_ready(int r); + void handle_process_entry_safe(const ReplayEntry& replay_entry, + const utime_t &m_replay_start_time, int r); + + void register_admin_socket_hook(); + void unregister_admin_socket_hook(); + void reregister_admin_socket_hook(); +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::ImageReplayer<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_H diff --git a/src/tools/rbd_mirror/ImageSync.cc b/src/tools/rbd_mirror/ImageSync.cc new file mode 100644 index 00000000..929d75c2 --- /dev/null +++ b/src/tools/rbd_mirror/ImageSync.cc @@ -0,0 +1,481 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ImageSync.h" +#include "InstanceWatcher.h" +#include "ProgressContext.h" +#include "common/debug.h" +#include "common/Timer.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/DeepCopyRequest.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Utils.h" +#include "librbd/internal.h" +#include "librbd/journal/Types.h" +#include "tools/rbd_mirror/image_sync/SyncPointCreateRequest.h" +#include "tools/rbd_mirror/image_sync/SyncPointPruneRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ImageSync: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { + +using namespace image_sync; +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::unique_lock_name; + +template <typename I> +class ImageSync<I>::ImageCopyProgressContext : public librbd::ProgressContext { +public: + ImageCopyProgressContext(ImageSync *image_sync) : image_sync(image_sync) { + } + + int update_progress(uint64_t object_no, uint64_t object_count) override { + image_sync->handle_copy_image_update_progress(object_no, object_count); + return 0; + } + + ImageSync *image_sync; +}; + +template <typename I> +ImageSync<I>::ImageSync(I *local_image_ctx, I *remote_image_ctx, + SafeTimer *timer, Mutex *timer_lock, + const std::string &mirror_uuid, Journaler *journaler, + MirrorPeerClientMeta *client_meta, + ContextWQ *work_queue, + InstanceWatcher<I> *instance_watcher, + Context *on_finish, ProgressContext *progress_ctx) + : BaseRequest("rbd::mirror::ImageSync", local_image_ctx->cct, on_finish), + m_local_image_ctx(local_image_ctx), m_remote_image_ctx(remote_image_ctx), + m_timer(timer), m_timer_lock(timer_lock), m_mirror_uuid(mirror_uuid), + m_journaler(journaler), m_client_meta(client_meta), + m_work_queue(work_queue), m_instance_watcher(instance_watcher), + m_progress_ctx(progress_ctx), + m_lock(unique_lock_name("ImageSync::m_lock", this)), + m_update_sync_point_interval(m_local_image_ctx->cct->_conf.template get_val<double>( + "rbd_mirror_sync_point_update_age")), m_client_meta_copy(*client_meta) { +} + +template <typename I> +ImageSync<I>::~ImageSync() { + ceph_assert(m_image_copy_request == nullptr); + ceph_assert(m_image_copy_prog_ctx == nullptr); + ceph_assert(m_update_sync_ctx == nullptr); +} + +template <typename I> +void ImageSync<I>::send() { + send_notify_sync_request(); +} + +template <typename I> +void ImageSync<I>::cancel() { + Mutex::Locker locker(m_lock); + + dout(10) << dendl; + + m_canceled = true; + + if (m_instance_watcher->cancel_sync_request(m_local_image_ctx->id)) { + return; + } + + if (m_image_copy_request != nullptr) { + m_image_copy_request->cancel(); + } +} + +template <typename I> +void ImageSync<I>::send_notify_sync_request() { + update_progress("NOTIFY_SYNC_REQUEST"); + + dout(10) << dendl; + + m_lock.Lock(); + if (m_canceled) { + m_lock.Unlock(); + BaseRequest::finish(-ECANCELED); + return; + } + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_notify_sync_request>(this)); + m_instance_watcher->notify_sync_request(m_local_image_ctx->id, ctx); + m_lock.Unlock(); +} + +template <typename I> +void ImageSync<I>::handle_notify_sync_request(int r) { + dout(10) << ": r=" << r << dendl; + + m_lock.Lock(); + if (r == 0 && m_canceled) { + r = -ECANCELED; + } + m_lock.Unlock(); + + if (r < 0) { + BaseRequest::finish(r); + return; + } + + send_prune_catch_up_sync_point(); +} + +template <typename I> +void ImageSync<I>::send_prune_catch_up_sync_point() { + update_progress("PRUNE_CATCH_UP_SYNC_POINT"); + + if (m_client_meta->sync_points.empty()) { + send_create_sync_point(); + return; + } + + dout(10) << dendl; + + // prune will remove sync points with missing snapshots and + // ensure we have a maximum of one sync point (in case we + // restarted) + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_prune_catch_up_sync_point>(this); + SyncPointPruneRequest<I> *request = SyncPointPruneRequest<I>::create( + m_remote_image_ctx, false, m_journaler, m_client_meta, ctx); + request->send(); +} + +template <typename I> +void ImageSync<I>::handle_prune_catch_up_sync_point(int r) { + dout(10) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to prune catch-up sync point: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_create_sync_point(); +} + +template <typename I> +void ImageSync<I>::send_create_sync_point() { + update_progress("CREATE_SYNC_POINT"); + + // TODO: when support for disconnecting laggy clients is added, + // re-connect and create catch-up sync point + if (m_client_meta->sync_points.size() > 0) { + send_copy_image(); + return; + } + + dout(10) << dendl; + + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_create_sync_point>(this); + SyncPointCreateRequest<I> *request = SyncPointCreateRequest<I>::create( + m_remote_image_ctx, m_mirror_uuid, m_journaler, m_client_meta, ctx); + request->send(); +} + +template <typename I> +void ImageSync<I>::handle_create_sync_point(int r) { + dout(10) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to create sync point: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + send_copy_image(); +} + +template <typename I> +void ImageSync<I>::send_copy_image() { + librados::snap_t snap_id_start = 0; + librados::snap_t snap_id_end; + librbd::deep_copy::ObjectNumber object_number; + int r = 0; + { + RWLock::RLocker snap_locker(m_remote_image_ctx->snap_lock); + ceph_assert(!m_client_meta->sync_points.empty()); + auto &sync_point = m_client_meta->sync_points.front(); + snap_id_end = m_remote_image_ctx->get_snap_id( + cls::rbd::UserSnapshotNamespace(), sync_point.snap_name); + if (snap_id_end == CEPH_NOSNAP) { + derr << ": failed to locate snapshot: " << sync_point.snap_name << dendl; + r = -ENOENT; + } else if (!sync_point.from_snap_name.empty()) { + snap_id_start = m_remote_image_ctx->get_snap_id( + cls::rbd::UserSnapshotNamespace(), sync_point.from_snap_name); + if (snap_id_start == CEPH_NOSNAP) { + derr << ": failed to locate from snapshot: " + << sync_point.from_snap_name << dendl; + r = -ENOENT; + } + } + object_number = sync_point.object_number; + } + if (r < 0) { + finish(r); + return; + } + + m_lock.Lock(); + if (m_canceled) { + m_lock.Unlock(); + finish(-ECANCELED); + return; + } + + dout(10) << dendl; + + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_copy_image>(this); + m_image_copy_prog_ctx = new ImageCopyProgressContext(this); + m_image_copy_request = librbd::DeepCopyRequest<I>::create( + m_remote_image_ctx, m_local_image_ctx, snap_id_start, snap_id_end, + 0, false, object_number, m_work_queue, &m_client_meta->snap_seqs, + m_image_copy_prog_ctx, ctx); + m_image_copy_request->get(); + m_lock.Unlock(); + + update_progress("COPY_IMAGE"); + + m_image_copy_request->send(); +} + +template <typename I> +void ImageSync<I>::handle_copy_image(int r) { + dout(10) << ": r=" << r << dendl; + + { + Mutex::Locker timer_locker(*m_timer_lock); + Mutex::Locker locker(m_lock); + m_image_copy_request->put(); + m_image_copy_request = nullptr; + delete m_image_copy_prog_ctx; + m_image_copy_prog_ctx = nullptr; + if (r == 0 && m_canceled) { + r = -ECANCELED; + } + + if (m_update_sync_ctx != nullptr) { + m_timer->cancel_event(m_update_sync_ctx); + m_update_sync_ctx = nullptr; + } + + if (m_updating_sync_point) { + m_ret_val = r; + return; + } + } + + if (r == -ECANCELED) { + dout(10) << ": image copy canceled" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << ": failed to copy image: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_flush_sync_point(); +} + +template <typename I> +void ImageSync<I>::handle_copy_image_update_progress(uint64_t object_no, + uint64_t object_count) { + int percent = 100 * object_no / object_count; + update_progress("COPY_IMAGE " + stringify(percent) + "%"); + + Mutex::Locker locker(m_lock); + m_image_copy_object_no = object_no; + m_image_copy_object_count = object_count; + + if (m_update_sync_ctx == nullptr && !m_updating_sync_point) { + send_update_sync_point(); + } +} + +template <typename I> +void ImageSync<I>::send_update_sync_point() { + ceph_assert(m_lock.is_locked()); + + m_update_sync_ctx = nullptr; + + if (m_canceled) { + return; + } + + auto sync_point = &m_client_meta->sync_points.front(); + + if (m_client_meta->sync_object_count == m_image_copy_object_count && + sync_point->object_number && + (m_image_copy_object_no - 1) == sync_point->object_number.get()) { + // update sync point did not progress since last sync + return; + } + + m_updating_sync_point = true; + + m_client_meta_copy = *m_client_meta; + m_client_meta->sync_object_count = m_image_copy_object_count; + if (m_image_copy_object_no > 0) { + sync_point->object_number = m_image_copy_object_no - 1; + } + + CephContext *cct = m_local_image_ctx->cct; + ldout(cct, 20) << ": sync_point=" << *sync_point << dendl; + + bufferlist client_data_bl; + librbd::journal::ClientData client_data(*m_client_meta); + encode(client_data, client_data_bl); + + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_update_sync_point>( + this); + m_journaler->update_client(client_data_bl, ctx); +} + +template <typename I> +void ImageSync<I>::handle_update_sync_point(int r) { + CephContext *cct = m_local_image_ctx->cct; + ldout(cct, 20) << ": r=" << r << dendl; + + if (r < 0) { + *m_client_meta = m_client_meta_copy; + lderr(cct) << ": failed to update client data: " << cpp_strerror(r) + << dendl; + } + + { + Mutex::Locker timer_locker(*m_timer_lock); + Mutex::Locker locker(m_lock); + m_updating_sync_point = false; + + if (m_image_copy_request != nullptr) { + m_update_sync_ctx = new FunctionContext( + [this](int r) { + Mutex::Locker locker(m_lock); + this->send_update_sync_point(); + }); + m_timer->add_event_after(m_update_sync_point_interval, + m_update_sync_ctx); + return; + } + } + + send_flush_sync_point(); +} + +template <typename I> +void ImageSync<I>::send_flush_sync_point() { + if (m_ret_val < 0) { + finish(m_ret_val); + return; + } + + update_progress("FLUSH_SYNC_POINT"); + + m_client_meta_copy = *m_client_meta; + m_client_meta->sync_object_count = m_image_copy_object_count; + auto sync_point = &m_client_meta->sync_points.front(); + if (m_image_copy_object_no > 0) { + sync_point->object_number = m_image_copy_object_no - 1; + } else { + sync_point->object_number = boost::none; + } + + dout(10) << ": sync_point=" << *sync_point << dendl; + + bufferlist client_data_bl; + librbd::journal::ClientData client_data(*m_client_meta); + encode(client_data, client_data_bl); + + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_flush_sync_point>( + this); + m_journaler->update_client(client_data_bl, ctx); +} + +template <typename I> +void ImageSync<I>::handle_flush_sync_point(int r) { + dout(10) << ": r=" << r << dendl; + + if (r < 0) { + *m_client_meta = m_client_meta_copy; + + derr << ": failed to update client data: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + send_prune_sync_points(); +} + +template <typename I> +void ImageSync<I>::send_prune_sync_points() { + dout(10) << dendl; + + update_progress("PRUNE_SYNC_POINTS"); + + Context *ctx = create_context_callback< + ImageSync<I>, &ImageSync<I>::handle_prune_sync_points>(this); + SyncPointPruneRequest<I> *request = SyncPointPruneRequest<I>::create( + m_remote_image_ctx, true, m_journaler, m_client_meta, ctx); + request->send(); +} + +template <typename I> +void ImageSync<I>::handle_prune_sync_points(int r) { + dout(10) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to prune sync point: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (!m_client_meta->sync_points.empty()) { + send_copy_image(); + return; + } + + finish(0); +} + +template <typename I> +void ImageSync<I>::update_progress(const std::string &description) { + dout(20) << ": " << description << dendl; + + if (m_progress_ctx) { + m_progress_ctx->update_progress("IMAGE_SYNC/" + description); + } +} + +template <typename I> +void ImageSync<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_instance_watcher->notify_sync_complete(m_local_image_ctx->id); + BaseRequest::finish(r); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ImageSync<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ImageSync.h b/src/tools/rbd_mirror/ImageSync.h new file mode 100644 index 00000000..9e00c129 --- /dev/null +++ b/src/tools/rbd_mirror/ImageSync.h @@ -0,0 +1,160 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_SYNC_H +#define RBD_MIRROR_IMAGE_SYNC_H + +#include "include/int_types.h" +#include "librbd/ImageCtx.h" +#include "librbd/journal/TypeTraits.h" +#include "librbd/journal/Types.h" +#include "common/Mutex.h" +#include "tools/rbd_mirror/BaseRequest.h" +#include <map> +#include <vector> + +class Context; +class ContextWQ; +namespace journal { class Journaler; } +namespace librbd { class ProgressContext; } +namespace librbd { template <typename> class DeepCopyRequest; } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +namespace rbd { +namespace mirror { + +class ProgressContext; + +template <typename> class InstanceWatcher; + +template <typename ImageCtxT = librbd::ImageCtx> +class ImageSync : public BaseRequest { +public: + typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits; + typedef typename TypeTraits::Journaler Journaler; + typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta; + + static ImageSync* create(ImageCtxT *local_image_ctx, + ImageCtxT *remote_image_ctx, + SafeTimer *timer, Mutex *timer_lock, + const std::string &mirror_uuid, + Journaler *journaler, + MirrorPeerClientMeta *client_meta, + ContextWQ *work_queue, + InstanceWatcher<ImageCtxT> *instance_watcher, + Context *on_finish, + ProgressContext *progress_ctx = nullptr) { + return new ImageSync(local_image_ctx, remote_image_ctx, timer, timer_lock, + mirror_uuid, journaler, client_meta, work_queue, + instance_watcher, on_finish, progress_ctx); + } + + ImageSync(ImageCtxT *local_image_ctx, ImageCtxT *remote_image_ctx, + SafeTimer *timer, Mutex *timer_lock, const std::string &mirror_uuid, + Journaler *journaler, MirrorPeerClientMeta *client_meta, + ContextWQ *work_queue, InstanceWatcher<ImageCtxT> *instance_watcher, + Context *on_finish, ProgressContext *progress_ctx = nullptr); + ~ImageSync() override; + + void send() override; + void cancel() override; + +protected: + void finish(int r) override; + +private: + /** + * @verbatim + * + * <start> + * | + * v + * NOTIFY_SYNC_REQUEST + * | + * v + * PRUNE_CATCH_UP_SYNC_POINT + * | + * v + * CREATE_SYNC_POINT (skip if already exists and + * | not disconnected) + * v + * COPY_IMAGE . . . . . . . . . . . . . . + * | . + * v . + * FLUSH_SYNC_POINT . + * | . (image sync canceled) + * v . + * PRUNE_SYNC_POINTS . + * | . + * v . + * <finish> < . . . . . . . . . . . . . . + * + * @endverbatim + */ + + typedef std::vector<librados::snap_t> SnapIds; + typedef std::map<librados::snap_t, SnapIds> SnapMap; + class ImageCopyProgressContext; + + ImageCtxT *m_local_image_ctx; + ImageCtxT *m_remote_image_ctx; + SafeTimer *m_timer; + Mutex *m_timer_lock; + std::string m_mirror_uuid; + Journaler *m_journaler; + MirrorPeerClientMeta *m_client_meta; + ContextWQ *m_work_queue; + InstanceWatcher<ImageCtxT> *m_instance_watcher; + ProgressContext *m_progress_ctx; + + SnapMap m_snap_map; + + Mutex m_lock; + bool m_canceled = false; + + librbd::DeepCopyRequest<ImageCtxT> *m_image_copy_request = nullptr; + librbd::ProgressContext *m_image_copy_prog_ctx = nullptr; + + bool m_updating_sync_point = false; + Context *m_update_sync_ctx = nullptr; + double m_update_sync_point_interval; + uint64_t m_image_copy_object_no = 0; + uint64_t m_image_copy_object_count = 0; + MirrorPeerClientMeta m_client_meta_copy; + + int m_ret_val = 0; + + void send_notify_sync_request(); + void handle_notify_sync_request(int r); + + void send_prune_catch_up_sync_point(); + void handle_prune_catch_up_sync_point(int r); + + void send_create_sync_point(); + void handle_create_sync_point(int r); + + void send_update_max_object_count(); + void handle_update_max_object_count(int r); + + void send_copy_image(); + void handle_copy_image(int r); + void handle_copy_image_update_progress(uint64_t object_no, + uint64_t object_count); + void send_update_sync_point(); + void handle_update_sync_point(int r); + + void send_flush_sync_point(); + void handle_flush_sync_point(int r); + + void send_prune_sync_points(); + void handle_prune_sync_points(int r); + + void update_progress(const std::string &description); +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::ImageSync<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_SYNC_H diff --git a/src/tools/rbd_mirror/ImageSyncThrottler.cc b/src/tools/rbd_mirror/ImageSyncThrottler.cc new file mode 100644 index 00000000..b395a012 --- /dev/null +++ b/src/tools/rbd_mirror/ImageSyncThrottler.cc @@ -0,0 +1,227 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "ImageSyncThrottler.h" +#include "common/Formatter.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ImageSyncThrottler:: " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +template <typename I> +ImageSyncThrottler<I>::ImageSyncThrottler(CephContext *cct) + : m_cct(cct), + m_lock(librbd::util::unique_lock_name("rbd::mirror::ImageSyncThrottler", + this)), + m_max_concurrent_syncs(cct->_conf.get_val<uint64_t>( + "rbd_mirror_concurrent_image_syncs")) { + dout(20) << "max_concurrent_syncs=" << m_max_concurrent_syncs << dendl; + m_cct->_conf.add_observer(this); +} + +template <typename I> +ImageSyncThrottler<I>::~ImageSyncThrottler() { + m_cct->_conf.remove_observer(this); + + Mutex::Locker locker(m_lock); + ceph_assert(m_inflight_ops.empty()); + ceph_assert(m_queue.empty()); +} + +template <typename I> +void ImageSyncThrottler<I>::start_op(const std::string &id, Context *on_start) { + dout(20) << "id=" << id << dendl; + + int r = 0; + { + Mutex::Locker locker(m_lock); + + if (m_inflight_ops.count(id) > 0) { + dout(20) << "duplicate for already started op " << id << dendl; + } else if (m_queued_ops.count(id) > 0) { + dout(20) << "duplicate for already queued op " << id << dendl; + std::swap(m_queued_ops[id], on_start); + r = -ENOENT; + } else if (m_max_concurrent_syncs == 0 || + m_inflight_ops.size() < m_max_concurrent_syncs) { + ceph_assert(m_queue.empty()); + m_inflight_ops.insert(id); + dout(20) << "ready to start sync for " << id << " [" + << m_inflight_ops.size() << "/" << m_max_concurrent_syncs << "]" + << dendl; + } else { + m_queue.push_back(id); + std::swap(m_queued_ops[id], on_start); + dout(20) << "image sync for " << id << " has been queued" << dendl; + } + } + + if (on_start != nullptr) { + on_start->complete(r); + } +} + +template <typename I> +bool ImageSyncThrottler<I>::cancel_op(const std::string &id) { + dout(20) << "id=" << id << dendl; + + Context *on_start = nullptr; + { + Mutex::Locker locker(m_lock); + auto it = m_queued_ops.find(id); + if (it != m_queued_ops.end()) { + dout(20) << "canceled queued sync for " << id << dendl; + m_queue.remove(id); + on_start = it->second; + m_queued_ops.erase(it); + } + } + + if (on_start == nullptr) { + return false; + } + + on_start->complete(-ECANCELED); + return true; +} + +template <typename I> +void ImageSyncThrottler<I>::finish_op(const std::string &id) { + dout(20) << "id=" << id << dendl; + + if (cancel_op(id)) { + return; + } + + Context *on_start = nullptr; + { + Mutex::Locker locker(m_lock); + + m_inflight_ops.erase(id); + + if (m_inflight_ops.size() < m_max_concurrent_syncs && !m_queue.empty()) { + auto id = m_queue.front(); + auto it = m_queued_ops.find(id); + ceph_assert(it != m_queued_ops.end()); + m_inflight_ops.insert(id); + dout(20) << "ready to start sync for " << id << " [" + << m_inflight_ops.size() << "/" << m_max_concurrent_syncs << "]" + << dendl; + on_start = it->second; + m_queued_ops.erase(it); + m_queue.pop_front(); + } + } + + if (on_start != nullptr) { + on_start->complete(0); + } +} + +template <typename I> +void ImageSyncThrottler<I>::drain(int r) { + dout(20) << dendl; + + std::map<std::string, Context *> queued_ops; + { + Mutex::Locker locker(m_lock); + std::swap(m_queued_ops, queued_ops); + m_queue.clear(); + m_inflight_ops.clear(); + } + + for (auto &it : queued_ops) { + it.second->complete(r); + } +} + +template <typename I> +void ImageSyncThrottler<I>::set_max_concurrent_syncs(uint32_t max) { + dout(20) << "max=" << max << dendl; + + std::list<Context *> ops; + { + Mutex::Locker locker(m_lock); + m_max_concurrent_syncs = max; + + // Start waiting ops in the case of available free slots + while ((m_max_concurrent_syncs == 0 || + m_inflight_ops.size() < m_max_concurrent_syncs) && + !m_queue.empty()) { + auto id = m_queue.front(); + m_inflight_ops.insert(id); + dout(20) << "ready to start sync for " << id << " [" + << m_inflight_ops.size() << "/" << m_max_concurrent_syncs << "]" + << dendl; + auto it = m_queued_ops.find(id); + ceph_assert(it != m_queued_ops.end()); + ops.push_back(it->second); + m_queued_ops.erase(it); + m_queue.pop_front(); + } + } + + for (const auto& ctx : ops) { + ctx->complete(0); + } +} + +template <typename I> +void ImageSyncThrottler<I>::print_status(Formatter *f, std::stringstream *ss) { + dout(20) << dendl; + + Mutex::Locker locker(m_lock); + + if (f) { + f->dump_int("max_parallel_syncs", m_max_concurrent_syncs); + f->dump_int("running_syncs", m_inflight_ops.size()); + f->dump_int("waiting_syncs", m_queue.size()); + f->flush(*ss); + } else { + *ss << "[ "; + *ss << "max_parallel_syncs=" << m_max_concurrent_syncs << ", "; + *ss << "running_syncs=" << m_inflight_ops.size() << ", "; + *ss << "waiting_syncs=" << m_queue.size() << " ]"; + } +} + +template <typename I> +const char** ImageSyncThrottler<I>::get_tracked_conf_keys() const { + static const char* KEYS[] = { + "rbd_mirror_concurrent_image_syncs", + NULL + }; + return KEYS; +} + +template <typename I> +void ImageSyncThrottler<I>::handle_conf_change(const ConfigProxy& conf, + const set<string> &changed) { + if (changed.count("rbd_mirror_concurrent_image_syncs")) { + set_max_concurrent_syncs(conf.get_val<uint64_t>("rbd_mirror_concurrent_image_syncs")); + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ImageSyncThrottler<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ImageSyncThrottler.h b/src/tools/rbd_mirror/ImageSyncThrottler.h new file mode 100644 index 00000000..c0cda61e --- /dev/null +++ b/src/tools/rbd_mirror/ImageSyncThrottler.h @@ -0,0 +1,65 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_SYNC_THROTTLER_H +#define RBD_MIRROR_IMAGE_SYNC_THROTTLER_H + +#include <list> +#include <map> +#include <set> +#include <sstream> +#include <string> +#include <utility> + +#include "common/Mutex.h" +#include "common/config_obs.h" + +class CephContext; +class Context; + +namespace ceph { class Formatter; } +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename ImageCtxT = librbd::ImageCtx> +class ImageSyncThrottler : public md_config_obs_t { +public: + static ImageSyncThrottler *create(CephContext *cct) { + return new ImageSyncThrottler(cct); + } + void destroy() { + delete this; + } + + ImageSyncThrottler(CephContext *cct); + ~ImageSyncThrottler() override; + + void set_max_concurrent_syncs(uint32_t max); + void start_op(const std::string &id, Context *on_start); + bool cancel_op(const std::string &id); + void finish_op(const std::string &id); + void drain(int r); + + void print_status(Formatter *f, std::stringstream *ss); + +private: + CephContext *m_cct; + Mutex m_lock; + uint32_t m_max_concurrent_syncs; + std::list<std::string> m_queue; + std::map<std::string, Context *> m_queued_ops; + std::set<std::string> m_inflight_ops; + + const char **get_tracked_conf_keys() const override; + void handle_conf_change(const ConfigProxy& conf, + const std::set<std::string> &changed) override; +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::ImageSyncThrottler<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_SYNC_THROTTLER_H diff --git a/src/tools/rbd_mirror/InstanceReplayer.cc b/src/tools/rbd_mirror/InstanceReplayer.cc new file mode 100644 index 00000000..c0086a48 --- /dev/null +++ b/src/tools/rbd_mirror/InstanceReplayer.cc @@ -0,0 +1,510 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/stringify.h" +#include "common/Timer.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/Utils.h" +#include "ImageReplayer.h" +#include "InstanceReplayer.h" +#include "ServiceDaemon.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::InstanceReplayer: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +namespace { + +const std::string SERVICE_DAEMON_ASSIGNED_COUNT_KEY("image_assigned_count"); +const std::string SERVICE_DAEMON_WARNING_COUNT_KEY("image_warning_count"); +const std::string SERVICE_DAEMON_ERROR_COUNT_KEY("image_error_count"); + +} // anonymous namespace + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; + +template <typename I> +InstanceReplayer<I>::InstanceReplayer( + Threads<I> *threads, ServiceDaemon<I>* service_daemon, + RadosRef local_rados, const std::string &local_mirror_uuid, + int64_t local_pool_id) + : m_threads(threads), m_service_daemon(service_daemon), + m_local_rados(local_rados), m_local_mirror_uuid(local_mirror_uuid), + m_local_pool_id(local_pool_id), + m_lock("rbd::mirror::InstanceReplayer " + stringify(local_pool_id)) { +} + +template <typename I> +InstanceReplayer<I>::~InstanceReplayer() { + ceph_assert(m_image_state_check_task == nullptr); + ceph_assert(m_async_op_tracker.empty()); + ceph_assert(m_image_replayers.empty()); +} + +template <typename I> +bool InstanceReplayer<I>::is_blacklisted() const { + std::lock_guard locker{m_lock}; + return m_blacklisted; +} + +template <typename I> +int InstanceReplayer<I>::init() { + C_SaferCond init_ctx; + init(&init_ctx); + return init_ctx.wait(); +} + +template <typename I> +void InstanceReplayer<I>::init(Context *on_finish) { + dout(10) << dendl; + + Context *ctx = new FunctionContext( + [this, on_finish] (int r) { + { + Mutex::Locker timer_locker(m_threads->timer_lock); + schedule_image_state_check_task(); + } + on_finish->complete(0); + }); + + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceReplayer<I>::shut_down() { + C_SaferCond shut_down_ctx; + shut_down(&shut_down_ctx); + int r = shut_down_ctx.wait(); + ceph_assert(r == 0); +} + +template <typename I> +void InstanceReplayer<I>::shut_down(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_on_shut_down == nullptr); + m_on_shut_down = on_finish; + + Context *ctx = new FunctionContext( + [this] (int r) { + cancel_image_state_check_task(); + wait_for_ops(); + }); + + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceReplayer<I>::add_peer(std::string peer_uuid, + librados::IoCtx io_ctx) { + dout(10) << peer_uuid << dendl; + + Mutex::Locker locker(m_lock); + auto result = m_peers.insert(Peer(peer_uuid, io_ctx)).second; + ceph_assert(result); +} + +template <typename I> +void InstanceReplayer<I>::release_all(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + C_Gather *gather_ctx = new C_Gather(g_ceph_context, on_finish); + for (auto it = m_image_replayers.begin(); it != m_image_replayers.end(); + it = m_image_replayers.erase(it)) { + auto image_replayer = it->second; + auto ctx = gather_ctx->new_sub(); + ctx = new FunctionContext( + [image_replayer, ctx] (int r) { + image_replayer->destroy(); + ctx->complete(0); + }); + stop_image_replayer(image_replayer, ctx); + } + gather_ctx->activate(); +} + +template <typename I> +void InstanceReplayer<I>::acquire_image(InstanceWatcher<I> *instance_watcher, + const std::string &global_image_id, + Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_on_shut_down == nullptr); + + auto it = m_image_replayers.find(global_image_id); + if (it == m_image_replayers.end()) { + auto image_replayer = ImageReplayer<I>::create( + m_threads, instance_watcher, m_local_rados, + m_local_mirror_uuid, m_local_pool_id, global_image_id); + + dout(10) << global_image_id << ": creating replayer " << image_replayer + << dendl; + + it = m_image_replayers.insert(std::make_pair(global_image_id, + image_replayer)).first; + + // TODO only a single peer is currently supported + ceph_assert(m_peers.size() == 1); + auto peer = *m_peers.begin(); + image_replayer->add_peer(peer.peer_uuid, peer.io_ctx); + start_image_replayer(image_replayer); + } else { + // A duplicate acquire notification implies (1) connection hiccup or + // (2) new leader election. For the second case, restart the replayer to + // detect if the image has been deleted while the leader was offline + auto& image_replayer = it->second; + image_replayer->set_finished(false); + image_replayer->restart(); + } + + m_threads->work_queue->queue(on_finish, 0); +} + +template <typename I> +void InstanceReplayer<I>::release_image(const std::string &global_image_id, + Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(m_on_shut_down == nullptr); + + auto it = m_image_replayers.find(global_image_id); + if (it == m_image_replayers.end()) { + dout(5) << global_image_id << ": not found" << dendl; + m_threads->work_queue->queue(on_finish, 0); + return; + } + + auto image_replayer = it->second; + m_image_replayers.erase(it); + + on_finish = new FunctionContext( + [image_replayer, on_finish] (int r) { + image_replayer->destroy(); + on_finish->complete(0); + }); + stop_image_replayer(image_replayer, on_finish); +} + +template <typename I> +void InstanceReplayer<I>::remove_peer_image(const std::string &global_image_id, + const std::string &peer_mirror_uuid, + Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << ", " + << "peer_mirror_uuid=" << peer_mirror_uuid << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(m_on_shut_down == nullptr); + + auto it = m_image_replayers.find(global_image_id); + if (it != m_image_replayers.end()) { + // TODO only a single peer is currently supported, therefore + // we can just interrupt the current image replayer and + // it will eventually detect that the peer image is missing and + // determine if a delete propagation is required. + auto image_replayer = it->second; + image_replayer->restart(); + } + m_threads->work_queue->queue(on_finish, 0); +} + +template <typename I> +void InstanceReplayer<I>::print_status(Formatter *f, stringstream *ss) { + dout(10) << dendl; + + if (!f) { + return; + } + + Mutex::Locker locker(m_lock); + + f->open_array_section("image_replayers"); + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->print_status(f, ss); + } + f->close_section(); +} + +template <typename I> +void InstanceReplayer<I>::start() +{ + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + m_manual_stop = false; + + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->start(nullptr, true); + } +} + +template <typename I> +void InstanceReplayer<I>::stop() +{ + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + m_manual_stop = true; + + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->stop(nullptr, true); + } +} + +template <typename I> +void InstanceReplayer<I>::restart() +{ + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + m_manual_stop = false; + + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->restart(); + } +} + +template <typename I> +void InstanceReplayer<I>::flush() +{ + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + for (auto &kv : m_image_replayers) { + auto &image_replayer = kv.second; + image_replayer->flush(); + } +} + +template <typename I> +void InstanceReplayer<I>::start_image_replayer( + ImageReplayer<I> *image_replayer) { + ceph_assert(m_lock.is_locked()); + + std::string global_image_id = image_replayer->get_global_image_id(); + if (!image_replayer->is_stopped()) { + return; + } else if (image_replayer->is_blacklisted()) { + derr << "global_image_id=" << global_image_id << ": blacklisted detected " + << "during image replay" << dendl; + m_blacklisted = true; + return; + } else if (image_replayer->is_finished()) { + // TODO temporary until policy integrated + dout(5) << "removing image replayer for global_image_id=" + << global_image_id << dendl; + m_image_replayers.erase(image_replayer->get_global_image_id()); + image_replayer->destroy(); + return; + } else if (m_manual_stop) { + return; + } + + dout(10) << "global_image_id=" << global_image_id << dendl; + image_replayer->start(nullptr, false); +} + +template <typename I> +void InstanceReplayer<I>::queue_start_image_replayers() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + InstanceReplayer, &InstanceReplayer<I>::start_image_replayers>(this); + m_async_op_tracker.start_op(); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceReplayer<I>::start_image_replayers(int r) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + if (m_on_shut_down != nullptr) { + return; + } + + uint64_t image_count = 0; + uint64_t warning_count = 0; + uint64_t error_count = 0; + for (auto it = m_image_replayers.begin(); + it != m_image_replayers.end();) { + auto current_it(it); + ++it; + + ++image_count; + auto health_state = current_it->second->get_health_state(); + if (health_state == image_replayer::HEALTH_STATE_WARNING) { + ++warning_count; + } else if (health_state == image_replayer::HEALTH_STATE_ERROR) { + ++error_count; + } + + start_image_replayer(current_it->second); + } + + m_service_daemon->add_or_update_attribute( + m_local_pool_id, SERVICE_DAEMON_ASSIGNED_COUNT_KEY, image_count); + m_service_daemon->add_or_update_attribute( + m_local_pool_id, SERVICE_DAEMON_WARNING_COUNT_KEY, warning_count); + m_service_daemon->add_or_update_attribute( + m_local_pool_id, SERVICE_DAEMON_ERROR_COUNT_KEY, error_count); + + m_async_op_tracker.finish_op(); +} + +template <typename I> +void InstanceReplayer<I>::stop_image_replayer(ImageReplayer<I> *image_replayer, + Context *on_finish) { + dout(10) << image_replayer << " global_image_id=" + << image_replayer->get_global_image_id() << ", on_finish=" + << on_finish << dendl; + + if (image_replayer->is_stopped()) { + m_threads->work_queue->queue(on_finish, 0); + return; + } + + m_async_op_tracker.start_op(); + Context *ctx = create_async_context_callback( + m_threads->work_queue, new FunctionContext( + [this, image_replayer, on_finish] (int r) { + stop_image_replayer(image_replayer, on_finish); + m_async_op_tracker.finish_op(); + })); + + if (image_replayer->is_running()) { + image_replayer->stop(ctx, false); + } else { + int after = 1; + dout(10) << "scheduling image replayer " << image_replayer << " stop after " + << after << " sec (task " << ctx << ")" << dendl; + ctx = new FunctionContext( + [this, after, ctx] (int r) { + Mutex::Locker timer_locker(m_threads->timer_lock); + m_threads->timer->add_event_after(after, ctx); + }); + m_threads->work_queue->queue(ctx, 0); + } +} + +template <typename I> +void InstanceReplayer<I>::wait_for_ops() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + InstanceReplayer, &InstanceReplayer<I>::handle_wait_for_ops>(this); + + m_async_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void InstanceReplayer<I>::handle_wait_for_ops(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r == 0); + + Mutex::Locker locker(m_lock); + stop_image_replayers(); +} + +template <typename I> +void InstanceReplayer<I>::stop_image_replayers() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback<InstanceReplayer<I>, + &InstanceReplayer<I>::handle_stop_image_replayers>(this)); + + C_Gather *gather_ctx = new C_Gather(g_ceph_context, ctx); + for (auto &it : m_image_replayers) { + stop_image_replayer(it.second, gather_ctx->new_sub()); + } + gather_ctx->activate(); +} + +template <typename I> +void InstanceReplayer<I>::handle_stop_image_replayers(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r == 0); + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + + for (auto &it : m_image_replayers) { + ceph_assert(it.second->is_stopped()); + it.second->destroy(); + } + m_image_replayers.clear(); + + ceph_assert(m_on_shut_down != nullptr); + std::swap(on_finish, m_on_shut_down); + } + on_finish->complete(r); +} + +template <typename I> +void InstanceReplayer<I>::cancel_image_state_check_task() { + Mutex::Locker timer_locker(m_threads->timer_lock); + + if (m_image_state_check_task == nullptr) { + return; + } + + dout(10) << m_image_state_check_task << dendl; + bool canceled = m_threads->timer->cancel_event(m_image_state_check_task); + ceph_assert(canceled); + m_image_state_check_task = nullptr; +} + +template <typename I> +void InstanceReplayer<I>::schedule_image_state_check_task() { + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_image_state_check_task == nullptr); + + m_image_state_check_task = new FunctionContext( + [this](int r) { + ceph_assert(m_threads->timer_lock.is_locked()); + m_image_state_check_task = nullptr; + schedule_image_state_check_task(); + queue_start_image_replayers(); + }); + + auto cct = static_cast<CephContext *>(m_local_rados->cct()); + int after = cct->_conf.get_val<uint64_t>( + "rbd_mirror_image_state_check_interval"); + + dout(10) << "scheduling image state check after " << after << " sec (task " + << m_image_state_check_task << ")" << dendl; + m_threads->timer->add_event_after(after, m_image_state_check_task); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::InstanceReplayer<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/InstanceReplayer.h b/src/tools/rbd_mirror/InstanceReplayer.h new file mode 100644 index 00000000..efbdde02 --- /dev/null +++ b/src/tools/rbd_mirror/InstanceReplayer.h @@ -0,0 +1,123 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_INSTANCE_REPLAYER_H +#define RBD_MIRROR_INSTANCE_REPLAYER_H + +#include <map> +#include <sstream> + +#include "common/AsyncOpTracker.h" +#include "common/Formatter.h" +#include "common/Mutex.h" +#include "tools/rbd_mirror/Types.h" + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> class ImageReplayer; +template <typename> class InstanceWatcher; +template <typename> class ServiceDaemon; +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class InstanceReplayer { +public: + static InstanceReplayer* create( + Threads<ImageCtxT> *threads, + ServiceDaemon<ImageCtxT>* service_daemon, + RadosRef local_rados, const std::string &local_mirror_uuid, + int64_t local_pool_id) { + return new InstanceReplayer(threads, service_daemon, local_rados, + local_mirror_uuid, local_pool_id); + } + void destroy() { + delete this; + } + + InstanceReplayer(Threads<ImageCtxT> *threads, + ServiceDaemon<ImageCtxT>* service_daemon, + RadosRef local_rados, const std::string &local_mirror_uuid, + int64_t local_pool_id); + ~InstanceReplayer(); + + bool is_blacklisted() const; + + int init(); + void shut_down(); + + void init(Context *on_finish); + void shut_down(Context *on_finish); + + void add_peer(std::string peer_uuid, librados::IoCtx io_ctx); + + void acquire_image(InstanceWatcher<ImageCtxT> *instance_watcher, + const std::string &global_image_id, Context *on_finish); + void release_image(const std::string &global_image_id, Context *on_finish); + void remove_peer_image(const std::string &global_image_id, + const std::string &peer_mirror_uuid, + Context *on_finish); + + void release_all(Context *on_finish); + + void print_status(Formatter *f, stringstream *ss); + void start(); + void stop(); + void restart(); + void flush(); + +private: + /** + * @verbatim + * + * <uninitialized> <-------------------\ + * | (init) | (repeat for each + * v STOP_IMAGE_REPLAYER ---\ image replayer) + * SCHEDULE_IMAGE_STATE_CHECK_TASK ^ ^ | + * | | | | + * v (shut_down) | \---------/ + * <initialized> -----------------> WAIT_FOR_OPS + * + * @endverbatim + */ + + Threads<ImageCtxT> *m_threads; + ServiceDaemon<ImageCtxT>* m_service_daemon; + RadosRef m_local_rados; + std::string m_local_mirror_uuid; + int64_t m_local_pool_id; + + mutable Mutex m_lock; + AsyncOpTracker m_async_op_tracker; + std::map<std::string, ImageReplayer<ImageCtxT> *> m_image_replayers; + Peers m_peers; + Context *m_image_state_check_task = nullptr; + Context *m_on_shut_down = nullptr; + bool m_manual_stop = false; + bool m_blacklisted = false; + + void wait_for_ops(); + void handle_wait_for_ops(int r); + + void start_image_replayer(ImageReplayer<ImageCtxT> *image_replayer); + void queue_start_image_replayers(); + void start_image_replayers(int r); + + void stop_image_replayer(ImageReplayer<ImageCtxT> *image_replayer, + Context *on_finish); + + void stop_image_replayers(); + void handle_stop_image_replayers(int r); + + void schedule_image_state_check_task(); + void cancel_image_state_check_task(); +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::InstanceReplayer<librbd::ImageCtx>; + +#endif // RBD_MIRROR_INSTANCE_REPLAYER_H diff --git a/src/tools/rbd_mirror/InstanceWatcher.cc b/src/tools/rbd_mirror/InstanceWatcher.cc new file mode 100644 index 00000000..d9e1ba23 --- /dev/null +++ b/src/tools/rbd_mirror/InstanceWatcher.cc @@ -0,0 +1,1299 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "InstanceWatcher.h" +#include "include/stringify.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ManagedLock.h" +#include "librbd/Utils.h" +#include "InstanceReplayer.h" +#include "ImageSyncThrottler.h" +#include "common/Cond.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::InstanceWatcher: " + +namespace rbd { +namespace mirror { + +using namespace instance_watcher; + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; +using librbd::util::unique_lock_name; + +namespace { + +struct C_GetInstances : public Context { + std::vector<std::string> *instance_ids; + Context *on_finish; + bufferlist out_bl; + + C_GetInstances(std::vector<std::string> *instance_ids, Context *on_finish) + : instance_ids(instance_ids), on_finish(on_finish) { + } + + void finish(int r) override { + dout(10) << "C_GetInstances: " << this << " " << __func__ << ": r=" << r + << dendl; + + if (r == 0) { + auto it = out_bl.cbegin(); + r = librbd::cls_client::mirror_instances_list_finish(&it, instance_ids); + } else if (r == -ENOENT) { + r = 0; + } + on_finish->complete(r); + } +}; + +template <typename I> +struct C_RemoveInstanceRequest : public Context { + InstanceWatcher<I> instance_watcher; + Context *on_finish; + + C_RemoveInstanceRequest(librados::IoCtx &io_ctx, ContextWQ *work_queue, + const std::string &instance_id, Context *on_finish) + : instance_watcher(io_ctx, work_queue, nullptr, instance_id), + on_finish(on_finish) { + } + + void send() { + dout(10) << "C_RemoveInstanceRequest: " << this << " " << __func__ << dendl; + + instance_watcher.remove(this); + } + + void finish(int r) override { + dout(10) << "C_RemoveInstanceRequest: " << this << " " << __func__ << ": r=" + << r << dendl; + ceph_assert(r == 0); + + on_finish->complete(r); + } +}; + +} // anonymous namespace + +template <typename I> +struct InstanceWatcher<I>::C_NotifyInstanceRequest : public Context { + InstanceWatcher<I> *instance_watcher; + std::string instance_id; + uint64_t request_id; + bufferlist bl; + Context *on_finish; + bool send_to_leader; + std::unique_ptr<librbd::watcher::Notifier> notifier; + librbd::watcher::NotifyResponse response; + bool canceling = false; + + C_NotifyInstanceRequest(InstanceWatcher<I> *instance_watcher, + const std::string &instance_id, uint64_t request_id, + bufferlist &&bl, Context *on_finish) + : instance_watcher(instance_watcher), instance_id(instance_id), + request_id(request_id), bl(bl), on_finish(on_finish), + send_to_leader(instance_id.empty()) { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": instance_watcher=" << instance_watcher << ", instance_id=" + << instance_id << ", request_id=" << request_id << dendl; + + ceph_assert(instance_watcher->m_lock.is_locked()); + + if (!send_to_leader) { + ceph_assert((!instance_id.empty())); + notifier.reset(new librbd::watcher::Notifier( + instance_watcher->m_work_queue, + instance_watcher->m_ioctx, + RBD_MIRROR_INSTANCE_PREFIX + instance_id)); + } + + instance_watcher->m_notify_op_tracker.start_op(); + auto result = instance_watcher->m_notify_ops.insert( + std::make_pair(instance_id, this)).second; + ceph_assert(result); + } + + void send() { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << dendl; + + ceph_assert(instance_watcher->m_lock.is_locked()); + + if (canceling) { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": canceling" << dendl; + instance_watcher->m_work_queue->queue(this, -ECANCELED); + return; + } + + if (send_to_leader) { + if (instance_watcher->m_leader_instance_id.empty()) { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": suspending" << dendl; + instance_watcher->suspend_notify_request(this); + return; + } + + if (instance_watcher->m_leader_instance_id != instance_id) { + auto count = instance_watcher->m_notify_ops.erase( + std::make_pair(instance_id, this)); + ceph_assert(count > 0); + + instance_id = instance_watcher->m_leader_instance_id; + + auto result = instance_watcher->m_notify_ops.insert( + std::make_pair(instance_id, this)).second; + ceph_assert(result); + + notifier.reset(new librbd::watcher::Notifier( + instance_watcher->m_work_queue, + instance_watcher->m_ioctx, + RBD_MIRROR_INSTANCE_PREFIX + instance_id)); + } + } + + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": sending to " << instance_id << dendl; + notifier->notify(bl, &response, this); + } + + void cancel() { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << dendl; + + ceph_assert(instance_watcher->m_lock.is_locked()); + + canceling = true; + instance_watcher->unsuspend_notify_request(this); + } + + void finish(int r) override { + dout(10) << "C_NotifyInstanceRequest: " << this << " " << __func__ << ": r=" + << r << dendl; + + if (r == 0 || r == -ETIMEDOUT) { + bool found = false; + for (auto &it : response.acks) { + auto &bl = it.second; + if (it.second.length() == 0) { + dout(5) << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": no payload in ack, ignoring" << dendl; + continue; + } + try { + auto iter = bl.cbegin(); + NotifyAckPayload ack; + decode(ack, iter); + if (ack.instance_id != instance_watcher->get_instance_id()) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": ack instance_id (" << ack.instance_id << ") " + << "does not match, ignoring" << dendl; + continue; + } + if (ack.request_id != request_id) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": ack request_id (" << ack.request_id << ") " + << "does not match, ignoring" << dendl; + continue; + } + r = ack.ret_val; + found = true; + break; + } catch (const buffer::error &err) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": failed to decode ack: " << err.what() << dendl; + continue; + } + } + + if (!found) { + if (r == -ETIMEDOUT) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": resending after timeout" << dendl; + Mutex::Locker locker(instance_watcher->m_lock); + send(); + return; + } else { + r = -EINVAL; + } + } else { + if (r == -ESTALE && send_to_leader) { + derr << "C_NotifyInstanceRequest: " << this << " " << __func__ + << ": resending due to leader change" << dendl; + Mutex::Locker locker(instance_watcher->m_lock); + send(); + return; + } + } + } + + on_finish->complete(r); + + { + Mutex::Locker locker(instance_watcher->m_lock); + auto result = instance_watcher->m_notify_ops.erase( + std::make_pair(instance_id, this)); + ceph_assert(result > 0); + instance_watcher->m_notify_op_tracker.finish_op(); + } + + delete this; + } + + void complete(int r) override { + finish(r); + } +}; + +template <typename I> +struct InstanceWatcher<I>::C_SyncRequest : public Context { + InstanceWatcher<I> *instance_watcher; + std::string sync_id; + Context *on_start; + Context *on_complete = nullptr; + C_NotifyInstanceRequest *req = nullptr; + + C_SyncRequest(InstanceWatcher<I> *instance_watcher, + const std::string &sync_id, Context *on_start) + : instance_watcher(instance_watcher), sync_id(sync_id), + on_start(on_start) { + dout(10) << "C_SyncRequest: " << this << " " << __func__ << ": sync_id=" + << sync_id << dendl; + } + + void finish(int r) override { + dout(10) << "C_SyncRequest: " << this << " " << __func__ << ": r=" + << r << dendl; + + if (on_start != nullptr) { + instance_watcher->handle_notify_sync_request(this, r); + } else { + instance_watcher->handle_notify_sync_complete(this, r); + delete this; + } + } + + // called twice + void complete(int r) override { + finish(r); + } +}; + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::InstanceWatcher: " \ + << this << " " << __func__ << ": " +template <typename I> +void InstanceWatcher<I>::get_instances(librados::IoCtx &io_ctx, + std::vector<std::string> *instance_ids, + Context *on_finish) { + librados::ObjectReadOperation op; + librbd::cls_client::mirror_instances_list_start(&op); + C_GetInstances *ctx = new C_GetInstances(instance_ids, on_finish); + librados::AioCompletion *aio_comp = create_rados_callback(ctx); + + int r = io_ctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op, &ctx->out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::remove_instance(librados::IoCtx &io_ctx, + ContextWQ *work_queue, + const std::string &instance_id, + Context *on_finish) { + auto req = new C_RemoveInstanceRequest<I>(io_ctx, work_queue, instance_id, + on_finish); + req->send(); +} + +template <typename I> +InstanceWatcher<I> *InstanceWatcher<I>::create( + librados::IoCtx &io_ctx, ContextWQ *work_queue, + InstanceReplayer<I> *instance_replayer) { + return new InstanceWatcher<I>(io_ctx, work_queue, instance_replayer, + stringify(io_ctx.get_instance_id())); +} + +template <typename I> +InstanceWatcher<I>::InstanceWatcher(librados::IoCtx &io_ctx, + ContextWQ *work_queue, + InstanceReplayer<I> *instance_replayer, + const std::string &instance_id) + : Watcher(io_ctx, work_queue, RBD_MIRROR_INSTANCE_PREFIX + instance_id), + m_instance_replayer(instance_replayer), m_instance_id(instance_id), + m_lock(unique_lock_name("rbd::mirror::InstanceWatcher::m_lock", this)), + m_instance_lock(librbd::ManagedLock<I>::create( + m_ioctx, m_work_queue, m_oid, this, librbd::managed_lock::EXCLUSIVE, true, + m_cct->_conf.get_val<uint64_t>("rbd_blacklist_expire_seconds"))) { +} + +template <typename I> +InstanceWatcher<I>::~InstanceWatcher() { + ceph_assert(m_requests.empty()); + ceph_assert(m_notify_ops.empty()); + ceph_assert(m_notify_op_tracker.empty()); + ceph_assert(m_suspended_ops.empty()); + ceph_assert(m_inflight_sync_reqs.empty()); + ceph_assert(m_image_sync_throttler == nullptr); + m_instance_lock->destroy(); +} + +template <typename I> +int InstanceWatcher<I>::init() { + C_SaferCond init_ctx; + init(&init_ctx); + return init_ctx.wait(); +} + +template <typename I> +void InstanceWatcher<I>::init(Context *on_finish) { + dout(10) << "instance_id=" << m_instance_id << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + register_instance(); +} + +template <typename I> +void InstanceWatcher<I>::shut_down() { + C_SaferCond shut_down_ctx; + shut_down(&shut_down_ctx); + int r = shut_down_ctx.wait(); + ceph_assert(r == 0); +} + +template <typename I> +void InstanceWatcher<I>::shut_down(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + release_lock(); +} + +template <typename I> +void InstanceWatcher<I>::remove(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + get_instance_locker(); +} + +template <typename I> +void InstanceWatcher<I>::notify_image_acquire( + const std::string &instance_id, const std::string &global_image_id, + Context *on_notify_ack) { + dout(10) << "instance_id=" << instance_id << ", global_image_id=" + << global_image_id << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_on_finish == nullptr); + + uint64_t request_id = ++m_request_seq; + bufferlist bl; + encode(NotifyMessage{ImageAcquirePayload{request_id, global_image_id}}, bl); + auto req = new C_NotifyInstanceRequest(this, instance_id, request_id, + std::move(bl), on_notify_ack); + req->send(); +} + +template <typename I> +void InstanceWatcher<I>::notify_image_release( + const std::string &instance_id, const std::string &global_image_id, + Context *on_notify_ack) { + dout(10) << "instance_id=" << instance_id << ", global_image_id=" + << global_image_id << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_on_finish == nullptr); + + uint64_t request_id = ++m_request_seq; + bufferlist bl; + encode(NotifyMessage{ImageReleasePayload{request_id, global_image_id}}, bl); + auto req = new C_NotifyInstanceRequest(this, instance_id, request_id, + std::move(bl), on_notify_ack); + req->send(); +} + +template <typename I> +void InstanceWatcher<I>::notify_peer_image_removed( + const std::string &instance_id, const std::string &global_image_id, + const std::string &peer_mirror_uuid, Context *on_notify_ack) { + dout(10) << "instance_id=" << instance_id << ", " + << "global_image_id=" << global_image_id << ", " + << "peer_mirror_uuid=" << peer_mirror_uuid << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(m_on_finish == nullptr); + + uint64_t request_id = ++m_request_seq; + bufferlist bl; + encode(NotifyMessage{PeerImageRemovedPayload{request_id, global_image_id, + peer_mirror_uuid}}, bl); + auto req = new C_NotifyInstanceRequest(this, instance_id, request_id, + std::move(bl), on_notify_ack); + req->send(); +} + +template <typename I> +void InstanceWatcher<I>::notify_sync_request(const std::string &sync_id, + Context *on_sync_start) { + dout(10) << "sync_id=" << sync_id << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_inflight_sync_reqs.count(sync_id) == 0); + + uint64_t request_id = ++m_request_seq; + + bufferlist bl; + encode(NotifyMessage{SyncRequestPayload{request_id, sync_id}}, bl); + + auto sync_ctx = new C_SyncRequest(this, sync_id, on_sync_start); + sync_ctx->req = new C_NotifyInstanceRequest(this, "", request_id, + std::move(bl), sync_ctx); + + m_inflight_sync_reqs[sync_id] = sync_ctx; + sync_ctx->req->send(); +} + +template <typename I> +bool InstanceWatcher<I>::cancel_sync_request(const std::string &sync_id) { + dout(10) << "sync_id=" << sync_id << dendl; + + Mutex::Locker locker(m_lock); + + auto it = m_inflight_sync_reqs.find(sync_id); + if (it == m_inflight_sync_reqs.end()) { + return false; + } + + auto sync_ctx = it->second; + + if (sync_ctx->on_start == nullptr) { + return false; + } + + ceph_assert(sync_ctx->req != nullptr); + sync_ctx->req->cancel(); + return true; +} + +template <typename I> +void InstanceWatcher<I>::notify_sync_start(const std::string &instance_id, + const std::string &sync_id) { + dout(10) << "sync_id=" << sync_id << dendl; + + Mutex::Locker locker(m_lock); + + uint64_t request_id = ++m_request_seq; + + bufferlist bl; + encode(NotifyMessage{SyncStartPayload{request_id, sync_id}}, bl); + + auto ctx = new FunctionContext( + [this, sync_id] (int r) { + dout(10) << "finish: sync_id=" << sync_id << ", r=" << r << dendl; + Mutex::Locker locker(m_lock); + if (r != -ESTALE && m_image_sync_throttler != nullptr) { + m_image_sync_throttler->finish_op(sync_id); + } + }); + auto req = new C_NotifyInstanceRequest(this, instance_id, request_id, + std::move(bl), ctx); + req->send(); +} + +template <typename I> +void InstanceWatcher<I>::notify_sync_complete(const std::string &sync_id) { + Mutex::Locker locker(m_lock); + notify_sync_complete(m_lock, sync_id); +} + +template <typename I> +void InstanceWatcher<I>::notify_sync_complete(const Mutex&, + const std::string &sync_id) { + dout(10) << "sync_id=" << sync_id << dendl; + ceph_assert(m_lock.is_locked()); + + auto it = m_inflight_sync_reqs.find(sync_id); + ceph_assert(it != m_inflight_sync_reqs.end()); + + auto sync_ctx = it->second; + ceph_assert(sync_ctx->req == nullptr); + + m_inflight_sync_reqs.erase(it); + m_work_queue->queue(sync_ctx, 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_notify_sync_request(C_SyncRequest *sync_ctx, + int r) { + dout(10) << "sync_id=" << sync_ctx->sync_id << ", r=" << r << dendl; + + Context *on_start = nullptr; + { + Mutex::Locker locker(m_lock); + ceph_assert(sync_ctx->req != nullptr); + ceph_assert(sync_ctx->on_start != nullptr); + + if (sync_ctx->req->canceling) { + r = -ECANCELED; + } + + std::swap(sync_ctx->on_start, on_start); + sync_ctx->req = nullptr; + + if (r == -ECANCELED) { + notify_sync_complete(m_lock, sync_ctx->sync_id); + } + } + + on_start->complete(r == -ECANCELED ? r : 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_notify_sync_complete(C_SyncRequest *sync_ctx, + int r) { + dout(10) << "sync_id=" << sync_ctx->sync_id << ", r=" << r << dendl; + + if (sync_ctx->on_complete != nullptr) { + sync_ctx->on_complete->complete(r); + } +} + +template <typename I> +void InstanceWatcher<I>::print_sync_status(Formatter *f, stringstream *ss) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + if (m_image_sync_throttler != nullptr) { + m_image_sync_throttler->print_status(f, ss); + } +} + +template <typename I> +void InstanceWatcher<I>::handle_acquire_leader() { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_image_sync_throttler == nullptr); + m_image_sync_throttler = ImageSyncThrottler<I>::create(m_cct); + + m_leader_instance_id = m_instance_id; + unsuspend_notify_requests(); +} + +template <typename I> +void InstanceWatcher<I>::handle_release_leader() { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_image_sync_throttler != nullptr); + + m_leader_instance_id.clear(); + + m_image_sync_throttler->drain(-ESTALE); + m_image_sync_throttler->destroy(); + m_image_sync_throttler = nullptr; +} + +template <typename I> +void InstanceWatcher<I>::handle_update_leader( + const std::string &leader_instance_id) { + dout(10) << "leader_instance_id=" << leader_instance_id << dendl; + + Mutex::Locker locker(m_lock); + + m_leader_instance_id = leader_instance_id; + + if (!m_leader_instance_id.empty()) { + unsuspend_notify_requests(); + } +} + +template <typename I> +void InstanceWatcher<I>::cancel_notify_requests( + const std::string &instance_id) { + dout(10) << "instance_id=" << instance_id << dendl; + + Mutex::Locker locker(m_lock); + + for (auto op : m_notify_ops) { + if (op.first == instance_id && !op.second->send_to_leader) { + op.second->cancel(); + } + } +} + +template <typename I> +void InstanceWatcher<I>::register_instance() { + ceph_assert(m_lock.is_locked()); + + dout(10) << dendl; + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_instances_add(&op, m_instance_id); + librados::AioCompletion *aio_comp = create_rados_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_register_instance>(this); + + int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::handle_register_instance(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + + if (r == 0) { + create_instance_object(); + return; + } + + derr << "error registering instance: " << cpp_strerror(r) << dendl; + + std::swap(on_finish, m_on_finish); + } + on_finish->complete(r); +} + + +template <typename I> +void InstanceWatcher<I>::create_instance_object() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + librados::ObjectWriteOperation op; + op.create(true); + + librados::AioCompletion *aio_comp = create_rados_callback< + InstanceWatcher<I>, + &InstanceWatcher<I>::handle_create_instance_object>(this); + int r = m_ioctx.aio_operate(m_oid, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::handle_create_instance_object(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker locker(m_lock); + + if (r < 0) { + derr << "error creating " << m_oid << " object: " << cpp_strerror(r) + << dendl; + + m_ret_val = r; + unregister_instance(); + return; + } + + register_watch(); +} + +template <typename I> +void InstanceWatcher<I>::register_watch() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_register_watch>(this)); + + librbd::Watcher::register_watch(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_register_watch(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker locker(m_lock); + + if (r < 0) { + derr << "error registering instance watcher for " << m_oid << " object: " + << cpp_strerror(r) << dendl; + + m_ret_val = r; + remove_instance_object(); + return; + } + + acquire_lock(); +} + +template <typename I> +void InstanceWatcher<I>::acquire_lock() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_acquire_lock>(this)); + + m_instance_lock->acquire_lock(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_acquire_lock(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + + if (r < 0) { + + derr << "error acquiring instance lock: " << cpp_strerror(r) << dendl; + + m_ret_val = r; + unregister_watch(); + return; + } + + std::swap(on_finish, m_on_finish); + } + + on_finish->complete(r); +} + +template <typename I> +void InstanceWatcher<I>::release_lock() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_release_lock>(this)); + + m_instance_lock->shut_down(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_release_lock(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker locker(m_lock); + + if (r < 0) { + derr << "error releasing instance lock: " << cpp_strerror(r) << dendl; + } + + unregister_watch(); +} + +template <typename I> +void InstanceWatcher<I>::unregister_watch() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_unregister_watch>(this)); + + librbd::Watcher::unregister_watch(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_unregister_watch(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error unregistering instance watcher for " << m_oid << " object: " + << cpp_strerror(r) << dendl; + } + + Mutex::Locker locker(m_lock); + remove_instance_object(); +} + +template <typename I> +void InstanceWatcher<I>::remove_instance_object() { + ceph_assert(m_lock.is_locked()); + + dout(10) << dendl; + + librados::ObjectWriteOperation op; + op.remove(); + + librados::AioCompletion *aio_comp = create_rados_callback< + InstanceWatcher<I>, + &InstanceWatcher<I>::handle_remove_instance_object>(this); + int r = m_ioctx.aio_operate(m_oid, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::handle_remove_instance_object(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + r = 0; + } + + if (r < 0) { + derr << "error removing " << m_oid << " object: " << cpp_strerror(r) + << dendl; + } + + Mutex::Locker locker(m_lock); + unregister_instance(); +} + +template <typename I> +void InstanceWatcher<I>::unregister_instance() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_instances_remove(&op, m_instance_id); + librados::AioCompletion *aio_comp = create_rados_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_unregister_instance>(this); + + int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void InstanceWatcher<I>::handle_unregister_instance(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error unregistering instance: " << cpp_strerror(r) << dendl; + } + + Mutex::Locker locker(m_lock); + wait_for_notify_ops(); +} + +template <typename I> +void InstanceWatcher<I>::wait_for_notify_ops() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + for (auto op : m_notify_ops) { + op.second->cancel(); + } + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_wait_for_notify_ops>(this)); + + m_notify_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_wait_for_notify_ops(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r == 0); + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + + ceph_assert(m_notify_ops.empty()); + + std::swap(on_finish, m_on_finish); + r = m_ret_val; + } + on_finish->complete(r); +} + +template <typename I> +void InstanceWatcher<I>::get_instance_locker() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_get_instance_locker>(this)); + + m_instance_lock->get_locker(&m_instance_locker, ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_get_instance_locker(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker locker(m_lock); + + if (r < 0) { + if (r != -ENOENT) { + derr << "error retrieving instance locker: " << cpp_strerror(r) << dendl; + } + remove_instance_object(); + return; + } + + break_instance_lock(); +} + +template <typename I> +void InstanceWatcher<I>::break_instance_lock() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + InstanceWatcher<I>, &InstanceWatcher<I>::handle_break_instance_lock>(this)); + + m_instance_lock->break_lock(m_instance_locker, true, ctx); +} + +template <typename I> +void InstanceWatcher<I>::handle_break_instance_lock(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker locker(m_lock); + + if (r < 0) { + if (r != -ENOENT) { + derr << "error breaking instance lock: " << cpp_strerror(r) << dendl; + } + remove_instance_object(); + return; + } + + remove_instance_object(); +} + +template <typename I> +void InstanceWatcher<I>::suspend_notify_request(C_NotifyInstanceRequest *req) { + dout(10) << req << dendl; + + ceph_assert(m_lock.is_locked()); + + auto result = m_suspended_ops.insert(req).second; + ceph_assert(result); +} + +template <typename I> +bool InstanceWatcher<I>::unsuspend_notify_request( + C_NotifyInstanceRequest *req) { + dout(10) << req << dendl; + + ceph_assert(m_lock.is_locked()); + + auto result = m_suspended_ops.erase(req); + if (result == 0) { + return false; + } + + req->send(); + return true; +} + +template <typename I> +void InstanceWatcher<I>::unsuspend_notify_requests() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + std::set<C_NotifyInstanceRequest *> suspended_ops; + std::swap(m_suspended_ops, suspended_ops); + + for (auto op : suspended_ops) { + op->send(); + } +} + +template <typename I> +Context *InstanceWatcher<I>::prepare_request(const std::string &instance_id, + uint64_t request_id, + C_NotifyAck *on_notify_ack) { + dout(10) << "instance_id=" << instance_id << ", request_id=" << request_id + << dendl; + + Mutex::Locker locker(m_lock); + + Context *ctx = nullptr; + Request request(instance_id, request_id); + auto it = m_requests.find(request); + + if (it != m_requests.end()) { + dout(10) << "duplicate for in-progress request" << dendl; + delete it->on_notify_ack; + m_requests.erase(it); + } else { + ctx = create_async_context_callback( + m_work_queue, new FunctionContext( + [this, instance_id, request_id] (int r) { + complete_request(instance_id, request_id, r); + })); + } + + request.on_notify_ack = on_notify_ack; + m_requests.insert(request); + return ctx; +} + +template <typename I> +void InstanceWatcher<I>::complete_request(const std::string &instance_id, + uint64_t request_id, int r) { + dout(10) << "instance_id=" << instance_id << ", request_id=" << request_id + << dendl; + + C_NotifyAck *on_notify_ack; + { + Mutex::Locker locker(m_lock); + Request request(instance_id, request_id); + auto it = m_requests.find(request); + ceph_assert(it != m_requests.end()); + on_notify_ack = it->on_notify_ack; + m_requests.erase(it); + } + + encode(NotifyAckPayload(instance_id, request_id, r), on_notify_ack->out); + on_notify_ack->complete(0); +} + +template <typename I> +void InstanceWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) { + dout(10) << "notify_id=" << notify_id << ", handle=" << handle << ", " + << "notifier_id=" << notifier_id << dendl; + + auto ctx = new C_NotifyAck(this, notify_id, handle); + + NotifyMessage notify_message; + try { + auto iter = bl.cbegin(); + decode(notify_message, iter); + } catch (const buffer::error &err) { + derr << "error decoding image notification: " << err.what() << dendl; + ctx->complete(0); + return; + } + + apply_visitor(HandlePayloadVisitor(this, stringify(notifier_id), ctx), + notify_message.payload); +} + +template <typename I> +void InstanceWatcher<I>::handle_image_acquire( + const std::string &global_image_id, Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << dendl; + + auto ctx = new FunctionContext( + [this, global_image_id, on_finish] (int r) { + m_instance_replayer->acquire_image(this, global_image_id, on_finish); + m_notify_op_tracker.finish_op(); + }); + + m_notify_op_tracker.start_op(); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_image_release( + const std::string &global_image_id, Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << dendl; + + auto ctx = new FunctionContext( + [this, global_image_id, on_finish] (int r) { + m_instance_replayer->release_image(global_image_id, on_finish); + m_notify_op_tracker.finish_op(); + }); + + m_notify_op_tracker.start_op(); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_peer_image_removed( + const std::string &global_image_id, const std::string &peer_mirror_uuid, + Context *on_finish) { + dout(10) << "global_image_id=" << global_image_id << ", " + << "peer_mirror_uuid=" << peer_mirror_uuid << dendl; + + auto ctx = new FunctionContext( + [this, peer_mirror_uuid, global_image_id, on_finish] (int r) { + m_instance_replayer->remove_peer_image(global_image_id, + peer_mirror_uuid, on_finish); + m_notify_op_tracker.finish_op(); + }); + + m_notify_op_tracker.start_op(); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void InstanceWatcher<I>::handle_sync_request(const std::string &instance_id, + const std::string &sync_id, + Context *on_finish) { + dout(10) << "instance_id=" << instance_id << ", sync_id=" << sync_id << dendl; + + Mutex::Locker locker(m_lock); + + if (m_image_sync_throttler == nullptr) { + dout(10) << "sync request for non-leader" << dendl; + m_work_queue->queue(on_finish, -ESTALE); + return; + } + + Context *on_start = create_async_context_callback( + m_work_queue, new FunctionContext( + [this, instance_id, sync_id, on_finish] (int r) { + dout(10) << "handle_sync_request: finish: instance_id=" << instance_id + << ", sync_id=" << sync_id << ", r=" << r << dendl; + if (r == 0) { + notify_sync_start(instance_id, sync_id); + } + if (r == -ENOENT) { + r = 0; + } + on_finish->complete(r); + })); + m_image_sync_throttler->start_op(sync_id, on_start); +} + +template <typename I> +void InstanceWatcher<I>::handle_sync_start(const std::string &instance_id, + const std::string &sync_id, + Context *on_finish) { + dout(10) << "instance_id=" << instance_id << ", sync_id=" << sync_id << dendl; + + Mutex::Locker locker(m_lock); + + auto it = m_inflight_sync_reqs.find(sync_id); + if (it == m_inflight_sync_reqs.end()) { + dout(5) << "not found" << dendl; + m_work_queue->queue(on_finish, 0); + return; + } + + auto sync_ctx = it->second; + + if (sync_ctx->on_complete != nullptr) { + dout(5) << "duplicate request" << dendl; + m_work_queue->queue(sync_ctx->on_complete, -ESTALE); + } + + sync_ctx->on_complete = on_finish; +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const ImageAcquirePayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "image_acquire: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish != nullptr) { + handle_image_acquire(payload.global_image_id, on_finish); + } +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const ImageReleasePayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "image_release: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish != nullptr) { + handle_image_release(payload.global_image_id, on_finish); + } +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const PeerImageRemovedPayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "remove_peer_image: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish != nullptr) { + handle_peer_image_removed(payload.global_image_id, payload.peer_mirror_uuid, + on_finish); + } +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const SyncRequestPayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "sync_request: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish == nullptr) { + return; + } + + handle_sync_request(instance_id, payload.sync_id, on_finish); +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const SyncStartPayload &payload, + C_NotifyAck *on_notify_ack) { + dout(10) << "sync_start: instance_id=" << instance_id << ", " + << "request_id=" << payload.request_id << dendl; + + auto on_finish = prepare_request(instance_id, payload.request_id, + on_notify_ack); + if (on_finish == nullptr) { + return; + } + + handle_sync_start(instance_id, payload.sync_id, on_finish); +} + +template <typename I> +void InstanceWatcher<I>::handle_payload(const std::string &instance_id, + const UnknownPayload &payload, + C_NotifyAck *on_notify_ack) { + dout(5) << "unknown: instance_id=" << instance_id << dendl; + + on_notify_ack->complete(0); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::InstanceWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/InstanceWatcher.h b/src/tools/rbd_mirror/InstanceWatcher.h new file mode 100644 index 00000000..5ec1aef0 --- /dev/null +++ b/src/tools/rbd_mirror/InstanceWatcher.h @@ -0,0 +1,264 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_INSTANCE_WATCHER_H +#define CEPH_RBD_MIRROR_INSTANCE_WATCHER_H + +#include <map> +#include <memory> +#include <set> +#include <string> +#include <vector> + +#include "common/AsyncOpTracker.h" +#include "librbd/Watcher.h" +#include "librbd/managed_lock/Types.h" +#include "tools/rbd_mirror/instance_watcher/Types.h" + +namespace librbd { + +class ImageCtx; +template <typename> class ManagedLock; + +} + +namespace rbd { +namespace mirror { + +template <typename> class ImageSyncThrottler; +template <typename> class InstanceReplayer; +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class InstanceWatcher : protected librbd::Watcher { + using librbd::Watcher::unregister_watch; // Silence overloaded virtual warning +public: + static void get_instances(librados::IoCtx &io_ctx, + std::vector<std::string> *instance_ids, + Context *on_finish); + static void remove_instance(librados::IoCtx &io_ctx, + ContextWQ *work_queue, + const std::string &instance_id, + Context *on_finish); + + static InstanceWatcher *create( + librados::IoCtx &io_ctx, ContextWQ *work_queue, + InstanceReplayer<ImageCtxT> *instance_replayer); + void destroy() { + delete this; + } + + InstanceWatcher(librados::IoCtx &io_ctx, ContextWQ *work_queue, + InstanceReplayer<ImageCtxT> *instance_replayer, + const std::string &instance_id); + ~InstanceWatcher() override; + + inline std::string &get_instance_id() { + return m_instance_id; + } + + int init(); + void shut_down(); + + void init(Context *on_finish); + void shut_down(Context *on_finish); + void remove(Context *on_finish); + + void notify_image_acquire(const std::string &instance_id, + const std::string &global_image_id, + Context *on_notify_ack); + void notify_image_release(const std::string &instance_id, + const std::string &global_image_id, + Context *on_notify_ack); + void notify_peer_image_removed(const std::string &instance_id, + const std::string &global_image_id, + const std::string &peer_mirror_uuid, + Context *on_notify_ack); + + void notify_sync_request(const std::string &sync_id, Context *on_sync_start); + bool cancel_sync_request(const std::string &sync_id); + void notify_sync_complete(const std::string &sync_id); + + void print_sync_status(Formatter *f, stringstream *ss); + + void cancel_notify_requests(const std::string &instance_id); + + void handle_acquire_leader(); + void handle_release_leader(); + void handle_update_leader(const std::string &leader_instance_id); + +private: + /** + * @verbatim + * + * BREAK_INSTANCE_LOCK -------\ + * ^ | + * | (error) | + * GET_INSTANCE_LOCKER * * *>| + * ^ (remove) | + * | | + * <uninitialized> <----------------+---- WAIT_FOR_NOTIFY_OPS + * | (init) ^ | ^ + * v (error) * | | + * REGISTER_INSTANCE * * * * * *|* *> UNREGISTER_INSTANCE + * | * | ^ + * v (error) * v | + * CREATE_INSTANCE_OBJECT * * * * * *> REMOVE_INSTANCE_OBJECT + * | * ^ + * v (error) * | + * REGISTER_WATCH * * * * * * * * * *> UNREGISTER_WATCH + * | * ^ + * v (error) * | + * ACQUIRE_LOCK * * * * * * * * * * * RELEASE_LOCK + * | ^ + * v (shut_down) | + * <watching> -------------------------------/ + * + * @endverbatim + */ + + struct C_NotifyInstanceRequest; + struct C_SyncRequest; + + typedef std::pair<std::string, std::string> Id; + + struct HandlePayloadVisitor : public boost::static_visitor<void> { + InstanceWatcher *instance_watcher; + std::string instance_id; + C_NotifyAck *on_notify_ack; + + HandlePayloadVisitor(InstanceWatcher *instance_watcher, + const std::string &instance_id, + C_NotifyAck *on_notify_ack) + : instance_watcher(instance_watcher), instance_id(instance_id), + on_notify_ack(on_notify_ack) { + } + + template <typename Payload> + inline void operator()(const Payload &payload) const { + instance_watcher->handle_payload(instance_id, payload, on_notify_ack); + } + }; + + struct Request { + std::string instance_id; + uint64_t request_id; + C_NotifyAck *on_notify_ack = nullptr; + + Request(const std::string &instance_id, uint64_t request_id) + : instance_id(instance_id), request_id(request_id) { + } + + inline bool operator<(const Request &rhs) const { + return instance_id < rhs.instance_id || + (instance_id == rhs.instance_id && request_id < rhs.request_id); + } + }; + + Threads<ImageCtxT> *m_threads; + InstanceReplayer<ImageCtxT> *m_instance_replayer; + std::string m_instance_id; + + mutable Mutex m_lock; + librbd::ManagedLock<ImageCtxT> *m_instance_lock; + Context *m_on_finish = nullptr; + int m_ret_val = 0; + std::string m_leader_instance_id; + librbd::managed_lock::Locker m_instance_locker; + std::set<std::pair<std::string, C_NotifyInstanceRequest *>> m_notify_ops; + AsyncOpTracker m_notify_op_tracker; + uint64_t m_request_seq = 0; + std::set<Request> m_requests; + std::set<C_NotifyInstanceRequest *> m_suspended_ops; + std::map<std::string, C_SyncRequest *> m_inflight_sync_reqs; + ImageSyncThrottler<ImageCtxT> *m_image_sync_throttler = nullptr; + + void register_instance(); + void handle_register_instance(int r); + + void create_instance_object(); + void handle_create_instance_object(int r); + + void register_watch(); + void handle_register_watch(int r); + + void acquire_lock(); + void handle_acquire_lock(int r); + + void release_lock(); + void handle_release_lock(int r); + + void unregister_watch(); + void handle_unregister_watch(int r); + + void remove_instance_object(); + void handle_remove_instance_object(int r); + + void unregister_instance(); + void handle_unregister_instance(int r); + + void wait_for_notify_ops(); + void handle_wait_for_notify_ops(int r); + + void get_instance_locker(); + void handle_get_instance_locker(int r); + + void break_instance_lock(); + void handle_break_instance_lock(int r); + + void suspend_notify_request(C_NotifyInstanceRequest *req); + bool unsuspend_notify_request(C_NotifyInstanceRequest *req); + void unsuspend_notify_requests(); + + void notify_sync_complete(const Mutex& lock, const std::string &sync_id); + void handle_notify_sync_request(C_SyncRequest *sync_ctx, int r); + void handle_notify_sync_complete(C_SyncRequest *sync_ctx, int r); + + void notify_sync_start(const std::string &instance_id, + const std::string &sync_id); + + Context *prepare_request(const std::string &instance_id, uint64_t request_id, + C_NotifyAck *on_notify_ack); + void complete_request(const std::string &instance_id, uint64_t request_id, + int r); + + void handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) override; + + void handle_image_acquire(const std::string &global_image_id, + Context *on_finish); + void handle_image_release(const std::string &global_image_id, + Context *on_finish); + void handle_peer_image_removed(const std::string &global_image_id, + const std::string &peer_mirror_uuid, + Context *on_finish); + + void handle_sync_request(const std::string &instance_id, + const std::string &sync_id, Context *on_finish); + void handle_sync_start(const std::string &instance_id, + const std::string &sync_id, Context *on_finish); + + void handle_payload(const std::string &instance_id, + const instance_watcher::ImageAcquirePayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::ImageReleasePayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::PeerImageRemovedPayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::SyncRequestPayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::SyncStartPayload &payload, + C_NotifyAck *on_notify_ack); + void handle_payload(const std::string &instance_id, + const instance_watcher::UnknownPayload &payload, + C_NotifyAck *on_notify_ack); +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_INSTANCE_WATCHER_H diff --git a/src/tools/rbd_mirror/Instances.cc b/src/tools/rbd_mirror/Instances.cc new file mode 100644 index 00000000..b7a6cf11 --- /dev/null +++ b/src/tools/rbd_mirror/Instances.cc @@ -0,0 +1,359 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/stringify.h" +#include "common/Timer.h" +#include "common/WorkQueue.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/Utils.h" +#include "InstanceWatcher.h" +#include "Instances.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::Instances: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +Instances<I>::Instances(Threads<I> *threads, librados::IoCtx &ioctx, + const std::string& instance_id, + instances::Listener& listener) : + m_threads(threads), m_ioctx(ioctx), m_instance_id(instance_id), + m_listener(listener), m_cct(reinterpret_cast<CephContext *>(ioctx.cct())), + m_lock("rbd::mirror::Instances " + ioctx.get_pool_name()) { +} + +template <typename I> +Instances<I>::~Instances() { +} + +template <typename I> +void Instances<I>::init(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + get_instances(); +} + +template <typename I> +void Instances<I>::shut_down(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + + Context *ctx = new FunctionContext( + [this](int r) { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + cancel_remove_task(); + wait_for_ops(); + }); + + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void Instances<I>::unblock_listener() { + dout(5) << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(m_listener_blocked); + m_listener_blocked = false; + + InstanceIds added_instance_ids; + for (auto& pair : m_instances) { + if (pair.second.state == INSTANCE_STATE_ADDING) { + added_instance_ids.push_back(pair.first); + } + } + + if (!added_instance_ids.empty()) { + m_threads->work_queue->queue( + new C_NotifyInstancesAdded(this, added_instance_ids), 0); + } +} + +template <typename I> +void Instances<I>::acked(const InstanceIds& instance_ids) { + dout(10) << "instance_ids=" << instance_ids << dendl; + + Mutex::Locker locker(m_lock); + if (m_on_finish != nullptr) { + dout(5) << "received on shut down, ignoring" << dendl; + return; + } + + Context *ctx = new C_HandleAcked(this, instance_ids); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void Instances<I>::handle_acked(const InstanceIds& instance_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + if (m_on_finish != nullptr) { + dout(5) << "handled on shut down, ignoring" << dendl; + return; + } + + InstanceIds added_instance_ids; + auto time = ceph_clock_now(); + for (auto& instance_id : instance_ids) { + auto &instance = m_instances.insert( + std::make_pair(instance_id, Instance{})).first->second; + instance.acked_time = time; + if (instance.state == INSTANCE_STATE_ADDING) { + added_instance_ids.push_back(instance_id); + } + } + + schedule_remove_task(time); + if (!m_listener_blocked && !added_instance_ids.empty()) { + m_threads->work_queue->queue( + new C_NotifyInstancesAdded(this, added_instance_ids), 0); + } +} + +template <typename I> +void Instances<I>::notify_instances_added(const InstanceIds& instance_ids) { + Mutex::Locker locker(m_lock); + InstanceIds added_instance_ids; + for (auto& instance_id : instance_ids) { + auto it = m_instances.find(instance_id); + if (it != m_instances.end() && it->second.state == INSTANCE_STATE_ADDING) { + added_instance_ids.push_back(instance_id); + } + } + + if (added_instance_ids.empty()) { + return; + } + + dout(5) << "instance_ids=" << added_instance_ids << dendl; + m_lock.Unlock(); + m_listener.handle_added(added_instance_ids); + m_lock.Lock(); + + for (auto& instance_id : added_instance_ids) { + auto it = m_instances.find(instance_id); + if (it != m_instances.end() && it->second.state == INSTANCE_STATE_ADDING) { + it->second.state = INSTANCE_STATE_IDLE; + } + } +} + +template <typename I> +void Instances<I>::notify_instances_removed(const InstanceIds& instance_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + m_listener.handle_removed(instance_ids); + + Mutex::Locker locker(m_lock); + for (auto& instance_id : instance_ids) { + m_instances.erase(instance_id); + } +} + +template <typename I> +void Instances<I>::list(std::vector<std::string> *instance_ids) { + dout(20) << dendl; + + Mutex::Locker locker(m_lock); + + for (auto it : m_instances) { + instance_ids->push_back(it.first); + } +} + + +template <typename I> +void Instances<I>::get_instances() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_context_callback< + Instances, &Instances<I>::handle_get_instances>(this); + + InstanceWatcher<I>::get_instances(m_ioctx, &m_instance_ids, ctx); +} + +template <typename I> +void Instances<I>::handle_get_instances(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + std::swap(on_finish, m_on_finish); + } + + if (r < 0) { + derr << "error retrieving instances: " << cpp_strerror(r) << dendl; + } else { + handle_acked(m_instance_ids); + } + on_finish->complete(r); +} + +template <typename I> +void Instances<I>::wait_for_ops() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + Instances, &Instances<I>::handle_wait_for_ops>(this)); + + m_async_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void Instances<I>::handle_wait_for_ops(int r) { + dout(10) << "r=" << r << dendl; + + ceph_assert(r == 0); + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + std::swap(on_finish, m_on_finish); + } + on_finish->complete(r); +} + +template <typename I> +void Instances<I>::remove_instances(const utime_t& time) { + ceph_assert(m_lock.is_locked()); + + InstanceIds instance_ids; + for (auto& instance_pair : m_instances) { + if (instance_pair.first == m_instance_id) { + continue; + } + auto& instance = instance_pair.second; + if (instance.state != INSTANCE_STATE_REMOVING && + instance.acked_time <= time) { + instance.state = INSTANCE_STATE_REMOVING; + instance_ids.push_back(instance_pair.first); + } + } + ceph_assert(!instance_ids.empty()); + + dout(10) << "instance_ids=" << instance_ids << dendl; + Context* ctx = new FunctionContext([this, instance_ids](int r) { + handle_remove_instances(r, instance_ids); + }); + ctx = create_async_context_callback(m_threads->work_queue, ctx); + + auto gather_ctx = new C_Gather(m_cct, ctx); + for (auto& instance_id : instance_ids) { + InstanceWatcher<I>::remove_instance(m_ioctx, m_threads->work_queue, + instance_id, gather_ctx->new_sub()); + } + + m_async_op_tracker.start_op(); + gather_ctx->activate(); +} + +template <typename I> +void Instances<I>::handle_remove_instances( + int r, const InstanceIds& instance_ids) { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + + dout(10) << "r=" << r << ", instance_ids=" << instance_ids << dendl; + ceph_assert(r == 0); + + // fire removed notification now that instances have been blacklisted + m_threads->work_queue->queue( + new C_NotifyInstancesRemoved(this, instance_ids), 0); + + // reschedule the timer for the next batch + schedule_remove_task(ceph_clock_now()); + m_async_op_tracker.finish_op(); +} + +template <typename I> +void Instances<I>::cancel_remove_task() { + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + + if (m_timer_task == nullptr) { + return; + } + + dout(10) << dendl; + + bool canceled = m_threads->timer->cancel_event(m_timer_task); + ceph_assert(canceled); + m_timer_task = nullptr; +} + +template <typename I> +void Instances<I>::schedule_remove_task(const utime_t& time) { + cancel_remove_task(); + if (m_on_finish != nullptr) { + dout(10) << "received on shut down, ignoring" << dendl; + return; + } + + int after = m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_heartbeat_interval") * + (1 + m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_missed_heartbeats") + + m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_acquire_attempts_before_break")); + + bool schedule = false; + utime_t oldest_time = time; + for (auto& instance : m_instances) { + if (instance.first == m_instance_id) { + continue; + } + if (instance.second.state == INSTANCE_STATE_REMOVING) { + // removal is already in-flight + continue; + } + + oldest_time = std::min(oldest_time, instance.second.acked_time); + schedule = true; + } + + if (!schedule) { + return; + } + + dout(10) << dendl; + + // schedule a time to fire when the oldest instance should be removed + m_timer_task = new FunctionContext( + [this, oldest_time](int r) { + ceph_assert(m_threads->timer_lock.is_locked()); + Mutex::Locker locker(m_lock); + m_timer_task = nullptr; + + remove_instances(oldest_time); + }); + + oldest_time += after; + m_threads->timer->add_event_at(oldest_time, m_timer_task); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::Instances<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/Instances.h b/src/tools/rbd_mirror/Instances.h new file mode 100644 index 00000000..dbfb16df --- /dev/null +++ b/src/tools/rbd_mirror/Instances.h @@ -0,0 +1,167 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_INSTANCES_H +#define CEPH_RBD_MIRROR_INSTANCES_H + +#include <map> +#include <vector> + +#include "include/buffer_fwd.h" +#include "include/rados/librados_fwd.hpp" +#include "common/AsyncOpTracker.h" +#include "common/Mutex.h" +#include "librbd/Watcher.h" +#include "tools/rbd_mirror/instances/Types.h" + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class Instances { +public: + typedef std::vector<std::string> InstanceIds; + + static Instances *create(Threads<ImageCtxT> *threads, + librados::IoCtx &ioctx, + const std::string& instance_id, + instances::Listener& listener) { + return new Instances(threads, ioctx, instance_id, listener); + } + void destroy() { + delete this; + } + + Instances(Threads<ImageCtxT> *threads, librados::IoCtx &ioctx, + const std::string& instance_id, instances::Listener& listener); + virtual ~Instances(); + + void init(Context *on_finish); + void shut_down(Context *on_finish); + + void unblock_listener(); + + void acked(const InstanceIds& instance_ids); + + void list(std::vector<std::string> *instance_ids); + +private: + /** + * @verbatim + * + * <uninitialized> <---------------------\ + * | (init) ^ | + * v (error) * | + * GET_INSTANCES * * * * * WAIT_FOR_OPS + * | ^ + * v (shut_down) | + * <initialized> ------------------------/ + * . + * . (remove_instance) + * v + * REMOVE_INSTANCE + * + * @endverbatim + */ + + enum InstanceState { + INSTANCE_STATE_ADDING, + INSTANCE_STATE_IDLE, + INSTANCE_STATE_REMOVING + }; + + struct Instance { + utime_t acked_time{}; + InstanceState state = INSTANCE_STATE_ADDING; + }; + + struct C_NotifyBase : public Context { + Instances *instances; + InstanceIds instance_ids; + + C_NotifyBase(Instances *instances, const InstanceIds& instance_ids) + : instances(instances), instance_ids(instance_ids) { + instances->m_async_op_tracker.start_op(); + } + + void finish(int r) override { + execute(); + instances->m_async_op_tracker.finish_op(); + } + + virtual void execute() = 0; + }; + + struct C_HandleAcked : public C_NotifyBase { + C_HandleAcked(Instances *instances, const InstanceIds& instance_ids) + : C_NotifyBase(instances, instance_ids) { + } + + void execute() override { + this->instances->handle_acked(this->instance_ids); + } + }; + + struct C_NotifyInstancesAdded : public C_NotifyBase { + C_NotifyInstancesAdded(Instances *instances, + const InstanceIds& instance_ids) + : C_NotifyBase(instances, instance_ids) { + } + + void execute() override { + this->instances->notify_instances_added(this->instance_ids); + } + }; + + struct C_NotifyInstancesRemoved : public C_NotifyBase { + C_NotifyInstancesRemoved(Instances *instances, + const InstanceIds& instance_ids) + : C_NotifyBase(instances, instance_ids) { + } + + void execute() override { + this->instances->notify_instances_removed(this->instance_ids); + } + }; + + Threads<ImageCtxT> *m_threads; + librados::IoCtx &m_ioctx; + std::string m_instance_id; + instances::Listener& m_listener; + CephContext *m_cct; + + Mutex m_lock; + InstanceIds m_instance_ids; + std::map<std::string, Instance> m_instances; + Context *m_on_finish = nullptr; + AsyncOpTracker m_async_op_tracker; + + Context *m_timer_task = nullptr; + + bool m_listener_blocked = true; + + void handle_acked(const InstanceIds& instance_ids); + void notify_instances_added(const InstanceIds& instance_ids); + void notify_instances_removed(const InstanceIds& instance_ids); + + void get_instances(); + void handle_get_instances(int r); + + void wait_for_ops(); + void handle_wait_for_ops(int r); + + void remove_instances(const utime_t& time); + void handle_remove_instances(int r, const InstanceIds& instance_ids); + + void cancel_remove_task(); + void schedule_remove_task(const utime_t& time); +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_INSTANCES_H diff --git a/src/tools/rbd_mirror/LeaderWatcher.cc b/src/tools/rbd_mirror/LeaderWatcher.cc new file mode 100644 index 00000000..0d4bde6f --- /dev/null +++ b/src/tools/rbd_mirror/LeaderWatcher.cc @@ -0,0 +1,1145 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "LeaderWatcher.h" +#include "common/Timer.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "include/stringify.h" +#include "librbd/Utils.h" +#include "librbd/watcher/Types.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::LeaderWatcher: " \ + << this << " " << __func__ << ": " +namespace rbd { +namespace mirror { + +using namespace leader_watcher; + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +LeaderWatcher<I>::LeaderWatcher(Threads<I> *threads, librados::IoCtx &io_ctx, + leader_watcher::Listener *listener) + : Watcher(io_ctx, threads->work_queue, RBD_MIRROR_LEADER), + m_threads(threads), m_listener(listener), m_instances_listener(this), + m_lock("rbd::mirror::LeaderWatcher " + io_ctx.get_pool_name()), + m_notifier_id(librados::Rados(io_ctx).get_instance_id()), + m_instance_id(stringify(m_notifier_id)), + m_leader_lock(new LeaderLock(m_ioctx, m_work_queue, m_oid, this, true, + m_cct->_conf.get_val<uint64_t>( + "rbd_blacklist_expire_seconds"))) { +} + +template <typename I> +LeaderWatcher<I>::~LeaderWatcher() { + ceph_assert(m_status_watcher == nullptr); + ceph_assert(m_instances == nullptr); + ceph_assert(m_timer_task == nullptr); + + delete m_leader_lock; +} + +template <typename I> +std::string LeaderWatcher<I>::get_instance_id() { + return m_instance_id; +} + +template <typename I> +int LeaderWatcher<I>::init() { + C_SaferCond init_ctx; + init(&init_ctx); + return init_ctx.wait(); +} + +template <typename I> +void LeaderWatcher<I>::init(Context *on_finish) { + dout(10) << "notifier_id=" << m_notifier_id << dendl; + + Mutex::Locker locker(m_lock); + + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + + create_leader_object(); +} + +template <typename I> +void LeaderWatcher<I>::create_leader_object() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + librados::ObjectWriteOperation op; + op.create(false); + + librados::AioCompletion *aio_comp = create_rados_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_create_leader_object>(this); + int r = m_ioctx.aio_operate(m_oid, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void LeaderWatcher<I>::handle_create_leader_object(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + + if (r == 0) { + register_watch(); + return; + } + + derr << "error creating " << m_oid << " object: " << cpp_strerror(r) + << dendl; + + std::swap(on_finish, m_on_finish); + } + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::register_watch() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_register_watch>(this)); + + librbd::Watcher::register_watch(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_register_watch(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + if (r < 0) { + Mutex::Locker locker(m_lock); + derr << "error registering leader watcher for " << m_oid << " object: " + << cpp_strerror(r) << dendl; + ceph_assert(m_on_finish != nullptr); + std::swap(on_finish, m_on_finish); + } else { + Mutex::Locker locker(m_lock); + init_status_watcher(); + return; + } + + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::shut_down() { + C_SaferCond shut_down_ctx; + shut_down(&shut_down_ctx); + int r = shut_down_ctx.wait(); + ceph_assert(r == 0); +} + +template <typename I> +void LeaderWatcher<I>::shut_down(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + + ceph_assert(m_on_shut_down_finish == nullptr); + m_on_shut_down_finish = on_finish; + cancel_timer_task(); + shut_down_leader_lock(); +} + +template <typename I> +void LeaderWatcher<I>::shut_down_leader_lock() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_shut_down_leader_lock>(this)); + + m_leader_lock->shut_down(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_shut_down_leader_lock(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker locker(m_lock); + + if (r < 0) { + derr << "error shutting down leader lock: " << cpp_strerror(r) << dendl; + } + + shut_down_status_watcher(); +} + +template <typename I> +void LeaderWatcher<I>::unregister_watch() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_unregister_watch>(this)); + + librbd::Watcher::unregister_watch(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_unregister_watch(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "error unregistering leader watcher for " << m_oid << " object: " + << cpp_strerror(r) << dendl; + } + wait_for_tasks(); +} + +template <typename I> +void LeaderWatcher<I>::wait_for_tasks() { + dout(10) << dendl; + + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + schedule_timer_task("wait for tasks", 0, false, + &LeaderWatcher<I>::handle_wait_for_tasks, true); +} + +template <typename I> +void LeaderWatcher<I>::handle_wait_for_tasks() { + dout(10) << dendl; + + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + ceph_assert(m_on_shut_down_finish != nullptr); + + ceph_assert(!m_timer_op_tracker.empty()); + m_timer_op_tracker.finish_op(); + + auto ctx = new FunctionContext([this](int r) { + Context *on_finish; + { + // ensure lock isn't held when completing shut down + Mutex::Locker locker(m_lock); + ceph_assert(m_on_shut_down_finish != nullptr); + on_finish = m_on_shut_down_finish; + } + on_finish->complete(0); + }); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +bool LeaderWatcher<I>::is_blacklisted() const { + std::lock_guard locker{m_lock}; + return m_blacklisted; +} + +template <typename I> +bool LeaderWatcher<I>::is_leader() const { + Mutex::Locker locker(m_lock); + + return is_leader(m_lock); +} + +template <typename I> +bool LeaderWatcher<I>::is_leader(Mutex &lock) const { + ceph_assert(m_lock.is_locked()); + + bool leader = m_leader_lock->is_leader(); + dout(10) << leader << dendl; + return leader; +} + +template <typename I> +bool LeaderWatcher<I>::is_releasing_leader() const { + Mutex::Locker locker(m_lock); + + return is_releasing_leader(m_lock); +} + +template <typename I> +bool LeaderWatcher<I>::is_releasing_leader(Mutex &lock) const { + ceph_assert(m_lock.is_locked()); + + bool releasing = m_leader_lock->is_releasing_leader(); + dout(10) << releasing << dendl; + return releasing; +} + +template <typename I> +bool LeaderWatcher<I>::get_leader_instance_id(std::string *instance_id) const { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + if (is_leader(m_lock) || is_releasing_leader(m_lock)) { + *instance_id = m_instance_id; + return true; + } + + if (!m_locker.cookie.empty()) { + *instance_id = stringify(m_locker.entity.num()); + return true; + } + + return false; +} + +template <typename I> +void LeaderWatcher<I>::release_leader() { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + if (!is_leader(m_lock)) { + return; + } + + release_leader_lock(); +} + +template <typename I> +void LeaderWatcher<I>::list_instances(std::vector<std::string> *instance_ids) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + instance_ids->clear(); + if (m_instances != nullptr) { + m_instances->list(instance_ids); + } +} + +template <typename I> +void LeaderWatcher<I>::cancel_timer_task() { + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + + if (m_timer_task == nullptr) { + return; + } + + dout(10) << m_timer_task << dendl; + bool canceled = m_threads->timer->cancel_event(m_timer_task); + ceph_assert(canceled); + m_timer_task = nullptr; +} + +template <typename I> +void LeaderWatcher<I>::schedule_timer_task(const std::string &name, + int delay_factor, bool leader, + TimerCallback timer_callback, + bool shutting_down) { + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + + if (!shutting_down && m_on_shut_down_finish != nullptr) { + return; + } + + cancel_timer_task(); + + m_timer_task = new FunctionContext( + [this, leader, timer_callback](int r) { + ceph_assert(m_threads->timer_lock.is_locked()); + m_timer_task = nullptr; + + if (m_timer_op_tracker.empty()) { + Mutex::Locker locker(m_lock); + execute_timer_task(leader, timer_callback); + return; + } + + // old timer task is still running -- do not start next + // task until the previous task completes + if (m_timer_gate == nullptr) { + m_timer_gate = new C_TimerGate(this); + m_timer_op_tracker.wait_for_ops(m_timer_gate); + } + m_timer_gate->leader = leader; + m_timer_gate->timer_callback = timer_callback; + }); + + int after = delay_factor * m_cct->_conf.get_val<uint64_t>( + "rbd_mirror_leader_heartbeat_interval"); + + dout(10) << "scheduling " << name << " after " << after << " sec (task " + << m_timer_task << ")" << dendl; + m_threads->timer->add_event_after(after, m_timer_task); +} + +template <typename I> +void LeaderWatcher<I>::execute_timer_task(bool leader, + TimerCallback timer_callback) { + dout(10) << dendl; + + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + ceph_assert(m_timer_op_tracker.empty()); + + if (is_leader(m_lock) != leader) { + return; + } + + m_timer_op_tracker.start_op(); + (this->*timer_callback)(); +} + +template <typename I> +void LeaderWatcher<I>::handle_post_acquire_leader_lock(int r, + Context *on_finish) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + if (r == -EAGAIN) { + dout(10) << "already locked" << dendl; + } else { + derr << "error acquiring leader lock: " << cpp_strerror(r) << dendl; + } + on_finish->complete(r); + return; + } + + Mutex::Locker locker(m_lock); + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + init_instances(); +} + +template <typename I> +void LeaderWatcher<I>::handle_pre_release_leader_lock(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + m_ret_val = 0; + + notify_listener(); +} + +template <typename I> +void LeaderWatcher<I>::handle_post_release_leader_lock(int r, + Context *on_finish) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + on_finish->complete(r); + return; + } + + Mutex::Locker locker(m_lock); + ceph_assert(m_on_finish == nullptr); + m_on_finish = on_finish; + + notify_lock_released(); +} + +template <typename I> +void LeaderWatcher<I>::break_leader_lock() { + dout(10) << dendl; + + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + ceph_assert(!m_timer_op_tracker.empty()); + + if (m_locker.cookie.empty()) { + get_locker(); + return; + } + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_break_leader_lock>(this)); + + m_leader_lock->break_lock(m_locker, true, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_break_leader_lock(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + ceph_assert(!m_timer_op_tracker.empty()); + + if (m_leader_lock->is_shutdown()) { + dout(10) << "canceling due to shutdown" << dendl; + m_timer_op_tracker.finish_op(); + return; + } + + if (r < 0 && r != -ENOENT) { + derr << "error breaking leader lock: " << cpp_strerror(r) << dendl; + schedule_acquire_leader_lock(1); + m_timer_op_tracker.finish_op(); + return; + } + + m_locker = {}; + m_acquire_attempts = 0; + acquire_leader_lock(); +} + +template <typename I> +void LeaderWatcher<I>::schedule_get_locker(bool reset_leader, + uint32_t delay_factor) { + dout(10) << dendl; + + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + + if (reset_leader) { + m_locker = {}; + m_acquire_attempts = 0; + } + + schedule_timer_task("get locker", delay_factor, false, + &LeaderWatcher<I>::get_locker, false); +} + +template <typename I> +void LeaderWatcher<I>::get_locker() { + dout(10) << dendl; + + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + ceph_assert(!m_timer_op_tracker.empty()); + + C_GetLocker *get_locker_ctx = new C_GetLocker(this); + Context *ctx = create_async_context_callback(m_work_queue, get_locker_ctx); + + m_leader_lock->get_locker(&get_locker_ctx->locker, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_get_locker(int r, + librbd::managed_lock::Locker& locker) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker mutex_locker(m_lock); + ceph_assert(!m_timer_op_tracker.empty()); + + if (m_leader_lock->is_shutdown()) { + dout(10) << "canceling due to shutdown" << dendl; + m_timer_op_tracker.finish_op(); + return; + } + + if (is_leader(m_lock)) { + m_locker = {}; + m_timer_op_tracker.finish_op(); + return; + } + + if (r == -ENOENT) { + m_locker = {}; + m_acquire_attempts = 0; + acquire_leader_lock(); + return; + } else if (r < 0) { + derr << "error retrieving leader locker: " << cpp_strerror(r) << dendl; + schedule_get_locker(true, 1); + m_timer_op_tracker.finish_op(); + return; + } + + bool notify_listener = false; + if (m_locker != locker) { + m_locker = locker; + notify_listener = true; + if (m_acquire_attempts > 1) { + dout(10) << "new lock owner detected -- resetting heartbeat counter" + << dendl; + m_acquire_attempts = 0; + } + } + + if (m_acquire_attempts >= m_cct->_conf.get_val<uint64_t>( + "rbd_mirror_leader_max_acquire_attempts_before_break")) { + dout(0) << "breaking leader lock after " << m_acquire_attempts << " " + << "failed attempts to acquire" << dendl; + break_leader_lock(); + return; + } + + schedule_acquire_leader_lock(1); + + if (!notify_listener) { + m_timer_op_tracker.finish_op(); + return; + } + + auto ctx = new FunctionContext( + [this](int r) { + std::string instance_id; + if (get_leader_instance_id(&instance_id)) { + m_listener->update_leader_handler(instance_id); + } + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + m_timer_op_tracker.finish_op(); + }); + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void LeaderWatcher<I>::schedule_acquire_leader_lock(uint32_t delay_factor) { + dout(10) << dendl; + + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + + schedule_timer_task("acquire leader lock", + delay_factor * + m_cct->_conf.get_val<uint64_t>("rbd_mirror_leader_max_missed_heartbeats"), + false, &LeaderWatcher<I>::acquire_leader_lock, false); +} + +template <typename I> +void LeaderWatcher<I>::acquire_leader_lock() { + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + ceph_assert(!m_timer_op_tracker.empty()); + + ++m_acquire_attempts; + dout(10) << "acquire_attempts=" << m_acquire_attempts << dendl; + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_acquire_leader_lock>(this)); + m_leader_lock->try_acquire_lock(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_acquire_leader_lock(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + ceph_assert(!m_timer_op_tracker.empty()); + + if (m_leader_lock->is_shutdown()) { + dout(10) << "canceling due to shutdown" << dendl; + m_timer_op_tracker.finish_op(); + return; + } + + if (r < 0) { + if (r == -EAGAIN) { + dout(10) << "already locked" << dendl; + } else { + derr << "error acquiring lock: " << cpp_strerror(r) << dendl; + } + + get_locker(); + return; + } + + m_locker = {}; + m_acquire_attempts = 0; + + if (m_ret_val) { + dout(5) << "releasing due to error on notify" << dendl; + release_leader_lock(); + m_timer_op_tracker.finish_op(); + return; + } + + notify_heartbeat(); +} + +template <typename I> +void LeaderWatcher<I>::release_leader_lock() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_release_leader_lock>(this)); + + m_leader_lock->release_lock(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_release_leader_lock(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + + if (r < 0) { + derr << "error releasing lock: " << cpp_strerror(r) << dendl; + return; + } + + schedule_acquire_leader_lock(1); +} + +template <typename I> +void LeaderWatcher<I>::init_status_watcher() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + ceph_assert(m_status_watcher == nullptr); + + m_status_watcher = MirrorStatusWatcher<I>::create(m_ioctx, m_work_queue); + + Context *ctx = create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_init_status_watcher>(this); + + m_status_watcher->init(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_init_status_watcher(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + + if (r < 0) { + derr << "error initializing mirror status watcher: " << cpp_strerror(r) + << cpp_strerror(r) << dendl; + } else { + schedule_acquire_leader_lock(0); + } + + ceph_assert(m_on_finish != nullptr); + std::swap(on_finish, m_on_finish); + } + + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::shut_down_status_watcher() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + ceph_assert(m_status_watcher != nullptr); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback<LeaderWatcher<I>, + &LeaderWatcher<I>::handle_shut_down_status_watcher>(this)); + + m_status_watcher->shut_down(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_shut_down_status_watcher(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker locker(m_lock); + m_status_watcher->destroy(); + m_status_watcher = nullptr; + + if (r < 0) { + derr << "error shutting mirror status watcher down: " << cpp_strerror(r) + << dendl; + } + + unregister_watch(); +} + +template <typename I> +void LeaderWatcher<I>::init_instances() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + ceph_assert(m_instances == nullptr); + + m_instances = Instances<I>::create(m_threads, m_ioctx, m_instance_id, + m_instances_listener); + + Context *ctx = create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_init_instances>(this); + + m_instances->init(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_init_instances(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + if (r < 0) { + Mutex::Locker locker(m_lock); + derr << "error initializing instances: " << cpp_strerror(r) << dendl; + m_instances->destroy(); + m_instances = nullptr; + + ceph_assert(m_on_finish != nullptr); + std::swap(m_on_finish, on_finish); + } else { + Mutex::Locker locker(m_lock); + notify_listener(); + return; + } + + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::shut_down_instances() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + ceph_assert(m_instances != nullptr); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback<LeaderWatcher<I>, + &LeaderWatcher<I>::handle_shut_down_instances>(this)); + + m_instances->shut_down(ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_shut_down_instances(int r) { + dout(10) << "r=" << r << dendl; + ceph_assert(r == 0); + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + + m_instances->destroy(); + m_instances = nullptr; + + ceph_assert(m_on_finish != nullptr); + std::swap(m_on_finish, on_finish); + } + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::notify_listener() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_async_context_callback( + m_work_queue, create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_listener>(this)); + + if (is_leader(m_lock)) { + ctx = new FunctionContext( + [this, ctx](int r) { + m_listener->post_acquire_handler(ctx); + }); + } else { + ctx = new FunctionContext( + [this, ctx](int r) { + m_listener->pre_release_handler(ctx); + }); + } + m_work_queue->queue(ctx, 0); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify_listener(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker locker(m_lock); + + if (r < 0) { + derr << "error notifying listener: " << cpp_strerror(r) << dendl; + m_ret_val = r; + } + + if (is_leader(m_lock)) { + notify_lock_acquired(); + } else { + shut_down_instances(); + } +} + +template <typename I> +void LeaderWatcher<I>::notify_lock_acquired() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_lock_acquired>(this); + + bufferlist bl; + encode(NotifyMessage{LockAcquiredPayload{}}, bl); + + send_notify(bl, nullptr, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify_lock_acquired(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + if (r < 0 && r != -ETIMEDOUT) { + derr << "error notifying leader lock acquired: " << cpp_strerror(r) + << dendl; + m_ret_val = r; + } + + ceph_assert(m_on_finish != nullptr); + std::swap(m_on_finish, on_finish); + + if (m_ret_val == 0) { + // listener should be ready for instance add/remove events now + m_instances->unblock_listener(); + } + } + on_finish->complete(0); +} + +template <typename I> +void LeaderWatcher<I>::notify_lock_released() { + dout(10) << dendl; + + ceph_assert(m_lock.is_locked()); + + Context *ctx = create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_lock_released>(this); + + bufferlist bl; + encode(NotifyMessage{LockReleasedPayload{}}, bl); + + send_notify(bl, nullptr, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify_lock_released(int r) { + dout(10) << "r=" << r << dendl; + + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + if (r < 0 && r != -ETIMEDOUT) { + derr << "error notifying leader lock released: " << cpp_strerror(r) + << dendl; + } + + ceph_assert(m_on_finish != nullptr); + std::swap(m_on_finish, on_finish); + } + on_finish->complete(r); +} + +template <typename I> +void LeaderWatcher<I>::notify_heartbeat() { + dout(10) << dendl; + + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_lock.is_locked()); + ceph_assert(!m_timer_op_tracker.empty()); + + if (!is_leader(m_lock)) { + dout(5) << "not leader, canceling" << dendl; + m_timer_op_tracker.finish_op(); + return; + } + + Context *ctx = create_context_callback< + LeaderWatcher<I>, &LeaderWatcher<I>::handle_notify_heartbeat>(this); + + bufferlist bl; + encode(NotifyMessage{HeartbeatPayload{}}, bl); + + m_heartbeat_response.acks.clear(); + send_notify(bl, &m_heartbeat_response, ctx); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify_heartbeat(int r) { + dout(10) << "r=" << r << dendl; + + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + ceph_assert(!m_timer_op_tracker.empty()); + + m_timer_op_tracker.finish_op(); + if (m_leader_lock->is_shutdown()) { + dout(10) << "canceling due to shutdown" << dendl; + return; + } else if (!is_leader(m_lock)) { + return; + } + + if (r < 0 && r != -ETIMEDOUT) { + derr << "error notifying heartbeat: " << cpp_strerror(r) + << ", releasing leader" << dendl; + release_leader_lock(); + return; + } + + dout(10) << m_heartbeat_response.acks.size() << " acks received, " + << m_heartbeat_response.timeouts.size() << " timed out" << dendl; + + std::vector<std::string> instance_ids; + for (auto &it: m_heartbeat_response.acks) { + uint64_t notifier_id = it.first.gid; + instance_ids.push_back(stringify(notifier_id)); + } + if (!instance_ids.empty()) { + m_instances->acked(instance_ids); + } + + schedule_timer_task("heartbeat", 1, true, + &LeaderWatcher<I>::notify_heartbeat, false); +} + +template <typename I> +void LeaderWatcher<I>::handle_heartbeat(Context *on_notify_ack) { + dout(10) << dendl; + + { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + if (is_leader(m_lock)) { + dout(5) << "got another leader heartbeat, ignoring" << dendl; + } else { + cancel_timer_task(); + m_acquire_attempts = 0; + schedule_acquire_leader_lock(1); + } + } + + on_notify_ack->complete(0); +} + +template <typename I> +void LeaderWatcher<I>::handle_lock_acquired(Context *on_notify_ack) { + dout(10) << dendl; + + { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + if (is_leader(m_lock)) { + dout(5) << "got another leader lock_acquired, ignoring" << dendl; + } else { + cancel_timer_task(); + schedule_get_locker(true, 0); + } + } + + on_notify_ack->complete(0); +} + +template <typename I> +void LeaderWatcher<I>::handle_lock_released(Context *on_notify_ack) { + dout(10) << dendl; + + { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + if (is_leader(m_lock)) { + dout(5) << "got another leader lock_released, ignoring" << dendl; + } else { + cancel_timer_task(); + schedule_get_locker(true, 0); + } + } + + on_notify_ack->complete(0); +} + +template <typename I> +void LeaderWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) { + dout(10) << "notify_id=" << notify_id << ", handle=" << handle << ", " + << "notifier_id=" << notifier_id << dendl; + + Context *ctx = new C_NotifyAck(this, notify_id, handle); + + if (notifier_id == m_notifier_id) { + dout(10) << "our own notification, ignoring" << dendl; + ctx->complete(0); + return; + } + + NotifyMessage notify_message; + try { + auto iter = bl.cbegin(); + decode(notify_message, iter); + } catch (const buffer::error &err) { + derr << "error decoding image notification: " << err.what() << dendl; + ctx->complete(0); + return; + } + + apply_visitor(HandlePayloadVisitor(this, ctx), notify_message.payload); +} + +template <typename I> +void LeaderWatcher<I>::handle_rewatch_complete(int r) { + dout(5) << "r=" << r << dendl; + + if (r == -EBLACKLISTED) { + dout(1) << "blacklisted detected" << dendl; + m_blacklisted = true; + return; + } + + m_leader_lock->reacquire_lock(nullptr); +} + +template <typename I> +void LeaderWatcher<I>::handle_payload(const HeartbeatPayload &payload, + Context *on_notify_ack) { + dout(10) << "heartbeat" << dendl; + + handle_heartbeat(on_notify_ack); +} + +template <typename I> +void LeaderWatcher<I>::handle_payload(const LockAcquiredPayload &payload, + Context *on_notify_ack) { + dout(10) << "lock_acquired" << dendl; + + handle_lock_acquired(on_notify_ack); +} + +template <typename I> +void LeaderWatcher<I>::handle_payload(const LockReleasedPayload &payload, + Context *on_notify_ack) { + dout(10) << "lock_released" << dendl; + + handle_lock_released(on_notify_ack); +} + +template <typename I> +void LeaderWatcher<I>::handle_payload(const UnknownPayload &payload, + Context *on_notify_ack) { + dout(10) << "unknown" << dendl; + + on_notify_ack->complete(0); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::LeaderWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/LeaderWatcher.h b/src/tools/rbd_mirror/LeaderWatcher.h new file mode 100644 index 00000000..01ee0565 --- /dev/null +++ b/src/tools/rbd_mirror/LeaderWatcher.h @@ -0,0 +1,320 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_LEADER_WATCHER_H +#define CEPH_RBD_MIRROR_LEADER_WATCHER_H + +#include <list> +#include <memory> +#include <string> + +#include "common/AsyncOpTracker.h" +#include "librbd/ManagedLock.h" +#include "librbd/Watcher.h" +#include "librbd/managed_lock/Types.h" +#include "librbd/watcher/Types.h" +#include "Instances.h" +#include "MirrorStatusWatcher.h" +#include "tools/rbd_mirror/instances/Types.h" +#include "tools/rbd_mirror/leader_watcher/Types.h" + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class LeaderWatcher : protected librbd::Watcher { + using librbd::Watcher::unregister_watch; // Silence overloaded virtual warning +public: + static LeaderWatcher* create(Threads<ImageCtxT> *threads, + librados::IoCtx &io_ctx, + leader_watcher::Listener *listener) { + return new LeaderWatcher(threads, io_ctx, listener); + } + + LeaderWatcher(Threads<ImageCtxT> *threads, librados::IoCtx &io_ctx, + leader_watcher::Listener *listener); + ~LeaderWatcher() override; + + int init(); + void shut_down(); + + void init(Context *on_finish); + void shut_down(Context *on_finish); + + bool is_blacklisted() const; + bool is_leader() const; + bool is_releasing_leader() const; + bool get_leader_instance_id(std::string *instance_id) const; + void release_leader(); + void list_instances(std::vector<std::string> *instance_ids); + + std::string get_instance_id(); + +private: + /** + * @verbatim + * + * <uninitialized> <------------------------------ WAIT_FOR_TASKS + * | (init) ^ ^ + * v * | + * CREATE_OBJECT * * * * * (error) UNREGISTER_WATCH + * | * ^ + * v * | + * REGISTER_WATCH * * * * * SHUT_DOWN_STATUS_WATCHER + * | * ^ + * v * | + * INIT_STATUS_WATCHER * * SHUT_DOWN_LEADER_LOCK + * | | + * | (no leader heartbeat and acquire failed) | + * | BREAK_LOCK <-------------------------------------\ | + * | | (no leader heartbeat) | | (shut down) + * | | /----------------------------------------\ | | + * | | | (lock_released received) | | + * | | | /-------------------------------------\ | | + * | | | | (lock_acquired or | | | + * | | | | heartbeat received) | | | + * | | | | (ENOENT) /-----------\ | | | + * | | | | * * * * * * * * * * | | | | | + * v v v v v (error) * v | | | | + * ACQUIRE_LEADER_LOCK * * * * *> GET_LOCKER ---> <secondary> + * | * ^ + * ....|...................*.................... .....|..................... + * . v * . . | post_release . + * .INIT_INSTANCES * * * * * . .NOTIFY_LOCK_RELEASED . + * . | . .....^..................... + * . v . | + * .NOTIFY_LISTENER . RELEASE_LEADER_LOCK + * . | . ^ + * . v . .....|..................... + * .NOTIFY_LOCK_ACQUIRED . . | . + * . | post_acquire . .SHUT_DOWN_INSTANCES . + * ....|........................................ . ^ . + * v . | . + * <leader> -----------------------------------> .NOTIFY_LISTENER . + * (shut_down, release_leader, . pre_release . + * notify error) ........................... + * @endverbatim + */ + + struct InstancesListener : public instances::Listener { + LeaderWatcher* leader_watcher; + + InstancesListener(LeaderWatcher* leader_watcher) + : leader_watcher(leader_watcher) { + } + + void handle_added(const InstanceIds& instance_ids) override { + leader_watcher->m_listener->handle_instances_added(instance_ids); + } + + void handle_removed(const InstanceIds& instance_ids) override { + leader_watcher->m_listener->handle_instances_removed(instance_ids); + } + }; + + class LeaderLock : public librbd::ManagedLock<ImageCtxT> { + public: + typedef librbd::ManagedLock<ImageCtxT> Parent; + + LeaderLock(librados::IoCtx& ioctx, ContextWQ *work_queue, + const std::string& oid, LeaderWatcher *watcher, + bool blacklist_on_break_lock, + uint32_t blacklist_expire_seconds) + : Parent(ioctx, work_queue, oid, watcher, librbd::managed_lock::EXCLUSIVE, + blacklist_on_break_lock, blacklist_expire_seconds), + watcher(watcher) { + } + + bool is_leader() const { + Mutex::Locker locker(Parent::m_lock); + return Parent::is_state_post_acquiring() || Parent::is_state_locked(); + } + + bool is_releasing_leader() const { + Mutex::Locker locker(Parent::m_lock); + return Parent::is_state_pre_releasing(); + } + + protected: + void post_acquire_lock_handler(int r, Context *on_finish) { + if (r == 0) { + // lock is owned at this point + Mutex::Locker locker(Parent::m_lock); + Parent::set_state_post_acquiring(); + } + watcher->handle_post_acquire_leader_lock(r, on_finish); + } + void pre_release_lock_handler(bool shutting_down, + Context *on_finish) { + watcher->handle_pre_release_leader_lock(on_finish); + } + void post_release_lock_handler(bool shutting_down, int r, + Context *on_finish) { + watcher->handle_post_release_leader_lock(r, on_finish); + } + private: + LeaderWatcher *watcher; + }; + + struct HandlePayloadVisitor : public boost::static_visitor<void> { + LeaderWatcher *leader_watcher; + Context *on_notify_ack; + + HandlePayloadVisitor(LeaderWatcher *leader_watcher, Context *on_notify_ack) + : leader_watcher(leader_watcher), on_notify_ack(on_notify_ack) { + } + + template <typename Payload> + inline void operator()(const Payload &payload) const { + leader_watcher->handle_payload(payload, on_notify_ack); + } + }; + + struct C_GetLocker : public Context { + LeaderWatcher *leader_watcher; + librbd::managed_lock::Locker locker; + + C_GetLocker(LeaderWatcher *leader_watcher) + : leader_watcher(leader_watcher) { + } + + void finish(int r) override { + leader_watcher->handle_get_locker(r, locker); + } + }; + + typedef void (LeaderWatcher<ImageCtxT>::*TimerCallback)(); + + struct C_TimerGate : public Context { + LeaderWatcher *leader_watcher; + + bool leader = false; + TimerCallback timer_callback = nullptr; + + C_TimerGate(LeaderWatcher *leader_watcher) + : leader_watcher(leader_watcher) { + } + + void finish(int r) override { + leader_watcher->m_timer_gate = nullptr; + leader_watcher->execute_timer_task(leader, timer_callback); + } + }; + + Threads<ImageCtxT> *m_threads; + leader_watcher::Listener *m_listener; + + InstancesListener m_instances_listener; + mutable Mutex m_lock; + uint64_t m_notifier_id; + std::string m_instance_id; + LeaderLock *m_leader_lock; + Context *m_on_finish = nullptr; + Context *m_on_shut_down_finish = nullptr; + uint64_t m_acquire_attempts = 0; + int m_ret_val = 0; + MirrorStatusWatcher<ImageCtxT> *m_status_watcher = nullptr; + Instances<ImageCtxT> *m_instances = nullptr; + librbd::managed_lock::Locker m_locker; + + bool m_blacklisted = false; + + AsyncOpTracker m_timer_op_tracker; + Context *m_timer_task = nullptr; + C_TimerGate *m_timer_gate = nullptr; + + librbd::watcher::NotifyResponse m_heartbeat_response; + + bool is_leader(Mutex &m_lock) const; + bool is_releasing_leader(Mutex &m_lock) const; + + void cancel_timer_task(); + void schedule_timer_task(const std::string &name, + int delay_factor, bool leader, + TimerCallback callback, bool shutting_down); + void execute_timer_task(bool leader, TimerCallback timer_callback); + + void create_leader_object(); + void handle_create_leader_object(int r); + + void register_watch(); + void handle_register_watch(int r); + + void shut_down_leader_lock(); + void handle_shut_down_leader_lock(int r); + + void unregister_watch(); + void handle_unregister_watch(int r); + + void wait_for_tasks(); + void handle_wait_for_tasks(); + + void break_leader_lock(); + void handle_break_leader_lock(int r); + + void schedule_get_locker(bool reset_leader, uint32_t delay_factor); + void get_locker(); + void handle_get_locker(int r, librbd::managed_lock::Locker& locker); + + void schedule_acquire_leader_lock(uint32_t delay_factor); + void acquire_leader_lock(); + void handle_acquire_leader_lock(int r); + + void release_leader_lock(); + void handle_release_leader_lock(int r); + + void init_status_watcher(); + void handle_init_status_watcher(int r); + + void shut_down_status_watcher(); + void handle_shut_down_status_watcher(int r); + + void init_instances(); + void handle_init_instances(int r); + + void shut_down_instances(); + void handle_shut_down_instances(int r); + + void notify_listener(); + void handle_notify_listener(int r); + + void notify_lock_acquired(); + void handle_notify_lock_acquired(int r); + + void notify_lock_released(); + void handle_notify_lock_released(int r); + + void notify_heartbeat(); + void handle_notify_heartbeat(int r); + + void handle_post_acquire_leader_lock(int r, Context *on_finish); + void handle_pre_release_leader_lock(Context *on_finish); + void handle_post_release_leader_lock(int r, Context *on_finish); + + void handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) override; + + void handle_rewatch_complete(int r) override; + + void handle_heartbeat(Context *on_ack); + void handle_lock_acquired(Context *on_ack); + void handle_lock_released(Context *on_ack); + + void handle_payload(const leader_watcher::HeartbeatPayload &payload, + Context *on_notify_ack); + void handle_payload(const leader_watcher::LockAcquiredPayload &payload, + Context *on_notify_ack); + void handle_payload(const leader_watcher::LockReleasedPayload &payload, + Context *on_notify_ack); + void handle_payload(const leader_watcher::UnknownPayload &payload, + Context *on_notify_ack); +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_LEADER_WATCHER_H diff --git a/src/tools/rbd_mirror/Mirror.cc b/src/tools/rbd_mirror/Mirror.cc new file mode 100644 index 00000000..ef18a0b6 --- /dev/null +++ b/src/tools/rbd_mirror/Mirror.cc @@ -0,0 +1,448 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <signal.h> + +#include <boost/range/adaptor/map.hpp> + +#include "common/Formatter.h" +#include "common/admin_socket.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "Mirror.h" +#include "ServiceDaemon.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::Mirror: " << this << " " \ + << __func__ << ": " + +using std::list; +using std::map; +using std::set; +using std::string; +using std::unique_ptr; +using std::vector; + +using librados::Rados; +using librados::IoCtx; +using librbd::mirror_peer_t; + +namespace rbd { +namespace mirror { + +namespace { + +class MirrorAdminSocketCommand { +public: + virtual ~MirrorAdminSocketCommand() {} + virtual bool call(Formatter *f, stringstream *ss) = 0; +}; + +class StatusCommand : public MirrorAdminSocketCommand { +public: + explicit StatusCommand(Mirror *mirror) : mirror(mirror) {} + + bool call(Formatter *f, stringstream *ss) override { + mirror->print_status(f, ss); + return true; + } + +private: + Mirror *mirror; +}; + +class StartCommand : public MirrorAdminSocketCommand { +public: + explicit StartCommand(Mirror *mirror) : mirror(mirror) {} + + bool call(Formatter *f, stringstream *ss) override { + mirror->start(); + return true; + } + +private: + Mirror *mirror; +}; + +class StopCommand : public MirrorAdminSocketCommand { +public: + explicit StopCommand(Mirror *mirror) : mirror(mirror) {} + + bool call(Formatter *f, stringstream *ss) override { + mirror->stop(); + return true; + } + +private: + Mirror *mirror; +}; + +class RestartCommand : public MirrorAdminSocketCommand { +public: + explicit RestartCommand(Mirror *mirror) : mirror(mirror) {} + + bool call(Formatter *f, stringstream *ss) override { + mirror->restart(); + return true; + } + +private: + Mirror *mirror; +}; + +class FlushCommand : public MirrorAdminSocketCommand { +public: + explicit FlushCommand(Mirror *mirror) : mirror(mirror) {} + + bool call(Formatter *f, stringstream *ss) override { + mirror->flush(); + return true; + } + +private: + Mirror *mirror; +}; + +class LeaderReleaseCommand : public MirrorAdminSocketCommand { +public: + explicit LeaderReleaseCommand(Mirror *mirror) : mirror(mirror) {} + + bool call(Formatter *f, stringstream *ss) override { + mirror->release_leader(); + return true; + } + +private: + Mirror *mirror; +}; + +} // anonymous namespace + +class MirrorAdminSocketHook : public AdminSocketHook { +public: + MirrorAdminSocketHook(CephContext *cct, Mirror *mirror) : + admin_socket(cct->get_admin_socket()) { + std::string command; + int r; + + command = "rbd mirror status"; + r = admin_socket->register_command(command, command, this, + "get status for rbd mirror"); + if (r == 0) { + commands[command] = new StatusCommand(mirror); + } + + command = "rbd mirror start"; + r = admin_socket->register_command(command, command, this, + "start rbd mirror"); + if (r == 0) { + commands[command] = new StartCommand(mirror); + } + + command = "rbd mirror stop"; + r = admin_socket->register_command(command, command, this, + "stop rbd mirror"); + if (r == 0) { + commands[command] = new StopCommand(mirror); + } + + command = "rbd mirror restart"; + r = admin_socket->register_command(command, command, this, + "restart rbd mirror"); + if (r == 0) { + commands[command] = new RestartCommand(mirror); + } + + command = "rbd mirror flush"; + r = admin_socket->register_command(command, command, this, + "flush rbd mirror"); + if (r == 0) { + commands[command] = new FlushCommand(mirror); + } + + command = "rbd mirror leader release"; + r = admin_socket->register_command(command, command, this, + "release rbd mirror leader"); + if (r == 0) { + commands[command] = new LeaderReleaseCommand(mirror); + } + } + + ~MirrorAdminSocketHook() override { + for (Commands::const_iterator i = commands.begin(); i != commands.end(); + ++i) { + (void)admin_socket->unregister_command(i->first); + delete i->second; + } + } + + bool call(std::string_view command, const cmdmap_t& cmdmap, + std::string_view format, bufferlist& out) override { + Commands::const_iterator i = commands.find(command); + ceph_assert(i != commands.end()); + Formatter *f = Formatter::create(format); + stringstream ss; + bool r = i->second->call(f, &ss); + delete f; + out.append(ss); + return r; + } + +private: + typedef std::map<std::string, MirrorAdminSocketCommand*, std::less<>> Commands; + + AdminSocket *admin_socket; + Commands commands; +}; + +Mirror::Mirror(CephContext *cct, const std::vector<const char*> &args) : + m_cct(cct), + m_args(args), + m_lock("rbd::mirror::Mirror"), + m_local(new librados::Rados()), + m_asok_hook(new MirrorAdminSocketHook(cct, this)) +{ + m_threads = + &(cct->lookup_or_create_singleton_object<Threads<librbd::ImageCtx>>( + "rbd_mirror::threads", false, cct)); + m_service_daemon.reset(new ServiceDaemon<>(m_cct, m_local, m_threads)); +} + +Mirror::~Mirror() +{ + delete m_asok_hook; +} + +void Mirror::handle_signal(int signum) +{ + dout(20) << signum << dendl; + + Mutex::Locker l(m_lock); + + switch (signum) { + case SIGHUP: + for (auto &it : m_pool_replayers) { + it.second->reopen_logs(); + } + g_ceph_context->reopen_logs(); + break; + + case SIGINT: + case SIGTERM: + m_stopping = true; + m_cond.Signal(); + break; + + default: + ceph_abort_msgf("unexpected signal %d", signum); + } +} + +int Mirror::init() +{ + int r = m_local->init_with_context(m_cct); + if (r < 0) { + derr << "could not initialize rados handle" << dendl; + return r; + } + + r = m_local->connect(); + if (r < 0) { + derr << "error connecting to local cluster" << dendl; + return r; + } + + r = m_service_daemon->init(); + if (r < 0) { + derr << "error registering service daemon: " << cpp_strerror(r) << dendl; + return r; + } + + m_local_cluster_watcher.reset(new ClusterWatcher(m_local, m_lock, + m_service_daemon.get())); + return r; +} + +void Mirror::run() +{ + dout(20) << "enter" << dendl; + while (!m_stopping) { + m_local_cluster_watcher->refresh_pools(); + Mutex::Locker l(m_lock); + if (!m_manual_stop) { + update_pool_replayers(m_local_cluster_watcher->get_pool_peers()); + } + m_cond.WaitInterval( + m_lock, + utime_t(m_cct->_conf.get_val<uint64_t>("rbd_mirror_pool_replayers_refresh_interval"), 0)); + } + + // stop all pool replayers in parallel + Mutex::Locker locker(m_lock); + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->stop(false); + } + dout(20) << "return" << dendl; +} + +void Mirror::print_status(Formatter *f, stringstream *ss) +{ + dout(20) << "enter" << dendl; + + Mutex::Locker l(m_lock); + + if (m_stopping) { + return; + } + + if (f) { + f->open_object_section("mirror_status"); + f->open_array_section("pool_replayers"); + }; + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->print_status(f, ss); + } + + if (f) { + f->close_section(); + f->close_section(); + f->flush(*ss); + } +} + +void Mirror::start() +{ + dout(20) << "enter" << dendl; + Mutex::Locker l(m_lock); + + if (m_stopping) { + return; + } + + m_manual_stop = false; + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->start(); + } +} + +void Mirror::stop() +{ + dout(20) << "enter" << dendl; + Mutex::Locker l(m_lock); + + if (m_stopping) { + return; + } + + m_manual_stop = true; + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->stop(true); + } +} + +void Mirror::restart() +{ + dout(20) << "enter" << dendl; + Mutex::Locker l(m_lock); + + if (m_stopping) { + return; + } + + m_manual_stop = false; + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->restart(); + } +} + +void Mirror::flush() +{ + dout(20) << "enter" << dendl; + Mutex::Locker l(m_lock); + + if (m_stopping || m_manual_stop) { + return; + } + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->flush(); + } +} + +void Mirror::release_leader() +{ + dout(20) << "enter" << dendl; + Mutex::Locker l(m_lock); + + if (m_stopping) { + return; + } + + for (auto &pool_replayer : m_pool_replayers) { + pool_replayer.second->release_leader(); + } +} + +void Mirror::update_pool_replayers(const PoolPeers &pool_peers) +{ + dout(20) << "enter" << dendl; + ceph_assert(m_lock.is_locked()); + + // remove stale pool replayers before creating new pool replayers + for (auto it = m_pool_replayers.begin(); it != m_pool_replayers.end();) { + auto &peer = it->first.second; + auto pool_peer_it = pool_peers.find(it->first.first); + if (pool_peer_it == pool_peers.end() || + pool_peer_it->second.find(peer) == pool_peer_it->second.end()) { + dout(20) << "removing pool replayer for " << peer << dendl; + // TODO: make async + it->second->shut_down(); + it = m_pool_replayers.erase(it); + } else { + ++it; + } + } + + for (auto &kv : pool_peers) { + for (auto &peer : kv.second) { + PoolPeer pool_peer(kv.first, peer); + + auto pool_replayers_it = m_pool_replayers.find(pool_peer); + if (pool_replayers_it != m_pool_replayers.end()) { + auto& pool_replayer = pool_replayers_it->second; + if (pool_replayer->is_blacklisted()) { + derr << "restarting blacklisted pool replayer for " << peer << dendl; + // TODO: make async + pool_replayer->shut_down(); + pool_replayer->init(); + } else if (!pool_replayer->is_running()) { + derr << "restarting failed pool replayer for " << peer << dendl; + // TODO: make async + pool_replayer->shut_down(); + pool_replayer->init(); + } + } else { + dout(20) << "starting pool replayer for " << peer << dendl; + unique_ptr<PoolReplayer<>> pool_replayer(new PoolReplayer<>( + m_threads, m_service_daemon.get(), kv.first, peer, m_args)); + + // TODO: make async + pool_replayer->init(); + m_pool_replayers.emplace(pool_peer, std::move(pool_replayer)); + } + } + + // TODO currently only support a single peer + } +} + +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/Mirror.h b/src/tools/rbd_mirror/Mirror.h new file mode 100644 index 00000000..153c0bc5 --- /dev/null +++ b/src/tools/rbd_mirror/Mirror.h @@ -0,0 +1,77 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_H +#define CEPH_RBD_MIRROR_H + +#include "common/ceph_context.h" +#include "common/Mutex.h" +#include "include/rados/librados.hpp" +#include "ClusterWatcher.h" +#include "PoolReplayer.h" +#include "tools/rbd_mirror/Types.h" + +#include <set> +#include <map> +#include <memory> +#include <atomic> + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct ServiceDaemon; +template <typename> struct Threads; +class MirrorAdminSocketHook; + +/** + * Contains the main loop and overall state for rbd-mirror. + * + * Sets up mirroring, and coordinates between noticing config + * changes and applying them. + */ +class Mirror { +public: + Mirror(CephContext *cct, const std::vector<const char*> &args); + Mirror(const Mirror&) = delete; + Mirror& operator=(const Mirror&) = delete; + ~Mirror(); + + int init(); + void run(); + void handle_signal(int signum); + + void print_status(Formatter *f, stringstream *ss); + void start(); + void stop(); + void restart(); + void flush(); + void release_leader(); + +private: + typedef ClusterWatcher::PoolPeers PoolPeers; + typedef std::pair<int64_t, PeerSpec> PoolPeer; + + void update_pool_replayers(const PoolPeers &pool_peers); + + CephContext *m_cct; + std::vector<const char*> m_args; + Threads<librbd::ImageCtx> *m_threads = nullptr; + Mutex m_lock; + Cond m_cond; + RadosRef m_local; + std::unique_ptr<ServiceDaemon<librbd::ImageCtx>> m_service_daemon; + + // monitor local cluster for config changes in peers + std::unique_ptr<ClusterWatcher> m_local_cluster_watcher; + std::map<PoolPeer, std::unique_ptr<PoolReplayer<>>> m_pool_replayers; + std::atomic<bool> m_stopping = { false }; + bool m_manual_stop = false; + MirrorAdminSocketHook *m_asok_hook; +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_H diff --git a/src/tools/rbd_mirror/MirrorStatusWatcher.cc b/src/tools/rbd_mirror/MirrorStatusWatcher.cc new file mode 100644 index 00000000..b935bc5c --- /dev/null +++ b/src/tools/rbd_mirror/MirrorStatusWatcher.cc @@ -0,0 +1,74 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "MirrorStatusWatcher.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::MirrorStatusWatcher: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { + +using librbd::util::create_rados_callback; + +template <typename I> +MirrorStatusWatcher<I>::MirrorStatusWatcher(librados::IoCtx &io_ctx, + ContextWQ *work_queue) + : Watcher(io_ctx, work_queue, RBD_MIRRORING) { +} + +template <typename I> +MirrorStatusWatcher<I>::~MirrorStatusWatcher() { +} + +template <typename I> +void MirrorStatusWatcher<I>::init(Context *on_finish) { + dout(20) << dendl; + + on_finish = new FunctionContext( + [this, on_finish] (int r) { + if (r < 0) { + derr << "error removing down statuses: " << cpp_strerror(r) << dendl; + on_finish->complete(r); + return; + } + register_watch(on_finish); + }); + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_status_remove_down(&op); + librados::AioCompletion *aio_comp = create_rados_callback(on_finish); + + int r = m_ioctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void MirrorStatusWatcher<I>::shut_down(Context *on_finish) { + dout(20) << dendl; + + unregister_watch(on_finish); +} + +template <typename I> +void MirrorStatusWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, + bufferlist &bl) { + dout(20) << dendl; + + bufferlist out; + acknowledge_notify(notify_id, handle, out); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::MirrorStatusWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/MirrorStatusWatcher.h b/src/tools/rbd_mirror/MirrorStatusWatcher.h new file mode 100644 index 00000000..155f8cc8 --- /dev/null +++ b/src/tools/rbd_mirror/MirrorStatusWatcher.h @@ -0,0 +1,39 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H +#define CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H + +#include "librbd/Watcher.h" + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename ImageCtxT = librbd::ImageCtx> +class MirrorStatusWatcher : protected librbd::Watcher { +public: + static MirrorStatusWatcher *create(librados::IoCtx &io_ctx, + ContextWQ *work_queue) { + return new MirrorStatusWatcher(io_ctx, work_queue); + } + void destroy() { + delete this; + } + + MirrorStatusWatcher(librados::IoCtx &io_ctx, ContextWQ *work_queue); + ~MirrorStatusWatcher() override; + + void init(Context *on_finish); + void shut_down(Context *on_finish); + +protected: + void handle_notify(uint64_t notify_id, uint64_t handle, + uint64_t notifier_id, bufferlist &bl) override; +}; + +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_MIRROR_STATUS_WATCHER_H diff --git a/src/tools/rbd_mirror/PoolReplayer.cc b/src/tools/rbd_mirror/PoolReplayer.cc new file mode 100644 index 00000000..35d32eb5 --- /dev/null +++ b/src/tools/rbd_mirror/PoolReplayer.cc @@ -0,0 +1,1133 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "PoolReplayer.h" +#include <boost/bind.hpp> +#include "common/Formatter.h" +#include "common/admin_socket.h" +#include "common/ceph_argparse.h" +#include "common/code_environment.h" +#include "common/common_init.h" +#include "common/debug.h" +#include "common/errno.h" +#include "include/stringify.h" +#include "cls/rbd/cls_rbd_client.h" +#include "global/global_context.h" +#include "librbd/internal.h" +#include "librbd/Utils.h" +#include "librbd/Watcher.h" +#include "librbd/api/Config.h" +#include "librbd/api/Mirror.h" +#include "ImageMap.h" +#include "InstanceReplayer.h" +#include "InstanceWatcher.h" +#include "LeaderWatcher.h" +#include "ServiceDaemon.h" +#include "Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::PoolReplayer: " \ + << this << " " << __func__ << ": " + +using std::chrono::seconds; +using std::map; +using std::string; +using std::unique_ptr; +using std::vector; + +using librbd::cls_client::dir_get_name; +using librbd::util::create_async_context_callback; + +namespace rbd { +namespace mirror { + +using ::operator<<; + +namespace { + +const std::string SERVICE_DAEMON_INSTANCE_ID_KEY("instance_id"); +const std::string SERVICE_DAEMON_LEADER_KEY("leader"); +const std::string SERVICE_DAEMON_LOCAL_COUNT_KEY("image_local_count"); +const std::string SERVICE_DAEMON_REMOTE_COUNT_KEY("image_remote_count"); + +const std::vector<std::string> UNIQUE_PEER_CONFIG_KEYS { + {"monmap", "mon_host", "mon_dns_srv_name", "key", "keyfile", "keyring"}}; + +template <typename I> +class PoolReplayerAdminSocketCommand { +public: + PoolReplayerAdminSocketCommand(PoolReplayer<I> *pool_replayer) + : pool_replayer(pool_replayer) { + } + virtual ~PoolReplayerAdminSocketCommand() {} + virtual bool call(Formatter *f, stringstream *ss) = 0; +protected: + PoolReplayer<I> *pool_replayer; +}; + +template <typename I> +class StatusCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit StatusCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->pool_replayer->print_status(f, ss); + return true; + } +}; + +template <typename I> +class StartCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit StartCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->pool_replayer->start(); + return true; + } +}; + +template <typename I> +class StopCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit StopCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->pool_replayer->stop(true); + return true; + } +}; + +template <typename I> +class RestartCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit RestartCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->pool_replayer->restart(); + return true; + } +}; + +template <typename I> +class FlushCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit FlushCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->pool_replayer->flush(); + return true; + } +}; + +template <typename I> +class LeaderReleaseCommand : public PoolReplayerAdminSocketCommand<I> { +public: + explicit LeaderReleaseCommand(PoolReplayer<I> *pool_replayer) + : PoolReplayerAdminSocketCommand<I>(pool_replayer) { + } + + bool call(Formatter *f, stringstream *ss) override { + this->pool_replayer->release_leader(); + return true; + } +}; + +template <typename I> +class PoolReplayerAdminSocketHook : public AdminSocketHook { +public: + PoolReplayerAdminSocketHook(CephContext *cct, const std::string &name, + PoolReplayer<I> *pool_replayer) + : admin_socket(cct->get_admin_socket()) { + std::string command; + int r; + + command = "rbd mirror status " + name; + r = admin_socket->register_command(command, command, this, + "get status for rbd mirror " + name); + if (r == 0) { + commands[command] = new StatusCommand<I>(pool_replayer); + } + + command = "rbd mirror start " + name; + r = admin_socket->register_command(command, command, this, + "start rbd mirror " + name); + if (r == 0) { + commands[command] = new StartCommand<I>(pool_replayer); + } + + command = "rbd mirror stop " + name; + r = admin_socket->register_command(command, command, this, + "stop rbd mirror " + name); + if (r == 0) { + commands[command] = new StopCommand<I>(pool_replayer); + } + + command = "rbd mirror restart " + name; + r = admin_socket->register_command(command, command, this, + "restart rbd mirror " + name); + if (r == 0) { + commands[command] = new RestartCommand<I>(pool_replayer); + } + + command = "rbd mirror flush " + name; + r = admin_socket->register_command(command, command, this, + "flush rbd mirror " + name); + if (r == 0) { + commands[command] = new FlushCommand<I>(pool_replayer); + } + + command = "rbd mirror leader release " + name; + r = admin_socket->register_command(command, command, this, + "release rbd mirror leader " + name); + if (r == 0) { + commands[command] = new LeaderReleaseCommand<I>(pool_replayer); + } + } + + ~PoolReplayerAdminSocketHook() override { + for (auto i = commands.begin(); i != commands.end(); ++i) { + (void)admin_socket->unregister_command(i->first); + delete i->second; + } + } + + bool call(std::string_view command, const cmdmap_t& cmdmap, + std::string_view format, bufferlist& out) override { + auto i = commands.find(command); + ceph_assert(i != commands.end()); + Formatter *f = Formatter::create(format); + stringstream ss; + bool r = i->second->call(f, &ss); + delete f; + out.append(ss); + return r; + } + +private: + typedef std::map<std::string, PoolReplayerAdminSocketCommand<I>*, + std::less<>> Commands; + + AdminSocket *admin_socket; + Commands commands; +}; + +} // anonymous namespace + +template <typename I> +PoolReplayer<I>::PoolReplayer(Threads<I> *threads, + ServiceDaemon<I>* service_daemon, + int64_t local_pool_id, const PeerSpec &peer, + const std::vector<const char*> &args) : + m_threads(threads), + m_service_daemon(service_daemon), + m_local_pool_id(local_pool_id), + m_peer(peer), + m_args(args), + m_lock(stringify("rbd::mirror::PoolReplayer ") + stringify(peer)), + m_local_pool_watcher_listener(this, true), + m_remote_pool_watcher_listener(this, false), + m_image_map_listener(this), + m_pool_replayer_thread(this), + m_leader_listener(this) +{ +} + +template <typename I> +PoolReplayer<I>::~PoolReplayer() +{ + delete m_asok_hook; + shut_down(); +} + +template <typename I> +bool PoolReplayer<I>::is_blacklisted() const { + Mutex::Locker locker(m_lock); + return m_blacklisted; +} + +template <typename I> +bool PoolReplayer<I>::is_leader() const { + Mutex::Locker locker(m_lock); + return m_leader_watcher && m_leader_watcher->is_leader(); +} + +template <typename I> +bool PoolReplayer<I>::is_running() const { + return m_pool_replayer_thread.is_started(); +} + +template <typename I> +void PoolReplayer<I>::init() +{ + Mutex::Locker l(m_lock); + + ceph_assert(!m_pool_replayer_thread.is_started()); + + // reset state + m_stopping = false; + m_blacklisted = false; + + dout(10) << "replaying for " << m_peer << dendl; + int r = init_rados(g_ceph_context->_conf->cluster, + g_ceph_context->_conf->name.to_str(), + "", "", "local cluster", &m_local_rados, false); + if (r < 0) { + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to connect to local cluster"); + return; + } + + r = init_rados(m_peer.cluster_name, m_peer.client_name, + m_peer.mon_host, m_peer.key, + std::string("remote peer ") + stringify(m_peer), + &m_remote_rados, true); + if (r < 0) { + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to connect to remote cluster"); + return; + } + + r = m_local_rados->ioctx_create2(m_local_pool_id, m_local_io_ctx); + if (r < 0) { + derr << "error accessing local pool " << m_local_pool_id << ": " + << cpp_strerror(r) << dendl; + return; + } + + auto cct = reinterpret_cast<CephContext *>(m_local_io_ctx.cct()); + librbd::api::Config<I>::apply_pool_overrides(m_local_io_ctx, &cct->_conf); + + std::string local_mirror_uuid; + r = librbd::cls_client::mirror_uuid_get(&m_local_io_ctx, + &local_mirror_uuid); + if (r < 0) { + derr << "failed to retrieve local mirror uuid from pool " + << m_local_io_ctx.get_pool_name() << ": " << cpp_strerror(r) << dendl; + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to query local mirror uuid"); + return; + } + + r = m_remote_rados->ioctx_create(m_local_io_ctx.get_pool_name().c_str(), + m_remote_io_ctx); + if (r < 0) { + derr << "error accessing remote pool " << m_local_io_ctx.get_pool_name() + << ": " << cpp_strerror(r) << dendl; + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_WARNING, + "unable to access remote pool"); + return; + } + + dout(10) << "connected to " << m_peer << dendl; + + m_instance_replayer.reset(InstanceReplayer<I>::create( + m_threads, m_service_daemon, m_local_rados, local_mirror_uuid, + m_local_pool_id)); + m_instance_replayer->init(); + m_instance_replayer->add_peer(m_peer.uuid, m_remote_io_ctx); + + m_instance_watcher.reset(InstanceWatcher<I>::create( + m_local_io_ctx, m_threads->work_queue, m_instance_replayer.get())); + r = m_instance_watcher->init(); + if (r < 0) { + derr << "error initializing instance watcher: " << cpp_strerror(r) << dendl; + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to initialize instance messenger object"); + return; + } + m_service_daemon->add_or_update_attribute( + m_local_pool_id, SERVICE_DAEMON_INSTANCE_ID_KEY, + m_instance_watcher->get_instance_id()); + + m_leader_watcher.reset(LeaderWatcher<I>::create(m_threads, m_local_io_ctx, + &m_leader_listener)); + r = m_leader_watcher->init(); + if (r < 0) { + derr << "error initializing leader watcher: " << cpp_strerror(r) << dendl; + m_callout_id = m_service_daemon->add_or_update_callout( + m_local_pool_id, m_callout_id, service_daemon::CALLOUT_LEVEL_ERROR, + "unable to initialize leader messenger object"); + return; + } + + if (m_callout_id != service_daemon::CALLOUT_ID_NONE) { + m_service_daemon->remove_callout(m_local_pool_id, m_callout_id); + m_callout_id = service_daemon::CALLOUT_ID_NONE; + } + + m_pool_replayer_thread.create("pool replayer"); +} + +template <typename I> +void PoolReplayer<I>::shut_down() { + m_stopping = true; + { + Mutex::Locker l(m_lock); + m_cond.Signal(); + } + if (m_pool_replayer_thread.is_started()) { + m_pool_replayer_thread.join(); + } + if (m_leader_watcher) { + m_leader_watcher->shut_down(); + } + if (m_instance_watcher) { + m_instance_watcher->shut_down(); + } + if (m_instance_replayer) { + m_instance_replayer->shut_down(); + } + + m_leader_watcher.reset(); + m_instance_watcher.reset(); + m_instance_replayer.reset(); + + ceph_assert(!m_image_map); + ceph_assert(!m_image_deleter); + ceph_assert(!m_local_pool_watcher); + ceph_assert(!m_remote_pool_watcher); + m_local_rados.reset(); + m_remote_rados.reset(); +} + +template <typename I> +int PoolReplayer<I>::init_rados(const std::string &cluster_name, + const std::string &client_name, + const std::string &mon_host, + const std::string &key, + const std::string &description, + RadosRef *rados_ref, + bool strip_cluster_overrides) { + // NOTE: manually bootstrap a CephContext here instead of via + // the librados API to avoid mixing global singletons between + // the librados shared library and the daemon + // TODO: eliminate intermingling of global singletons within Ceph APIs + CephInitParameters iparams(CEPH_ENTITY_TYPE_CLIENT); + if (client_name.empty() || !iparams.name.from_str(client_name)) { + derr << "error initializing cluster handle for " << description << dendl; + return -EINVAL; + } + + CephContext *cct = common_preinit(iparams, CODE_ENVIRONMENT_LIBRARY, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + cct->_conf->cluster = cluster_name; + + // librados::Rados::conf_read_file + int r = cct->_conf.parse_config_files(nullptr, nullptr, 0); + if (r < 0 && r != -ENOENT) { + // do not treat this as fatal, it might still be able to connect + derr << "could not read ceph conf for " << description << ": " + << cpp_strerror(r) << dendl; + } + + // preserve cluster-specific config settings before applying environment/cli + // overrides + std::map<std::string, std::string> config_values; + if (strip_cluster_overrides) { + // remote peer connections shouldn't apply cluster-specific + // configuration settings + for (auto& key : UNIQUE_PEER_CONFIG_KEYS) { + config_values[key] = cct->_conf.get_val<std::string>(key); + } + } + + cct->_conf.parse_env(cct->get_module_type()); + + // librados::Rados::conf_parse_env + std::vector<const char*> args; + r = cct->_conf.parse_argv(args); + if (r < 0) { + derr << "could not parse environment for " << description << ":" + << cpp_strerror(r) << dendl; + cct->put(); + return r; + } + cct->_conf.parse_env(cct->get_module_type()); + + if (!m_args.empty()) { + // librados::Rados::conf_parse_argv + args = m_args; + r = cct->_conf.parse_argv(args); + if (r < 0) { + derr << "could not parse command line args for " << description << ": " + << cpp_strerror(r) << dendl; + cct->put(); + return r; + } + } + + if (strip_cluster_overrides) { + // remote peer connections shouldn't apply cluster-specific + // configuration settings + for (auto& pair : config_values) { + auto value = cct->_conf.get_val<std::string>(pair.first); + if (pair.second != value) { + dout(0) << "reverting global config option override: " + << pair.first << ": " << value << " -> " << pair.second + << dendl; + cct->_conf.set_val_or_die(pair.first, pair.second); + } + } + } + + if (!g_ceph_context->_conf->admin_socket.empty()) { + cct->_conf.set_val_or_die("admin_socket", + "$run_dir/$name.$pid.$cluster.$cctid.asok"); + } + + if (!mon_host.empty()) { + r = cct->_conf.set_val("mon_host", mon_host); + if (r < 0) { + derr << "failed to set mon_host config for " << description << ": " + << cpp_strerror(r) << dendl; + cct->put(); + return r; + } + } + + if (!key.empty()) { + r = cct->_conf.set_val("key", key); + if (r < 0) { + derr << "failed to set key config for " << description << ": " + << cpp_strerror(r) << dendl; + cct->put(); + return r; + } + } + + // disable unnecessary librbd cache + cct->_conf.set_val_or_die("rbd_cache", "false"); + cct->_conf.apply_changes(nullptr); + cct->_conf.complain_about_parse_errors(cct); + + rados_ref->reset(new librados::Rados()); + + r = (*rados_ref)->init_with_context(cct); + ceph_assert(r == 0); + cct->put(); + + r = (*rados_ref)->connect(); + if (r < 0) { + derr << "error connecting to " << description << ": " + << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +template <typename I> +void PoolReplayer<I>::run() +{ + dout(20) << "enter" << dendl; + + while (!m_stopping) { + std::string asok_hook_name = m_local_io_ctx.get_pool_name() + " " + + m_peer.cluster_name; + if (m_asok_hook_name != asok_hook_name || m_asok_hook == nullptr) { + m_asok_hook_name = asok_hook_name; + delete m_asok_hook; + + m_asok_hook = new PoolReplayerAdminSocketHook<I>(g_ceph_context, + m_asok_hook_name, this); + } + + Mutex::Locker locker(m_lock); + if (m_leader_watcher->is_blacklisted() || + m_instance_replayer->is_blacklisted() || + (m_local_pool_watcher && m_local_pool_watcher->is_blacklisted()) || + (m_remote_pool_watcher && m_remote_pool_watcher->is_blacklisted())) { + m_blacklisted = true; + m_stopping = true; + break; + } + + if (!m_stopping) { + m_cond.WaitInterval(m_lock, utime_t(1, 0)); + } + } + + m_instance_replayer->stop(); +} + +template <typename I> +void PoolReplayer<I>::reopen_logs() +{ + Mutex::Locker l(m_lock); + + if (m_local_rados) { + reinterpret_cast<CephContext *>(m_local_rados->cct())->reopen_logs(); + } + if (m_remote_rados) { + reinterpret_cast<CephContext *>(m_remote_rados->cct())->reopen_logs(); + } +} + +template <typename I> +void PoolReplayer<I>::print_status(Formatter *f, stringstream *ss) +{ + dout(20) << "enter" << dendl; + + if (!f) { + return; + } + + Mutex::Locker l(m_lock); + + f->open_object_section("pool_replayer_status"); + f->dump_stream("peer") << m_peer; + if (m_local_io_ctx.is_valid()) { + f->dump_string("pool", m_local_io_ctx.get_pool_name()); + f->dump_stream("instance_id") << m_instance_watcher->get_instance_id(); + } + + std::string state("running"); + if (m_manual_stop) { + state = "stopped (manual)"; + } else if (m_stopping) { + state = "stopped"; + } + f->dump_string("state", state); + + std::string leader_instance_id; + m_leader_watcher->get_leader_instance_id(&leader_instance_id); + f->dump_string("leader_instance_id", leader_instance_id); + + bool leader = m_leader_watcher->is_leader(); + f->dump_bool("leader", leader); + if (leader) { + std::vector<std::string> instance_ids; + m_leader_watcher->list_instances(&instance_ids); + f->open_array_section("instances"); + for (auto instance_id : instance_ids) { + f->dump_string("instance_id", instance_id); + } + f->close_section(); + } + + f->dump_string("local_cluster_admin_socket", + reinterpret_cast<CephContext *>(m_local_io_ctx.cct())->_conf. + get_val<std::string>("admin_socket")); + f->dump_string("remote_cluster_admin_socket", + reinterpret_cast<CephContext *>(m_remote_io_ctx.cct())->_conf. + get_val<std::string>("admin_socket")); + + f->open_object_section("sync_throttler"); + m_instance_watcher->print_sync_status(f, ss); + f->close_section(); + + m_instance_replayer->print_status(f, ss); + + if (m_image_deleter) { + f->open_object_section("image_deleter"); + m_image_deleter->print_status(f, ss); + f->close_section(); + } + + f->close_section(); + f->flush(*ss); +} + +template <typename I> +void PoolReplayer<I>::start() +{ + dout(20) << "enter" << dendl; + + Mutex::Locker l(m_lock); + + if (m_stopping) { + return; + } + + m_manual_stop = false; + + if (m_instance_replayer) { + m_instance_replayer->start(); + } +} + +template <typename I> +void PoolReplayer<I>::stop(bool manual) +{ + dout(20) << "enter: manual=" << manual << dendl; + + Mutex::Locker l(m_lock); + if (!manual) { + m_stopping = true; + m_cond.Signal(); + return; + } else if (m_stopping) { + return; + } + + m_manual_stop = true; + + if (m_instance_replayer) { + m_instance_replayer->stop(); + } +} + +template <typename I> +void PoolReplayer<I>::restart() +{ + dout(20) << "enter" << dendl; + + Mutex::Locker l(m_lock); + + if (m_stopping) { + return; + } + + if (m_instance_replayer) { + m_instance_replayer->restart(); + } +} + +template <typename I> +void PoolReplayer<I>::flush() +{ + dout(20) << "enter" << dendl; + + Mutex::Locker l(m_lock); + + if (m_stopping || m_manual_stop) { + return; + } + + if (m_instance_replayer) { + m_instance_replayer->flush(); + } +} + +template <typename I> +void PoolReplayer<I>::release_leader() +{ + dout(20) << "enter" << dendl; + + Mutex::Locker l(m_lock); + + if (m_stopping || !m_leader_watcher) { + return; + } + + m_leader_watcher->release_leader(); +} + +template <typename I> +void PoolReplayer<I>::handle_update(const std::string &mirror_uuid, + ImageIds &&added_image_ids, + ImageIds &&removed_image_ids) { + if (m_stopping) { + return; + } + + dout(10) << "mirror_uuid=" << mirror_uuid << ", " + << "added_count=" << added_image_ids.size() << ", " + << "removed_count=" << removed_image_ids.size() << dendl; + Mutex::Locker locker(m_lock); + if (!m_leader_watcher->is_leader()) { + return; + } + + m_service_daemon->add_or_update_attribute( + m_local_pool_id, SERVICE_DAEMON_LOCAL_COUNT_KEY, + m_local_pool_watcher->get_image_count()); + if (m_remote_pool_watcher) { + m_service_daemon->add_or_update_attribute( + m_local_pool_id, SERVICE_DAEMON_REMOTE_COUNT_KEY, + m_remote_pool_watcher->get_image_count()); + } + + std::set<std::string> added_global_image_ids; + for (auto& image_id : added_image_ids) { + added_global_image_ids.insert(image_id.global_id); + } + + std::set<std::string> removed_global_image_ids; + for (auto& image_id : removed_image_ids) { + removed_global_image_ids.insert(image_id.global_id); + } + + m_image_map->update_images(mirror_uuid, + std::move(added_global_image_ids), + std::move(removed_global_image_ids)); +} + +template <typename I> +void PoolReplayer<I>::handle_post_acquire_leader(Context *on_finish) { + dout(10) << dendl; + + m_service_daemon->add_or_update_attribute(m_local_pool_id, + SERVICE_DAEMON_LEADER_KEY, true); + m_instance_watcher->handle_acquire_leader(); + init_image_map(on_finish); +} + +template <typename I> +void PoolReplayer<I>::handle_pre_release_leader(Context *on_finish) { + dout(10) << dendl; + + m_service_daemon->remove_attribute(m_local_pool_id, + SERVICE_DAEMON_LEADER_KEY); + m_instance_watcher->handle_release_leader(); + shut_down_image_deleter(on_finish); +} + +template <typename I> +void PoolReplayer<I>::init_image_map(Context *on_finish) { + dout(5) << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(!m_image_map); + m_image_map.reset(ImageMap<I>::create(m_local_io_ctx, m_threads, + m_instance_watcher->get_instance_id(), + m_image_map_listener)); + + auto ctx = new FunctionContext([this, on_finish](int r) { + handle_init_image_map(r, on_finish); + }); + m_image_map->init(create_async_context_callback( + m_threads->work_queue, ctx)); +} + +template <typename I> +void PoolReplayer<I>::handle_init_image_map(int r, Context *on_finish) { + dout(5) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to init image map: " << cpp_strerror(r) << dendl; + on_finish = new FunctionContext([on_finish, r](int) { + on_finish->complete(r); + }); + shut_down_image_map(on_finish); + return; + } + + init_local_pool_watcher(on_finish); +} + +template <typename I> +void PoolReplayer<I>::init_local_pool_watcher(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(!m_local_pool_watcher); + m_local_pool_watcher.reset(PoolWatcher<I>::create( + m_threads, m_local_io_ctx, m_local_pool_watcher_listener)); + + // ensure the initial set of local images is up-to-date + // after acquiring the leader role + auto ctx = new FunctionContext([this, on_finish](int r) { + handle_init_local_pool_watcher(r, on_finish); + }); + m_local_pool_watcher->init(create_async_context_callback( + m_threads->work_queue, ctx)); +} + +template <typename I> +void PoolReplayer<I>::handle_init_local_pool_watcher( + int r, Context *on_finish) { + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to retrieve local images: " << cpp_strerror(r) << dendl; + on_finish = new FunctionContext([on_finish, r](int) { + on_finish->complete(r); + }); + shut_down_pool_watchers(on_finish); + return; + } + + init_remote_pool_watcher(on_finish); +} + +template <typename I> +void PoolReplayer<I>::init_remote_pool_watcher(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(!m_remote_pool_watcher); + m_remote_pool_watcher.reset(PoolWatcher<I>::create( + m_threads, m_remote_io_ctx, m_remote_pool_watcher_listener)); + + auto ctx = new FunctionContext([this, on_finish](int r) { + handle_init_remote_pool_watcher(r, on_finish); + }); + m_remote_pool_watcher->init(create_async_context_callback( + m_threads->work_queue, ctx)); +} + +template <typename I> +void PoolReplayer<I>::handle_init_remote_pool_watcher( + int r, Context *on_finish) { + dout(10) << "r=" << r << dendl; + if (r == -ENOENT) { + // Technically nothing to do since the other side doesn't + // have mirroring enabled. Eventually the remote pool watcher will + // detect images (if mirroring is enabled), so no point propagating + // an error which would just busy-spin the state machines. + dout(0) << "remote peer does not have mirroring configured" << dendl; + } else if (r < 0) { + derr << "failed to retrieve remote images: " << cpp_strerror(r) << dendl; + on_finish = new FunctionContext([on_finish, r](int) { + on_finish->complete(r); + }); + shut_down_pool_watchers(on_finish); + return; + } + + init_image_deleter(on_finish); +} + +template <typename I> +void PoolReplayer<I>::init_image_deleter(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + ceph_assert(!m_image_deleter); + + on_finish = new FunctionContext([this, on_finish](int r) { + handle_init_image_deleter(r, on_finish); + }); + m_image_deleter.reset(ImageDeleter<I>::create(m_local_io_ctx, m_threads, + m_service_daemon)); + m_image_deleter->init(create_async_context_callback( + m_threads->work_queue, on_finish)); +} + +template <typename I> +void PoolReplayer<I>::handle_init_image_deleter(int r, Context *on_finish) { + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to init image deleter: " << cpp_strerror(r) << dendl; + on_finish = new FunctionContext([on_finish, r](int) { + on_finish->complete(r); + }); + shut_down_image_deleter(on_finish); + return; + } + + on_finish->complete(0); + + Mutex::Locker locker(m_lock); + m_cond.Signal(); +} + +template <typename I> +void PoolReplayer<I>::shut_down_image_deleter(Context* on_finish) { + dout(10) << dendl; + { + Mutex::Locker locker(m_lock); + if (m_image_deleter) { + Context *ctx = new FunctionContext([this, on_finish](int r) { + handle_shut_down_image_deleter(r, on_finish); + }); + ctx = create_async_context_callback(m_threads->work_queue, ctx); + + m_image_deleter->shut_down(ctx); + return; + } + } + shut_down_pool_watchers(on_finish); +} + +template <typename I> +void PoolReplayer<I>::handle_shut_down_image_deleter( + int r, Context* on_finish) { + dout(10) << "r=" << r << dendl; + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_image_deleter); + m_image_deleter.reset(); + } + + shut_down_pool_watchers(on_finish); +} + +template <typename I> +void PoolReplayer<I>::shut_down_pool_watchers(Context *on_finish) { + dout(10) << dendl; + + { + Mutex::Locker locker(m_lock); + if (m_local_pool_watcher) { + Context *ctx = new FunctionContext([this, on_finish](int r) { + handle_shut_down_pool_watchers(r, on_finish); + }); + ctx = create_async_context_callback(m_threads->work_queue, ctx); + + auto gather_ctx = new C_Gather(g_ceph_context, ctx); + m_local_pool_watcher->shut_down(gather_ctx->new_sub()); + if (m_remote_pool_watcher) { + m_remote_pool_watcher->shut_down(gather_ctx->new_sub()); + } + gather_ctx->activate(); + return; + } + } + + on_finish->complete(0); +} + +template <typename I> +void PoolReplayer<I>::handle_shut_down_pool_watchers( + int r, Context *on_finish) { + dout(10) << "r=" << r << dendl; + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_local_pool_watcher); + m_local_pool_watcher.reset(); + + if (m_remote_pool_watcher) { + m_remote_pool_watcher.reset(); + } + } + wait_for_update_ops(on_finish); +} + +template <typename I> +void PoolReplayer<I>::wait_for_update_ops(Context *on_finish) { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + + Context *ctx = new FunctionContext([this, on_finish](int r) { + handle_wait_for_update_ops(r, on_finish); + }); + ctx = create_async_context_callback(m_threads->work_queue, ctx); + + m_update_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void PoolReplayer<I>::handle_wait_for_update_ops(int r, Context *on_finish) { + dout(10) << "r=" << r << dendl; + ceph_assert(r == 0); + + shut_down_image_map(on_finish); +} + +template <typename I> +void PoolReplayer<I>::shut_down_image_map(Context *on_finish) { + dout(5) << dendl; + + { + Mutex::Locker locker(m_lock); + if (m_image_map) { + on_finish = new FunctionContext([this, on_finish](int r) { + handle_shut_down_image_map(r, on_finish); + }); + m_image_map->shut_down(create_async_context_callback( + m_threads->work_queue, on_finish)); + return; + } + } + + on_finish->complete(0); +} + +template <typename I> +void PoolReplayer<I>::handle_shut_down_image_map(int r, Context *on_finish) { + dout(5) << "r=" << r << dendl; + if (r < 0 && r != -EBLACKLISTED) { + derr << "failed to shut down image map: " << cpp_strerror(r) << dendl; + } + + Mutex::Locker locker(m_lock); + ceph_assert(m_image_map); + m_image_map.reset(); + + m_instance_replayer->release_all(on_finish); +} + +template <typename I> +void PoolReplayer<I>::handle_update_leader( + const std::string &leader_instance_id) { + dout(10) << "leader_instance_id=" << leader_instance_id << dendl; + + m_instance_watcher->handle_update_leader(leader_instance_id); +} + +template <typename I> +void PoolReplayer<I>::handle_acquire_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) { + dout(5) << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + m_instance_watcher->notify_image_acquire(instance_id, global_image_id, + on_finish); +} + +template <typename I> +void PoolReplayer<I>::handle_release_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) { + dout(5) << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + m_instance_watcher->notify_image_release(instance_id, global_image_id, + on_finish); +} + +template <typename I> +void PoolReplayer<I>::handle_remove_image(const std::string &mirror_uuid, + const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) { + ceph_assert(!mirror_uuid.empty()); + dout(5) << "mirror_uuid=" << mirror_uuid << ", " + << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + m_instance_watcher->notify_peer_image_removed(instance_id, global_image_id, + mirror_uuid, on_finish); +} + +template <typename I> +void PoolReplayer<I>::handle_instances_added(const InstanceIds &instance_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + Mutex::Locker locker(m_lock); + if (!m_leader_watcher->is_leader()) { + return; + } + + ceph_assert(m_image_map); + m_image_map->update_instances_added(instance_ids); +} + +template <typename I> +void PoolReplayer<I>::handle_instances_removed( + const InstanceIds &instance_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + Mutex::Locker locker(m_lock); + if (!m_leader_watcher->is_leader()) { + return; + } + + ceph_assert(m_image_map); + m_image_map->update_instances_removed(instance_ids); +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::PoolReplayer<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/PoolReplayer.h b/src/tools/rbd_mirror/PoolReplayer.h new file mode 100644 index 00000000..43a4a0fc --- /dev/null +++ b/src/tools/rbd_mirror/PoolReplayer.h @@ -0,0 +1,303 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_POOL_REPLAYER_H +#define CEPH_RBD_MIRROR_POOL_REPLAYER_H + +#include "common/AsyncOpTracker.h" +#include "common/Cond.h" +#include "common/Mutex.h" +#include "common/WorkQueue.h" +#include "include/rados/librados.hpp" + +#include "ClusterWatcher.h" +#include "LeaderWatcher.h" +#include "PoolWatcher.h" +#include "ImageDeleter.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/image_map/Types.h" +#include "tools/rbd_mirror/leader_watcher/Types.h" +#include "tools/rbd_mirror/pool_watcher/Types.h" +#include "tools/rbd_mirror/service_daemon/Types.h" + +#include <set> +#include <map> +#include <memory> +#include <atomic> +#include <string> +#include <vector> + +class AdminSocketHook; + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> class ImageMap; +template <typename> class InstanceReplayer; +template <typename> class InstanceWatcher; +template <typename> class ServiceDaemon; +template <typename> struct Threads; + +/** + * Controls mirroring for a single remote cluster. + */ +template <typename ImageCtxT = librbd::ImageCtx> +class PoolReplayer { +public: + PoolReplayer(Threads<ImageCtxT> *threads, + ServiceDaemon<ImageCtxT>* service_daemon, + int64_t local_pool_id, const PeerSpec &peer, + const std::vector<const char*> &args); + ~PoolReplayer(); + PoolReplayer(const PoolReplayer&) = delete; + PoolReplayer& operator=(const PoolReplayer&) = delete; + + bool is_blacklisted() const; + bool is_leader() const; + bool is_running() const; + + void init(); + void shut_down(); + + void run(); + + void print_status(Formatter *f, stringstream *ss); + void start(); + void stop(bool manual); + void restart(); + void flush(); + void release_leader(); + void reopen_logs(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * INIT + * | + * v + * <follower> <-------------------------\ + * . | + * . | + * v (leader acquired) | + * INIT_IMAGE_MAP SHUT_DOWN_IMAGE_MAP + * | ^ + * v | + * INIT_LOCAL_POOL_WATCHER WAIT_FOR_NOTIFICATIONS + * | ^ + * v | + * INIT_REMOTE_POOL_WATCHER SHUT_DOWN_POOL_WATCHERS + * | ^ + * v | + * INIT_IMAGE_DELETER SHUT_DOWN_IMAGE_DELETER + * | ^ + * v . + * <leader> <-----------\ . + * . | . + * . (image update) | . + * . . > NOTIFY_INSTANCE_WATCHER . + * . . + * . (leader lost / shut down) . + * . . . . . . . . . . . . . . . . . . + * + * @endverbatim + */ + + typedef std::vector<std::string> InstanceIds; + + struct PoolWatcherListener : public pool_watcher::Listener { + PoolReplayer *pool_replayer; + bool local; + + PoolWatcherListener(PoolReplayer *pool_replayer, bool local) + : pool_replayer(pool_replayer), local(local) { + } + + void handle_update(const std::string &mirror_uuid, + ImageIds &&added_image_ids, + ImageIds &&removed_image_ids) override { + pool_replayer->handle_update((local ? "" : mirror_uuid), + std::move(added_image_ids), + std::move(removed_image_ids)); + } + }; + + struct ImageMapListener : public image_map::Listener { + PoolReplayer *pool_replayer; + + ImageMapListener(PoolReplayer *pool_replayer) + : pool_replayer(pool_replayer) { + } + + void acquire_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) override { + pool_replayer->handle_acquire_image(global_image_id, instance_id, + on_finish); + } + + void release_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) override { + pool_replayer->handle_release_image(global_image_id, instance_id, + on_finish); + } + + void remove_image(const std::string &mirror_uuid, + const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) override { + pool_replayer->handle_remove_image(mirror_uuid, global_image_id, + instance_id, on_finish); + } + }; + + void handle_update(const std::string &mirror_uuid, + ImageIds &&added_image_ids, + ImageIds &&removed_image_ids); + + int init_rados(const std::string &cluster_name, + const std::string &client_name, + const std::string &mon_host, + const std::string &key, + const std::string &description, RadosRef *rados_ref, + bool strip_cluster_overrides); + + void handle_post_acquire_leader(Context *on_finish); + void handle_pre_release_leader(Context *on_finish); + + void init_image_map(Context *on_finish); + void handle_init_image_map(int r, Context *on_finish); + + void init_local_pool_watcher(Context *on_finish); + void handle_init_local_pool_watcher(int r, Context *on_finish); + + void init_remote_pool_watcher(Context *on_finish); + void handle_init_remote_pool_watcher(int r, Context *on_finish); + + void init_image_deleter(Context* on_finish); + void handle_init_image_deleter(int r, Context* on_finish); + + void shut_down_image_deleter(Context* on_finish); + void handle_shut_down_image_deleter(int r, Context* on_finish); + + void shut_down_pool_watchers(Context *on_finish); + void handle_shut_down_pool_watchers(int r, Context *on_finish); + + void wait_for_update_ops(Context *on_finish); + void handle_wait_for_update_ops(int r, Context *on_finish); + + void shut_down_image_map(Context *on_finish); + void handle_shut_down_image_map(int r, Context *on_finish); + + void handle_update_leader(const std::string &leader_instance_id); + + void handle_acquire_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish); + void handle_release_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish); + void handle_remove_image(const std::string &mirror_uuid, + const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish); + + void handle_instances_added(const InstanceIds &instance_ids); + void handle_instances_removed(const InstanceIds &instance_ids); + + Threads<ImageCtxT> *m_threads; + ServiceDaemon<ImageCtxT>* m_service_daemon; + int64_t m_local_pool_id = -1; + PeerSpec m_peer; + std::vector<const char*> m_args; + + mutable Mutex m_lock; + Cond m_cond; + std::atomic<bool> m_stopping = { false }; + bool m_manual_stop = false; + bool m_blacklisted = false; + + RadosRef m_local_rados; + RadosRef m_remote_rados; + + librados::IoCtx m_local_io_ctx; + librados::IoCtx m_remote_io_ctx; + + PoolWatcherListener m_local_pool_watcher_listener; + std::unique_ptr<PoolWatcher<ImageCtxT>> m_local_pool_watcher; + + PoolWatcherListener m_remote_pool_watcher_listener; + std::unique_ptr<PoolWatcher<ImageCtxT>> m_remote_pool_watcher; + + std::unique_ptr<InstanceReplayer<ImageCtxT>> m_instance_replayer; + std::unique_ptr<ImageDeleter<ImageCtxT>> m_image_deleter; + + ImageMapListener m_image_map_listener; + std::unique_ptr<ImageMap<ImageCtxT>> m_image_map; + + std::string m_asok_hook_name; + AdminSocketHook *m_asok_hook = nullptr; + + service_daemon::CalloutId m_callout_id = service_daemon::CALLOUT_ID_NONE; + + class PoolReplayerThread : public Thread { + PoolReplayer *m_pool_replayer; + public: + PoolReplayerThread(PoolReplayer *pool_replayer) + : m_pool_replayer(pool_replayer) { + } + void *entry() override { + m_pool_replayer->run(); + return 0; + } + } m_pool_replayer_thread; + + class LeaderListener : public leader_watcher::Listener { + public: + LeaderListener(PoolReplayer *pool_replayer) + : m_pool_replayer(pool_replayer) { + } + + protected: + void post_acquire_handler(Context *on_finish) override { + m_pool_replayer->handle_post_acquire_leader(on_finish); + } + + void pre_release_handler(Context *on_finish) override { + m_pool_replayer->handle_pre_release_leader(on_finish); + } + + void update_leader_handler( + const std::string &leader_instance_id) override { + m_pool_replayer->handle_update_leader(leader_instance_id); + } + + void handle_instances_added(const InstanceIds& instance_ids) override { + m_pool_replayer->handle_instances_added(instance_ids); + } + + void handle_instances_removed(const InstanceIds& instance_ids) override { + m_pool_replayer->handle_instances_removed(instance_ids); + } + + private: + PoolReplayer *m_pool_replayer; + } m_leader_listener; + + std::unique_ptr<LeaderWatcher<ImageCtxT>> m_leader_watcher; + std::unique_ptr<InstanceWatcher<ImageCtxT>> m_instance_watcher; + AsyncOpTracker m_update_op_tracker; +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::PoolReplayer<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_POOL_REPLAYER_H diff --git a/src/tools/rbd_mirror/PoolWatcher.cc b/src/tools/rbd_mirror/PoolWatcher.cc new file mode 100644 index 00000000..81810ea1 --- /dev/null +++ b/src/tools/rbd_mirror/PoolWatcher.cc @@ -0,0 +1,553 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/PoolWatcher.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" +#include "librbd/ImageCtx.h" +#include "librbd/internal.h" +#include "librbd/MirroringWatcher.h" +#include "librbd/Utils.h" +#include "librbd/api/Image.h" +#include "librbd/api/Mirror.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h" +#include <boost/bind.hpp> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::PoolWatcher: " << this << " " \ + << __func__ << ": " + +using std::list; +using std::string; +using std::unique_ptr; +using std::vector; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +namespace rbd { +namespace mirror { + +template <typename I> +class PoolWatcher<I>::MirroringWatcher : public librbd::MirroringWatcher<I> { +public: + using ContextWQ = typename std::decay< + typename std::remove_pointer< + decltype(Threads<I>::work_queue)>::type>::type; + + MirroringWatcher(librados::IoCtx &io_ctx, ContextWQ *work_queue, + PoolWatcher *pool_watcher) + : librbd::MirroringWatcher<I>(io_ctx, work_queue), + m_pool_watcher(pool_watcher) { + } + + void handle_rewatch_complete(int r) override { + m_pool_watcher->handle_rewatch_complete(r); + } + + void handle_mode_updated(cls::rbd::MirrorMode mirror_mode) override { + // invalidate all image state and refresh the pool contents + m_pool_watcher->schedule_refresh_images(5); + } + + void handle_image_updated(cls::rbd::MirrorImageState state, + const std::string &remote_image_id, + const std::string &global_image_id) override { + bool enabled = (state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED); + m_pool_watcher->handle_image_updated(remote_image_id, global_image_id, + enabled); + } + +private: + PoolWatcher *m_pool_watcher; +}; + +template <typename I> +PoolWatcher<I>::PoolWatcher(Threads<I> *threads, librados::IoCtx &remote_io_ctx, + pool_watcher::Listener &listener) + : m_threads(threads), m_remote_io_ctx(remote_io_ctx), m_listener(listener), + m_lock(librbd::util::unique_lock_name("rbd::mirror::PoolWatcher", this)) { + m_mirroring_watcher = new MirroringWatcher(m_remote_io_ctx, + m_threads->work_queue, this); +} + +template <typename I> +PoolWatcher<I>::~PoolWatcher() { + delete m_mirroring_watcher; +} + +template <typename I> +bool PoolWatcher<I>::is_blacklisted() const { + Mutex::Locker locker(m_lock); + return m_blacklisted; +} + +template <typename I> +void PoolWatcher<I>::init(Context *on_finish) { + dout(5) << dendl; + + { + Mutex::Locker locker(m_lock); + m_on_init_finish = on_finish; + + ceph_assert(!m_refresh_in_progress); + m_refresh_in_progress = true; + } + + // start async updates for mirror image directory + register_watcher(); +} + +template <typename I> +void PoolWatcher<I>::shut_down(Context *on_finish) { + dout(5) << dendl; + + { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + + ceph_assert(!m_shutting_down); + m_shutting_down = true; + if (m_timer_ctx != nullptr) { + m_threads->timer->cancel_event(m_timer_ctx); + m_timer_ctx = nullptr; + } + } + + // in-progress unregister tracked as async op + unregister_watcher(); + + m_async_op_tracker.wait_for_ops(on_finish); +} + +template <typename I> +void PoolWatcher<I>::register_watcher() { + { + Mutex::Locker locker(m_lock); + ceph_assert(m_image_ids_invalid); + ceph_assert(m_refresh_in_progress); + } + + // if the watch registration is in-flight, let the watcher + // handle the transition -- only (re-)register if it's not registered + if (!m_mirroring_watcher->is_unregistered()) { + refresh_images(); + return; + } + + // first time registering or the watch failed + dout(5) << dendl; + m_async_op_tracker.start_op(); + + Context *ctx = create_context_callback< + PoolWatcher, &PoolWatcher<I>::handle_register_watcher>(this); + m_mirroring_watcher->register_watch(ctx); +} + +template <typename I> +void PoolWatcher<I>::handle_register_watcher(int r) { + dout(5) << "r=" << r << dendl; + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_image_ids_invalid); + ceph_assert(m_refresh_in_progress); + if (r < 0) { + m_refresh_in_progress = false; + } + } + + Context *on_init_finish = nullptr; + if (r >= 0) { + refresh_images(); + } else if (r == -EBLACKLISTED) { + dout(0) << "detected client is blacklisted" << dendl; + + Mutex::Locker locker(m_lock); + m_blacklisted = true; + std::swap(on_init_finish, m_on_init_finish); + } else if (r == -ENOENT) { + dout(5) << "mirroring directory does not exist" << dendl; + { + Mutex::Locker locker(m_lock); + std::swap(on_init_finish, m_on_init_finish); + } + + schedule_refresh_images(30); + } else { + derr << "unexpected error registering mirroring directory watch: " + << cpp_strerror(r) << dendl; + schedule_refresh_images(10); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void PoolWatcher<I>::unregister_watcher() { + dout(5) << dendl; + + m_async_op_tracker.start_op(); + Context *ctx = new FunctionContext([this](int r) { + dout(5) << "unregister_watcher: r=" << r << dendl; + if (r < 0) { + derr << "error unregistering watcher for " + << m_mirroring_watcher->get_oid() << " object: " << cpp_strerror(r) + << dendl; + } + m_async_op_tracker.finish_op(); + }); + + m_mirroring_watcher->unregister_watch(ctx); +} + +template <typename I> +void PoolWatcher<I>::refresh_images() { + dout(5) << dendl; + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_image_ids_invalid); + ceph_assert(m_refresh_in_progress); + + // clear all pending notification events since we need to perform + // a full image list refresh + m_pending_added_image_ids.clear(); + m_pending_removed_image_ids.clear(); + } + + m_async_op_tracker.start_op(); + m_refresh_image_ids.clear(); + Context *ctx = create_context_callback< + PoolWatcher, &PoolWatcher<I>::handle_refresh_images>(this); + auto req = pool_watcher::RefreshImagesRequest<I>::create(m_remote_io_ctx, + &m_refresh_image_ids, + ctx); + req->send(); +} + +template <typename I> +void PoolWatcher<I>::handle_refresh_images(int r) { + dout(5) << "r=" << r << dendl; + + bool retry_refresh = false; + Context *on_init_finish = nullptr; + { + Mutex::Locker locker(m_lock); + ceph_assert(m_image_ids_invalid); + ceph_assert(m_refresh_in_progress); + + if (r >= 0) { + m_pending_image_ids = std::move(m_refresh_image_ids); + } else if (r == -EBLACKLISTED) { + dout(0) << "detected client is blacklisted during image refresh" << dendl; + + m_blacklisted = true; + m_refresh_in_progress = false; + std::swap(on_init_finish, m_on_init_finish); + } else if (r == -ENOENT) { + dout(5) << "mirroring directory not found" << dendl; + m_pending_image_ids.clear(); + r = 0; + } else { + m_refresh_in_progress = false; + retry_refresh = true; + } + } + + if (retry_refresh) { + derr << "failed to retrieve mirroring directory: " << cpp_strerror(r) + << dendl; + schedule_refresh_images(10); + } else if (r >= 0) { + get_mirror_uuid(); + return; + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + ceph_assert(r == -EBLACKLISTED); + on_init_finish->complete(r); + } +} + +template <typename I> +void PoolWatcher<I>::get_mirror_uuid() { + dout(5) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_uuid_get_start(&op); + + m_out_bl.clear(); + librados::AioCompletion *aio_comp = create_rados_callback< + PoolWatcher, &PoolWatcher<I>::handle_get_mirror_uuid>(this); + int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void PoolWatcher<I>::handle_get_mirror_uuid(int r) { + dout(5) << "r=" << r << dendl; + + bool deferred_refresh = false; + bool retry_refresh = false; + Context *on_init_finish = nullptr; + { + Mutex::Locker locker(m_lock); + ceph_assert(m_image_ids_invalid); + ceph_assert(m_refresh_in_progress); + m_refresh_in_progress = false; + + m_pending_mirror_uuid = ""; + if (r >= 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_uuid_get_finish( + &it, &m_pending_mirror_uuid); + } + if (r >= 0 && m_pending_mirror_uuid.empty()) { + r = -ENOENT; + } + + if (m_deferred_refresh) { + // need to refresh -- skip the notification + deferred_refresh = true; + } else if (r >= 0) { + dout(10) << "mirror_uuid=" << m_pending_mirror_uuid << dendl; + m_image_ids_invalid = false; + std::swap(on_init_finish, m_on_init_finish); + schedule_listener(); + } else if (r == -EBLACKLISTED) { + dout(0) << "detected client is blacklisted during image refresh" << dendl; + + m_blacklisted = true; + std::swap(on_init_finish, m_on_init_finish); + } else if (r == -ENOENT) { + dout(5) << "mirroring uuid not found" << dendl; + std::swap(on_init_finish, m_on_init_finish); + retry_refresh = true; + } else { + retry_refresh = true; + } + } + + if (deferred_refresh) { + dout(5) << "scheduling deferred refresh" << dendl; + schedule_refresh_images(0); + } else if (retry_refresh) { + derr << "failed to retrieve mirror uuid: " << cpp_strerror(r) + << dendl; + schedule_refresh_images(10); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void PoolWatcher<I>::schedule_refresh_images(double interval) { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + if (m_shutting_down || m_refresh_in_progress || m_timer_ctx != nullptr) { + if (m_refresh_in_progress && !m_deferred_refresh) { + dout(5) << "deferring refresh until in-flight refresh completes" << dendl; + m_deferred_refresh = true; + } + return; + } + + m_image_ids_invalid = true; + m_timer_ctx = m_threads->timer->add_event_after( + interval, + new FunctionContext([this](int r) { + process_refresh_images(); + })); +} + +template <typename I> +void PoolWatcher<I>::handle_rewatch_complete(int r) { + dout(5) << "r=" << r << dendl; + + if (r == -EBLACKLISTED) { + dout(0) << "detected client is blacklisted" << dendl; + + Mutex::Locker locker(m_lock); + m_blacklisted = true; + return; + } else if (r == -ENOENT) { + dout(5) << "mirroring directory deleted" << dendl; + } else if (r < 0) { + derr << "unexpected error re-registering mirroring directory watch: " + << cpp_strerror(r) << dendl; + } + + schedule_refresh_images(5); +} + +template <typename I> +void PoolWatcher<I>::handle_image_updated(const std::string &remote_image_id, + const std::string &global_image_id, + bool enabled) { + dout(10) << "remote_image_id=" << remote_image_id << ", " + << "global_image_id=" << global_image_id << ", " + << "enabled=" << enabled << dendl; + + Mutex::Locker locker(m_lock); + ImageId image_id(global_image_id, remote_image_id); + m_pending_added_image_ids.erase(image_id); + m_pending_removed_image_ids.erase(image_id); + + if (enabled) { + m_pending_added_image_ids.insert(image_id); + schedule_listener(); + } else { + m_pending_removed_image_ids.insert(image_id); + schedule_listener(); + } +} + +template <typename I> +void PoolWatcher<I>::process_refresh_images() { + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_timer_ctx != nullptr); + m_timer_ctx = nullptr; + + { + Mutex::Locker locker(m_lock); + ceph_assert(!m_refresh_in_progress); + m_refresh_in_progress = true; + m_deferred_refresh = false; + } + + // execute outside of the timer's lock + m_async_op_tracker.start_op(); + Context *ctx = new FunctionContext([this](int r) { + register_watcher(); + m_async_op_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void PoolWatcher<I>::schedule_listener() { + ceph_assert(m_lock.is_locked()); + m_pending_updates = true; + if (m_shutting_down || m_image_ids_invalid || m_notify_listener_in_progress) { + return; + } + + dout(20) << dendl; + + m_async_op_tracker.start_op(); + Context *ctx = new FunctionContext([this](int r) { + notify_listener(); + m_async_op_tracker.finish_op(); + }); + + m_notify_listener_in_progress = true; + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void PoolWatcher<I>::notify_listener() { + dout(10) << dendl; + + std::string mirror_uuid; + ImageIds added_image_ids; + ImageIds removed_image_ids; + { + Mutex::Locker locker(m_lock); + ceph_assert(m_notify_listener_in_progress); + + // if the mirror uuid is updated, treat it as the removal of all + // images in the pool + if (m_mirror_uuid != m_pending_mirror_uuid) { + if (!m_mirror_uuid.empty()) { + dout(0) << "mirror uuid updated:" + << "old=" << m_mirror_uuid << ", " + << "new=" << m_pending_mirror_uuid << dendl; + } + + mirror_uuid = m_mirror_uuid; + removed_image_ids = std::move(m_image_ids); + m_image_ids.clear(); + } + } + + if (!removed_image_ids.empty()) { + m_listener.handle_update(mirror_uuid, {}, std::move(removed_image_ids)); + removed_image_ids.clear(); + } + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_notify_listener_in_progress); + + // if the watch failed while we didn't own the lock, we are going + // to need to perform a full refresh + if (m_image_ids_invalid) { + m_notify_listener_in_progress = false; + return; + } + + // merge add/remove notifications into pending set (a given image + // can only be in one set or another) + for (auto &image_id : m_pending_removed_image_ids) { + dout(20) << "image_id=" << image_id << dendl; + m_pending_image_ids.erase(image_id); + } + + for (auto &image_id : m_pending_added_image_ids) { + dout(20) << "image_id=" << image_id << dendl; + m_pending_image_ids.erase(image_id); + m_pending_image_ids.insert(image_id); + } + m_pending_added_image_ids.clear(); + + // compute added/removed images + for (auto &image_id : m_image_ids) { + auto it = m_pending_image_ids.find(image_id); + if (it == m_pending_image_ids.end() || it->id != image_id.id) { + removed_image_ids.insert(image_id); + } + } + for (auto &image_id : m_pending_image_ids) { + auto it = m_image_ids.find(image_id); + if (it == m_image_ids.end() || it->id != image_id.id) { + added_image_ids.insert(image_id); + } + } + + m_pending_updates = false; + m_image_ids = m_pending_image_ids; + + m_mirror_uuid = m_pending_mirror_uuid; + mirror_uuid = m_mirror_uuid; + } + + m_listener.handle_update(mirror_uuid, std::move(added_image_ids), + std::move(removed_image_ids)); + + { + Mutex::Locker locker(m_lock); + m_notify_listener_in_progress = false; + if (m_pending_updates) { + schedule_listener(); + } + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::PoolWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/PoolWatcher.h b/src/tools/rbd_mirror/PoolWatcher.h new file mode 100644 index 00000000..1136a319 --- /dev/null +++ b/src/tools/rbd_mirror/PoolWatcher.h @@ -0,0 +1,166 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_H +#define CEPH_RBD_MIRROR_POOL_WATCHER_H + +#include <map> +#include <memory> +#include <set> +#include <string> + +#include "common/AsyncOpTracker.h" +#include "common/ceph_context.h" +#include "common/Mutex.h" +#include "include/rados/librados.hpp" +#include "tools/rbd_mirror/Types.h" +#include <boost/functional/hash.hpp> +#include <boost/optional.hpp> +#include "include/ceph_assert.h" +#include "tools/rbd_mirror/pool_watcher/Types.h" + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +/** + * Keeps track of images that have mirroring enabled within all + * pools. + */ +template <typename ImageCtxT = librbd::ImageCtx> +class PoolWatcher { +public: + static PoolWatcher* create(Threads<ImageCtxT> *threads, + librados::IoCtx &remote_io_ctx, + pool_watcher::Listener &listener) { + return new PoolWatcher(threads, remote_io_ctx, listener); + } + + PoolWatcher(Threads<ImageCtxT> *threads, librados::IoCtx &remote_io_ctx, + pool_watcher::Listener &listener); + ~PoolWatcher(); + PoolWatcher(const PoolWatcher&) = delete; + PoolWatcher& operator=(const PoolWatcher&) = delete; + + bool is_blacklisted() const; + + void init(Context *on_finish = nullptr); + void shut_down(Context *on_finish); + + inline uint64_t get_image_count() const { + Mutex::Locker locker(m_lock); + return m_image_ids.size(); + } + +private: + /** + * @verbatim + * + * <start> + * | + * v + * INIT + * | + * v + * REGISTER_WATCHER + * | + * |/--------------------------------\ + * | | + * v | + * REFRESH_IMAGES | + * | | + * |/----------------------------\ | + * | | | + * v | | + * GET_MIRROR_UUID | | + * | | | + * v | | + * NOTIFY_LISTENER | | + * | | | + * v | | + * IDLE ---\ | | + * | | | | + * | |\---> IMAGE_UPDATED | | + * | | | | | + * | | v | | + * | | GET_IMAGE_NAME --/ | + * | | | + * | \----> WATCH_ERROR ---------/ + * v + * SHUT_DOWN + * | + * v + * UNREGISTER_WATCHER + * | + * v + * <finish> + * + * @endverbatim + */ + class MirroringWatcher; + + Threads<ImageCtxT> *m_threads; + librados::IoCtx m_remote_io_ctx; + pool_watcher::Listener &m_listener; + + ImageIds m_refresh_image_ids; + bufferlist m_out_bl; + + mutable Mutex m_lock; + + Context *m_on_init_finish = nullptr; + + ImageIds m_image_ids; + std::string m_mirror_uuid; + + bool m_pending_updates = false; + bool m_notify_listener_in_progress = false; + ImageIds m_pending_image_ids; + ImageIds m_pending_added_image_ids; + ImageIds m_pending_removed_image_ids; + + std::string m_pending_mirror_uuid; + + MirroringWatcher *m_mirroring_watcher; + + Context *m_timer_ctx = nullptr; + + AsyncOpTracker m_async_op_tracker; + bool m_blacklisted = false; + bool m_shutting_down = false; + bool m_image_ids_invalid = true; + bool m_refresh_in_progress = false; + bool m_deferred_refresh = false; + + void register_watcher(); + void handle_register_watcher(int r); + void unregister_watcher(); + + void refresh_images(); + void handle_refresh_images(int r); + + void schedule_refresh_images(double interval); + void process_refresh_images(); + + void get_mirror_uuid(); + void handle_get_mirror_uuid(int r); + + void handle_rewatch_complete(int r); + void handle_image_updated(const std::string &remote_image_id, + const std::string &global_image_id, + bool enabled); + + void schedule_listener(); + void notify_listener(); + +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::PoolWatcher<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_POOL_WATCHER_H diff --git a/src/tools/rbd_mirror/ProgressContext.h b/src/tools/rbd_mirror/ProgressContext.h new file mode 100644 index 00000000..e4430ee6 --- /dev/null +++ b/src/tools/rbd_mirror/ProgressContext.h @@ -0,0 +1,21 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_PROGRESS_CONTEXT_H +#define RBD_MIRROR_PROGRESS_CONTEXT_H + +namespace rbd { +namespace mirror { + +class ProgressContext +{ +public: + virtual ~ProgressContext() {} + virtual void update_progress(const std::string &description, + bool flush = true) = 0; +}; + +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_PROGRESS_CONTEXT_H diff --git a/src/tools/rbd_mirror/ServiceDaemon.cc b/src/tools/rbd_mirror/ServiceDaemon.cc new file mode 100644 index 00000000..f3b549b8 --- /dev/null +++ b/src/tools/rbd_mirror/ServiceDaemon.cc @@ -0,0 +1,251 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/ServiceDaemon.h" +#include "include/Context.h" +#include "include/stringify.h" +#include "common/ceph_context.h" +#include "common/config.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/Timer.h" +#include "tools/rbd_mirror/Threads.h" +#include <sstream> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::ServiceDaemon: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { + +namespace { + +const std::string RBD_MIRROR_AUTH_ID_PREFIX("rbd-mirror."); + +struct AttributeDumpVisitor : public boost::static_visitor<void> { + ceph::Formatter *f; + const std::string& name; + + AttributeDumpVisitor(ceph::Formatter *f, const std::string& name) + : f(f), name(name) { + } + + void operator()(bool val) const { + f->dump_bool(name.c_str(), val); + } + void operator()(uint64_t val) const { + f->dump_unsigned(name.c_str(), val); + } + void operator()(const std::string& val) const { + f->dump_string(name.c_str(), val); + } +}; + +} // anonymous namespace + +using namespace service_daemon; + +template <typename I> +ServiceDaemon<I>::ServiceDaemon(CephContext *cct, RadosRef rados, + Threads<I>* threads) + : m_cct(cct), m_rados(rados), m_threads(threads), + m_lock("rbd::mirror::ServiceDaemon") { + dout(20) << dendl; +} + +template <typename I> +ServiceDaemon<I>::~ServiceDaemon() { + dout(20) << dendl; + Mutex::Locker timer_locker(m_threads->timer_lock); + if (m_timer_ctx != nullptr) { + m_threads->timer->cancel_event(m_timer_ctx); + update_status(); + } +} + +template <typename I> +int ServiceDaemon<I>::init() { + dout(20) << dendl; + + std::string id = m_cct->_conf->name.get_id(); + if (id.find(RBD_MIRROR_AUTH_ID_PREFIX) == 0) { + id = id.substr(RBD_MIRROR_AUTH_ID_PREFIX.size()); + } + + std::string instance_id = stringify(m_rados->get_instance_id()); + std::map<std::string, std::string> service_metadata = { + {"id", id}, {"instance_id", instance_id}}; + int r = m_rados->service_daemon_register("rbd-mirror", instance_id, + service_metadata); + if (r < 0) { + return r; + } + + return 0; +} + +template <typename I> +void ServiceDaemon<I>::add_pool(int64_t pool_id, const std::string& pool_name) { + dout(20) << "pool_id=" << pool_id << ", pool_name=" << pool_name << dendl; + + { + Mutex::Locker locker(m_lock); + m_pools.insert({pool_id, {pool_name}}); + } + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::remove_pool(int64_t pool_id) { + dout(20) << "pool_id=" << pool_id << dendl; + { + Mutex::Locker locker(m_lock); + m_pools.erase(pool_id); + } + schedule_update_status(); +} + +template <typename I> +uint64_t ServiceDaemon<I>::add_or_update_callout(int64_t pool_id, + uint64_t callout_id, + CalloutLevel callout_level, + const std::string& text) { + dout(20) << "pool_id=" << pool_id << ", " + << "callout_id=" << callout_id << ", " + << "callout_level=" << callout_level << ", " + << "text=" << text << dendl; + + { + Mutex::Locker locker(m_lock); + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return CALLOUT_ID_NONE; + } + + if (callout_id == CALLOUT_ID_NONE) { + callout_id = ++m_callout_id; + } + pool_it->second.callouts[callout_id] = {callout_level, text}; + } + + schedule_update_status(); + return callout_id; +} + +template <typename I> +void ServiceDaemon<I>::remove_callout(int64_t pool_id, uint64_t callout_id) { + dout(20) << "pool_id=" << pool_id << ", " + << "callout_id=" << callout_id << dendl; + + { + Mutex::Locker locker(m_lock); + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return; + } + pool_it->second.callouts.erase(callout_id); + } + + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::add_or_update_attribute(int64_t pool_id, + const std::string& key, + const AttributeValue& value) { + dout(20) << "pool_id=" << pool_id << ", " + << "key=" << key << ", " + << "value=" << value << dendl; + + { + Mutex::Locker locker(m_lock); + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return; + } + pool_it->second.attributes[key] = value; + } + + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::remove_attribute(int64_t pool_id, + const std::string& key) { + dout(20) << "pool_id=" << pool_id << ", " + << "key=" << key << dendl; + + { + Mutex::Locker locker(m_lock); + auto pool_it = m_pools.find(pool_id); + if (pool_it == m_pools.end()) { + return; + } + pool_it->second.attributes.erase(key); + } + + schedule_update_status(); +} + +template <typename I> +void ServiceDaemon<I>::schedule_update_status() { + Mutex::Locker timer_locker(m_threads->timer_lock); + if (m_timer_ctx != nullptr) { + return; + } + + m_timer_ctx = new FunctionContext([this](int) { + m_timer_ctx = nullptr; + update_status(); + }); + m_threads->timer->add_event_after(1, m_timer_ctx); +} + +template <typename I> +void ServiceDaemon<I>::update_status() { + dout(20) << dendl; + ceph_assert(m_threads->timer_lock.is_locked()); + + ceph::JSONFormatter f; + { + Mutex::Locker locker(m_lock); + f.open_object_section("pools"); + for (auto& pool_pair : m_pools) { + f.open_object_section(stringify(pool_pair.first).c_str()); + f.dump_string("name", pool_pair.second.name); + f.open_object_section("callouts"); + for (auto& callout : pool_pair.second.callouts) { + f.open_object_section(stringify(callout.first).c_str()); + f.dump_string("level", stringify(callout.second.level).c_str()); + f.dump_string("text", callout.second.text.c_str()); + f.close_section(); + } + f.close_section(); // callouts + + for (auto& attribute : pool_pair.second.attributes) { + AttributeDumpVisitor attribute_dump_visitor(&f, attribute.first); + boost::apply_visitor(attribute_dump_visitor, attribute.second); + } + f.close_section(); // pool + } + f.close_section(); // pools + } + + std::stringstream ss; + f.flush(ss); + + int r = m_rados->service_daemon_update_status({{"json", ss.str()}}); + if (r < 0) { + derr << "failed to update service daemon status: " << cpp_strerror(r) + << dendl; + } +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::ServiceDaemon<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/ServiceDaemon.h b/src/tools/rbd_mirror/ServiceDaemon.h new file mode 100644 index 00000000..1de7e20b --- /dev/null +++ b/src/tools/rbd_mirror/ServiceDaemon.h @@ -0,0 +1,86 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_SERVICE_DAEMON_H +#define CEPH_RBD_MIRROR_SERVICE_DAEMON_H + +#include "common/Mutex.h" +#include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/service_daemon/Types.h" +#include <map> +#include <string> + +struct CephContext; +struct Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +template <typename ImageCtxT = librbd::ImageCtx> +class ServiceDaemon { +public: + ServiceDaemon(CephContext *cct, RadosRef rados, Threads<ImageCtxT>* threads); + ~ServiceDaemon(); + + int init(); + + void add_pool(int64_t pool_id, const std::string& pool_name); + void remove_pool(int64_t pool_id); + + uint64_t add_or_update_callout(int64_t pool_id, uint64_t callout_id, + service_daemon::CalloutLevel callout_level, + const std::string& text); + void remove_callout(int64_t pool_id, uint64_t callout_id); + + void add_or_update_attribute(int64_t pool_id, const std::string& key, + const service_daemon::AttributeValue& value); + void remove_attribute(int64_t pool_id, const std::string& key); + +private: + struct Callout { + service_daemon::CalloutLevel level; + std::string text; + + Callout() : level(service_daemon::CALLOUT_LEVEL_INFO) { + } + Callout(service_daemon::CalloutLevel level, const std::string& text) + : level(level), text(text) { + } + }; + typedef std::map<uint64_t, Callout> Callouts; + typedef std::map<std::string, service_daemon::AttributeValue> Attributes; + + struct Pool { + std::string name; + Callouts callouts; + Attributes attributes; + + Pool(const std::string& name) : name(name) { + } + }; + + typedef std::map<int64_t, Pool> Pools; + + CephContext *m_cct; + RadosRef m_rados; + Threads<ImageCtxT>* m_threads; + + Mutex m_lock; + Pools m_pools; + uint64_t m_callout_id = service_daemon::CALLOUT_ID_NONE; + + Context* m_timer_ctx = nullptr; + + void schedule_update_status(); + void update_status(); +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::ServiceDaemon<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_SERVICE_DAEMON_H diff --git a/src/tools/rbd_mirror/Threads.cc b/src/tools/rbd_mirror/Threads.cc new file mode 100644 index 00000000..ca0a8b0f --- /dev/null +++ b/src/tools/rbd_mirror/Threads.cc @@ -0,0 +1,45 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/Threads.h" +#include "common/Timer.h" +#include "common/WorkQueue.h" +#include "librbd/ImageCtx.h" + +namespace rbd { +namespace mirror { + +template <typename I> +Threads<I>::Threads(CephContext *cct) : timer_lock("Threads::timer_lock") { + thread_pool = new ThreadPool(cct, "Journaler::thread_pool", "tp_journal", + cct->_conf.get_val<uint64_t>("rbd_op_threads"), + "rbd_op_threads"); + thread_pool->start(); + + work_queue = new ContextWQ("Journaler::work_queue", + cct->_conf.get_val<uint64_t>("rbd_op_thread_timeout"), + thread_pool); + + timer = new SafeTimer(cct, timer_lock, true); + timer->init(); +} + +template <typename I> +Threads<I>::~Threads() { + { + Mutex::Locker timer_locker(timer_lock); + timer->shutdown(); + } + delete timer; + + work_queue->drain(); + delete work_queue; + + thread_pool->stop(); + delete thread_pool; +} + +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::Threads<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/Threads.h b/src/tools/rbd_mirror/Threads.h new file mode 100644 index 00000000..f52e8837 --- /dev/null +++ b/src/tools/rbd_mirror/Threads.h @@ -0,0 +1,39 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_THREADS_H +#define CEPH_RBD_MIRROR_THREADS_H + +#include "common/Mutex.h" + +class CephContext; +class ContextWQ; +class SafeTimer; +class ThreadPool; + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename ImageCtxT = librbd::ImageCtx> +struct Threads { + ThreadPool *thread_pool = nullptr; + ContextWQ *work_queue = nullptr; + + SafeTimer *timer = nullptr; + Mutex timer_lock; + + explicit Threads(CephContext *cct); + Threads(const Threads&) = delete; + Threads& operator=(const Threads&) = delete; + + ~Threads(); +}; + +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::Threads<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_THREADS_H diff --git a/src/tools/rbd_mirror/Types.cc b/src/tools/rbd_mirror/Types.cc new file mode 100644 index 00000000..74fe318e --- /dev/null +++ b/src/tools/rbd_mirror/Types.cc @@ -0,0 +1,21 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/Types.h" + +namespace rbd { +namespace mirror { + +std::ostream &operator<<(std::ostream &os, const ImageId &image_id) { + return os << "global id=" << image_id.global_id << ", " + << "id=" << image_id.id; +} + +std::ostream& operator<<(std::ostream& lhs, const PeerSpec &peer) { + return lhs << "uuid: " << peer.uuid + << " cluster: " << peer.cluster_name + << " client: " << peer.client_name; +} + +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/Types.h b/src/tools/rbd_mirror/Types.h new file mode 100644 index 00000000..ed3b9d8a --- /dev/null +++ b/src/tools/rbd_mirror/Types.h @@ -0,0 +1,123 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_TYPES_H +#define CEPH_RBD_MIRROR_TYPES_H + +#include <iostream> +#include <memory> +#include <set> +#include <string> +#include <vector> + +#include "include/rados/librados.hpp" +#include "include/rbd/librbd.hpp" + +namespace rbd { +namespace mirror { + +// Performance counters +enum { + l_rbd_mirror_first = 27000, + l_rbd_mirror_replay, + l_rbd_mirror_replay_bytes, + l_rbd_mirror_replay_latency, + l_rbd_mirror_last, +}; + +typedef std::shared_ptr<librados::Rados> RadosRef; +typedef std::shared_ptr<librados::IoCtx> IoCtxRef; +typedef std::shared_ptr<librbd::Image> ImageRef; + +struct ImageId { + std::string global_id; + std::string id; + + explicit ImageId(const std::string &global_id) : global_id(global_id) { + } + ImageId(const std::string &global_id, const std::string &id) + : global_id(global_id), id(id) { + } + + inline bool operator==(const ImageId &rhs) const { + return (global_id == rhs.global_id && id == rhs.id); + } + inline bool operator<(const ImageId &rhs) const { + return global_id < rhs.global_id; + } +}; + +std::ostream &operator<<(std::ostream &, const ImageId &image_id); + +typedef std::set<ImageId> ImageIds; + +struct Peer { + std::string peer_uuid; + librados::IoCtx io_ctx; + + Peer() { + } + Peer(const std::string &peer_uuid) : peer_uuid(peer_uuid) { + } + Peer(const std::string &peer_uuid, librados::IoCtx& io_ctx) + : peer_uuid(peer_uuid), io_ctx(io_ctx) { + } + + inline bool operator<(const Peer &rhs) const { + return peer_uuid < rhs.peer_uuid; + } +}; + +typedef std::set<Peer> Peers; + +struct PeerSpec { + PeerSpec() = default; + PeerSpec(const std::string &uuid, const std::string &cluster_name, + const std::string &client_name) + : uuid(uuid), cluster_name(cluster_name), client_name(client_name) + { + } + PeerSpec(const librbd::mirror_peer_t &peer) : + uuid(peer.uuid), + cluster_name(peer.cluster_name), + client_name(peer.client_name) + { + } + + std::string uuid; + std::string cluster_name; + std::string client_name; + + /// optional config properties + std::string mon_host; + std::string key; + + bool operator==(const PeerSpec& rhs) const { + return (uuid == rhs.uuid && + cluster_name == rhs.cluster_name && + client_name == rhs.client_name && + mon_host == rhs.mon_host && + key == rhs.key); + } + bool operator<(const PeerSpec& rhs) const { + if (uuid != rhs.uuid) { + return uuid < rhs.uuid; + } else if (cluster_name != rhs.cluster_name) { + return cluster_name < rhs.cluster_name; + } else if (client_name != rhs.client_name) { + return client_name < rhs.client_name; + } else if (mon_host < rhs.mon_host) { + return mon_host < rhs.mon_host; + } else { + return key < rhs.key; + } + } +}; + +std::ostream& operator<<(std::ostream& lhs, const PeerSpec &peer); + +} // namespace mirror +} // namespace rbd + + +#endif // CEPH_RBD_MIRROR_TYPES_H diff --git a/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc new file mode 100644 index 00000000..a0e9fd90 --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.cc @@ -0,0 +1,290 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include "librbd/journal/Policy.h" +#include "tools/rbd_mirror/image_deleter/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_deleter::SnapshotPurgeRequest: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_deleter { + +using librbd::util::create_context_callback; + +template <typename I> +void SnapshotPurgeRequest<I>::send() { + open_image(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::open_image() { + dout(10) << dendl; + m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, false); + + { + RWLock::WLocker snap_locker(m_image_ctx->snap_lock); + m_image_ctx->set_journal_policy(new JournalPolicy()); + } + + Context *ctx = create_context_callback< + SnapshotPurgeRequest<I>, &SnapshotPurgeRequest<I>::handle_open_image>( + this); + m_image_ctx->state->open(librbd::OPEN_FLAG_SKIP_OPEN_PARENT, ctx); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_open_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to open image '" << m_image_id << "': " << cpp_strerror(r) + << dendl; + m_image_ctx->destroy(); + m_image_ctx = nullptr; + + finish(r); + return; + } + + acquire_lock(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::acquire_lock() { + dout(10) << dendl; + + m_image_ctx->owner_lock.get_read(); + if (m_image_ctx->exclusive_lock == nullptr) { + m_image_ctx->owner_lock.put_read(); + + derr << "exclusive lock not enabled" << dendl; + m_ret_val = -EINVAL; + close_image(); + return; + } + + m_image_ctx->exclusive_lock->acquire_lock(create_context_callback< + SnapshotPurgeRequest<I>, &SnapshotPurgeRequest<I>::handle_acquire_lock>( + this)); + m_image_ctx->owner_lock.put_read(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_acquire_lock(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to acquire exclusive lock: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + { + RWLock::RLocker snap_locker(m_image_ctx->snap_lock); + m_snaps = m_image_ctx->snaps; + } + snap_unprotect(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::snap_unprotect() { + if (m_snaps.empty()) { + close_image(); + return; + } + + librados::snap_t snap_id = m_snaps.back(); + m_image_ctx->snap_lock.get_read(); + int r = m_image_ctx->get_snap_namespace(snap_id, &m_snap_namespace); + if (r < 0) { + m_image_ctx->snap_lock.put_read(); + + derr << "failed to get snap namespace: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + r = m_image_ctx->get_snap_name(snap_id, &m_snap_name); + if (r < 0) { + m_image_ctx->snap_lock.put_read(); + + derr << "failed to get snap name: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + bool is_protected; + r = m_image_ctx->is_snap_protected(snap_id, &is_protected); + if (r < 0) { + m_image_ctx->snap_lock.put_read(); + + derr << "failed to get snap protection status: " << cpp_strerror(r) + << dendl; + m_ret_val = r; + close_image(); + return; + } + m_image_ctx->snap_lock.put_read(); + + if (!is_protected) { + snap_remove(); + return; + } + + dout(10) << "snap_id=" << snap_id << ", " + << "snap_namespace=" << m_snap_namespace << ", " + << "snap_name=" << m_snap_name << dendl; + + auto finish_op_ctx = start_lock_op(&r); + if (finish_op_ctx == nullptr) { + derr << "lost exclusive lock" << dendl; + m_ret_val = r; + close_image(); + return; + } + + auto ctx = new FunctionContext([this, finish_op_ctx](int r) { + handle_snap_unprotect(r); + finish_op_ctx->complete(0); + }); + RWLock::RLocker owner_locker(m_image_ctx->owner_lock); + m_image_ctx->operations->execute_snap_unprotect( + m_snap_namespace, m_snap_name.c_str(), ctx); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_snap_unprotect(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -EBUSY) { + dout(10) << "snapshot in-use" << dendl; + m_ret_val = r; + close_image(); + return; + } else if (r < 0) { + derr << "failed to unprotect snapshot: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + { + // avoid the need to refresh to delete the newly unprotected snapshot + RWLock::RLocker snap_locker(m_image_ctx->snap_lock); + librados::snap_t snap_id = m_snaps.back(); + auto snap_info_it = m_image_ctx->snap_info.find(snap_id); + if (snap_info_it != m_image_ctx->snap_info.end()) { + snap_info_it->second.protection_status = + RBD_PROTECTION_STATUS_UNPROTECTED; + } + } + + snap_remove(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::snap_remove() { + librados::snap_t snap_id = m_snaps.back(); + dout(10) << "snap_id=" << snap_id << ", " + << "snap_namespace=" << m_snap_namespace << ", " + << "snap_name=" << m_snap_name << dendl; + + int r; + auto finish_op_ctx = start_lock_op(&r); + if (finish_op_ctx == nullptr) { + derr << "lost exclusive lock" << dendl; + m_ret_val = r; + close_image(); + return; + } + + auto ctx = new FunctionContext([this, finish_op_ctx](int r) { + handle_snap_remove(r); + finish_op_ctx->complete(0); + }); + RWLock::RLocker owner_locker(m_image_ctx->owner_lock); + m_image_ctx->operations->execute_snap_remove( + m_snap_namespace, m_snap_name.c_str(), ctx); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_snap_remove(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -EBUSY) { + dout(10) << "snapshot in-use" << dendl; + m_ret_val = r; + close_image(); + return; + } else if (r < 0) { + derr << "failed to remove snapshot: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + m_snaps.pop_back(); + snap_unprotect(); +} + +template <typename I> +void SnapshotPurgeRequest<I>::close_image() { + dout(10) << dendl; + + m_image_ctx->state->close(create_context_callback< + SnapshotPurgeRequest<I>, + &SnapshotPurgeRequest<I>::handle_close_image>(this)); +} + +template <typename I> +void SnapshotPurgeRequest<I>::handle_close_image(int r) { + dout(10) << "r=" << r << dendl; + + m_image_ctx->destroy(); + m_image_ctx = nullptr; + + if (r < 0) { + derr << "failed to close: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + finish(0); +} + +template <typename I> +void SnapshotPurgeRequest<I>::finish(int r) { + if (m_ret_val < 0) { + r = m_ret_val; + } + + m_on_finish->complete(r); + delete this; +} + +template <typename I> +Context *SnapshotPurgeRequest<I>::start_lock_op(int* r) { + RWLock::RLocker owner_locker(m_image_ctx->owner_lock); + return m_image_ctx->exclusive_lock->start_op(r); +} + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_deleter::SnapshotPurgeRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h new file mode 100644 index 00000000..b8b635fe --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h @@ -0,0 +1,104 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H + +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include <string> +#include <vector> + +class Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_deleter { + +template <typename ImageCtxT = librbd::ImageCtx> +class SnapshotPurgeRequest { +public: + static SnapshotPurgeRequest* create(librados::IoCtx &io_ctx, + const std::string &image_id, + Context *on_finish) { + return new SnapshotPurgeRequest(io_ctx, image_id, on_finish); + } + + SnapshotPurgeRequest(librados::IoCtx &io_ctx, const std::string &image_id, + Context *on_finish) + : m_io_ctx(io_ctx), m_image_id(image_id), m_on_finish(on_finish) { + } + + void send(); + +private: + /* + * @verbatim + * + * <start> + * | + * v + * OPEN_IMAGE + * | + * v + * ACQUIRE_LOCK + * | + * | (repeat for each snapshot) + * |/------------------------\ + * | | + * v (skip if not needed) | + * SNAP_UNPROTECT | + * | | + * v (skip if not needed) | + * SNAP_REMOVE -----------------/ + * | + * v + * CLOSE_IMAGE + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_image_id; + Context *m_on_finish; + + ImageCtxT *m_image_ctx = nullptr; + int m_ret_val = 0; + + std::vector<librados::snap_t> m_snaps; + cls::rbd::SnapshotNamespace m_snap_namespace; + std::string m_snap_name; + + void open_image(); + void handle_open_image(int r); + + void acquire_lock(); + void handle_acquire_lock(int r); + + void snap_unprotect(); + void handle_snap_unprotect(int r); + + void snap_remove(); + void handle_snap_remove(int r); + + void close_image(); + void handle_close_image(int r); + + void finish(int r); + + Context *start_lock_op(int* r); + +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_deleter::SnapshotPurgeRequest<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_SNAPSHOT_PURGE_REQUEST_H + diff --git a/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc new file mode 100644 index 00000000..92db22ca --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.cc @@ -0,0 +1,384 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_deleter/TrashMoveRequest.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Journal.h" +#include "librbd/TrashWatcher.h" +#include "librbd/Utils.h" +#include "librbd/journal/ResetRequest.h" +#include "librbd/trash/MoveRequest.h" +#include "tools/rbd_mirror/image_deleter/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashMoveRequest: " \ + << this << " " << __func__ << ": " +namespace rbd { +namespace mirror { +namespace image_deleter { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +void TrashMoveRequest<I>::send() { + get_mirror_image_id(); +} + +template <typename I> +void TrashMoveRequest<I>::get_mirror_image_id() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_image_id_start(&op, m_global_image_id); + + auto aio_comp = create_rados_callback< + TrashMoveRequest<I>, + &TrashMoveRequest<I>::handle_get_mirror_image_id>(this); + m_out_bl.clear(); + int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_get_mirror_image_id(int r) { + dout(10) << "r=" << r << dendl; + + if (r == 0) { + auto bl_it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_image_id_finish(&bl_it, + &m_image_id); + } + if (r == -ENOENT) { + dout(10) << "image " << m_global_image_id << " is not mirrored" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "error retrieving local id for image " << m_global_image_id << ": " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + get_tag_owner(); +} + +template <typename I> +void TrashMoveRequest<I>::get_tag_owner() { + dout(10) << dendl; + + auto ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_get_tag_owner>(this); + librbd::Journal<I>::get_tag_owner(m_io_ctx, m_image_id, &m_mirror_uuid, + m_op_work_queue, ctx); +} + +template <typename I> +void TrashMoveRequest<I>::handle_get_tag_owner(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + derr << "error retrieving image primary info for image " + << m_global_image_id << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } else if (r != -ENOENT) { + if (m_mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID) { + dout(10) << "image " << m_global_image_id << " is local primary" << dendl; + finish(-EPERM); + return; + } else if (m_mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID && + !m_resync) { + dout(10) << "image " << m_global_image_id << " is orphaned" << dendl; + finish(-EPERM); + return; + } + } + + disable_mirror_image(); +} + +template <typename I> +void TrashMoveRequest<I>::disable_mirror_image() { + dout(10) << dendl; + + cls::rbd::MirrorImage mirror_image; + mirror_image.global_image_id = m_global_image_id; + mirror_image.state = cls::rbd::MIRROR_IMAGE_STATE_DISABLING; + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_set(&op, m_image_id, mirror_image); + + auto aio_comp = create_rados_callback< + TrashMoveRequest<I>, + &TrashMoveRequest<I>::handle_disable_mirror_image>(this); + int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_disable_mirror_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(10) << "local image is not mirrored, aborting deletion." << dendl; + finish(r); + return; + } else if (r == -EEXIST || r == -EINVAL) { + derr << "cannot disable mirroring for image " << m_global_image_id + << ": global_image_id has changed/reused: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "cannot disable mirroring for image " << m_global_image_id + << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + reset_journal(); +} + +template <typename I> +void TrashMoveRequest<I>::reset_journal() { + dout(10) << dendl; + + // ensure that if the image is recovered any peers will split-brain + auto ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_reset_journal>(this); + auto req = librbd::journal::ResetRequest<I>::create( + m_io_ctx, m_image_id, librbd::Journal<>::IMAGE_CLIENT_ID, + librbd::Journal<>::LOCAL_MIRROR_UUID, m_op_work_queue, ctx); + req->send(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_reset_journal(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0 && r != -ENOENT) { + derr << "failed to reset journal: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + open_image(); +} + +template <typename I> +void TrashMoveRequest<I>::open_image() { + dout(10) << dendl; + + m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, false); + + { + // don't attempt to open the journal + RWLock::WLocker snap_locker(m_image_ctx->snap_lock); + m_image_ctx->set_journal_policy(new JournalPolicy()); + } + + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_open_image>(this); + m_image_ctx->state->open(librbd::OPEN_FLAG_SKIP_OPEN_PARENT, ctx); +} + +template <typename I> +void TrashMoveRequest<I>::handle_open_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to open image: " << cpp_strerror(r) << dendl; + m_image_ctx->destroy(); + m_image_ctx = nullptr; + finish(r); + return; + } + + if (m_image_ctx->old_format) { + derr << "cannot move v1 image to trash" << dendl; + m_ret_val = -EINVAL; + close_image(); + return; + } + + acquire_lock(); +} + +template <typename I> +void TrashMoveRequest<I>::acquire_lock() { + m_image_ctx->owner_lock.get_read(); + if (m_image_ctx->exclusive_lock == nullptr) { + derr << "exclusive lock feature not enabled" << dendl; + m_image_ctx->owner_lock.put_read(); + m_ret_val = -EINVAL; + close_image(); + return; + } + + dout(10) << dendl; + + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_acquire_lock>(this); + m_image_ctx->exclusive_lock->block_requests(0); + m_image_ctx->exclusive_lock->acquire_lock(ctx); + m_image_ctx->owner_lock.put_read(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_acquire_lock(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to acquire exclusive lock: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + trash_move(); +} + +template <typename I> +void TrashMoveRequest<I>::trash_move() { + dout(10) << dendl; + + utime_t delete_time{ceph_clock_now()}; + utime_t deferment_end_time{delete_time}; + deferment_end_time += + m_image_ctx->config.template get_val<uint64_t>("rbd_mirroring_delete_delay"); + + m_trash_image_spec = { + cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING, m_image_ctx->name, delete_time, + deferment_end_time}; + + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_trash_move>(this); + auto req = librbd::trash::MoveRequest<I>::create( + m_io_ctx, m_image_id, m_trash_image_spec, ctx); + req->send(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_trash_move(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to move image to trash: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_image(); + return; + } + + m_moved_to_trash = true; + remove_mirror_image(); +} + +template <typename I> +void TrashMoveRequest<I>::remove_mirror_image() { + dout(10) << dendl; + + librados::ObjectWriteOperation op; + librbd::cls_client::mirror_image_remove(&op, m_image_id); + + auto aio_comp = create_rados_callback< + TrashMoveRequest<I>, + &TrashMoveRequest<I>::handle_remove_mirror_image>(this); + int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashMoveRequest<I>::handle_remove_mirror_image(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(10) << "local image is not mirrored" << dendl; + } else if (r < 0) { + derr << "failed to remove mirror image state for " << m_global_image_id + << ": " << cpp_strerror(r) << dendl; + m_ret_val = r; + } + + close_image(); +} + +template <typename I> +void TrashMoveRequest<I>::close_image() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_close_image>(this); + m_image_ctx->state->close(ctx); +} + +template <typename I> +void TrashMoveRequest<I>::handle_close_image(int r) { + dout(10) << "r=" << r << dendl; + + m_image_ctx->destroy(); + m_image_ctx = nullptr; + + if (r < 0) { + derr << "failed to close image: " << cpp_strerror(r) << dendl; + } + + // don't send notification if we failed + if (!m_moved_to_trash) { + finish(0); + return; + } + + notify_trash_add(); +} + +template <typename I> +void TrashMoveRequest<I>::notify_trash_add() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + TrashMoveRequest<I>, &TrashMoveRequest<I>::handle_notify_trash_add>(this); + librbd::TrashWatcher<I>::notify_image_added(m_io_ctx, m_image_id, + m_trash_image_spec, ctx); +} + +template <typename I> +void TrashMoveRequest<I>::handle_notify_trash_add(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to notify trash watchers: " << cpp_strerror(r) << dendl; + } + + finish(0); +} + +template <typename I> +void TrashMoveRequest<I>::finish(int r) { + if (m_ret_val < 0) { + r = m_ret_val; + } + + dout(10) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_deleter::TrashMoveRequest<librbd::ImageCtx>; + diff --git a/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h new file mode 100644 index 00000000..07b7432e --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashMoveRequest.h @@ -0,0 +1,136 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_MOVE_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_MOVE_REQUEST_H + +#include "include/buffer.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_types.h" +#include <boost/optional.hpp> +#include <string> + +struct Context; +class ContextWQ; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_deleter { + +template <typename ImageCtxT = librbd::ImageCtx> +class TrashMoveRequest { +public: + static TrashMoveRequest* create(librados::IoCtx& io_ctx, + const std::string& global_image_id, + bool resync, ContextWQ* op_work_queue, + Context* on_finish) { + return new TrashMoveRequest(io_ctx, global_image_id, resync, op_work_queue, + on_finish); + } + + TrashMoveRequest(librados::IoCtx& io_ctx, const std::string& global_image_id, + bool resync, ContextWQ* op_work_queue, Context* on_finish) + : m_io_ctx(io_ctx), m_global_image_id(global_image_id), m_resync(resync), + m_op_work_queue(op_work_queue), m_on_finish(on_finish) { + } + + void send(); + +private: + /* + * @verbatim + * + * <start> + * | + * v + * GET_MIRROR_IMAGE_ID + * | + * v + * GET_TAG_OWNER + * | + * v + * DISABLE_MIRROR_IMAGE + * | + * v + * RESET_JOURNAL + * | + * v + * OPEN_IMAGE + * | + * v + * ACQUIRE_LOCK + * | + * v + * TRASH_MOVE + * | + * v + * REMOVE_MIRROR_IMAGE + * | + * v + * CLOSE_IMAGE + * | + * v + * NOTIFY_TRASH_ADD + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_global_image_id; + bool m_resync; + ContextWQ *m_op_work_queue; + Context *m_on_finish; + + ceph::bufferlist m_out_bl; + std::string m_image_id; + std::string m_mirror_uuid; + cls::rbd::TrashImageSpec m_trash_image_spec; + ImageCtxT *m_image_ctx = nullptr;; + int m_ret_val = 0; + bool m_moved_to_trash = false; + + void get_mirror_image_id(); + void handle_get_mirror_image_id(int r); + + void get_tag_owner(); + void handle_get_tag_owner(int r); + + void disable_mirror_image(); + void handle_disable_mirror_image(int r); + + void reset_journal(); + void handle_reset_journal(int r); + + void open_image(); + void handle_open_image(int r); + + void acquire_lock(); + void handle_acquire_lock(int r); + + void trash_move(); + void handle_trash_move(int r); + + void remove_mirror_image(); + void handle_remove_mirror_image(int r); + + void close_image(); + void handle_close_image(int r); + + void notify_trash_add(); + void handle_notify_trash_add(int r); + + void finish(int r); + +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_deleter::TrashMoveRequest<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H diff --git a/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc new file mode 100644 index 00000000..e7c725dc --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.cc @@ -0,0 +1,265 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_deleter/TrashRemoveRequest.h" +#include "include/ceph_assert.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "librbd/TrashWatcher.h" +#include "librbd/Utils.h" +#include "librbd/trash/RemoveRequest.h" +#include "tools/rbd_mirror/image_deleter/SnapshotPurgeRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashRemoveRequest: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_deleter { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +void TrashRemoveRequest<I>::send() { + *m_error_result = ERROR_RESULT_RETRY; + + get_trash_image_spec(); +} + +template <typename I> +void TrashRemoveRequest<I>::get_trash_image_spec() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::trash_get_start(&op, m_image_id); + + auto aio_comp = create_rados_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_get_trash_image_spec>(this); + m_out_bl.clear(); + int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_get_trash_image_spec(int r) { + dout(10) << "r=" << r << dendl; + + if (r == 0) { + auto bl_it = m_out_bl.cbegin(); + r = librbd::cls_client::trash_get_finish(&bl_it, &m_trash_image_spec); + } + + if (r == -ENOENT || (r >= 0 && m_trash_image_spec.source != + cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING)) { + dout(10) << "image id " << m_image_id << " not in mirroring trash" << dendl; + finish(0); + return; + } else if (r < 0) { + derr << "error getting image id " << m_image_id << " info from trash: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + if (m_trash_image_spec.state != cls::rbd::TRASH_IMAGE_STATE_NORMAL && + m_trash_image_spec.state != cls::rbd::TRASH_IMAGE_STATE_REMOVING) { + dout(10) << "image " << m_image_id << " is not in an expected trash state: " + << m_trash_image_spec.state << dendl; + *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY; + finish(-EBUSY); + return; + } + + set_trash_state(); +} + +template <typename I> +void TrashRemoveRequest<I>::set_trash_state() { + if (m_trash_image_spec.state == cls::rbd::TRASH_IMAGE_STATE_REMOVING) { + get_snap_context(); + return; + } + + dout(10) << dendl; + + librados::ObjectWriteOperation op; + librbd::cls_client::trash_state_set(&op, m_image_id, + cls::rbd::TRASH_IMAGE_STATE_REMOVING, + cls::rbd::TRASH_IMAGE_STATE_NORMAL); + + auto aio_comp = create_rados_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_set_trash_state>(this); + int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_set_trash_state(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(10) << "image id " << m_image_id << " not in mirroring trash" << dendl; + finish(0); + return; + } else if (r < 0 && r != -EOPNOTSUPP) { + derr << "error setting trash image state for image id " << m_image_id + << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + get_snap_context(); +} + +template <typename I> +void TrashRemoveRequest<I>::get_snap_context() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::get_snapcontext_start(&op); + + std::string header_oid = librbd::util::header_name(m_image_id); + + auto aio_comp = create_rados_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_get_snap_context>(this); + m_out_bl.clear(); + int r = m_io_ctx.aio_operate(header_oid, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_get_snap_context(int r) { + dout(10) << "r=" << r << dendl; + + ::SnapContext snapc; + if (r == 0) { + auto bl_it = m_out_bl.cbegin(); + r = librbd::cls_client::get_snapcontext_finish(&bl_it, &snapc); + } + if (r < 0 && r != -ENOENT) { + derr << "error retrieving snapshot context for image " + << m_image_id << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + m_has_snapshots = (!snapc.empty()); + purge_snapshots(); +} + +template <typename I> +void TrashRemoveRequest<I>::purge_snapshots() { + if (!m_has_snapshots) { + remove_image(); + return; + } + + dout(10) << dendl; + auto ctx = create_context_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_purge_snapshots>(this); + auto req = SnapshotPurgeRequest<I>::create(m_io_ctx, m_image_id, ctx); + req->send(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_purge_snapshots(int r) { + dout(10) << "r=" << r << dendl; + + if (r == -EBUSY) { + dout(10) << "snapshots still in-use" << dendl; + *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY; + finish(r); + return; + } else if (r < 0) { + derr << "failed to purge image snapshots: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + remove_image(); +} + +template <typename I> +void TrashRemoveRequest<I>::remove_image() { + dout(10) << dendl; + + auto ctx = create_context_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_remove_image>(this); + auto req = librbd::trash::RemoveRequest<I>::create( + m_io_ctx, m_image_id, m_op_work_queue, true, m_progress_ctx, + ctx); + req->send(); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_remove_image(int r) { + dout(10) << "r=" << r << dendl; + if (r == -ENOTEMPTY) { + // image must have clone v2 snapshot still associated to child + dout(10) << "snapshots still in-use" << dendl; + *m_error_result = ERROR_RESULT_RETRY_IMMEDIATELY; + finish(-EBUSY); + return; + } + + if (r < 0 && r != -ENOENT) { + derr << "error removing image " << m_image_id << " " + << "(" << m_image_id << ") from local pool: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + notify_trash_removed(); +} + +template <typename I> +void TrashRemoveRequest<I>::notify_trash_removed() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + TrashRemoveRequest<I>, + &TrashRemoveRequest<I>::handle_notify_trash_removed>(this); + librbd::TrashWatcher<I>::notify_image_removed(m_io_ctx, m_image_id, ctx); +} + +template <typename I> +void TrashRemoveRequest<I>::handle_notify_trash_removed(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to notify trash watchers: " << cpp_strerror(r) << dendl; + } + + finish(0); +} + +template <typename I> +void TrashRemoveRequest<I>::finish(int r) { + dout(10) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_deleter::TrashRemoveRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h new file mode 100644 index 00000000..d2295e8e --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashRemoveRequest.h @@ -0,0 +1,113 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H + +#include "include/rados/librados.hpp" +#include "include/buffer.h" +#include "cls/rbd/cls_rbd_types.h" +#include "librbd/internal.h" +#include "tools/rbd_mirror/image_deleter/Types.h" +#include <string> +#include <vector> + +class Context; +class ContextWQ; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_deleter { + +template <typename ImageCtxT = librbd::ImageCtx> +class TrashRemoveRequest { +public: + static TrashRemoveRequest* create(librados::IoCtx &io_ctx, + const std::string &image_id, + ErrorResult *error_result, + ContextWQ *op_work_queue, + Context *on_finish) { + return new TrashRemoveRequest(io_ctx, image_id, error_result, op_work_queue, + on_finish); + } + + TrashRemoveRequest(librados::IoCtx &io_ctx, const std::string &image_id, + ErrorResult *error_result, ContextWQ *op_work_queue, + Context *on_finish) + : m_io_ctx(io_ctx), m_image_id(image_id), m_error_result(error_result), + m_op_work_queue(op_work_queue), m_on_finish(on_finish) { + } + + void send(); + +private: + /* + * @verbatim + * + * <start> + * | + * v + * GET_TRASH_IMAGE_SPEC + * | + * v + * SET_TRASH_STATE + * | + * v + * GET_SNAP_CONTEXT + * | + * v + * PURGE_SNAPSHOTS + * | + * v + * TRASH_REMOVE + * | + * v + * NOTIFY_TRASH_REMOVE + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_image_id; + ErrorResult *m_error_result; + ContextWQ *m_op_work_queue; + Context *m_on_finish; + + ceph::bufferlist m_out_bl; + cls::rbd::TrashImageSpec m_trash_image_spec; + bool m_has_snapshots = false; + librbd::NoOpProgressContext m_progress_ctx; + + void get_trash_image_spec(); + void handle_get_trash_image_spec(int r); + + void set_trash_state(); + void handle_set_trash_state(int r); + + void get_snap_context(); + void handle_get_snap_context(int r); + + void purge_snapshots(); + void handle_purge_snapshots(int r); + + void remove_image(); + void handle_remove_image(int r); + + void notify_trash_removed(); + void handle_notify_trash_removed(int r); + + void finish(int r); + +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_deleter::TrashRemoveRequest<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_TRASH_REMOVE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc b/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc new file mode 100644 index 00000000..8735dfb7 --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashWatcher.cc @@ -0,0 +1,384 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_deleter/TrashWatcher.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Timer.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_deleter/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_deleter::TrashWatcher: " \ + << this << " " << __func__ << ": " + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +namespace rbd { +namespace mirror { +namespace image_deleter { + +namespace { + +const size_t MAX_RETURN = 1024; + +} // anonymous namespace + +template <typename I> +TrashWatcher<I>::TrashWatcher(librados::IoCtx &io_ctx, Threads<I> *threads, + TrashListener& trash_listener) + : librbd::TrashWatcher<I>(io_ctx, threads->work_queue), + m_io_ctx(io_ctx), m_threads(threads), m_trash_listener(trash_listener), + m_lock(librbd::util::unique_lock_name( + "rbd::mirror::image_deleter::TrashWatcher", this)) { +} + +template <typename I> +void TrashWatcher<I>::init(Context *on_finish) { + dout(5) << dendl; + + { + Mutex::Locker locker(m_lock); + m_on_init_finish = on_finish; + + ceph_assert(!m_trash_list_in_progress); + m_trash_list_in_progress = true; + } + + create_trash(); +} + +template <typename I> +void TrashWatcher<I>::shut_down(Context *on_finish) { + dout(5) << dendl; + + { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + + ceph_assert(!m_shutting_down); + m_shutting_down = true; + if (m_timer_ctx != nullptr) { + m_threads->timer->cancel_event(m_timer_ctx); + m_timer_ctx = nullptr; + } + } + + auto ctx = new FunctionContext([this, on_finish](int r) { + unregister_watcher(on_finish); + }); + m_async_op_tracker.wait_for_ops(ctx); +} + +template <typename I> +void TrashWatcher<I>::handle_image_added(const std::string &image_id, + const cls::rbd::TrashImageSpec& spec) { + dout(10) << "image_id=" << image_id << dendl; + + Mutex::Locker locker(m_lock); + add_image(image_id, spec); +} + +template <typename I> +void TrashWatcher<I>::handle_image_removed(const std::string &image_id) { + // ignore removals -- the image deleter will ignore -ENOENTs +} + +template <typename I> +void TrashWatcher<I>::handle_rewatch_complete(int r) { + dout(5) << "r=" << r << dendl; + + if (r == -EBLACKLISTED) { + dout(0) << "detected client is blacklisted" << dendl; + return; + } else if (r == -ENOENT) { + dout(5) << "trash directory deleted" << dendl; + } else if (r < 0) { + derr << "unexpected error re-registering trash directory watch: " + << cpp_strerror(r) << dendl; + } + schedule_trash_list(30); +} + +template <typename I> +void TrashWatcher<I>::create_trash() { + dout(20) << dendl; + { + Mutex::Locker locker(m_lock); + ceph_assert(m_trash_list_in_progress); + } + + librados::ObjectWriteOperation op; + op.create(false); + + m_async_op_tracker.start_op(); + auto aio_comp = create_rados_callback< + TrashWatcher<I>, &TrashWatcher<I>::handle_create_trash>(this); + int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashWatcher<I>::handle_create_trash(int r) { + dout(20) << "r=" << r << dendl; + { + Mutex::Locker locker(m_lock); + ceph_assert(m_trash_list_in_progress); + } + + Context* on_init_finish = nullptr; + if (r == -EBLACKLISTED || r == -ENOENT) { + if (r == -EBLACKLISTED) { + dout(0) << "detected client is blacklisted" << dendl; + } else { + dout(0) << "detected pool no longer exists" << dendl; + } + + Mutex::Locker locker(m_lock); + std::swap(on_init_finish, m_on_init_finish); + m_trash_list_in_progress = false; + } else if (r < 0 && r != -EEXIST) { + derr << "failed to create trash object: " << cpp_strerror(r) << dendl; + { + Mutex::Locker locker(m_lock); + m_trash_list_in_progress = false; + } + + schedule_trash_list(30); + } else { + register_watcher(); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void TrashWatcher<I>::register_watcher() { + { + Mutex::Locker locker(m_lock); + ceph_assert(m_trash_list_in_progress); + } + + // if the watch registration is in-flight, let the watcher + // handle the transition -- only (re-)register if it's not registered + if (!this->is_unregistered()) { + trash_list(true); + return; + } + + // first time registering or the watch failed + dout(5) << dendl; + m_async_op_tracker.start_op(); + + Context *ctx = create_context_callback< + TrashWatcher, &TrashWatcher<I>::handle_register_watcher>(this); + this->register_watch(ctx); +} + +template <typename I> +void TrashWatcher<I>::handle_register_watcher(int r) { + dout(5) << "r=" << r << dendl; + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_trash_list_in_progress); + if (r < 0) { + m_trash_list_in_progress = false; + } + } + + Context *on_init_finish = nullptr; + if (r >= 0) { + trash_list(true); + } else if (r == -EBLACKLISTED) { + dout(0) << "detected client is blacklisted" << dendl; + + Mutex::Locker locker(m_lock); + std::swap(on_init_finish, m_on_init_finish); + } else { + derr << "unexpected error registering trash directory watch: " + << cpp_strerror(r) << dendl; + schedule_trash_list(10); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void TrashWatcher<I>::unregister_watcher(Context* on_finish) { + dout(5) << dendl; + + m_async_op_tracker.start_op(); + Context *ctx = new FunctionContext([this, on_finish](int r) { + handle_unregister_watcher(r, on_finish); + }); + this->unregister_watch(ctx); +} + +template <typename I> +void TrashWatcher<I>::handle_unregister_watcher(int r, Context* on_finish) { + dout(5) << "unregister_watcher: r=" << r << dendl; + if (r < 0) { + derr << "error unregistering watcher for trash directory: " + << cpp_strerror(r) << dendl; + } + m_async_op_tracker.finish_op(); + on_finish->complete(0); +} + +template <typename I> +void TrashWatcher<I>::trash_list(bool initial_request) { + if (initial_request) { + m_async_op_tracker.start_op(); + m_last_image_id = ""; + } + + dout(5) << "last_image_id=" << m_last_image_id << dendl; + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_trash_list_in_progress); + } + + librados::ObjectReadOperation op; + librbd::cls_client::trash_list_start(&op, m_last_image_id, MAX_RETURN); + + librados::AioCompletion *aio_comp = create_rados_callback< + TrashWatcher<I>, &TrashWatcher<I>::handle_trash_list>(this); + m_out_bl.clear(); + int r = m_io_ctx.aio_operate(RBD_TRASH, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void TrashWatcher<I>::handle_trash_list(int r) { + dout(5) << "r=" << r << dendl; + + std::map<std::string, cls::rbd::TrashImageSpec> images; + if (r >= 0) { + auto bl_it = m_out_bl.cbegin(); + r = librbd::cls_client::trash_list_finish(&bl_it, &images); + } + + Context *on_init_finish = nullptr; + { + Mutex::Locker locker(m_lock); + ceph_assert(m_trash_list_in_progress); + if (r >= 0) { + for (auto& image : images) { + add_image(image.first, image.second); + } + } else if (r == -ENOENT) { + r = 0; + } + + if (r == -EBLACKLISTED) { + dout(0) << "detected client is blacklisted during trash refresh" << dendl; + m_trash_list_in_progress = false; + std::swap(on_init_finish, m_on_init_finish); + } else if (r >= 0 && images.size() < MAX_RETURN) { + m_trash_list_in_progress = false; + std::swap(on_init_finish, m_on_init_finish); + } else if (r < 0) { + m_trash_list_in_progress = false; + } + } + + if (r >= 0 && images.size() == MAX_RETURN) { + m_last_image_id = images.rbegin()->first; + trash_list(false); + return; + } else if (r < 0 && r != -EBLACKLISTED) { + derr << "failed to retrieve trash directory: " << cpp_strerror(r) << dendl; + schedule_trash_list(10); + } + + m_async_op_tracker.finish_op(); + if (on_init_finish != nullptr) { + on_init_finish->complete(r); + } +} + +template <typename I> +void TrashWatcher<I>::schedule_trash_list(double interval) { + Mutex::Locker timer_locker(m_threads->timer_lock); + Mutex::Locker locker(m_lock); + if (m_shutting_down || m_trash_list_in_progress || m_timer_ctx != nullptr) { + if (m_trash_list_in_progress && !m_deferred_trash_list) { + dout(5) << "deferring refresh until in-flight refresh completes" << dendl; + m_deferred_trash_list = true; + } + return; + } + + dout(5) << dendl; + m_timer_ctx = m_threads->timer->add_event_after( + interval, + new FunctionContext([this](int r) { + process_trash_list(); + })); +} + +template <typename I> +void TrashWatcher<I>::process_trash_list() { + dout(5) << dendl; + + ceph_assert(m_threads->timer_lock.is_locked()); + ceph_assert(m_timer_ctx != nullptr); + m_timer_ctx = nullptr; + + { + Mutex::Locker locker(m_lock); + ceph_assert(!m_trash_list_in_progress); + m_trash_list_in_progress = true; + } + + // execute outside of the timer's lock + m_async_op_tracker.start_op(); + Context *ctx = new FunctionContext([this](int r) { + create_trash(); + m_async_op_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +template <typename I> +void TrashWatcher<I>::add_image(const std::string& image_id, + const cls::rbd::TrashImageSpec& spec) { + if (spec.source != cls::rbd::TRASH_IMAGE_SOURCE_MIRRORING) { + return; + } + + ceph_assert(m_lock.is_locked()); + auto& deferment_end_time = spec.deferment_end_time; + dout(10) << "image_id=" << image_id << ", " + << "deferment_end_time=" << deferment_end_time << dendl; + + m_async_op_tracker.start_op(); + auto ctx = new FunctionContext([this, image_id, deferment_end_time](int r) { + m_trash_listener.handle_trash_image(image_id, deferment_end_time); + m_async_op_tracker.finish_op(); + }); + m_threads->work_queue->queue(ctx, 0); +} + +} // namespace image_deleter; +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_deleter::TrashWatcher<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_deleter/TrashWatcher.h b/src/tools/rbd_mirror/image_deleter/TrashWatcher.h new file mode 100644 index 00000000..b6f69833 --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/TrashWatcher.h @@ -0,0 +1,139 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H +#define CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H + +#include "include/rados/librados.hpp" +#include "common/AsyncOpTracker.h" +#include "common/Mutex.h" +#include "librbd/TrashWatcher.h" +#include <set> +#include <string> + +struct Context; +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +namespace image_deleter { + +struct TrashListener; + +template <typename ImageCtxT = librbd::ImageCtx> +class TrashWatcher : public librbd::TrashWatcher<ImageCtxT> { +public: + static TrashWatcher* create(librados::IoCtx &io_ctx, + Threads<ImageCtxT> *threads, + TrashListener& trash_listener) { + return new TrashWatcher(io_ctx, threads, trash_listener); + } + + TrashWatcher(librados::IoCtx &io_ctx, Threads<ImageCtxT> *threads, + TrashListener& trash_listener); + TrashWatcher(const TrashWatcher&) = delete; + TrashWatcher& operator=(const TrashWatcher&) = delete; + + void init(Context *on_finish); + void shut_down(Context *on_finish); + +protected: + void handle_image_added(const std::string &image_id, + const cls::rbd::TrashImageSpec& spec) override; + + void handle_image_removed(const std::string &image_id) override; + + void handle_rewatch_complete(int r) override; + +private: + /** + * @verbatim + * + * <start> + * | + * v + * INIT + * | + * v + * CREATE_TRASH + * | + * v + * REGISTER_WATCHER + * | + * |/--------------------------------\ + * | | + * |/---------\ | + * | | | + * v | (more images) | + * TRASH_LIST ---/ | + * | | + * |/----------------------------\ | + * | | | + * v | | + * <idle> --\ | | + * | | | | + * | |\---> IMAGE_ADDED -----/ | + * | | | + * | \----> WATCH_ERROR ---------/ + * v + * SHUT_DOWN + * | + * v + * UNREGISTER_WATCHER + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx m_io_ctx; + Threads<ImageCtxT> *m_threads; + TrashListener& m_trash_listener; + + std::string m_last_image_id; + bufferlist m_out_bl; + + mutable Mutex m_lock; + + Context *m_on_init_finish = nullptr; + Context *m_timer_ctx = nullptr; + + AsyncOpTracker m_async_op_tracker; + bool m_trash_list_in_progress = false; + bool m_deferred_trash_list = false; + bool m_shutting_down = false; + + void register_watcher(); + void handle_register_watcher(int r); + + void create_trash(); + void handle_create_trash(int r); + + void unregister_watcher(Context* on_finish); + void handle_unregister_watcher(int r, Context* on_finish); + + void trash_list(bool initial_request); + void handle_trash_list(int r); + + void schedule_trash_list(double interval); + void process_trash_list(); + + void get_mirror_uuid(); + void handle_get_mirror_uuid(int r); + + void add_image(const std::string& image_id, + const cls::rbd::TrashImageSpec& spec); + +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_deleter::TrashWatcher<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETE_TRASH_WATCHER_H diff --git a/src/tools/rbd_mirror/image_deleter/Types.h b/src/tools/rbd_mirror/image_deleter/Types.h new file mode 100644 index 00000000..ac3bc64a --- /dev/null +++ b/src/tools/rbd_mirror/image_deleter/Types.h @@ -0,0 +1,54 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H +#define CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H + +#include "include/Context.h" +#include "librbd/journal/Policy.h" +#include <string> + +struct utime_t; + +namespace rbd { +namespace mirror { +namespace image_deleter { + +enum ErrorResult { + ERROR_RESULT_COMPLETE, + ERROR_RESULT_RETRY, + ERROR_RESULT_RETRY_IMMEDIATELY +}; + +struct TrashListener { + TrashListener() { + } + TrashListener(const TrashListener&) = delete; + TrashListener& operator=(const TrashListener&) = delete; + + virtual ~TrashListener() { + } + + virtual void handle_trash_image(const std::string& image_id, + const utime_t& deferment_end_time) = 0; + +}; + +struct JournalPolicy : public librbd::journal::Policy { + bool append_disabled() const override { + return true; + } + bool journal_disabled() const override { + return true; + } + + void allocate_tag_on_lock(Context *on_finish) override { + on_finish->complete(0); + } +}; + +} // namespace image_deleter +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_DELETER_TYPES_H diff --git a/src/tools/rbd_mirror/image_map/LoadRequest.cc b/src/tools/rbd_mirror/image_map/LoadRequest.cc new file mode 100644 index 00000000..7387b476 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/LoadRequest.cc @@ -0,0 +1,98 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" + +#include "librbd/Utils.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" + +#include "LoadRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_map::LoadRequest: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_map { + +static const uint32_t MAX_RETURN = 1024; + +using librbd::util::create_rados_callback; + +template<typename I> +LoadRequest<I>::LoadRequest(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping, + Context *on_finish) + : m_ioctx(ioctx), + m_image_mapping(image_mapping), + m_on_finish(on_finish) { +} + +template<typename I> +void LoadRequest<I>::send() { + dout(20) << dendl; + + image_map_list(); +} + +template<typename I> +void LoadRequest<I>::image_map_list() { + dout(20) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_map_list_start(&op, m_start_after, MAX_RETURN); + + librados::AioCompletion *aio_comp = create_rados_callback< + LoadRequest, &LoadRequest::handle_image_map_list>(this); + + m_out_bl.clear(); + int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template<typename I> +void LoadRequest<I>::handle_image_map_list(int r) { + dout(20) << ": r=" << r << dendl; + + std::map<std::string, cls::rbd::MirrorImageMap> image_mapping; + if (r == 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_map_list_finish(&it, &image_mapping); + } + + if (r < 0) { + derr << ": failed to get image map: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + m_image_mapping->insert(image_mapping.begin(), image_mapping.end()); + + if (image_mapping.size() == MAX_RETURN) { + m_start_after = image_mapping.rbegin()->first; + image_map_list(); + return; + } + + finish(0); +} + +template<typename I> +void LoadRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_map::LoadRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_map/LoadRequest.h b/src/tools/rbd_mirror/image_map/LoadRequest.h new file mode 100644 index 00000000..7657e110 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/LoadRequest.h @@ -0,0 +1,64 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H + +#include "cls/rbd/cls_rbd_types.h" +#include "include/rados/librados.hpp" + +class Context; + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_map { + +template<typename ImageCtxT = librbd::ImageCtx> +class LoadRequest { +public: + static LoadRequest *create(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping, + Context *on_finish) { + return new LoadRequest(ioctx, image_mapping, on_finish); + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | . . . . . . . . + * v v . MAX_RETURN + * IMAGE_MAP_LIST. . . . . . . + * | + * v + * <finish> + * + * @endverbatim + */ + LoadRequest(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> *image_mapping, + Context *on_finish); + + librados::IoCtx &m_ioctx; + std::map<std::string, cls::rbd::MirrorImageMap> *m_image_mapping; + Context *m_on_finish; + + bufferlist m_out_bl; + std::string m_start_after; + + void image_map_list(); + void handle_image_map_list(int r); + + void finish(int r); +}; + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_LOAD_REQUEST_H diff --git a/src/tools/rbd_mirror/image_map/Policy.cc b/src/tools/rbd_mirror/image_map/Policy.cc new file mode 100644 index 00000000..6fababdd --- /dev/null +++ b/src/tools/rbd_mirror/image_map/Policy.cc @@ -0,0 +1,406 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" + +#include "librbd/Utils.h" +#include "Policy.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_map::Policy: " << this \ + << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_map { + +namespace { + +bool is_instance_action(ActionType action_type) { + switch (action_type) { + case ACTION_TYPE_ACQUIRE: + case ACTION_TYPE_RELEASE: + return true; + case ACTION_TYPE_NONE: + case ACTION_TYPE_MAP_UPDATE: + case ACTION_TYPE_MAP_REMOVE: + break; + } + return false; +} + +} // anonymous namespace + +using ::operator<<; +using librbd::util::unique_lock_name; + +Policy::Policy(librados::IoCtx &ioctx) + : m_ioctx(ioctx), + m_map_lock(unique_lock_name("rbd::mirror::image_map::Policy::m_map_lock", + this)) { + + // map should at least have once instance + std::string instance_id = stringify(ioctx.get_instance_id()); + m_map.emplace(instance_id, std::set<std::string>{}); +} + +void Policy::init( + const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping) { + dout(20) << dendl; + + RWLock::WLocker map_lock(m_map_lock); + for (auto& it : image_mapping) { + ceph_assert(!it.second.instance_id.empty()); + auto map_result = m_map[it.second.instance_id].emplace(it.first); + ceph_assert(map_result.second); + + auto image_state_result = m_image_states.emplace( + it.first, ImageState{it.second.instance_id, it.second.mapped_time}); + ceph_assert(image_state_result.second); + + // ensure we (re)send image acquire actions to the instance + auto& image_state = image_state_result.first->second; + auto start_action = set_state(&image_state, + StateTransition::STATE_INITIALIZING, false); + ceph_assert(start_action); + } +} + +LookupInfo Policy::lookup(const std::string &global_image_id) { + dout(20) << "global_image_id=" << global_image_id << dendl; + + RWLock::RLocker map_lock(m_map_lock); + LookupInfo info; + + auto it = m_image_states.find(global_image_id); + if (it != m_image_states.end()) { + info.instance_id = it->second.instance_id; + info.mapped_time = it->second.mapped_time; + } + return info; +} + +bool Policy::add_image(const std::string &global_image_id) { + dout(5) << "global_image_id=" << global_image_id << dendl; + + RWLock::WLocker map_lock(m_map_lock); + auto image_state_result = m_image_states.emplace(global_image_id, + ImageState{}); + auto& image_state = image_state_result.first->second; + if (image_state.state == StateTransition::STATE_INITIALIZING) { + // avoid duplicate acquire notifications upon leader startup + return false; + } + + return set_state(&image_state, StateTransition::STATE_ASSOCIATING, false); +} + +bool Policy::remove_image(const std::string &global_image_id) { + dout(5) << "global_image_id=" << global_image_id << dendl; + + RWLock::WLocker map_lock(m_map_lock); + auto it = m_image_states.find(global_image_id); + if (it == m_image_states.end()) { + return false; + } + + auto& image_state = it->second; + return set_state(&image_state, StateTransition::STATE_DISSOCIATING, false); +} + +void Policy::add_instances(const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids) { + dout(5) << "instance_ids=" << instance_ids << dendl; + + RWLock::WLocker map_lock(m_map_lock); + for (auto& instance : instance_ids) { + ceph_assert(!instance.empty()); + m_map.emplace(instance, std::set<std::string>{}); + } + + // post-failover, remove any dead instances and re-shuffle their images + if (m_initial_update) { + dout(5) << "initial instance update" << dendl; + m_initial_update = false; + + std::set<std::string> alive_instances(instance_ids.begin(), + instance_ids.end()); + InstanceIds dead_instances; + for (auto& map_pair : m_map) { + if (alive_instances.find(map_pair.first) == alive_instances.end()) { + dead_instances.push_back(map_pair.first); + } + } + + if (!dead_instances.empty()) { + remove_instances(m_map_lock, dead_instances, global_image_ids); + } + } + + GlobalImageIds shuffle_global_image_ids; + do_shuffle_add_instances(m_map, m_image_states.size(), &shuffle_global_image_ids); + dout(5) << "shuffling global_image_ids=[" << shuffle_global_image_ids + << "]" << dendl; + for (auto& global_image_id : shuffle_global_image_ids) { + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + + auto& image_state = it->second; + if (set_state(&image_state, StateTransition::STATE_SHUFFLING, false)) { + global_image_ids->emplace(global_image_id); + } + } +} + +void Policy::remove_instances(const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids) { + RWLock::WLocker map_lock(m_map_lock); + remove_instances(m_map_lock, instance_ids, global_image_ids); +} + +void Policy::remove_instances(const RWLock& lock, + const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids) { + ceph_assert(m_map_lock.is_wlocked()); + dout(5) << "instance_ids=" << instance_ids << dendl; + + for (auto& instance_id : instance_ids) { + auto map_it = m_map.find(instance_id); + if (map_it == m_map.end()) { + continue; + } + + auto& instance_global_image_ids = map_it->second; + if (instance_global_image_ids.empty()) { + m_map.erase(map_it); + continue; + } + + m_dead_instances.insert(instance_id); + dout(5) << "force shuffling: instance_id=" << instance_id << ", " + << "global_image_ids=[" << instance_global_image_ids << "]"<< dendl; + for (auto& global_image_id : instance_global_image_ids) { + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + + auto& image_state = it->second; + if (is_state_scheduled(image_state, + StateTransition::STATE_DISSOCIATING)) { + // don't shuffle images that no longer exist + continue; + } + + if (set_state(&image_state, StateTransition::STATE_SHUFFLING, true)) { + global_image_ids->emplace(global_image_id); + } + } + } +} + +ActionType Policy::start_action(const std::string &global_image_id) { + RWLock::WLocker map_lock(m_map_lock); + + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + + auto& image_state = it->second; + auto& transition = image_state.transition; + ceph_assert(transition.action_type != ACTION_TYPE_NONE); + + dout(5) << "global_image_id=" << global_image_id << ", " + << "state=" << image_state.state << ", " + << "action_type=" << transition.action_type << dendl; + if (transition.start_policy_action) { + execute_policy_action(global_image_id, &image_state, + *transition.start_policy_action); + transition.start_policy_action = boost::none; + } + return transition.action_type; +} + +bool Policy::finish_action(const std::string &global_image_id, int r) { + RWLock::WLocker map_lock(m_map_lock); + + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + + auto& image_state = it->second; + auto& transition = image_state.transition; + dout(5) << "global_image_id=" << global_image_id << ", " + << "state=" << image_state.state << ", " + << "action_type=" << transition.action_type << ", " + << "r=" << r << dendl; + + // retry on failure unless it's an RPC message to an instance that is dead + if (r < 0 && + (!is_instance_action(image_state.transition.action_type) || + image_state.instance_id == UNMAPPED_INSTANCE_ID || + m_dead_instances.find(image_state.instance_id) == + m_dead_instances.end())) { + return true; + } + + auto finish_policy_action = transition.finish_policy_action; + StateTransition::transit(image_state.state, &image_state.transition); + if (transition.finish_state) { + // in-progress state machine complete + ceph_assert(StateTransition::is_idle(*transition.finish_state)); + image_state.state = *transition.finish_state; + image_state.transition = {}; + } + + if (StateTransition::is_idle(image_state.state) && image_state.next_state) { + // advance to pending state machine + bool start_action = set_state(&image_state, *image_state.next_state, false); + ceph_assert(start_action); + } + + // image state may get purged in execute_policy_action() + bool pending_action = image_state.transition.action_type != ACTION_TYPE_NONE; + if (finish_policy_action) { + execute_policy_action(global_image_id, &image_state, *finish_policy_action); + } + + return pending_action; +} + +void Policy::execute_policy_action( + const std::string& global_image_id, ImageState* image_state, + StateTransition::PolicyAction policy_action) { + dout(5) << "global_image_id=" << global_image_id << ", " + << "policy_action=" << policy_action << dendl; + + switch (policy_action) { + case StateTransition::POLICY_ACTION_MAP: + map(global_image_id, image_state); + break; + case StateTransition::POLICY_ACTION_UNMAP: + unmap(global_image_id, image_state); + break; + case StateTransition::POLICY_ACTION_REMOVE: + if (image_state->state == StateTransition::STATE_UNASSOCIATED) { + ceph_assert(image_state->instance_id == UNMAPPED_INSTANCE_ID); + ceph_assert(!image_state->next_state); + m_image_states.erase(global_image_id); + } + break; + } +} + +void Policy::map(const std::string& global_image_id, ImageState* image_state) { + ceph_assert(m_map_lock.is_wlocked()); + + std::string instance_id = image_state->instance_id; + if (instance_id != UNMAPPED_INSTANCE_ID && !is_dead_instance(instance_id)) { + return; + } + if (is_dead_instance(instance_id)) { + unmap(global_image_id, image_state); + } + + instance_id = do_map(m_map, global_image_id); + ceph_assert(!instance_id.empty()); + dout(5) << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + image_state->instance_id = instance_id; + image_state->mapped_time = ceph_clock_now(); + + auto ins = m_map[instance_id].emplace(global_image_id); + ceph_assert(ins.second); +} + +void Policy::unmap(const std::string &global_image_id, + ImageState* image_state) { + ceph_assert(m_map_lock.is_wlocked()); + + std::string instance_id = image_state->instance_id; + if (instance_id == UNMAPPED_INSTANCE_ID) { + return; + } + + dout(5) << "global_image_id=" << global_image_id << ", " + << "instance_id=" << instance_id << dendl; + + ceph_assert(!instance_id.empty()); + m_map[instance_id].erase(global_image_id); + image_state->instance_id = UNMAPPED_INSTANCE_ID; + image_state->mapped_time = {}; + + if (is_dead_instance(instance_id) && m_map[instance_id].empty()) { + dout(5) << "removing dead instance_id=" << instance_id << dendl; + m_map.erase(instance_id); + m_dead_instances.erase(instance_id); + } +} + +bool Policy::is_image_shuffling(const std::string &global_image_id) { + ceph_assert(m_map_lock.is_locked()); + + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + auto& image_state = it->second; + + // avoid attempting to re-shuffle a pending shuffle + auto result = is_state_scheduled(image_state, + StateTransition::STATE_SHUFFLING); + dout(20) << "global_image_id=" << global_image_id << ", " + << "result=" << result << dendl; + return result; +} + +bool Policy::can_shuffle_image(const std::string &global_image_id) { + ceph_assert(m_map_lock.is_locked()); + + CephContext *cct = reinterpret_cast<CephContext *>(m_ioctx.cct()); + int migration_throttle = cct->_conf.get_val<uint64_t>( + "rbd_mirror_image_policy_migration_throttle"); + + auto it = m_image_states.find(global_image_id); + ceph_assert(it != m_image_states.end()); + auto& image_state = it->second; + + utime_t last_shuffled_time = image_state.mapped_time; + + // idle images that haven't been recently remapped can shuffle + utime_t now = ceph_clock_now(); + auto result = (StateTransition::is_idle(image_state.state) && + ((migration_throttle <= 0) || + (now - last_shuffled_time >= migration_throttle))); + dout(10) << "global_image_id=" << global_image_id << ", " + << "migration_throttle=" << migration_throttle << ", " + << "last_shuffled_time=" << last_shuffled_time << ", " + << "result=" << result << dendl; + return result; +} + +bool Policy::set_state(ImageState* image_state, StateTransition::State state, + bool ignore_current_state) { + if (!ignore_current_state && image_state->state == state) { + return false; + } else if (StateTransition::is_idle(image_state->state)) { + image_state->state = state; + image_state->next_state = boost::none; + + StateTransition::transit(image_state->state, &image_state->transition); + ceph_assert(image_state->transition.action_type != ACTION_TYPE_NONE); + ceph_assert(!image_state->transition.finish_state); + return true; + } + + image_state->next_state = state; + return false; +} + +bool Policy::is_state_scheduled(const ImageState& image_state, + StateTransition::State state) const { + return (image_state.state == state || + (image_state.next_state && *image_state.next_state == state)); +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_map/Policy.h b/src/tools/rbd_mirror/image_map/Policy.h new file mode 100644 index 00000000..590fdbfe --- /dev/null +++ b/src/tools/rbd_mirror/image_map/Policy.h @@ -0,0 +1,122 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H + +#include <map> +#include <tuple> +#include <boost/optional.hpp> + +#include "common/RWLock.h" +#include "cls/rbd/cls_rbd_types.h" +#include "include/rados/librados.hpp" +#include "tools/rbd_mirror/image_map/StateTransition.h" +#include "tools/rbd_mirror/image_map/Types.h" + +class Context; + +namespace rbd { +namespace mirror { +namespace image_map { + +class Policy { +public: + Policy(librados::IoCtx &ioctx); + + virtual ~Policy() { + } + + // init -- called during initialization + void init( + const std::map<std::string, cls::rbd::MirrorImageMap> &image_mapping); + + // lookup an image from the map + LookupInfo lookup(const std::string &global_image_id); + + // add, remove + bool add_image(const std::string &global_image_id); + bool remove_image(const std::string &global_image_id); + + // shuffle images when instances are added/removed + void add_instances(const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids); + void remove_instances(const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids); + + ActionType start_action(const std::string &global_image_id); + bool finish_action(const std::string &global_image_id, int r); + +protected: + typedef std::map<std::string, std::set<std::string> > InstanceToImageMap; + + bool is_dead_instance(const std::string instance_id) { + ceph_assert(m_map_lock.is_locked()); + return m_dead_instances.find(instance_id) != m_dead_instances.end(); + } + + bool is_image_shuffling(const std::string &global_image_id); + bool can_shuffle_image(const std::string &global_image_id); + + // map an image (global image id) to an instance + virtual std::string do_map(const InstanceToImageMap& map, + const std::string &global_image_id) = 0; + + // shuffle images when instances are added/removed + virtual void do_shuffle_add_instances( + const InstanceToImageMap& map, size_t image_count, + std::set<std::string> *remap_global_image_ids) = 0; + +private: + struct ImageState { + std::string instance_id = UNMAPPED_INSTANCE_ID; + utime_t mapped_time; + + ImageState() {} + ImageState(const std::string& instance_id, const utime_t& mapped_time) + : instance_id(instance_id), mapped_time(mapped_time) { + } + + // active state and action + StateTransition::State state = StateTransition::STATE_UNASSOCIATED; + StateTransition::Transition transition; + + // next scheduled state + boost::optional<StateTransition::State> next_state = boost::none; + }; + + typedef std::map<std::string, ImageState> ImageStates; + + librados::IoCtx &m_ioctx; + + RWLock m_map_lock; // protects m_map + InstanceToImageMap m_map; // instance_id -> global_id map + + ImageStates m_image_states; + std::set<std::string> m_dead_instances; + + bool m_initial_update = true; + + void remove_instances(const RWLock& lock, const InstanceIds &instance_ids, + GlobalImageIds* global_image_ids); + + bool set_state(ImageState* image_state, StateTransition::State state, + bool ignore_current_state); + + void execute_policy_action(const std::string& global_image_id, + ImageState* image_state, + StateTransition::PolicyAction policy_action); + + void map(const std::string& global_image_id, ImageState* image_state); + void unmap(const std::string &global_image_id, ImageState* image_state); + + bool is_state_scheduled(const ImageState& image_state, + StateTransition::State state) const; + +}; + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_POLICY_H diff --git a/src/tools/rbd_mirror/image_map/SimplePolicy.cc b/src/tools/rbd_mirror/image_map/SimplePolicy.cc new file mode 100644 index 00000000..f2680581 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/SimplePolicy.cc @@ -0,0 +1,89 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" + +#include "SimplePolicy.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_map::SimplePolicy: " << this \ + << " " << __func__ << ": " +namespace rbd { +namespace mirror { +namespace image_map { + +SimplePolicy::SimplePolicy(librados::IoCtx &ioctx) + : Policy(ioctx) { +} + +size_t SimplePolicy::calc_images_per_instance(const InstanceToImageMap& map, + size_t image_count) { + size_t nr_instances = 0; + for (auto const &it : map) { + if (!Policy::is_dead_instance(it.first)) { + ++nr_instances; + } + } + ceph_assert(nr_instances > 0); + + size_t images_per_instance = image_count / nr_instances; + if (images_per_instance == 0) { + ++images_per_instance; + } + + return images_per_instance; +} + +void SimplePolicy::do_shuffle_add_instances( + const InstanceToImageMap& map, size_t image_count, + std::set<std::string> *remap_global_image_ids) { + uint64_t images_per_instance = calc_images_per_instance(map, image_count); + dout(5) << "images per instance=" << images_per_instance << dendl; + + for (auto const &instance : map) { + if (instance.second.size() <= images_per_instance) { + continue; + } + + auto it = instance.second.begin(); + uint64_t cut_off = instance.second.size() - images_per_instance; + + while (it != instance.second.end() && cut_off > 0) { + if (Policy::is_image_shuffling(*it)) { + --cut_off; + } else if (Policy::can_shuffle_image(*it)) { + --cut_off; + remap_global_image_ids->emplace(*it); + } + + ++it; + } + } +} + +std::string SimplePolicy::do_map(const InstanceToImageMap& map, + const std::string &global_image_id) { + auto min_it = map.end(); + for (auto it = map.begin(); it != map.end(); ++it) { + ceph_assert(it->second.find(global_image_id) == it->second.end()); + if (Policy::is_dead_instance(it->first)) { + continue; + } else if (min_it == map.end()) { + min_it = it; + } else if (it->second.size() < min_it->second.size()) { + min_it = it; + } + } + + ceph_assert(min_it != map.end()); + dout(20) << "global_image_id=" << global_image_id << " maps to instance_id=" + << min_it->first << dendl; + return min_it->first; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_map/SimplePolicy.h b/src/tools/rbd_mirror/image_map/SimplePolicy.h new file mode 100644 index 00000000..ad2071b2 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/SimplePolicy.h @@ -0,0 +1,39 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H + +#include "Policy.h" + +namespace rbd { +namespace mirror { +namespace image_map { + +class SimplePolicy : public Policy { +public: + static SimplePolicy *create(librados::IoCtx &ioctx) { + return new SimplePolicy(ioctx); + } + +protected: + SimplePolicy(librados::IoCtx &ioctx); + + std::string do_map(const InstanceToImageMap& map, + const std::string &global_image_id) override; + + void do_shuffle_add_instances( + const InstanceToImageMap& map, size_t image_count, + std::set<std::string> *remap_global_image_ids) override; + +private: + size_t calc_images_per_instance(const InstanceToImageMap& map, + size_t image_count); + +}; + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_SIMPLE_POLICY_H diff --git a/src/tools/rbd_mirror/image_map/StateTransition.cc b/src/tools/rbd_mirror/image_map/StateTransition.cc new file mode 100644 index 00000000..ec5f07ff --- /dev/null +++ b/src/tools/rbd_mirror/image_map/StateTransition.cc @@ -0,0 +1,94 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <ostream> +#include "include/ceph_assert.h" +#include "StateTransition.h" + +namespace rbd { +namespace mirror { +namespace image_map { + +std::ostream &operator<<(std::ostream &os, + const StateTransition::State &state) { + switch(state) { + case StateTransition::STATE_INITIALIZING: + os << "INITIALIZING"; + break; + case StateTransition::STATE_ASSOCIATING: + os << "ASSOCIATING"; + break; + case StateTransition::STATE_ASSOCIATED: + os << "ASSOCIATED"; + break; + case StateTransition::STATE_SHUFFLING: + os << "SHUFFLING"; + break; + case StateTransition::STATE_DISSOCIATING: + os << "DISSOCIATING"; + break; + case StateTransition::STATE_UNASSOCIATED: + os << "UNASSOCIATED"; + break; + } + return os; +} + +std::ostream &operator<<(std::ostream &os, + const StateTransition::PolicyAction &policy_action) { + switch(policy_action) { + case StateTransition::POLICY_ACTION_MAP: + os << "MAP"; + break; + case StateTransition::POLICY_ACTION_UNMAP: + os << "UNMAP"; + break; + case StateTransition::POLICY_ACTION_REMOVE: + os << "REMOVE"; + break; + } + return os; +} + +const StateTransition::TransitionTable StateTransition::s_transition_table { + // state current_action Transition + // --------------------------------------------------------------------------- + {{STATE_INITIALIZING, ACTION_TYPE_NONE}, {ACTION_TYPE_ACQUIRE, {}, {}, + {}}}, + {{STATE_INITIALIZING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {}, + {STATE_ASSOCIATED}}}, + + {{STATE_ASSOCIATING, ACTION_TYPE_NONE}, {ACTION_TYPE_MAP_UPDATE, + {POLICY_ACTION_MAP}, {}, {}}}, + {{STATE_ASSOCIATING, ACTION_TYPE_MAP_UPDATE}, {ACTION_TYPE_ACQUIRE, {}, {}, + {}}}, + {{STATE_ASSOCIATING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {}, + {STATE_ASSOCIATED}}}, + + {{STATE_DISSOCIATING, ACTION_TYPE_NONE}, {ACTION_TYPE_RELEASE, {}, + {POLICY_ACTION_UNMAP}, {}}}, + {{STATE_DISSOCIATING, ACTION_TYPE_RELEASE}, {ACTION_TYPE_MAP_REMOVE, {}, + {POLICY_ACTION_REMOVE}, {}}}, + {{STATE_DISSOCIATING, ACTION_TYPE_MAP_REMOVE}, {ACTION_TYPE_NONE, {}, + {}, {STATE_UNASSOCIATED}}}, + + {{STATE_SHUFFLING, ACTION_TYPE_NONE}, {ACTION_TYPE_RELEASE, {}, + {POLICY_ACTION_UNMAP}, {}}}, + {{STATE_SHUFFLING, ACTION_TYPE_RELEASE}, {ACTION_TYPE_MAP_UPDATE, + {POLICY_ACTION_MAP}, {}, {}}}, + {{STATE_SHUFFLING, ACTION_TYPE_MAP_UPDATE}, {ACTION_TYPE_ACQUIRE, {}, {}, + {}}}, + {{STATE_SHUFFLING, ACTION_TYPE_ACQUIRE}, {ACTION_TYPE_NONE, {}, {}, + {STATE_ASSOCIATED}}} +}; + +void StateTransition::transit(State state, Transition* transition) { + auto it = s_transition_table.find({state, transition->action_type}); + ceph_assert(it != s_transition_table.end()); + + *transition = it->second; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_map/StateTransition.h b/src/tools/rbd_mirror/image_map/StateTransition.h new file mode 100644 index 00000000..02a5ce4e --- /dev/null +++ b/src/tools/rbd_mirror/image_map/StateTransition.h @@ -0,0 +1,76 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H + +#include "tools/rbd_mirror/image_map/Types.h" +#include <boost/optional.hpp> +#include <map> + +namespace rbd { +namespace mirror { +namespace image_map { + +class StateTransition { +public: + enum State { + STATE_UNASSOCIATED, + STATE_INITIALIZING, + STATE_ASSOCIATING, + STATE_ASSOCIATED, + STATE_SHUFFLING, + STATE_DISSOCIATING + }; + + enum PolicyAction { + POLICY_ACTION_MAP, + POLICY_ACTION_UNMAP, + POLICY_ACTION_REMOVE + }; + + struct Transition { + // image map action + ActionType action_type = ACTION_TYPE_NONE; + + // policy internal action + boost::optional<PolicyAction> start_policy_action; + boost::optional<PolicyAction> finish_policy_action; + + // state machine complete + boost::optional<State> finish_state; + + Transition() { + } + Transition(ActionType action_type, + const boost::optional<PolicyAction>& start_policy_action, + const boost::optional<PolicyAction>& finish_policy_action, + const boost::optional<State>& finish_state) + : action_type(action_type), start_policy_action(start_policy_action), + finish_policy_action(finish_policy_action), finish_state(finish_state) { + } + }; + + static bool is_idle(State state) { + return (state == STATE_UNASSOCIATED || state == STATE_ASSOCIATED); + } + + static void transit(State state, Transition* transition); + +private: + typedef std::pair<State, ActionType> TransitionKey; + typedef std::map<TransitionKey, Transition> TransitionTable; + + // image transition table + static const TransitionTable s_transition_table; +}; + +std::ostream &operator<<(std::ostream &os, const StateTransition::State &state); +std::ostream &operator<<(std::ostream &os, + const StateTransition::PolicyAction &policy_action); + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_STATE_TRANSITION_H diff --git a/src/tools/rbd_mirror/image_map/Types.cc b/src/tools/rbd_mirror/image_map/Types.cc new file mode 100644 index 00000000..47de9c3c --- /dev/null +++ b/src/tools/rbd_mirror/image_map/Types.cc @@ -0,0 +1,138 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Types.h" +#include "include/ceph_assert.h" +#include "include/stringify.h" +#include "common/Formatter.h" +#include <iostream> + +namespace rbd { +namespace mirror { +namespace image_map { + +const std::string UNMAPPED_INSTANCE_ID(""); + +namespace { + +template <typename E> +class GetTypeVisitor : public boost::static_visitor<E> { +public: + template <typename T> + inline E operator()(const T&) const { + return T::TYPE; + } +}; + +class EncodeVisitor : public boost::static_visitor<void> { +public: + explicit EncodeVisitor(bufferlist &bl) : m_bl(bl) { + } + + template <typename T> + inline void operator()(const T& t) const { + using ceph::encode; + encode(static_cast<uint32_t>(T::TYPE), m_bl); + t.encode(m_bl); + } +private: + bufferlist &m_bl; +}; + +class DecodeVisitor : public boost::static_visitor<void> { +public: + DecodeVisitor(__u8 version, bufferlist::const_iterator &iter) + : m_version(version), m_iter(iter) { + } + + template <typename T> + inline void operator()(T& t) const { + t.decode(m_version, m_iter); + } +private: + __u8 m_version; + bufferlist::const_iterator &m_iter; +}; + +class DumpVisitor : public boost::static_visitor<void> { +public: + explicit DumpVisitor(Formatter *formatter, const std::string &key) + : m_formatter(formatter), m_key(key) {} + + template <typename T> + inline void operator()(const T& t) const { + auto type = T::TYPE; + m_formatter->dump_string(m_key.c_str(), stringify(type)); + t.dump(m_formatter); + } +private: + ceph::Formatter *m_formatter; + std::string m_key; +}; + +} // anonymous namespace + +PolicyMetaType PolicyData::get_policy_meta_type() const { + return boost::apply_visitor(GetTypeVisitor<PolicyMetaType>(), policy_meta); +} + +void PolicyData::encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + boost::apply_visitor(EncodeVisitor(bl), policy_meta); + ENCODE_FINISH(bl); +} + +void PolicyData::decode(bufferlist::const_iterator& it) { + DECODE_START(1, it); + + uint32_t policy_meta_type; + decode(policy_meta_type, it); + + switch (policy_meta_type) { + case POLICY_META_TYPE_NONE: + policy_meta = PolicyMetaNone(); + break; + default: + policy_meta = PolicyMetaUnknown(); + break; + } + + boost::apply_visitor(DecodeVisitor(struct_v, it), policy_meta); + DECODE_FINISH(it); +} + +void PolicyData::dump(Formatter *f) const { + boost::apply_visitor(DumpVisitor(f, "policy_meta_type"), policy_meta); +} + +void PolicyData::generate_test_instances(std::list<PolicyData *> &o) { + o.push_back(new PolicyData(PolicyMetaNone())); +} + +std::ostream &operator<<(std::ostream &os, const ActionType& action_type) { + switch (action_type) { + case ACTION_TYPE_NONE: + os << "NONE"; + break; + case ACTION_TYPE_MAP_UPDATE: + os << "MAP_UPDATE"; + break; + case ACTION_TYPE_MAP_REMOVE: + os << "MAP_REMOVE"; + break; + case ACTION_TYPE_ACQUIRE: + os << "ACQUIRE"; + break; + case ACTION_TYPE_RELEASE: + os << "RELEASE"; + break; + default: + os << "UNKNOWN (" << static_cast<uint32_t>(action_type) << ")"; + break; + } + return os; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/image_map/Types.h b/src/tools/rbd_mirror/image_map/Types.h new file mode 100644 index 00000000..5a97430f --- /dev/null +++ b/src/tools/rbd_mirror/image_map/Types.h @@ -0,0 +1,130 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H + +#include <iosfwd> +#include <map> +#include <set> +#include <string> +#include <boost/variant.hpp> + +#include "include/buffer.h" +#include "include/encoding.h" +#include "include/utime.h" +#include "tools/rbd_mirror/Types.h" + +struct Context; + +namespace ceph { +class Formatter; +} + +namespace rbd { +namespace mirror { +namespace image_map { + +extern const std::string UNMAPPED_INSTANCE_ID; + +struct Listener { + virtual ~Listener() { + } + + virtual void acquire_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) = 0; + virtual void release_image(const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) = 0; + virtual void remove_image(const std::string &mirror_uuid, + const std::string &global_image_id, + const std::string &instance_id, + Context* on_finish) = 0; +}; + +struct LookupInfo { + std::string instance_id = UNMAPPED_INSTANCE_ID; + utime_t mapped_time; +}; + +enum ActionType { + ACTION_TYPE_NONE, + ACTION_TYPE_MAP_UPDATE, + ACTION_TYPE_MAP_REMOVE, + ACTION_TYPE_ACQUIRE, + ACTION_TYPE_RELEASE +}; + +typedef std::vector<std::string> InstanceIds; +typedef std::set<std::string> GlobalImageIds; +typedef std::map<std::string, ActionType> ImageActionTypes; + +enum PolicyMetaType { + POLICY_META_TYPE_NONE = 0, +}; + +struct PolicyMetaNone { + static const PolicyMetaType TYPE = POLICY_META_TYPE_NONE; + + PolicyMetaNone() { + } + + void encode(bufferlist& bl) const { + } + + void decode(__u8 version, bufferlist::const_iterator& it) { + } + + void dump(Formatter *f) const { + } +}; + +struct PolicyMetaUnknown { + static const PolicyMetaType TYPE = static_cast<PolicyMetaType>(-1); + + PolicyMetaUnknown() { + } + + void encode(bufferlist& bl) const { + ceph_abort(); + } + + void decode(__u8 version, bufferlist::const_iterator& it) { + } + + void dump(Formatter *f) const { + } +}; + +typedef boost::variant<PolicyMetaNone, + PolicyMetaUnknown> PolicyMeta; + +struct PolicyData { + PolicyData() + : policy_meta(PolicyMetaUnknown()) { + } + PolicyData(const PolicyMeta &policy_meta) + : policy_meta(policy_meta) { + } + + PolicyMeta policy_meta; + + PolicyMetaType get_policy_meta_type() const; + + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& it); + void dump(Formatter *f) const; + + static void generate_test_instances(std::list<PolicyData *> &o); +}; + +WRITE_CLASS_ENCODER(PolicyData); + +std::ostream &operator<<(std::ostream &os, const ActionType &action_type); + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_TYPES_H diff --git a/src/tools/rbd_mirror/image_map/UpdateRequest.cc b/src/tools/rbd_mirror/image_map/UpdateRequest.cc new file mode 100644 index 00000000..799c5670 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/UpdateRequest.cc @@ -0,0 +1,100 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/debug.h" +#include "common/errno.h" + +#include "librbd/Utils.h" +#include "include/rbd_types.h" +#include "cls/rbd/cls_rbd_client.h" + +#include "UpdateRequest.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_map::UpdateRequest: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_map { + +using librbd::util::create_rados_callback; + +static const uint32_t MAX_UPDATE = 256; + +template <typename I> +UpdateRequest<I>::UpdateRequest(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping, + std::set<std::string> &&remove_global_image_ids, Context *on_finish) + : m_ioctx(ioctx), + m_update_mapping(update_mapping), + m_remove_global_image_ids(remove_global_image_ids), + m_on_finish(on_finish) { +} + +template <typename I> +void UpdateRequest<I>::send() { + dout(20) << dendl; + + update_image_map(); +} + +template <typename I> +void UpdateRequest<I>::update_image_map() { + dout(20) << dendl; + + if (m_update_mapping.empty() && m_remove_global_image_ids.empty()) { + finish(0); + return; + } + + uint32_t nr_updates = 0; + librados::ObjectWriteOperation op; + + auto it1 = m_update_mapping.begin(); + while (it1 != m_update_mapping.end() && nr_updates++ < MAX_UPDATE) { + librbd::cls_client::mirror_image_map_update(&op, it1->first, it1->second); + it1 = m_update_mapping.erase(it1); + } + + auto it2 = m_remove_global_image_ids.begin(); + while (it2 != m_remove_global_image_ids.end() && nr_updates++ < MAX_UPDATE) { + librbd::cls_client::mirror_image_map_remove(&op, *it2); + it2 = m_remove_global_image_ids.erase(it2); + } + + librados::AioCompletion *aio_comp = create_rados_callback< + UpdateRequest, &UpdateRequest::handle_update_image_map>(this); + int r = m_ioctx.aio_operate(RBD_MIRROR_LEADER, aio_comp, &op); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void UpdateRequest<I>::handle_update_image_map(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to update image map: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + update_image_map(); +} + +template <typename I> +void UpdateRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_map::UpdateRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_map/UpdateRequest.h b/src/tools/rbd_mirror/image_map/UpdateRequest.h new file mode 100644 index 00000000..841cc6f9 --- /dev/null +++ b/src/tools/rbd_mirror/image_map/UpdateRequest.h @@ -0,0 +1,65 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H +#define CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H + +#include "cls/rbd/cls_rbd_types.h" +#include "include/rados/librados.hpp" + +class Context; + +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_map { + +template<typename ImageCtxT = librbd::ImageCtx> +class UpdateRequest { +public: + // accepts an image map for updation and a collection of + // global image ids to purge. + static UpdateRequest *create(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping, + std::set<std::string> &&remove_global_image_ids, Context *on_finish) { + return new UpdateRequest(ioctx, std::move(update_mapping), std::move(remove_global_image_ids), + on_finish); + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | . . . . . . . . + * v v . MAX_UPDATE + * UPDATE_IMAGE_MAP. . . . . . . + * | + * v + * <finish> + * + * @endverbatim + */ + UpdateRequest(librados::IoCtx &ioctx, + std::map<std::string, cls::rbd::MirrorImageMap> &&update_mapping, + std::set<std::string> &&remove_global_image_ids, Context *on_finish); + + librados::IoCtx &m_ioctx; + std::map<std::string, cls::rbd::MirrorImageMap> m_update_mapping; + std::set<std::string> m_remove_global_image_ids; + Context *m_on_finish; + + void update_image_map(); + void handle_update_image_map(int r); + + void finish(int r); +}; + +} // namespace image_map +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_MAP_UPDATE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc new file mode 100644 index 00000000..7ce21b4b --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc @@ -0,0 +1,785 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/compat.h" +#include "BootstrapRequest.h" +#include "CloseImageRequest.h" +#include "CreateImageRequest.h" +#include "IsPrimaryRequest.h" +#include "OpenImageRequest.h" +#include "OpenLocalImageRequest.h" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "cls/rbd/cls_rbd_client.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/internal.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/journal/Types.h" +#include "tools/rbd_mirror/ProgressContext.h" +#include "tools/rbd_mirror/ImageSync.h" +#include "tools/rbd_mirror/Threads.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::BootstrapRequest: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; +using librbd::util::unique_lock_name; + +template <typename I> +BootstrapRequest<I>::BootstrapRequest( + Threads<I>* threads, + librados::IoCtx &local_io_ctx, + librados::IoCtx &remote_io_ctx, + InstanceWatcher<I> *instance_watcher, + I **local_image_ctx, + const std::string &local_image_id, + const std::string &remote_image_id, + const std::string &global_image_id, + const std::string &local_mirror_uuid, + const std::string &remote_mirror_uuid, + Journaler *journaler, + cls::journal::ClientState *client_state, + MirrorPeerClientMeta *client_meta, + Context *on_finish, + bool *do_resync, + rbd::mirror::ProgressContext *progress_ctx) + : BaseRequest("rbd::mirror::image_replayer::BootstrapRequest", + reinterpret_cast<CephContext*>(local_io_ctx.cct()), on_finish), + m_threads(threads), m_local_io_ctx(local_io_ctx), + m_remote_io_ctx(remote_io_ctx), m_instance_watcher(instance_watcher), + m_local_image_ctx(local_image_ctx), m_local_image_id(local_image_id), + m_remote_image_id(remote_image_id), m_global_image_id(global_image_id), + m_local_mirror_uuid(local_mirror_uuid), + m_remote_mirror_uuid(remote_mirror_uuid), m_journaler(journaler), + m_client_state(client_state), m_client_meta(client_meta), + m_progress_ctx(progress_ctx), m_do_resync(do_resync), + m_lock(unique_lock_name("BootstrapRequest::m_lock", this)) { + dout(10) << dendl; +} + +template <typename I> +BootstrapRequest<I>::~BootstrapRequest() { + ceph_assert(m_remote_image_ctx == nullptr); +} + +template <typename I> +bool BootstrapRequest<I>::is_syncing() const { + Mutex::Locker locker(m_lock); + return (m_image_sync != nullptr); +} + +template <typename I> +void BootstrapRequest<I>::send() { + *m_do_resync = false; + + get_remote_tag_class(); +} + +template <typename I> +void BootstrapRequest<I>::cancel() { + dout(10) << dendl; + + Mutex::Locker locker(m_lock); + m_canceled = true; + + if (m_image_sync != nullptr) { + m_image_sync->cancel(); + } +} + +template <typename I> +void BootstrapRequest<I>::get_remote_tag_class() { + dout(15) << dendl; + + update_progress("GET_REMOTE_TAG_CLASS"); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_get_remote_tag_class>( + this); + m_journaler->get_client(librbd::Journal<>::IMAGE_CLIENT_ID, &m_client, ctx); +} + +template <typename I> +void BootstrapRequest<I>::handle_get_remote_tag_class(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to retrieve remote client: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + librbd::journal::ClientData client_data; + auto it = m_client.data.cbegin(); + try { + decode(client_data, it); + } catch (const buffer::error &err) { + derr << "failed to decode remote client meta data: " << err.what() + << dendl; + finish(-EBADMSG); + return; + } + + librbd::journal::ImageClientMeta *client_meta = + boost::get<librbd::journal::ImageClientMeta>(&client_data.client_meta); + if (client_meta == nullptr) { + derr << "unknown remote client registration" << dendl; + finish(-EINVAL); + return; + } + + m_remote_tag_class = client_meta->tag_class; + dout(10) << "remote tag class=" << m_remote_tag_class << dendl; + + open_remote_image(); +} + +template <typename I> +void BootstrapRequest<I>::open_remote_image() { + dout(15) << "remote_image_id=" << m_remote_image_id << dendl; + + update_progress("OPEN_REMOTE_IMAGE"); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_open_remote_image>( + this); + OpenImageRequest<I> *request = OpenImageRequest<I>::create( + m_remote_io_ctx, &m_remote_image_ctx, m_remote_image_id, false, + ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_open_remote_image(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to open remote image: " << cpp_strerror(r) << dendl; + ceph_assert(m_remote_image_ctx == nullptr); + finish(r); + return; + } + + is_primary(); +} + +template <typename I> +void BootstrapRequest<I>::is_primary() { + dout(15) << dendl; + + update_progress("OPEN_REMOTE_IMAGE"); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_is_primary>( + this); + IsPrimaryRequest<I> *request = IsPrimaryRequest<I>::create(m_remote_image_ctx, + &m_primary, ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_is_primary(int r) { + dout(15) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(5) << "remote image is not mirrored" << dendl; + m_ret_val = -EREMOTEIO; + close_remote_image(); + return; + } else if (r < 0) { + derr << "error querying remote image primary status: " << cpp_strerror(r) + << dendl; + m_ret_val = r; + close_remote_image(); + return; + } + + if (!m_primary) { + if (m_local_image_id.empty()) { + // no local image and remote isn't primary -- don't sync it + dout(5) << "remote image is not primary -- not syncing" + << dendl; + m_ret_val = -EREMOTEIO; + close_remote_image(); + return; + } else if (m_client_meta->state != + librbd::journal::MIRROR_PEER_STATE_REPLAYING) { + // ensure we attempt to re-sync to remote if it's re-promoted + dout(5) << "remote image is not primary -- sync interrupted" + << dendl; + m_ret_val = -EREMOTEIO; + update_client_state(); + return; + } + } + + if (!m_client_meta->image_id.empty()) { + // have an image id -- use that to open the image since a deletion (resync) + // will leave the old image id registered in the peer + m_local_image_id = m_client_meta->image_id; + } + + if (m_local_image_id.empty()) { + // prepare to create local image + update_client_image(); + return; + } + + open_local_image(); +} + +template <typename I> +void BootstrapRequest<I>::update_client_state() { + dout(15) << dendl; + update_progress("UPDATE_CLIENT_STATE"); + + librbd::journal::MirrorPeerClientMeta client_meta(*m_client_meta); + client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + + librbd::journal::ClientData client_data(client_meta); + bufferlist data_bl; + encode(client_data, data_bl); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_update_client_state>( + this); + m_journaler->update_client(data_bl, ctx); +} + +template <typename I> +void BootstrapRequest<I>::handle_update_client_state(int r) { + dout(15) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to update client: " << cpp_strerror(r) << dendl; + } else { + m_client_meta->state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + } + + close_remote_image(); +} + +template <typename I> +void BootstrapRequest<I>::open_local_image() { + dout(15) << "local_image_id=" << m_local_image_id << dendl; + + update_progress("OPEN_LOCAL_IMAGE"); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_open_local_image>( + this); + OpenLocalImageRequest<I> *request = OpenLocalImageRequest<I>::create( + m_local_io_ctx, m_local_image_ctx, m_local_image_id, m_threads->work_queue, + ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_open_local_image(int r) { + dout(15) << "r=" << r << dendl; + + if (r == -ENOENT) { + ceph_assert(*m_local_image_ctx == nullptr); + dout(10) << "local image missing" << dendl; + unregister_client(); + return; + } else if (r == -EREMOTEIO) { + ceph_assert(*m_local_image_ctx == nullptr); + dout(10) << "local image is primary -- skipping image replay" << dendl; + m_ret_val = r; + close_remote_image(); + return; + } else if (r < 0) { + ceph_assert(*m_local_image_ctx == nullptr); + derr << "failed to open local image: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_remote_image(); + return; + } + + I *local_image_ctx = (*m_local_image_ctx); + { + local_image_ctx->snap_lock.get_read(); + if (local_image_ctx->journal == nullptr) { + local_image_ctx->snap_lock.put_read(); + + derr << "local image does not support journaling" << dendl; + m_ret_val = -EINVAL; + close_local_image(); + return; + } + + r = (*m_local_image_ctx)->journal->is_resync_requested(m_do_resync); + if (r < 0) { + local_image_ctx->snap_lock.put_read(); + + derr << "failed to check if a resync was requested" << dendl; + m_ret_val = r; + close_local_image(); + return; + } + + m_local_tag_tid = local_image_ctx->journal->get_tag_tid(); + m_local_tag_data = local_image_ctx->journal->get_tag_data(); + dout(10) << "local tag=" << m_local_tag_tid << ", " + << "local tag data=" << m_local_tag_data << dendl; + local_image_ctx->snap_lock.put_read(); + } + + if (m_local_tag_data.mirror_uuid != m_remote_mirror_uuid && !m_primary) { + // if the local mirror is not linked to the (now) non-primary image, + // stop the replay. Otherwise, we ignore that the remote is non-primary + // so that we can replay the demotion + dout(5) << "remote image is not primary -- skipping image replay" + << dendl; + m_ret_val = -EREMOTEIO; + close_local_image(); + return; + } + + if (*m_do_resync) { + close_remote_image(); + return; + } + + if (*m_client_state == cls::journal::CLIENT_STATE_DISCONNECTED) { + dout(10) << "client flagged disconnected -- skipping bootstrap" << dendl; + // The caller is expected to detect disconnect initializing remote journal. + m_ret_val = 0; + close_remote_image(); + return; + } + + get_remote_tags(); +} + +template <typename I> +void BootstrapRequest<I>::unregister_client() { + dout(15) << dendl; + update_progress("UNREGISTER_CLIENT"); + + m_local_image_id = ""; + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_unregister_client>( + this); + m_journaler->unregister_client(ctx); +} + +template <typename I> +void BootstrapRequest<I>::handle_unregister_client(int r) { + dout(15) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to unregister with remote journal: " << cpp_strerror(r) + << dendl; + m_ret_val = r; + close_remote_image(); + return; + } + + *m_client_meta = librbd::journal::MirrorPeerClientMeta(""); + register_client(); +} + +template <typename I> +void BootstrapRequest<I>::register_client() { + dout(15) << dendl; + + update_progress("REGISTER_CLIENT"); + + ceph_assert(m_local_image_id.empty()); + librbd::journal::MirrorPeerClientMeta mirror_peer_client_meta; + mirror_peer_client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + + librbd::journal::ClientData client_data{mirror_peer_client_meta}; + bufferlist client_data_bl; + encode(client_data, client_data_bl); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_register_client>( + this); + m_journaler->register_client(client_data_bl, ctx); +} + +template <typename I> +void BootstrapRequest<I>::handle_register_client(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to register with remote journal: " << cpp_strerror(r) + << dendl; + m_ret_val = r; + close_remote_image(); + return; + } + + *m_client_state = cls::journal::CLIENT_STATE_CONNECTED; + *m_client_meta = librbd::journal::MirrorPeerClientMeta(); + m_client_meta->state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + + is_primary(); +} + +template <typename I> +void BootstrapRequest<I>::update_client_image() { + ceph_assert(m_local_image_id.empty()); + assert(m_local_image_id.empty()); + m_local_image_id = librbd::util::generate_image_id<I>(m_local_io_ctx); + + dout(15) << "local_image_id=" << m_local_image_id << dendl; + update_progress("UPDATE_CLIENT_IMAGE"); + + librbd::journal::MirrorPeerClientMeta client_meta{m_local_image_id}; + client_meta.state = librbd::journal::MIRROR_PEER_STATE_SYNCING; + + librbd::journal::ClientData client_data(client_meta); + bufferlist data_bl; + encode(client_data, data_bl); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_update_client_image>( + this); + m_journaler->update_client(data_bl, ctx); +} + +template <typename I> +void BootstrapRequest<I>::handle_update_client_image(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to update client: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_remote_image(); + return; + } + + if (m_canceled) { + dout(10) << "request canceled" << dendl; + m_ret_val = -ECANCELED; + close_remote_image(); + return; + } + + *m_client_meta = {m_local_image_id}; + m_client_meta->state = librbd::journal::MIRROR_PEER_STATE_SYNCING; + create_local_image(); +} + +template <typename I> +void BootstrapRequest<I>::create_local_image() { + dout(15) << "local_image_id=" << m_local_image_id << dendl; + update_progress("CREATE_LOCAL_IMAGE"); + + m_remote_image_ctx->snap_lock.get_read(); + std::string image_name = m_remote_image_ctx->name; + m_remote_image_ctx->snap_lock.put_read(); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_create_local_image>( + this); + CreateImageRequest<I> *request = CreateImageRequest<I>::create( + m_threads, m_local_io_ctx, m_global_image_id, m_remote_mirror_uuid, + image_name, m_local_image_id, m_remote_image_ctx, ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_create_local_image(int r) { + dout(15) << "r=" << r << dendl; + + if (r == -EBADF) { + dout(5) << "image id " << m_local_image_id << " already in-use" << dendl; + m_local_image_id = ""; + update_client_image(); + return; + } else if (r < 0) { + if (r == -ENOENT) { + dout(10) << "parent image does not exist" << dendl; + } else { + derr << "failed to create local image: " << cpp_strerror(r) << dendl; + } + m_ret_val = r; + close_remote_image(); + return; + } + + open_local_image(); +} + +template <typename I> +void BootstrapRequest<I>::get_remote_tags() { + if (m_client_meta->state == librbd::journal::MIRROR_PEER_STATE_SYNCING) { + // optimization -- no need to compare remote tags if we just created + // the image locally or sync was interrupted + image_sync(); + return; + } + + dout(15) << dendl; + update_progress("GET_REMOTE_TAGS"); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_get_remote_tags>(this); + m_journaler->get_tags(m_remote_tag_class, &m_remote_tags, ctx); +} + +template <typename I> +void BootstrapRequest<I>::handle_get_remote_tags(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to retrieve remote tags: " << cpp_strerror(r) << dendl; + m_ret_val = r; + close_local_image(); + return; + } + + if (m_canceled) { + dout(10) << "request canceled" << dendl; + m_ret_val = -ECANCELED; + close_local_image(); + return; + } + + // At this point, the local image was existing, non-primary, and replaying; + // and the remote image is primary. Attempt to link the local image's most + // recent tag to the remote image's tag chain. + bool remote_tag_data_valid = false; + librbd::journal::TagData remote_tag_data; + boost::optional<uint64_t> remote_orphan_tag_tid = + boost::make_optional<uint64_t>(false, 0U); + bool reconnect_orphan = false; + + // decode the remote tags + for (auto &remote_tag : m_remote_tags) { + if (m_local_tag_data.predecessor.commit_valid && + m_local_tag_data.predecessor.mirror_uuid == m_remote_mirror_uuid && + m_local_tag_data.predecessor.tag_tid > remote_tag.tid) { + dout(15) << "skipping processed predecessor remote tag " + << remote_tag.tid << dendl; + continue; + } + + try { + auto it = remote_tag.data.cbegin(); + decode(remote_tag_data, it); + remote_tag_data_valid = true; + } catch (const buffer::error &err) { + derr << "failed to decode remote tag " << remote_tag.tid << ": " + << err.what() << dendl; + m_ret_val = -EBADMSG; + close_local_image(); + return; + } + + dout(10) << "decoded remote tag " << remote_tag.tid << ": " + << remote_tag_data << dendl; + + if (!m_local_tag_data.predecessor.commit_valid) { + // newly synced local image (no predecessor) replays from the first tag + if (remote_tag_data.mirror_uuid != librbd::Journal<>::LOCAL_MIRROR_UUID) { + dout(15) << "skipping non-primary remote tag" << dendl; + continue; + } + + dout(10) << "using initial primary remote tag" << dendl; + break; + } + + if (m_local_tag_data.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID) { + // demotion last available local epoch + + if (remote_tag_data.mirror_uuid == m_local_tag_data.mirror_uuid && + remote_tag_data.predecessor.commit_valid && + remote_tag_data.predecessor.tag_tid == + m_local_tag_data.predecessor.tag_tid) { + // demotion matches remote epoch + + if (remote_tag_data.predecessor.mirror_uuid == m_local_mirror_uuid && + m_local_tag_data.predecessor.mirror_uuid == + librbd::Journal<>::LOCAL_MIRROR_UUID) { + // local demoted and remote has matching event + dout(15) << "found matching local demotion tag" << dendl; + remote_orphan_tag_tid = remote_tag.tid; + continue; + } + + if (m_local_tag_data.predecessor.mirror_uuid == m_remote_mirror_uuid && + remote_tag_data.predecessor.mirror_uuid == + librbd::Journal<>::LOCAL_MIRROR_UUID) { + // remote demoted and local has matching event + dout(15) << "found matching remote demotion tag" << dendl; + remote_orphan_tag_tid = remote_tag.tid; + continue; + } + } + + if (remote_tag_data.mirror_uuid == librbd::Journal<>::LOCAL_MIRROR_UUID && + remote_tag_data.predecessor.mirror_uuid == librbd::Journal<>::ORPHAN_MIRROR_UUID && + remote_tag_data.predecessor.commit_valid && remote_orphan_tag_tid && + remote_tag_data.predecessor.tag_tid == *remote_orphan_tag_tid) { + // remote promotion tag chained to remote/local demotion tag + dout(15) << "found chained remote promotion tag" << dendl; + reconnect_orphan = true; + break; + } + + // promotion must follow demotion + remote_orphan_tag_tid = boost::none; + } + } + + if (remote_tag_data_valid && + m_local_tag_data.mirror_uuid == m_remote_mirror_uuid) { + dout(10) << "local image is in clean replay state" << dendl; + } else if (reconnect_orphan) { + dout(10) << "remote image was demoted/promoted" << dendl; + } else { + derr << "split-brain detected -- skipping image replay" << dendl; + m_ret_val = -EEXIST; + close_local_image(); + return; + } + + image_sync(); +} + +template <typename I> +void BootstrapRequest<I>::image_sync() { + if (m_client_meta->state == librbd::journal::MIRROR_PEER_STATE_REPLAYING) { + // clean replay state -- no image sync required + close_remote_image(); + return; + } + + { + Mutex::Locker locker(m_lock); + if (m_canceled) { + m_ret_val = -ECANCELED; + } else { + dout(15) << dendl; + ceph_assert(m_image_sync == nullptr); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_image_sync>(this); + m_image_sync = ImageSync<I>::create( + *m_local_image_ctx, m_remote_image_ctx, m_threads->timer, + &m_threads->timer_lock, m_local_mirror_uuid, m_journaler, + m_client_meta, m_threads->work_queue, m_instance_watcher, ctx, + m_progress_ctx); + + m_image_sync->get(); + + m_lock.Unlock(); + update_progress("IMAGE_SYNC"); + m_lock.Lock(); + + m_image_sync->send(); + return; + } + } + + dout(10) << "request canceled" << dendl; + close_remote_image(); +} + +template <typename I> +void BootstrapRequest<I>::handle_image_sync(int r) { + dout(15) << "r=" << r << dendl; + + { + Mutex::Locker locker(m_lock); + m_image_sync->put(); + m_image_sync = nullptr; + + if (m_canceled) { + dout(10) << "request canceled" << dendl; + m_ret_val = -ECANCELED; + } + + if (r < 0) { + derr << "failed to sync remote image: " << cpp_strerror(r) << dendl; + m_ret_val = r; + } + } + + close_remote_image(); +} + +template <typename I> +void BootstrapRequest<I>::close_local_image() { + dout(15) << dendl; + + update_progress("CLOSE_LOCAL_IMAGE"); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_close_local_image>( + this); + CloseImageRequest<I> *request = CloseImageRequest<I>::create( + m_local_image_ctx, ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_close_local_image(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "error encountered closing local image: " << cpp_strerror(r) + << dendl; + } + + close_remote_image(); +} + +template <typename I> +void BootstrapRequest<I>::close_remote_image() { + dout(15) << dendl; + + update_progress("CLOSE_REMOTE_IMAGE"); + + Context *ctx = create_context_callback< + BootstrapRequest<I>, &BootstrapRequest<I>::handle_close_remote_image>( + this); + CloseImageRequest<I> *request = CloseImageRequest<I>::create( + &m_remote_image_ctx, ctx); + request->send(); +} + +template <typename I> +void BootstrapRequest<I>::handle_close_remote_image(int r) { + dout(15) << "r=" << r << dendl; + + if (r < 0) { + derr << "error encountered closing remote image: " << cpp_strerror(r) + << dendl; + } + + finish(m_ret_val); +} + +template <typename I> +void BootstrapRequest<I>::update_progress(const std::string &description) { + dout(15) << description << dendl; + + if (m_progress_ctx) { + m_progress_ctx->update_progress(description); + } +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::BootstrapRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h new file mode 100644 index 00000000..ea9f8565 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h @@ -0,0 +1,230 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H + +#include "include/int_types.h" +#include "include/rados/librados.hpp" +#include "common/Mutex.h" +#include "cls/journal/cls_journal_types.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include "tools/rbd_mirror/BaseRequest.h" +#include "tools/rbd_mirror/Types.h" +#include <list> +#include <string> + +class Context; +class ContextWQ; +class Mutex; +class SafeTimer; +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +namespace rbd { +namespace mirror { + +class ProgressContext; + +template <typename> class ImageSync; +template <typename> class InstanceWatcher; +template <typename> struct Threads; + +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class BootstrapRequest : public BaseRequest { +public: + typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits; + typedef typename TypeTraits::Journaler Journaler; + typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta; + typedef rbd::mirror::ProgressContext ProgressContext; + + static BootstrapRequest* create( + Threads<ImageCtxT>* threads, + librados::IoCtx &local_io_ctx, + librados::IoCtx &remote_io_ctx, + InstanceWatcher<ImageCtxT> *instance_watcher, + ImageCtxT **local_image_ctx, + const std::string &local_image_id, + const std::string &remote_image_id, + const std::string &global_image_id, + const std::string &local_mirror_uuid, + const std::string &remote_mirror_uuid, + Journaler *journaler, + cls::journal::ClientState *client_state, + MirrorPeerClientMeta *client_meta, + Context *on_finish, + bool *do_resync, + ProgressContext *progress_ctx = nullptr) { + return new BootstrapRequest(threads, local_io_ctx, remote_io_ctx, + instance_watcher, local_image_ctx, + local_image_id, remote_image_id, + global_image_id, local_mirror_uuid, + remote_mirror_uuid, journaler, client_state, + client_meta, on_finish, do_resync, + progress_ctx); + } + + BootstrapRequest(Threads<ImageCtxT>* threads, + librados::IoCtx &local_io_ctx, + librados::IoCtx &remote_io_ctx, + InstanceWatcher<ImageCtxT> *instance_watcher, + ImageCtxT **local_image_ctx, + const std::string &local_image_id, + const std::string &remote_image_id, + const std::string &global_image_id, + const std::string &local_mirror_uuid, + const std::string &remote_mirror_uuid, Journaler *journaler, + cls::journal::ClientState *client_state, + MirrorPeerClientMeta *client_meta, Context *on_finish, + bool *do_resync, ProgressContext *progress_ctx = nullptr); + ~BootstrapRequest() override; + + bool is_syncing() const; + + void send() override; + void cancel() override; + +private: + /** + * @verbatim + * + * <start> + * | + * v + * GET_REMOTE_TAG_CLASS * * * * * * * * * * * * * * * * * * + * | * (error) + * v * + * OPEN_REMOTE_IMAGE * * * * * * * * * * * * * * * * * * * + * | * + * |/--------------------------------------------------*---\ + * v * | + * IS_PRIMARY * * * * * * * * * * * * * * * * * * * * * * | + * | * * | + * | (remote image primary, no local image id) * * | + * \----> UPDATE_CLIENT_IMAGE * * * * * * * * * * * * | + * | | ^ * * | + * | | * (duplicate image id) * * | + * | v * * * | + * \----> CREATE_LOCAL_IMAGE * * * * * * * * * * * * * | + * | | * * | + * | v * * | + * | (remote image primary) * * | + * \----> OPEN_LOCAL_IMAGE * * * * * * * * * * * * * * | + * | | . * * | + * | | . (image doesn't exist) * * | + * | | . . > UNREGISTER_CLIENT * * * * * * * | + * | | | * * | + * | | v * * | + * | | REGISTER_CLIENT * * * * * * * * | + * | | | * * | + * | | \-----------------------*---*---/ + * | | * * + * | v (skip if not needed) * * + * | GET_REMOTE_TAGS * * * * * * * * * + * | | * * * + * | v (skip if not needed) v * * + * | IMAGE_SYNC * * * > CLOSE_LOCAL_IMAGE * * + * | | | * * + * | \-----------------\ /-----/ * * + * | | * * + * | | * * + * | (skip if not needed) | * * + * \----> UPDATE_CLIENT_STATE *|* * * * * * * * * * * + * | | * * + * /-----------/----------------/ * * + * | * * + * v * * + * CLOSE_REMOTE_IMAGE < * * * * * * * * * * * * * * * * * + * | * + * v * + * <finish> < * * * * * * * * * * * * * * * * * * * * * * * + * + * @endverbatim + */ + typedef std::list<cls::journal::Tag> Tags; + + Threads<ImageCtxT>* m_threads; + librados::IoCtx &m_local_io_ctx; + librados::IoCtx &m_remote_io_ctx; + InstanceWatcher<ImageCtxT> *m_instance_watcher; + ImageCtxT **m_local_image_ctx; + std::string m_local_image_id; + std::string m_remote_image_id; + std::string m_global_image_id; + std::string m_local_mirror_uuid; + std::string m_remote_mirror_uuid; + Journaler *m_journaler; + cls::journal::ClientState *m_client_state; + MirrorPeerClientMeta *m_client_meta; + ProgressContext *m_progress_ctx; + bool *m_do_resync; + + mutable Mutex m_lock; + bool m_canceled = false; + + Tags m_remote_tags; + cls::journal::Client m_client; + uint64_t m_remote_tag_class = 0; + ImageCtxT *m_remote_image_ctx = nullptr; + bool m_primary = false; + int m_ret_val = 0; + ImageSync<ImageCtxT> *m_image_sync = nullptr; + + uint64_t m_local_tag_tid = 0; + librbd::journal::TagData m_local_tag_data; + + bufferlist m_out_bl; + + void get_remote_tag_class(); + void handle_get_remote_tag_class(int r); + + void open_remote_image(); + void handle_open_remote_image(int r); + + void is_primary(); + void handle_is_primary(int r); + + void update_client_state(); + void handle_update_client_state(int r); + + void open_local_image(); + void handle_open_local_image(int r); + + void unregister_client(); + void handle_unregister_client(int r); + + void register_client(); + void handle_register_client(int r); + + void create_local_image(); + void handle_create_local_image(int r); + + void update_client_image(); + void handle_update_client_image(int r); + + void get_remote_tags(); + void handle_get_remote_tags(int r); + + void image_sync(); + void handle_image_sync(int r); + + void close_local_image(); + void handle_close_local_image(int r); + + void close_remote_image(); + void handle_close_remote_image(int r); + + void update_progress(const std::string &description); +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::BootstrapRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_BOOTSTRAP_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc new file mode 100644 index 00000000..5b754823 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.cc @@ -0,0 +1,64 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "CloseImageRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::CloseImageRequest: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; + +template <typename I> +CloseImageRequest<I>::CloseImageRequest(I **image_ctx, Context *on_finish) + : m_image_ctx(image_ctx), m_on_finish(on_finish) { +} + +template <typename I> +void CloseImageRequest<I>::send() { + close_image(); +} + +template <typename I> +void CloseImageRequest<I>::close_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + CloseImageRequest<I>, &CloseImageRequest<I>::handle_close_image>(this); + (*m_image_ctx)->state->close(ctx); +} + +template <typename I> +void CloseImageRequest<I>::handle_close_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": error encountered while closing image: " << cpp_strerror(r) + << dendl; + } + + delete *m_image_ctx; + *m_image_ctx = nullptr; + + m_on_finish->complete(0); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::CloseImageRequest<librbd::ImageCtx>; + diff --git a/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h new file mode 100644 index 00000000..02481369 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/CloseImageRequest.h @@ -0,0 +1,56 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H + +#include "include/int_types.h" +#include "librbd/ImageCtx.h" +#include <string> + +class Context; +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class CloseImageRequest { +public: + static CloseImageRequest* create(ImageCtxT **image_ctx, Context *on_finish) { + return new CloseImageRequest(image_ctx, on_finish); + } + + CloseImageRequest(ImageCtxT **image_ctx, Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * CLOSE_IMAGE + * | + * v + * <finish> + * + * @endverbatim + */ + ImageCtxT **m_image_ctx; + Context *m_on_finish; + + void close_image(); + void handle_close_image(int r); +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::CloseImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_CLOSE_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc new file mode 100644 index 00000000..8d8236b2 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.cc @@ -0,0 +1,506 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "CreateImageRequest.h" +#include "CloseImageRequest.h" +#include "OpenImageRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "cls/rbd/cls_rbd_client.h" +#include "journal/Journaler.h" +#include "journal/Settings.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/internal.h" +#include "librbd/Utils.h" +#include "librbd/image/CreateRequest.h" +#include "librbd/image/CloneRequest.h" +#include "librbd/journal/Types.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_replayer/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::CreateImageRequest: " \ + << this << " " << __func__ << ": " + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename I> +CreateImageRequest<I>::CreateImageRequest(Threads<I>* threads, + librados::IoCtx &local_io_ctx, + const std::string &global_image_id, + const std::string &remote_mirror_uuid, + const std::string &local_image_name, + const std::string &local_image_id, + I *remote_image_ctx, + Context *on_finish) + : m_threads(threads), m_local_io_ctx(local_io_ctx), + m_global_image_id(global_image_id), + m_remote_mirror_uuid(remote_mirror_uuid), + m_local_image_name(local_image_name), m_local_image_id(local_image_id), + m_remote_image_ctx(remote_image_ctx), m_on_finish(on_finish) { +} + +template <typename I> +void CreateImageRequest<I>::send() { + int r = validate_parent(); + if (r < 0) { + error(r); + return; + } + + if (m_remote_parent_spec.pool_id == -1) { + create_image(); + } else { + get_local_parent_mirror_uuid(); + } +} + +template <typename I> +void CreateImageRequest<I>::create_image() { + dout(10) << dendl; + + using klass = CreateImageRequest<I>; + Context *ctx = create_context_callback< + klass, &klass::handle_create_image>(this); + + RWLock::RLocker snap_locker(m_remote_image_ctx->snap_lock); + + auto& config{ + reinterpret_cast<CephContext*>(m_local_io_ctx.cct())->_conf}; + + librbd::ImageOptions image_options; + populate_image_options(&image_options); + + auto req = librbd::image::CreateRequest<I>::create( + config, m_local_io_ctx, m_local_image_name, m_local_image_id, + m_remote_image_ctx->size, image_options, m_global_image_id, + m_remote_mirror_uuid, false, m_remote_image_ctx->op_work_queue, ctx); + req->send(); +} + +template <typename I> +void CreateImageRequest<I>::handle_create_image(int r) { + dout(10) << "r=" << r << dendl; + if (r == -EBADF) { + dout(5) << "image id " << m_local_image_id << " already in-use" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "failed to create local image: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void CreateImageRequest<I>::get_local_parent_mirror_uuid() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_uuid_get_start(&op); + + librados::AioCompletion *aio_comp = create_rados_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_get_local_parent_mirror_uuid>(this); + m_out_bl.clear(); + int r = m_local_parent_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, + &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void CreateImageRequest<I>::handle_get_local_parent_mirror_uuid(int r) { + if (r >= 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_uuid_get_finish( + &it, &m_local_parent_mirror_uuid); + if (r >= 0 && m_local_parent_mirror_uuid.empty()) { + r = -ENOENT; + } + } + + dout(10) << "r=" << r << dendl; + if (r < 0) { + if (r == -ENOENT) { + dout(5) << "local parent mirror uuid missing" << dendl; + } else { + derr << "failed to retrieve local parent mirror uuid: " << cpp_strerror(r) + << dendl; + } + finish(r); + return; + } + + dout(15) << "local_parent_mirror_uuid=" << m_local_parent_mirror_uuid + << dendl; + get_remote_parent_client_state(); +} + +template <typename I> +void CreateImageRequest<I>::get_remote_parent_client_state() { + dout(10) << dendl; + + m_remote_journaler = new Journaler(m_threads->work_queue, m_threads->timer, + &m_threads->timer_lock, + m_remote_parent_io_ctx, + m_remote_parent_spec.image_id, + m_local_parent_mirror_uuid, {}); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_get_remote_parent_client_state>(this)); + m_remote_journaler->get_client(m_local_parent_mirror_uuid, &m_client, ctx); +} + +template <typename I> +void CreateImageRequest<I>::handle_get_remote_parent_client_state(int r) { + dout(10) << "r=" << r << dendl; + + delete m_remote_journaler; + m_remote_journaler = nullptr; + + librbd::journal::MirrorPeerClientMeta mirror_peer_client_meta; + if (r == -ENOENT) { + dout(15) << "client not registered to parent image" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "failed to retrieve parent client: " << cpp_strerror(r) << dendl; + finish(r); + return; + } else if (!util::decode_client_meta(m_client, &mirror_peer_client_meta)) { + // require operator intervention since the data is corrupt + derr << "failed to decode parent client: " << cpp_strerror(r) << dendl; + finish(-EBADMSG); + return; + } else if (mirror_peer_client_meta.state != + librbd::journal::MIRROR_PEER_STATE_REPLAYING) { + // avoid possible race w/ incomplete parent image since the parent snapshot + // might be deleted if the sync restarts + dout(15) << "parent image still syncing" << dendl; + finish(-ENOENT); + return; + } + + get_parent_global_image_id(); +} + + +template <typename I> +void CreateImageRequest<I>::get_parent_global_image_id() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_start(&op, + m_remote_parent_spec.image_id); + + librados::AioCompletion *aio_comp = create_rados_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_get_parent_global_image_id>(this); + m_out_bl.clear(); + int r = m_remote_parent_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, + &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void CreateImageRequest<I>::handle_get_parent_global_image_id(int r) { + dout(10) << "r=" << r << dendl; + if (r == 0) { + cls::rbd::MirrorImage mirror_image; + auto iter = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_finish(&iter, &mirror_image); + if (r == 0) { + m_parent_global_image_id = mirror_image.global_image_id; + dout(15) << "parent_global_image_id=" << m_parent_global_image_id + << dendl; + } + } + + if (r == -ENOENT) { + dout(10) << "parent image " << m_remote_parent_spec.image_id + << " not mirrored" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "failed to retrieve global image id for parent image " + << m_remote_parent_spec.image_id << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + get_local_parent_image_id(); +} + +template <typename I> +void CreateImageRequest<I>::get_local_parent_image_id() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_image_id_start( + &op, m_parent_global_image_id); + + librados::AioCompletion *aio_comp = create_rados_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_get_local_parent_image_id>(this); + m_out_bl.clear(); + int r = m_local_parent_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, + &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void CreateImageRequest<I>::handle_get_local_parent_image_id(int r) { + dout(10) << "r=" << r << dendl; + + if (r == 0) { + auto iter = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_image_id_finish( + &iter, &m_local_parent_spec.image_id); + } + + if (r == -ENOENT) { + dout(10) << "parent image " << m_parent_global_image_id << " not " + << "registered locally" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "failed to retrieve local image id for parent image " + << m_parent_global_image_id << ": " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + open_remote_parent_image(); +} + +template <typename I> +void CreateImageRequest<I>::open_remote_parent_image() { + dout(10) << dendl; + + Context *ctx = create_context_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_open_remote_parent_image>(this); + OpenImageRequest<I> *request = OpenImageRequest<I>::create( + m_remote_parent_io_ctx, &m_remote_parent_image_ctx, + m_remote_parent_spec.image_id, true, ctx); + request->send(); +} + +template <typename I> +void CreateImageRequest<I>::handle_open_remote_parent_image(int r) { + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "failed to open remote parent image " << m_parent_pool_name << "/" + << m_remote_parent_spec.image_id << dendl; + finish(r); + return; + } + + clone_image(); +} + +template <typename I> +void CreateImageRequest<I>::clone_image() { + dout(10) << dendl; + + std::string snap_name; + cls::rbd::SnapshotNamespace snap_namespace; + { + RWLock::RLocker remote_snap_locker(m_remote_parent_image_ctx->snap_lock); + auto it = m_remote_parent_image_ctx->snap_info.find( + m_remote_parent_spec.snap_id); + if (it != m_remote_parent_image_ctx->snap_info.end()) { + snap_name = it->second.name; + snap_namespace = it->second.snap_namespace; + } + } + + librbd::ImageOptions opts; + populate_image_options(&opts); + + auto& config{ + reinterpret_cast<CephContext*>(m_local_io_ctx.cct())->_conf}; + + using klass = CreateImageRequest<I>; + Context *ctx = create_context_callback< + klass, &klass::handle_clone_image>(this); + + librbd::image::CloneRequest<I> *req = librbd::image::CloneRequest<I>::create( + config, m_local_parent_io_ctx, m_local_parent_spec.image_id, snap_name, + CEPH_NOSNAP, m_local_io_ctx, m_local_image_name, m_local_image_id, opts, + m_global_image_id, m_remote_mirror_uuid, m_remote_image_ctx->op_work_queue, + ctx); + req->send(); +} + +template <typename I> +void CreateImageRequest<I>::handle_clone_image(int r) { + dout(10) << "r=" << r << dendl; + if (r == -EBADF) { + dout(5) << "image id " << m_local_image_id << " already in-use" << dendl; + finish(r); + return; + } else if (r < 0) { + derr << "failed to clone image " << m_parent_pool_name << "/" + << m_remote_parent_spec.image_id << " to " + << m_local_image_name << dendl; + m_ret_val = r; + } + + close_remote_parent_image(); +} + +template <typename I> +void CreateImageRequest<I>::close_remote_parent_image() { + dout(10) << dendl; + Context *ctx = create_context_callback< + CreateImageRequest<I>, + &CreateImageRequest<I>::handle_close_remote_parent_image>(this); + CloseImageRequest<I> *request = CloseImageRequest<I>::create( + &m_remote_parent_image_ctx, ctx); + request->send(); +} + +template <typename I> +void CreateImageRequest<I>::handle_close_remote_parent_image(int r) { + dout(10) << "r=" << r << dendl; + if (r < 0) { + derr << "error encountered closing remote parent image: " + << cpp_strerror(r) << dendl; + } + + finish(m_ret_val); +} + +template <typename I> +void CreateImageRequest<I>::error(int r) { + dout(10) << "r=" << r << dendl; + + m_threads->work_queue->queue(create_context_callback< + CreateImageRequest<I>, &CreateImageRequest<I>::finish>(this), r); +} + +template <typename I> +void CreateImageRequest<I>::finish(int r) { + dout(10) << "r=" << r << dendl; + m_on_finish->complete(r); + delete this; +} + +template <typename I> +int CreateImageRequest<I>::validate_parent() { + RWLock::RLocker owner_locker(m_remote_image_ctx->owner_lock); + RWLock::RLocker snap_locker(m_remote_image_ctx->snap_lock); + + m_remote_parent_spec = m_remote_image_ctx->parent_md.spec; + + // scan all remote snapshots for a linked parent + for (auto &snap_info_pair : m_remote_image_ctx->snap_info) { + auto &parent_spec = snap_info_pair.second.parent.spec; + if (parent_spec.pool_id == -1) { + continue; + } else if (m_remote_parent_spec.pool_id == -1) { + m_remote_parent_spec = parent_spec; + continue; + } + + if (m_remote_parent_spec != parent_spec) { + derr << "remote image parent spec mismatch" << dendl; + return -EINVAL; + } + } + + if (m_remote_parent_spec.pool_id == -1) { + return 0; + } + + // map remote parent pool to local parent pool + librados::Rados remote_rados(m_remote_image_ctx->md_ctx); + int r = remote_rados.ioctx_create2(m_remote_parent_spec.pool_id, + m_remote_parent_io_ctx); + if (r < 0) { + derr << "failed to open remote parent pool " << m_remote_parent_spec.pool_id + << ": " << cpp_strerror(r) << dendl; + return r; + } + + m_parent_pool_name = m_remote_parent_io_ctx.get_pool_name(); + + librados::Rados local_rados(m_local_io_ctx); + r = local_rados.ioctx_create(m_parent_pool_name.c_str(), + m_local_parent_io_ctx); + if (r < 0) { + derr << "failed to open local parent pool " << m_parent_pool_name << ": " + << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +template <typename I> +void CreateImageRequest<I>::populate_image_options( + librbd::ImageOptions* image_options) { + image_options->set(RBD_IMAGE_OPTION_FEATURES, + m_remote_image_ctx->features); + image_options->set(RBD_IMAGE_OPTION_ORDER, m_remote_image_ctx->order); + image_options->set(RBD_IMAGE_OPTION_STRIPE_UNIT, + m_remote_image_ctx->stripe_unit); + image_options->set(RBD_IMAGE_OPTION_STRIPE_COUNT, + m_remote_image_ctx->stripe_count); + + // Determine the data pool for the local image as follows: + // 1. If the local pool has a default data pool, use it. + // 2. If the remote image has a data pool different from its metadata pool and + // a pool with the same name exists locally, use it. + // 3. Don't set the data pool explicitly. + std::string data_pool; + librados::Rados local_rados(m_local_io_ctx); + auto default_data_pool = g_ceph_context->_conf.get_val<std::string>("rbd_default_data_pool"); + auto remote_md_pool = m_remote_image_ctx->md_ctx.get_pool_name(); + auto remote_data_pool = m_remote_image_ctx->data_ctx.get_pool_name(); + + if (default_data_pool != "") { + data_pool = default_data_pool; + } else if (remote_data_pool != remote_md_pool) { + if (local_rados.pool_lookup(remote_data_pool.c_str()) >= 0) { + data_pool = remote_data_pool; + } + } + + if (data_pool != "") { + image_options->set(RBD_IMAGE_OPTION_DATA_POOL, data_pool); + } + + if (m_remote_parent_spec.pool_id != -1) { + uint64_t clone_format = 1; + if (m_remote_image_ctx->test_op_features( + RBD_OPERATION_FEATURE_CLONE_CHILD)) { + clone_format = 2; + } + image_options->set(RBD_IMAGE_OPTION_CLONE_FORMAT, clone_format); + } +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::CreateImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h new file mode 100644 index 00000000..0b20da52 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/CreateImageRequest.h @@ -0,0 +1,154 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H + +#include "include/int_types.h" +#include "include/types.h" +#include "include/rados/librados.hpp" +#include "cls/journal/cls_journal_types.h" +#include "librbd/Types.h" +#include "librbd/journal/TypeTraits.h" +#include <string> + +class Context; +class ContextWQ; +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } +namespace librbd { class ImageOptions; } + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class CreateImageRequest { +public: + static CreateImageRequest *create(Threads<ImageCtxT> *threads, + librados::IoCtx &local_io_ctx, + const std::string &global_image_id, + const std::string &remote_mirror_uuid, + const std::string &local_image_name, + const std::string &local_image_id, + ImageCtxT *remote_image_ctx, + Context *on_finish) { + return new CreateImageRequest(threads, local_io_ctx, global_image_id, + remote_mirror_uuid, local_image_name, + local_image_id, remote_image_ctx, on_finish); + } + + CreateImageRequest(Threads<ImageCtxT> *threads, librados::IoCtx &local_io_ctx, + const std::string &global_image_id, + const std::string &remote_mirror_uuid, + const std::string &local_image_name, + const std::string &local_image_id, + ImageCtxT *remote_image_ctx, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * | * + * | (non-clone) * + * |\------------> CREATE_IMAGE ---------------------\ * (error) + * | | * + * | (clone) | * + * \-------------> GET_LOCAL_PARENT_MIRROR_UUID * * | * * * * + * | | * * + * v | * + * GET_REMOTE_PARENT_CLIENT_STATE * | * * * * + * | | * * + * v | * + * GET_PARENT_GLOBAL_IMAGE_ID * * * | * * * * + * | | * * + * v | * + * GET_LOCAL_PARENT_IMAGE_ID * * * * | * * * * + * | | * * + * v | * + * OPEN_REMOTE_PARENT * * * * * * * | * * * * + * | | * * + * v | * + * CLONE_IMAGE | * + * | | * + * v | * + * CLOSE_REMOTE_PARENT | * + * | v * + * \------------------------> <finish> < * * + * @endverbatim + */ + + typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits; + typedef typename TypeTraits::Journaler Journaler; + + Threads<ImageCtxT> *m_threads; + librados::IoCtx &m_local_io_ctx; + std::string m_global_image_id; + std::string m_remote_mirror_uuid; + std::string m_local_image_name; + std::string m_local_image_id; + ImageCtxT *m_remote_image_ctx; + Context *m_on_finish; + + librados::IoCtx m_remote_parent_io_ctx; + std::string m_local_parent_mirror_uuid; + Journaler *m_remote_journaler = nullptr; + ImageCtxT *m_remote_parent_image_ctx = nullptr; + cls::rbd::ParentImageSpec m_remote_parent_spec; + + librados::IoCtx m_local_parent_io_ctx; + cls::rbd::ParentImageSpec m_local_parent_spec; + + bufferlist m_out_bl; + std::string m_parent_global_image_id; + std::string m_parent_pool_name; + cls::journal::Client m_client; + int m_ret_val = 0; + + void create_image(); + void handle_create_image(int r); + + void get_local_parent_mirror_uuid(); + void handle_get_local_parent_mirror_uuid(int r); + + void get_remote_parent_client_state(); + void handle_get_remote_parent_client_state(int r); + + void get_parent_global_image_id(); + void handle_get_parent_global_image_id(int r); + + void get_local_parent_image_id(); + void handle_get_local_parent_image_id(int r); + + void open_remote_parent_image(); + void handle_open_remote_parent_image(int r); + + void clone_image(); + void handle_clone_image(int r); + + void close_remote_parent_image(); + void handle_close_remote_parent_image(int r); + + void error(int r); + void finish(int r); + + int validate_parent(); + + void populate_image_options(librbd::ImageOptions* image_options); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::CreateImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_CREATE_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/EventPreprocessor.cc b/src/tools/rbd_mirror/image_replayer/EventPreprocessor.cc new file mode 100644 index 00000000..6314eb7d --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/EventPreprocessor.cc @@ -0,0 +1,204 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "EventPreprocessor.h" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Utils.h" +#include "librbd/journal/Types.h" +#include <boost/variant.hpp> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror + +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::EventPreprocessor: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; + +template <typename I> +EventPreprocessor<I>::EventPreprocessor(I &local_image_ctx, + Journaler &remote_journaler, + const std::string &local_mirror_uuid, + MirrorPeerClientMeta *client_meta, + ContextWQ *work_queue) + : m_local_image_ctx(local_image_ctx), m_remote_journaler(remote_journaler), + m_local_mirror_uuid(local_mirror_uuid), m_client_meta(client_meta), + m_work_queue(work_queue) { +} + +template <typename I> +EventPreprocessor<I>::~EventPreprocessor() { + ceph_assert(!m_in_progress); +} + +template <typename I> +bool EventPreprocessor<I>::is_required(const EventEntry &event_entry) { + SnapSeqs snap_seqs(m_client_meta->snap_seqs); + return (prune_snap_map(&snap_seqs) || + event_entry.get_event_type() == + librbd::journal::EVENT_TYPE_SNAP_RENAME); +} + +template <typename I> +void EventPreprocessor<I>::preprocess(EventEntry *event_entry, + Context *on_finish) { + ceph_assert(!m_in_progress); + m_in_progress = true; + m_event_entry = event_entry; + m_on_finish = on_finish; + + refresh_image(); +} + +template <typename I> +void EventPreprocessor<I>::refresh_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + EventPreprocessor<I>, &EventPreprocessor<I>::handle_refresh_image>(this); + m_local_image_ctx.state->refresh(ctx); +} + +template <typename I> +void EventPreprocessor<I>::handle_refresh_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << "error encountered during image refresh: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + preprocess_event(); +} + +template <typename I> +void EventPreprocessor<I>::preprocess_event() { + dout(20) << dendl; + + m_snap_seqs = m_client_meta->snap_seqs; + m_snap_seqs_updated = prune_snap_map(&m_snap_seqs); + + int r = boost::apply_visitor(PreprocessEventVisitor(this), + m_event_entry->event); + if (r < 0) { + finish(r); + return; + } + + update_client(); +} + +template <typename I> +int EventPreprocessor<I>::preprocess_snap_rename( + librbd::journal::SnapRenameEvent &event) { + dout(20) << ": " + << "remote_snap_id=" << event.snap_id << ", " + << "src_snap_name=" << event.src_snap_name << ", " + << "dest_snap_name=" << event.dst_snap_name << dendl; + + auto snap_seq_it = m_snap_seqs.find(event.snap_id); + if (snap_seq_it != m_snap_seqs.end()) { + dout(20) << ": remapping remote snap id " << snap_seq_it->first << " " + << "to local snap id " << snap_seq_it->second << dendl; + event.snap_id = snap_seq_it->second; + return 0; + } + + auto snap_id_it = m_local_image_ctx.snap_ids.find({cls::rbd::UserSnapshotNamespace(), + event.src_snap_name}); + if (snap_id_it == m_local_image_ctx.snap_ids.end()) { + dout(20) << ": cannot map remote snapshot '" << event.src_snap_name << "' " + << "to local snapshot" << dendl; + event.snap_id = CEPH_NOSNAP; + return -ENOENT; + } + + dout(20) << ": mapping remote snap id " << event.snap_id << " " + << "to local snap id " << snap_id_it->second << dendl; + m_snap_seqs_updated = true; + m_snap_seqs[event.snap_id] = snap_id_it->second; + event.snap_id = snap_id_it->second; + return 0; +} + +template <typename I> +void EventPreprocessor<I>::update_client() { + if (!m_snap_seqs_updated) { + finish(0); + return; + } + + dout(20) << dendl; + librbd::journal::MirrorPeerClientMeta client_meta(*m_client_meta); + client_meta.snap_seqs = m_snap_seqs; + + librbd::journal::ClientData client_data(client_meta); + bufferlist data_bl; + encode(client_data, data_bl); + + Context *ctx = create_context_callback< + EventPreprocessor<I>, &EventPreprocessor<I>::handle_update_client>( + this); + m_remote_journaler.update_client(data_bl, ctx); +} + +template <typename I> +void EventPreprocessor<I>::handle_update_client(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << "failed to update mirror peer journal client: " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + m_client_meta->snap_seqs = m_snap_seqs; + finish(0); +} + +template <typename I> +bool EventPreprocessor<I>::prune_snap_map(SnapSeqs *snap_seqs) { + bool pruned = false; + + RWLock::RLocker snap_locker(m_local_image_ctx.snap_lock); + for (auto it = snap_seqs->begin(); it != snap_seqs->end(); ) { + auto current_it(it++); + if (m_local_image_ctx.snap_info.count(current_it->second) == 0) { + snap_seqs->erase(current_it); + pruned = true; + } + } + return pruned; +} + +template <typename I> +void EventPreprocessor<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + Context *on_finish = m_on_finish; + m_on_finish = nullptr; + m_event_entry = nullptr; + m_in_progress = false; + m_snap_seqs_updated = false; + m_work_queue->queue(on_finish, r); +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::EventPreprocessor<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/EventPreprocessor.h b/src/tools/rbd_mirror/image_replayer/EventPreprocessor.h new file mode 100644 index 00000000..67aeea0b --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/EventPreprocessor.h @@ -0,0 +1,122 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H +#define RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H + +#include "include/int_types.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include <map> +#include <string> +#include <boost/variant/static_visitor.hpp> + +struct Context; +struct ContextWQ; +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class EventPreprocessor { +public: + using Journaler = typename librbd::journal::TypeTraits<ImageCtxT>::Journaler; + using EventEntry = librbd::journal::EventEntry; + using MirrorPeerClientMeta = librbd::journal::MirrorPeerClientMeta; + + static EventPreprocessor *create(ImageCtxT &local_image_ctx, + Journaler &remote_journaler, + const std::string &local_mirror_uuid, + MirrorPeerClientMeta *client_meta, + ContextWQ *work_queue) { + return new EventPreprocessor(local_image_ctx, remote_journaler, + local_mirror_uuid, client_meta, work_queue); + } + + static void destroy(EventPreprocessor* processor) { + delete processor; + } + + EventPreprocessor(ImageCtxT &local_image_ctx, Journaler &remote_journaler, + const std::string &local_mirror_uuid, + MirrorPeerClientMeta *client_meta, ContextWQ *work_queue); + ~EventPreprocessor(); + + bool is_required(const EventEntry &event_entry); + void preprocess(EventEntry *event_entry, Context *on_finish); + +private: + /** + * @verbatim + * + * <start> + * | + * v (skip if not required) + * REFRESH_IMAGE + * | + * v (skip if not required) + * PREPROCESS_EVENT + * | + * v (skip if not required) + * UPDATE_CLIENT + * + * @endverbatim + */ + + typedef std::map<uint64_t, uint64_t> SnapSeqs; + + class PreprocessEventVisitor : public boost::static_visitor<int> { + public: + EventPreprocessor *event_preprocessor; + + PreprocessEventVisitor(EventPreprocessor *event_preprocessor) + : event_preprocessor(event_preprocessor) { + } + + template <typename T> + inline int operator()(T&) const { + return 0; + } + inline int operator()(librbd::journal::SnapRenameEvent &event) const { + return event_preprocessor->preprocess_snap_rename(event); + } + }; + + ImageCtxT &m_local_image_ctx; + Journaler &m_remote_journaler; + std::string m_local_mirror_uuid; + MirrorPeerClientMeta *m_client_meta; + ContextWQ *m_work_queue; + + bool m_in_progress = false; + EventEntry *m_event_entry = nullptr; + Context *m_on_finish = nullptr; + + SnapSeqs m_snap_seqs; + bool m_snap_seqs_updated = false; + + bool prune_snap_map(SnapSeqs *snap_seqs); + + void refresh_image(); + void handle_refresh_image(int r); + + void preprocess_event(); + int preprocess_snap_rename(librbd::journal::SnapRenameEvent &event); + + void update_client(); + void handle_update_client(int r); + + void finish(int r); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::EventPreprocessor<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H diff --git a/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc new file mode 100644 index 00000000..74e97537 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.cc @@ -0,0 +1,85 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \ + << "GetMirrorImageIdRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_rados_callback; + +template <typename I> +void GetMirrorImageIdRequest<I>::send() { + dout(20) << dendl; + get_image_id(); +} + +template <typename I> +void GetMirrorImageIdRequest<I>::get_image_id() { + dout(20) << dendl; + + // attempt to cross-reference a image id by the global image id + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_image_id_start(&op, m_global_image_id); + + librados::AioCompletion *aio_comp = create_rados_callback< + GetMirrorImageIdRequest<I>, + &GetMirrorImageIdRequest<I>::handle_get_image_id>( + this); + int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void GetMirrorImageIdRequest<I>::handle_get_image_id(int r) { + if (r == 0) { + auto iter = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_image_id_finish( + &iter, m_image_id); + } + + dout(20) << "r=" << r << ", " + << "image_id=" << *m_image_id << dendl; + + if (r < 0) { + if (r == -ENOENT) { + dout(10) << "global image " << m_global_image_id << " not registered" + << dendl; + } else { + derr << "failed to retrieve image id: " << cpp_strerror(r) << dendl; + } + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void GetMirrorImageIdRequest<I>::finish(int r) { + dout(20) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::GetMirrorImageIdRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h new file mode 100644 index 00000000..b2664513 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h @@ -0,0 +1,75 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H + +#include "include/buffer.h" +#include "include/rados/librados_fwd.hpp" +#include <string> + +namespace librbd { struct ImageCtx; } + +struct Context; + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class GetMirrorImageIdRequest { +public: + static GetMirrorImageIdRequest *create(librados::IoCtx &io_ctx, + const std::string &global_image_id, + std::string *image_id, + Context *on_finish) { + return new GetMirrorImageIdRequest(io_ctx, global_image_id, image_id, + on_finish); + } + + GetMirrorImageIdRequest(librados::IoCtx &io_ctx, + const std::string &global_image_id, + std::string *image_id, + Context *on_finish) + : m_io_ctx(io_ctx), m_global_image_id(global_image_id), + m_image_id(image_id), m_on_finish(on_finish) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * GET_IMAGE_ID + * | + * v + * <finish> + + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_global_image_id; + std::string *m_image_id; + Context *m_on_finish; + + bufferlist m_out_bl; + + void get_image_id(); + void handle_get_image_id(int r); + + void finish(int r); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::GetMirrorImageIdRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_GET_MIRROR_IMAGE_ID_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.cc b/src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.cc new file mode 100644 index 00000000..54636fdb --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.cc @@ -0,0 +1,125 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "IsPrimaryRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include <type_traits> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::IsPrimaryRequest: " \ + << this << " " << __func__ << " " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +IsPrimaryRequest<I>::IsPrimaryRequest(I *image_ctx, bool *primary, + Context *on_finish) + : m_image_ctx(image_ctx), m_primary(primary), m_on_finish(on_finish) { +} + +template <typename I> +void IsPrimaryRequest<I>::send() { + send_get_mirror_state(); +} + +template <typename I> +void IsPrimaryRequest<I>::send_get_mirror_state() { + dout(20) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_start(&op, m_image_ctx->id); + + librados::AioCompletion *aio_comp = create_rados_callback< + IsPrimaryRequest<I>, &IsPrimaryRequest<I>::handle_get_mirror_state>(this); + int r = m_image_ctx->md_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, + &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void IsPrimaryRequest<I>::handle_get_mirror_state(int r) { + dout(20) << ": r=" << r << dendl; + + cls::rbd::MirrorImage mirror_image; + if (r == 0) { + auto iter = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_finish(&iter, &mirror_image); + if (r == 0) { + if (mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED) { + send_is_tag_owner(); + return; + } else if (mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_DISABLING) { + dout(5) << ": image mirroring is being disabled" << dendl; + r = -ENOENT; + } else { + derr << ": image mirroring is disabled" << dendl; + r = -EINVAL; + } + } else { + derr << ": failed to decode image mirror state: " << cpp_strerror(r) + << dendl; + } + } else if (r == -ENOENT) { + dout(5) << ": image is not mirrored" << dendl; + } else { + derr << ": failed to retrieve image mirror state: " << cpp_strerror(r) + << dendl; + } + + finish(r); +} + +template <typename I> +void IsPrimaryRequest<I>::send_is_tag_owner() { + // deduce the class type for the journal to support unit tests + using Journal = typename std::decay< + typename std::remove_pointer<decltype(std::declval<I>().journal)> + ::type>::type; + + dout(20) << dendl; + + Context *ctx = create_context_callback< + IsPrimaryRequest<I>, &IsPrimaryRequest<I>::handle_is_tag_owner>(this); + + Journal::is_tag_owner(m_image_ctx, m_primary, ctx); +} + +template <typename I> +void IsPrimaryRequest<I>::handle_is_tag_owner(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to query remote image tag owner: " << cpp_strerror(r) + << dendl; + } + + finish(r); +} + +template <typename I> +void IsPrimaryRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::IsPrimaryRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.h b/src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.h new file mode 100644 index 00000000..ddb332cb --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/IsPrimaryRequest.h @@ -0,0 +1,67 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_IS_PRIMARY_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_IS_PRIMARY_REQUEST_H + +#include "include/buffer.h" + +class Context; +class ContextWQ; +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class IsPrimaryRequest { +public: + static IsPrimaryRequest* create(ImageCtxT *image_ctx, bool *primary, + Context *on_finish) { + return new IsPrimaryRequest(image_ctx, primary, on_finish); + } + + IsPrimaryRequest(ImageCtxT *image_ctx, bool *primary, Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * GET_MIRROR_STATE * * * * * + * | * + * v * + * IS_TAG_OWNER * * * * * * * (error) + * | * + * v * + * <finish> < * * * * * * * * + * + * @endverbatim + */ + ImageCtxT *m_image_ctx; + bool *m_primary; + Context *m_on_finish; + + bufferlist m_out_bl; + + void send_get_mirror_state(); + void handle_get_mirror_state(int r); + + void send_is_tag_owner(); + void handle_is_tag_owner(int r); + + void finish(int r); +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::IsPrimaryRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_IS_PRIMARY_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc new file mode 100644 index 00000000..7f55745e --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.cc @@ -0,0 +1,75 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "OpenImageRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Utils.h" +#include <type_traits> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::OpenImageRequest: " \ + << this << " " << __func__ << " " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; + +template <typename I> +OpenImageRequest<I>::OpenImageRequest(librados::IoCtx &io_ctx, I **image_ctx, + const std::string &image_id, + bool read_only, Context *on_finish) + : m_io_ctx(io_ctx), m_image_ctx(image_ctx), m_image_id(image_id), + m_read_only(read_only), m_on_finish(on_finish) { +} + +template <typename I> +void OpenImageRequest<I>::send() { + send_open_image(); +} + +template <typename I> +void OpenImageRequest<I>::send_open_image() { + dout(20) << dendl; + + *m_image_ctx = I::create("", m_image_id, nullptr, m_io_ctx, m_read_only); + + Context *ctx = create_context_callback< + OpenImageRequest<I>, &OpenImageRequest<I>::handle_open_image>( + this); + (*m_image_ctx)->state->open(0, ctx); +} + +template <typename I> +void OpenImageRequest<I>::handle_open_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to open image '" << m_image_id << "': " + << cpp_strerror(r) << dendl; + (*m_image_ctx)->destroy(); + *m_image_ctx = nullptr; + } + + finish(r); +} + +template <typename I> +void OpenImageRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::OpenImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h new file mode 100644 index 00000000..01ab3117 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/OpenImageRequest.h @@ -0,0 +1,71 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H + +#include "include/int_types.h" +#include "librbd/ImageCtx.h" +#include <string> + +class Context; +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class OpenImageRequest { +public: + static OpenImageRequest* create(librados::IoCtx &io_ctx, + ImageCtxT **image_ctx, + const std::string &image_id, + bool read_only, Context *on_finish) { + return new OpenImageRequest(io_ctx, image_ctx, image_id, read_only, + on_finish); + } + + OpenImageRequest(librados::IoCtx &io_ctx, ImageCtxT **image_ctx, + const std::string &image_id, bool read_only, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * OPEN_IMAGE + * | + * v + * <finish> + * + * @endverbatim + */ + librados::IoCtx &m_io_ctx; + ImageCtxT **m_image_ctx; + std::string m_image_id; + bool m_read_only; + Context *m_on_finish; + + void send_open_image(); + void handle_open_image(int r); + + void send_close_image(int r); + void handle_close_image(int r); + + void finish(int r); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::OpenImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_OPEN_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc new file mode 100644 index 00000000..87b141ca --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc @@ -0,0 +1,271 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/compat.h" +#include "CloseImageRequest.h" +#include "IsPrimaryRequest.h" +#include "OpenLocalImageRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "librbd/ExclusiveLock.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "librbd/exclusive_lock/Policy.h" +#include "librbd/journal/Policy.h" +#include <type_traits> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::OpenLocalImageRequest: " \ + << this << " " << __func__ << " " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; + +namespace { + +template <typename I> +struct MirrorExclusiveLockPolicy : public librbd::exclusive_lock::Policy { + I *image_ctx; + + MirrorExclusiveLockPolicy(I *image_ctx) : image_ctx(image_ctx) { + } + + bool may_auto_request_lock() override { + return false; + } + + int lock_requested(bool force) override { + int r = -EROFS; + { + RWLock::RLocker owner_locker(image_ctx->owner_lock); + RWLock::RLocker snap_locker(image_ctx->snap_lock); + if (image_ctx->journal == nullptr || image_ctx->journal->is_tag_owner()) { + r = 0; + } + } + + if (r == 0) { + // if the local image journal has been closed or if it was (force) + // promoted allow the lock to be released to another client + image_ctx->exclusive_lock->release_lock(nullptr); + } + return r; + } + + bool accept_blocked_request( + librbd::exclusive_lock::OperationRequestType request_type) override { + if (request_type == + librbd::exclusive_lock::OPERATION_REQUEST_TYPE_TRASH_SNAP_REMOVE) { + return true; + } + return false; + } +}; + +struct MirrorJournalPolicy : public librbd::journal::Policy { + ContextWQ *work_queue; + + MirrorJournalPolicy(ContextWQ *work_queue) : work_queue(work_queue) { + } + + bool append_disabled() const override { + // avoid recording any events to the local journal + return true; + } + bool journal_disabled() const override { + return false; + } + + void allocate_tag_on_lock(Context *on_finish) override { + // rbd-mirror will manually create tags by copying them from the peer + work_queue->queue(on_finish, 0); + } +}; + +} // anonymous namespace + +template <typename I> +OpenLocalImageRequest<I>::OpenLocalImageRequest(librados::IoCtx &local_io_ctx, + I **local_image_ctx, + const std::string &local_image_id, + ContextWQ *work_queue, + Context *on_finish) + : m_local_io_ctx(local_io_ctx), m_local_image_ctx(local_image_ctx), + m_local_image_id(local_image_id), m_work_queue(work_queue), + m_on_finish(on_finish) { +} + +template <typename I> +void OpenLocalImageRequest<I>::send() { + send_open_image(); +} + +template <typename I> +void OpenLocalImageRequest<I>::send_open_image() { + dout(20) << dendl; + + *m_local_image_ctx = I::create("", m_local_image_id, nullptr, + m_local_io_ctx, false); + { + RWLock::WLocker owner_locker((*m_local_image_ctx)->owner_lock); + RWLock::WLocker snap_locker((*m_local_image_ctx)->snap_lock); + (*m_local_image_ctx)->set_exclusive_lock_policy( + new MirrorExclusiveLockPolicy<I>(*m_local_image_ctx)); + (*m_local_image_ctx)->set_journal_policy( + new MirrorJournalPolicy(m_work_queue)); + } + + Context *ctx = create_context_callback< + OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_open_image>( + this); + (*m_local_image_ctx)->state->open(0, ctx); +} + +template <typename I> +void OpenLocalImageRequest<I>::handle_open_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + if (r == -ENOENT) { + dout(10) << ": local image does not exist" << dendl; + } else { + derr << ": failed to open image '" << m_local_image_id << "': " + << cpp_strerror(r) << dendl; + } + (*m_local_image_ctx)->destroy(); + *m_local_image_ctx = nullptr; + finish(r); + return; + } + + send_is_primary(); +} + +template <typename I> +void OpenLocalImageRequest<I>::send_is_primary() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_is_primary>( + this); + IsPrimaryRequest<I> *request = IsPrimaryRequest<I>::create(*m_local_image_ctx, + &m_primary, ctx); + request->send(); +} + +template <typename I> +void OpenLocalImageRequest<I>::handle_is_primary(int r) { + dout(20) << ": r=" << r << dendl; + + if (r == -ENOENT) { + dout(5) << ": local image is not mirrored" << dendl; + send_close_image(r); + return; + } else if (r < 0) { + derr << ": error querying local image primary status: " << cpp_strerror(r) + << dendl; + send_close_image(r); + return; + } + + // if the local image owns the tag -- don't steal the lock since + // we aren't going to mirror peer data into this image anyway + if (m_primary) { + dout(10) << ": local image is primary -- skipping image replay" << dendl; + send_close_image(-EREMOTEIO); + return; + } + + send_lock_image(); +} + +template <typename I> +void OpenLocalImageRequest<I>::send_lock_image() { + dout(20) << dendl; + + RWLock::RLocker owner_locker((*m_local_image_ctx)->owner_lock); + if ((*m_local_image_ctx)->exclusive_lock == nullptr) { + derr << ": image does not support exclusive lock" << dendl; + send_close_image(-EINVAL); + return; + } + + // disallow any proxied maintenance operations before grabbing lock + (*m_local_image_ctx)->exclusive_lock->block_requests(-EROFS); + + Context *ctx = create_context_callback< + OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_lock_image>( + this); + + (*m_local_image_ctx)->exclusive_lock->acquire_lock(ctx); +} + +template <typename I> +void OpenLocalImageRequest<I>::handle_lock_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to lock image '" << m_local_image_id << "': " + << cpp_strerror(r) << dendl; + send_close_image(r); + return; + } + + { + RWLock::RLocker owner_locker((*m_local_image_ctx)->owner_lock); + if ((*m_local_image_ctx)->exclusive_lock == nullptr || + !(*m_local_image_ctx)->exclusive_lock->is_lock_owner()) { + derr << ": image is not locked" << dendl; + send_close_image(-EBUSY); + return; + } + } + + finish(0); +} + +template <typename I> +void OpenLocalImageRequest<I>::send_close_image(int r) { + dout(20) << dendl; + + if (m_ret_val == 0 && r < 0) { + m_ret_val = r; + } + + Context *ctx = create_context_callback< + OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_close_image>( + this); + CloseImageRequest<I> *request = CloseImageRequest<I>::create( + m_local_image_ctx, ctx); + request->send(); +} + +template <typename I> +void OpenLocalImageRequest<I>::handle_close_image(int r) { + dout(20) << dendl; + + ceph_assert(r == 0); + finish(m_ret_val); +} + +template <typename I> +void OpenLocalImageRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::OpenLocalImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h new file mode 100644 index 00000000..58de545f --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h @@ -0,0 +1,90 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H + +#include "include/int_types.h" +#include "librbd/ImageCtx.h" +#include <string> + +class Context; +class ContextWQ; +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class OpenLocalImageRequest { +public: + static OpenLocalImageRequest* create(librados::IoCtx &local_io_ctx, + ImageCtxT **local_image_ctx, + const std::string &local_image_id, + ContextWQ *work_queue, + Context *on_finish) { + return new OpenLocalImageRequest(local_io_ctx, local_image_ctx, + local_image_id, work_queue, on_finish); + } + + OpenLocalImageRequest(librados::IoCtx &local_io_ctx, + ImageCtxT **local_image_ctx, + const std::string &local_image_id, + ContextWQ *m_work_queue, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * OPEN_IMAGE * * * * * * * * + * | * + * v * + * IS_PRIMARY * * * * * * * * + * | * + * v (skip if primary) v + * LOCK_IMAGE * * * > CLOSE_IMAGE + * | | + * v | + * <finish> <---------------/ + * + * @endverbatim + */ + librados::IoCtx &m_local_io_ctx; + ImageCtxT **m_local_image_ctx; + std::string m_local_image_id; + ContextWQ *m_work_queue; + Context *m_on_finish; + + bool m_primary = false; + int m_ret_val = 0; + + void send_open_image(); + void handle_open_image(int r); + + void send_is_primary(); + void handle_is_primary(int r); + + void send_lock_image(); + void handle_lock_image(int r); + + void send_close_image(int r); + void handle_close_image(int r); + + void finish(int r); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::OpenLocalImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_OPEN_LOCAL_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc new file mode 100644 index 00000000..8e0ea837 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc @@ -0,0 +1,180 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h" +#include <type_traits> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \ + << "PrepareLocalImageRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +void PrepareLocalImageRequest<I>::send() { + dout(20) << dendl; + get_local_image_id(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::get_local_image_id() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + PrepareLocalImageRequest<I>, + &PrepareLocalImageRequest<I>::handle_get_local_image_id>(this); + auto req = GetMirrorImageIdRequest<I>::create(m_io_ctx, m_global_image_id, + m_local_image_id, ctx); + req->send(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::handle_get_local_image_id(int r) { + dout(20) << "r=" << r << ", " + << "local_image_id=" << *m_local_image_id << dendl; + + if (r < 0) { + finish(r); + return; + } + + get_local_image_name(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::get_local_image_name() { + dout(20) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::dir_get_name_start(&op, *m_local_image_id); + + m_out_bl.clear(); + librados::AioCompletion *aio_comp = create_rados_callback< + PrepareLocalImageRequest<I>, + &PrepareLocalImageRequest<I>::handle_get_local_image_name>(this); + int r = m_io_ctx.aio_operate(RBD_DIRECTORY, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::handle_get_local_image_name(int r) { + dout(20) << "r=" << r << dendl; + + if (r == 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::dir_get_name_finish(&it, m_local_image_name); + } + + if (r < 0) { + if (r != -ENOENT) { + derr << "failed to retrieve image name: " << cpp_strerror(r) << dendl; + } + finish(r); + return; + } + + get_mirror_state(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::get_mirror_state() { + dout(20) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_get_start(&op, *m_local_image_id); + + m_out_bl.clear(); + librados::AioCompletion *aio_comp = create_rados_callback< + PrepareLocalImageRequest<I>, + &PrepareLocalImageRequest<I>::handle_get_mirror_state>(this); + int r = m_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::handle_get_mirror_state(int r) { + dout(20) << ": r=" << r << dendl; + + cls::rbd::MirrorImage mirror_image; + if (r == 0) { + auto iter = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_get_finish(&iter, &mirror_image); + } + + if (r < 0) { + derr << "failed to retrieve image mirror state: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + // TODO save current mirror state to determine if we should + // delete a partially formed image + // (e.g. MIRROR_IMAGE_STATE_CREATING/DELETING) + + get_tag_owner(); +} + +template <typename I> +void PrepareLocalImageRequest<I>::get_tag_owner() { + // deduce the class type for the journal to support unit tests + using Journal = typename std::decay< + typename std::remove_pointer<decltype(std::declval<I>().journal)> + ::type>::type; + + dout(20) << dendl; + + Context *ctx = create_context_callback< + PrepareLocalImageRequest<I>, + &PrepareLocalImageRequest<I>::handle_get_tag_owner>(this); + Journal::get_tag_owner(m_io_ctx, *m_local_image_id, m_tag_owner, + m_work_queue, ctx); +} + +template <typename I> +void PrepareLocalImageRequest<I>::handle_get_tag_owner(int r) { + dout(20) << "r=" << r << ", " + << "tag_owner=" << *m_tag_owner << dendl; + + if (r < 0) { + derr << "failed to retrieve journal tag owner: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void PrepareLocalImageRequest<I>::finish(int r) { + dout(20) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::PrepareLocalImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h new file mode 100644 index 00000000..3417dd96 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.h @@ -0,0 +1,102 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H + +#include "include/buffer.h" +#include "include/rados/librados_fwd.hpp" +#include <string> + +namespace librbd { struct ImageCtx; } + +struct Context; +struct ContextWQ; + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class PrepareLocalImageRequest { +public: + static PrepareLocalImageRequest *create(librados::IoCtx &io_ctx, + const std::string &global_image_id, + std::string *local_image_id, + std::string *local_image_name, + std::string *tag_owner, + ContextWQ *work_queue, + Context *on_finish) { + return new PrepareLocalImageRequest(io_ctx, global_image_id, local_image_id, + local_image_name, tag_owner, work_queue, + on_finish); + } + + PrepareLocalImageRequest(librados::IoCtx &io_ctx, + const std::string &global_image_id, + std::string *local_image_id, + std::string *local_image_name, + std::string *tag_owner, + ContextWQ *work_queue, + Context *on_finish) + : m_io_ctx(io_ctx), m_global_image_id(global_image_id), + m_local_image_id(local_image_id), m_local_image_name(local_image_name), + m_tag_owner(tag_owner), m_work_queue(work_queue), m_on_finish(on_finish) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * GET_LOCAL_IMAGE_ID + * | + * v + * GET_LOCAL_IMAGE_NAME + * | + * v + * GET_MIRROR_STATE + * | + * v + * <finish> + + * @endverbatim + */ + + librados::IoCtx &m_io_ctx; + std::string m_global_image_id; + std::string *m_local_image_id; + std::string *m_local_image_name; + std::string *m_tag_owner; + ContextWQ *m_work_queue; + Context *m_on_finish; + + bufferlist m_out_bl; + + void get_local_image_id(); + void handle_get_local_image_id(int r); + + void get_local_image_name(); + void handle_get_local_image_name(int r); + + void get_mirror_state(); + void handle_get_mirror_state(int r); + + void get_tag_owner(); + void handle_get_tag_owner(int r); + + void finish(int r); + +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::PrepareLocalImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_PREPARE_LOCAL_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc new file mode 100644 index 00000000..00c141e0 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.cc @@ -0,0 +1,195 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h" +#include "include/rados/librados.hpp" +#include "cls/rbd/cls_rbd_client.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/WorkQueue.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/journal/Types.h" +#include "tools/rbd_mirror/Threads.h" +#include "tools/rbd_mirror/image_replayer/GetMirrorImageIdRequest.h" +#include "tools/rbd_mirror/image_replayer/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::" \ + << "PrepareRemoteImageRequest: " << this << " " \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::create_async_context_callback; +using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; + +template <typename I> +void PrepareRemoteImageRequest<I>::send() { + get_remote_mirror_uuid(); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::get_remote_mirror_uuid() { + dout(20) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_uuid_get_start(&op); + + librados::AioCompletion *aio_comp = create_rados_callback< + PrepareRemoteImageRequest<I>, + &PrepareRemoteImageRequest<I>::handle_get_remote_mirror_uuid>(this); + int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::handle_get_remote_mirror_uuid(int r) { + if (r >= 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_uuid_get_finish(&it, m_remote_mirror_uuid); + if (r >= 0 && m_remote_mirror_uuid->empty()) { + r = -ENOENT; + } + } + + dout(20) << "r=" << r << dendl; + if (r < 0) { + if (r == -ENOENT) { + dout(5) << "remote mirror uuid missing" << dendl; + } else { + derr << "failed to retrieve remote mirror uuid: " << cpp_strerror(r) + << dendl; + } + finish(r); + return; + } + + get_remote_image_id(); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::get_remote_image_id() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + PrepareRemoteImageRequest<I>, + &PrepareRemoteImageRequest<I>::handle_get_remote_image_id>(this); + auto req = GetMirrorImageIdRequest<I>::create(m_remote_io_ctx, + m_global_image_id, + m_remote_image_id, ctx); + req->send(); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::handle_get_remote_image_id(int r) { + dout(20) << "r=" << r << ", " + << "remote_image_id=" << *m_remote_image_id << dendl; + + if (r < 0) { + finish(r); + return; + } + + get_client(); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::get_client() { + dout(20) << dendl; + + ceph_assert(*m_remote_journaler == nullptr); + *m_remote_journaler = new Journaler(m_threads->work_queue, m_threads->timer, + &m_threads->timer_lock, m_remote_io_ctx, + *m_remote_image_id, m_local_mirror_uuid, + m_journal_settings); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + PrepareRemoteImageRequest<I>, + &PrepareRemoteImageRequest<I>::handle_get_client>(this)); + (*m_remote_journaler)->get_client(m_local_mirror_uuid, &m_client, ctx); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::handle_get_client(int r) { + dout(20) << "r=" << r << dendl; + + if (r == -ENOENT) { + dout(10) << "client not registered" << dendl; + register_client(); + } else if (r < 0) { + derr << "failed to retrieve client: " << cpp_strerror(r) << dendl; + finish(r); + } else if (!util::decode_client_meta(m_client, m_client_meta)) { + // require operator intervention since the data is corrupt + finish(-EBADMSG); + } else { + // skip registration if it already exists + *m_client_state = m_client.state; + finish(0); + } +} + +template <typename I> +void PrepareRemoteImageRequest<I>::register_client() { + dout(20) << dendl; + + librbd::journal::MirrorPeerClientMeta mirror_peer_client_meta{ + m_local_image_id}; + mirror_peer_client_meta.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + + librbd::journal::ClientData client_data{mirror_peer_client_meta}; + bufferlist client_data_bl; + encode(client_data, client_data_bl); + + Context *ctx = create_async_context_callback( + m_threads->work_queue, create_context_callback< + PrepareRemoteImageRequest<I>, + &PrepareRemoteImageRequest<I>::handle_register_client>(this)); + (*m_remote_journaler)->register_client(client_data_bl, ctx); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::handle_register_client(int r) { + dout(20) << "r=" << r << dendl; + + if (r < 0) { + derr << "failed to register with remote journal: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + *m_client_state = cls::journal::CLIENT_STATE_CONNECTED; + *m_client_meta = librbd::journal::MirrorPeerClientMeta(m_local_image_id); + m_client_meta->state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + + finish(0); +} + +template <typename I> +void PrepareRemoteImageRequest<I>::finish(int r) { + dout(20) << "r=" << r << dendl; + + if (r < 0) { + delete *m_remote_journaler; + *m_remote_journaler = nullptr; + } + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_replayer::PrepareRemoteImageRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h new file mode 100644 index 00000000..100a066b --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/PrepareRemoteImageRequest.h @@ -0,0 +1,141 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H +#define RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H + +#include "include/buffer_fwd.h" +#include "include/rados/librados_fwd.hpp" +#include "cls/journal/cls_journal_types.h" +#include "journal/Settings.h" +#include "librbd/journal/TypeTraits.h" +#include <string> + +namespace journal { class Journaler; } +namespace journal { class Settings; } +namespace librbd { struct ImageCtx; } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +struct Context; +struct ContextWQ; + +namespace rbd { +namespace mirror { + +template <typename> struct Threads; + +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class PrepareRemoteImageRequest { +public: + typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits; + typedef typename TypeTraits::Journaler Journaler; + typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta; + + static PrepareRemoteImageRequest *create(Threads<ImageCtxT> *threads, + librados::IoCtx &remote_io_ctx, + const std::string &global_image_id, + const std::string &local_mirror_uuid, + const std::string &local_image_id, + const journal::Settings &settings, + std::string *remote_mirror_uuid, + std::string *remote_image_id, + Journaler **remote_journaler, + cls::journal::ClientState *client_state, + MirrorPeerClientMeta *client_meta, + Context *on_finish) { + return new PrepareRemoteImageRequest(threads, remote_io_ctx, + global_image_id, local_mirror_uuid, + local_image_id, settings, + remote_mirror_uuid, remote_image_id, + remote_journaler, client_state, + client_meta, on_finish); + } + + PrepareRemoteImageRequest(Threads<ImageCtxT> *threads, + librados::IoCtx &remote_io_ctx, + const std::string &global_image_id, + const std::string &local_mirror_uuid, + const std::string &local_image_id, + const journal::Settings &journal_settings, + std::string *remote_mirror_uuid, + std::string *remote_image_id, + Journaler **remote_journaler, + cls::journal::ClientState *client_state, + MirrorPeerClientMeta *client_meta, + Context *on_finish) + : m_threads(threads), m_remote_io_ctx(remote_io_ctx), + m_global_image_id(global_image_id), + m_local_mirror_uuid(local_mirror_uuid), m_local_image_id(local_image_id), + m_journal_settings(journal_settings), + m_remote_mirror_uuid(remote_mirror_uuid), + m_remote_image_id(remote_image_id), + m_remote_journaler(remote_journaler), m_client_state(client_state), + m_client_meta(client_meta), m_on_finish(on_finish) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * GET_REMOTE_MIRROR_UUID + * | + * v + * GET_REMOTE_IMAGE_ID + * | + * v + * GET_CLIENT + * | + * v (skip if not needed) + * REGISTER_CLIENT + * | + * v + * <finish> + + * @endverbatim + */ + + Threads<ImageCtxT> *m_threads; + librados::IoCtx &m_remote_io_ctx; + std::string m_global_image_id; + std::string m_local_mirror_uuid; + std::string m_local_image_id; + journal::Settings m_journal_settings; + std::string *m_remote_mirror_uuid; + std::string *m_remote_image_id; + Journaler **m_remote_journaler; + cls::journal::ClientState *m_client_state; + MirrorPeerClientMeta *m_client_meta; + Context *m_on_finish; + + bufferlist m_out_bl; + cls::journal::Client m_client; + + void get_remote_mirror_uuid(); + void handle_get_remote_mirror_uuid(int r); + + void get_remote_image_id(); + void handle_get_remote_image_id(int r); + + void get_client(); + void handle_get_client(int r); + + void register_client(); + void handle_register_client(int r); + + void finish(int r); +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_replayer::PrepareRemoteImageRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_REPLAYER_PREPARE_REMOTE_IMAGE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.cc b/src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.cc new file mode 100644 index 00000000..f514d749 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.cc @@ -0,0 +1,246 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "ReplayStatusFormatter.h" +#include "common/debug.h" +#include "common/dout.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/Journal.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::ReplayStatusFormatter: " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { + +using librbd::util::unique_lock_name; + +template <typename I> +ReplayStatusFormatter<I>::ReplayStatusFormatter(Journaler *journaler, + const std::string &mirror_uuid) + : m_journaler(journaler), + m_mirror_uuid(mirror_uuid), + m_lock(unique_lock_name("ReplayStatusFormatter::m_lock", this)) { +} + +template <typename I> +bool ReplayStatusFormatter<I>::get_or_send_update(std::string *description, + Context *on_finish) { + dout(20) << dendl; + + bool in_progress = false; + { + Mutex::Locker locker(m_lock); + if (m_on_finish) { + in_progress = true; + } else { + m_on_finish = on_finish; + } + } + + if (in_progress) { + dout(10) << "previous request is still in progress, ignoring" << dendl; + on_finish->complete(-EAGAIN); + return false; + } + + m_master_position = cls::journal::ObjectPosition(); + m_mirror_position = cls::journal::ObjectPosition(); + + cls::journal::Client master_client, mirror_client; + int r; + + r = m_journaler->get_cached_client(librbd::Journal<>::IMAGE_CLIENT_ID, + &master_client); + if (r < 0) { + derr << "error retrieving registered master client: " + << cpp_strerror(r) << dendl; + } else { + r = m_journaler->get_cached_client(m_mirror_uuid, &mirror_client); + if (r < 0) { + derr << "error retrieving registered mirror client: " + << cpp_strerror(r) << dendl; + } + } + + if (!master_client.commit_position.object_positions.empty()) { + m_master_position = + *(master_client.commit_position.object_positions.begin()); + } + + if (!mirror_client.commit_position.object_positions.empty()) { + m_mirror_position = + *(mirror_client.commit_position.object_positions.begin()); + } + + if (!calculate_behind_master_or_send_update()) { + dout(20) << "need to update tag cache" << dendl; + return false; + } + + format(description); + + { + Mutex::Locker locker(m_lock); + ceph_assert(m_on_finish == on_finish); + m_on_finish = nullptr; + } + + on_finish->complete(-EEXIST); + return true; +} + +template <typename I> +bool ReplayStatusFormatter<I>::calculate_behind_master_or_send_update() { + dout(20) << "m_master_position=" << m_master_position + << ", m_mirror_position=" << m_mirror_position << dendl; + + m_entries_behind_master = 0; + + if (m_master_position == cls::journal::ObjectPosition() || + m_master_position.tag_tid < m_mirror_position.tag_tid) { + return true; + } + + cls::journal::ObjectPosition master = m_master_position; + uint64_t mirror_tag_tid = m_mirror_position.tag_tid; + + while (master.tag_tid > mirror_tag_tid) { + auto tag_it = m_tag_cache.find(master.tag_tid); + if (tag_it == m_tag_cache.end()) { + send_update_tag_cache(master.tag_tid, mirror_tag_tid); + return false; + } + librbd::journal::TagData &tag_data = tag_it->second; + m_entries_behind_master += master.entry_tid; + master = {0, tag_data.predecessor.tag_tid, tag_data.predecessor.entry_tid}; + } + if (master.tag_tid == mirror_tag_tid && + master.entry_tid > m_mirror_position.entry_tid) { + m_entries_behind_master += master.entry_tid - m_mirror_position.entry_tid; + } + + dout(20) << "clearing tags not needed any more (below mirror position)" + << dendl; + + uint64_t tag_tid = mirror_tag_tid; + size_t old_size = m_tag_cache.size(); + while (tag_tid != 0) { + auto tag_it = m_tag_cache.find(tag_tid); + if (tag_it == m_tag_cache.end()) { + break; + } + librbd::journal::TagData &tag_data = tag_it->second; + + dout(20) << "erasing tag " << tag_data << "for tag_tid " << tag_tid + << dendl; + + tag_tid = tag_data.predecessor.tag_tid; + m_tag_cache.erase(tag_it); + } + + dout(20) << old_size - m_tag_cache.size() << " entries cleared" << dendl; + + return true; +} + +template <typename I> +void ReplayStatusFormatter<I>::send_update_tag_cache(uint64_t master_tag_tid, + uint64_t mirror_tag_tid) { + if (master_tag_tid <= mirror_tag_tid || + m_tag_cache.find(master_tag_tid) != m_tag_cache.end()) { + Context *on_finish = nullptr; + { + Mutex::Locker locker(m_lock); + std::swap(m_on_finish, on_finish); + } + + ceph_assert(on_finish); + on_finish->complete(0); + return; + } + + dout(20) << "master_tag_tid=" << master_tag_tid << ", mirror_tag_tid=" + << mirror_tag_tid << dendl; + + FunctionContext *ctx = new FunctionContext( + [this, master_tag_tid, mirror_tag_tid](int r) { + handle_update_tag_cache(master_tag_tid, mirror_tag_tid, r); + }); + m_journaler->get_tag(master_tag_tid, &m_tag, ctx); +} + +template <typename I> +void ReplayStatusFormatter<I>::handle_update_tag_cache(uint64_t master_tag_tid, + uint64_t mirror_tag_tid, + int r) { + librbd::journal::TagData tag_data; + + if (r < 0) { + derr << "error retrieving tag " << master_tag_tid << ": " << cpp_strerror(r) + << dendl; + } else { + dout(20) << "retrieved tag " << master_tag_tid << ": " << m_tag << dendl; + + auto it = m_tag.data.cbegin(); + try { + decode(tag_data, it); + } catch (const buffer::error &err) { + derr << "error decoding tag " << master_tag_tid << ": " << err.what() + << dendl; + } + } + + if (tag_data.predecessor.mirror_uuid != + librbd::Journal<>::LOCAL_MIRROR_UUID && + tag_data.predecessor.mirror_uuid != + librbd::Journal<>::ORPHAN_MIRROR_UUID) { + dout(20) << "hit remote image non-primary epoch" << dendl; + tag_data.predecessor = {}; + } + + dout(20) << "decoded tag " << master_tag_tid << ": " << tag_data << dendl; + + m_tag_cache[master_tag_tid] = tag_data; + send_update_tag_cache(tag_data.predecessor.tag_tid, mirror_tag_tid); +} + +template <typename I> +void ReplayStatusFormatter<I>::format(std::string *description) { + + dout(20) << "m_master_position=" << m_master_position + << ", m_mirror_position=" << m_mirror_position + << ", m_entries_behind_master=" << m_entries_behind_master << dendl; + + std::stringstream ss; + ss << "master_position="; + if (m_master_position == cls::journal::ObjectPosition()) { + ss << "[]"; + } else { + ss << m_master_position; + } + ss << ", mirror_position="; + if (m_mirror_position == cls::journal::ObjectPosition()) { + ss << "[]"; + } else { + ss << m_mirror_position; + } + ss << ", entries_behind_master=" + << (m_entries_behind_master > 0 ? m_entries_behind_master : 0); + + *description = ss.str(); +} + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +template class +rbd::mirror::image_replayer::ReplayStatusFormatter<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.h b/src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.h new file mode 100644 index 00000000..59940a65 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/ReplayStatusFormatter.h @@ -0,0 +1,60 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H +#define RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H + +#include "include/Context.h" +#include "common/Mutex.h" +#include "cls/journal/cls_journal_types.h" +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" + +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } + +namespace rbd { +namespace mirror { +namespace image_replayer { + +template <typename ImageCtxT = librbd::ImageCtx> +class ReplayStatusFormatter { +public: + typedef typename librbd::journal::TypeTraits<ImageCtxT>::Journaler Journaler; + + static ReplayStatusFormatter* create(Journaler *journaler, + const std::string &mirror_uuid) { + return new ReplayStatusFormatter(journaler, mirror_uuid); + } + + static void destroy(ReplayStatusFormatter* formatter) { + delete formatter; + } + + ReplayStatusFormatter(Journaler *journaler, const std::string &mirror_uuid); + + bool get_or_send_update(std::string *description, Context *on_finish); + +private: + Journaler *m_journaler; + std::string m_mirror_uuid; + Mutex m_lock; + Context *m_on_finish = nullptr; + cls::journal::ObjectPosition m_master_position; + cls::journal::ObjectPosition m_mirror_position; + int m_entries_behind_master = 0; + cls::journal::Tag m_tag; + std::map<uint64_t, librbd::journal::TagData> m_tag_cache; + + bool calculate_behind_master_or_send_update(); + void send_update_tag_cache(uint64_t master_tag_tid, uint64_t mirror_tag_tid); + void handle_update_tag_cache(uint64_t master_tag_tid, uint64_t mirror_tag_tid, + int r); + void format(std::string *description); +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_IMAGE_REPLAYER_REPLAY_STATUS_FORMATTER_H diff --git a/src/tools/rbd_mirror/image_replayer/Types.h b/src/tools/rbd_mirror/image_replayer/Types.h new file mode 100644 index 00000000..6ab988a7 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/Types.h @@ -0,0 +1,21 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H +#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H + +namespace rbd { +namespace mirror { +namespace image_replayer { + +enum HealthState { + HEALTH_STATE_OK, + HEALTH_STATE_WARNING, + HEALTH_STATE_ERROR +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H diff --git a/src/tools/rbd_mirror/image_replayer/Utils.cc b/src/tools/rbd_mirror/image_replayer/Utils.cc new file mode 100644 index 00000000..eda0179f --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/Utils.cc @@ -0,0 +1,50 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/image_replayer/Utils.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/journal/cls_journal_types.h" +#include "librbd/journal/Types.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_replayer::util::" \ + << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace util { + +bool decode_client_meta(const cls::journal::Client& client, + librbd::journal::MirrorPeerClientMeta* client_meta) { + dout(15) << dendl; + + librbd::journal::ClientData client_data; + auto it = client.data.cbegin(); + try { + decode(client_data, it); + } catch (const buffer::error &err) { + derr << "failed to decode client meta data: " << err.what() << dendl; + return false; + } + + auto local_client_meta = boost::get<librbd::journal::MirrorPeerClientMeta>( + &client_data.client_meta); + if (local_client_meta == nullptr) { + derr << "unknown peer registration" << dendl; + return false; + } + + *client_meta = *local_client_meta; + dout(15) << "client found: client_meta=" << *client_meta << dendl; + return true; +} + +} // namespace util +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + diff --git a/src/tools/rbd_mirror/image_replayer/Utils.h b/src/tools/rbd_mirror/image_replayer/Utils.h new file mode 100644 index 00000000..d42146d1 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/Utils.h @@ -0,0 +1,23 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_REPLAYER_UTILS_H +#define RBD_MIRROR_IMAGE_REPLAYER_UTILS_H + +namespace cls { namespace journal { struct Client; } } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +namespace rbd { +namespace mirror { +namespace image_replayer { +namespace util { + +bool decode_client_meta(const cls::journal::Client& client, + librbd::journal::MirrorPeerClientMeta* client_meta); + +} // namespace util +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // RBD_MIRROR_IMAGE_REPLAYER_UTILS_H diff --git a/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc new file mode 100644 index 00000000..ffe2eca9 --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc @@ -0,0 +1,182 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "SyncPointCreateRequest.h" +#include "include/uuid.h" +#include "common/debug.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_sync::SyncPointCreateRequest: " \ + << this << " " << __func__ + +namespace rbd { +namespace mirror { +namespace image_sync { + +namespace { + +static const std::string SNAP_NAME_PREFIX(".rbd-mirror"); + +} // anonymous namespace + +using librbd::util::create_context_callback; + +template <typename I> +SyncPointCreateRequest<I>::SyncPointCreateRequest(I *remote_image_ctx, + const std::string &mirror_uuid, + Journaler *journaler, + MirrorPeerClientMeta *client_meta, + Context *on_finish) + : m_remote_image_ctx(remote_image_ctx), m_mirror_uuid(mirror_uuid), + m_journaler(journaler), m_client_meta(client_meta), m_on_finish(on_finish), + m_client_meta_copy(*client_meta) { + ceph_assert(m_client_meta->sync_points.size() < 2); + + // initialize the updated client meta with the new sync point + m_client_meta_copy.sync_points.emplace_back(); + if (m_client_meta_copy.sync_points.size() > 1) { + m_client_meta_copy.sync_points.back().from_snap_name = + m_client_meta_copy.sync_points.front().snap_name; + } +} + +template <typename I> +void SyncPointCreateRequest<I>::send() { + send_update_client(); +} + +template <typename I> +void SyncPointCreateRequest<I>::send_update_client() { + uuid_d uuid_gen; + uuid_gen.generate_random(); + + MirrorPeerSyncPoint &sync_point = m_client_meta_copy.sync_points.back(); + sync_point.snap_name = SNAP_NAME_PREFIX + "." + m_mirror_uuid + "." + + uuid_gen.to_string(); + + dout(20) << ": sync_point=" << sync_point << dendl; + + bufferlist client_data_bl; + librbd::journal::ClientData client_data(m_client_meta_copy); + encode(client_data, client_data_bl); + + Context *ctx = create_context_callback< + SyncPointCreateRequest<I>, &SyncPointCreateRequest<I>::handle_update_client>( + this); + m_journaler->update_client(client_data_bl, ctx); +} + +template <typename I> +void SyncPointCreateRequest<I>::handle_update_client(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to update client data: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + // update provided meta structure to reflect reality + *m_client_meta = m_client_meta_copy; + + send_refresh_image(); +} + +template <typename I> +void SyncPointCreateRequest<I>::send_refresh_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + SyncPointCreateRequest<I>, &SyncPointCreateRequest<I>::handle_refresh_image>( + this); + m_remote_image_ctx->state->refresh(ctx); +} + +template <typename I> +void SyncPointCreateRequest<I>::handle_refresh_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": remote image refresh failed: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_create_snap(); +} + +template <typename I> +void SyncPointCreateRequest<I>::send_create_snap() { + dout(20) << dendl; + + MirrorPeerSyncPoint &sync_point = m_client_meta_copy.sync_points.back(); + + Context *ctx = create_context_callback< + SyncPointCreateRequest<I>, &SyncPointCreateRequest<I>::handle_create_snap>( + this); + m_remote_image_ctx->operations->snap_create( + cls::rbd::UserSnapshotNamespace(), sync_point.snap_name.c_str(), ctx); +} + +template <typename I> +void SyncPointCreateRequest<I>::handle_create_snap(int r) { + dout(20) << ": r=" << r << dendl; + + if (r == -EEXIST) { + send_update_client(); + return; + } else if (r < 0) { + derr << ": failed to create snapshot: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_final_refresh_image(); +} + +template <typename I> +void SyncPointCreateRequest<I>::send_final_refresh_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + SyncPointCreateRequest<I>, + &SyncPointCreateRequest<I>::handle_final_refresh_image>(this); + m_remote_image_ctx->state->refresh(ctx); +} + +template <typename I> +void SyncPointCreateRequest<I>::handle_final_refresh_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to refresh image for snapshot: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + finish(0); +} + +template <typename I> +void SyncPointCreateRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_sync::SyncPointCreateRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h new file mode 100644 index 00000000..45275ec4 --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/SyncPointCreateRequest.h @@ -0,0 +1,96 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H +#define RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H + +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include <string> + +class Context; +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +namespace rbd { +namespace mirror { +namespace image_sync { + +template <typename ImageCtxT = librbd::ImageCtx> +class SyncPointCreateRequest { +public: + typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits; + typedef typename TypeTraits::Journaler Journaler; + typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta; + typedef librbd::journal::MirrorPeerSyncPoint MirrorPeerSyncPoint; + + static SyncPointCreateRequest* create(ImageCtxT *remote_image_ctx, + const std::string &mirror_uuid, + Journaler *journaler, + MirrorPeerClientMeta *client_meta, + Context *on_finish) { + return new SyncPointCreateRequest(remote_image_ctx, mirror_uuid, journaler, + client_meta, on_finish); + } + + SyncPointCreateRequest(ImageCtxT *remote_image_ctx, + const std::string &mirror_uuid, Journaler *journaler, + MirrorPeerClientMeta *client_meta, Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * v + * UPDATE_CLIENT < . . + * | . + * v . + * REFRESH_IMAGE . + * | . (repeat on EEXIST) + * v . + * CREATE_SNAP . . . . + * | + * v + * REFRESH_IMAGE + * | + * v + * <finish> + * + * @endverbatim + */ + + ImageCtxT *m_remote_image_ctx; + std::string m_mirror_uuid; + Journaler *m_journaler; + MirrorPeerClientMeta *m_client_meta; + Context *m_on_finish; + + MirrorPeerClientMeta m_client_meta_copy; + + void send_update_client(); + void handle_update_client(int r); + + void send_refresh_image(); + void handle_refresh_image(int r); + + void send_create_snap(); + void handle_create_snap(int r); + + void send_final_refresh_image(); + void handle_final_refresh_image(int r); + + void finish(int r); +}; + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_sync::SyncPointCreateRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_CREATE_REQUEST_H diff --git a/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc new file mode 100644 index 00000000..2cfed5e6 --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc @@ -0,0 +1,220 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "SyncPointPruneRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "journal/Journaler.h" +#include "librbd/ImageCtx.h" +#include "librbd/ImageState.h" +#include "librbd/Operations.h" +#include "librbd/Utils.h" +#include <set> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::image_sync::SyncPointPruneRequest: " \ + << this << " " << __func__ +namespace rbd { +namespace mirror { +namespace image_sync { + +using librbd::util::create_context_callback; + +template <typename I> +SyncPointPruneRequest<I>::SyncPointPruneRequest(I *remote_image_ctx, + bool sync_complete, + Journaler *journaler, + MirrorPeerClientMeta *client_meta, + Context *on_finish) + : m_remote_image_ctx(remote_image_ctx), m_sync_complete(sync_complete), + m_journaler(journaler), m_client_meta(client_meta), m_on_finish(on_finish), + m_client_meta_copy(*client_meta) { +} + +template <typename I> +void SyncPointPruneRequest<I>::send() { + if (m_client_meta->sync_points.empty()) { + send_remove_snap(); + return; + } + + if (m_sync_complete) { + // if sync is complete, we can remove the master sync point + auto it = m_client_meta_copy.sync_points.begin(); + MirrorPeerSyncPoint &sync_point = *it; + + ++it; + if (it == m_client_meta_copy.sync_points.end() || + it->from_snap_name != sync_point.snap_name) { + m_snap_names.push_back(sync_point.snap_name); + } + + if (!sync_point.from_snap_name.empty()) { + m_snap_names.push_back(sync_point.from_snap_name); + } + } else { + // if we have more than one sync point or invalid sync points, + // trim them off + RWLock::RLocker snap_locker(m_remote_image_ctx->snap_lock); + std::set<std::string> snap_names; + for (auto it = m_client_meta_copy.sync_points.rbegin(); + it != m_client_meta_copy.sync_points.rend(); ++it) { + MirrorPeerSyncPoint &sync_point = *it; + if (&sync_point == &m_client_meta_copy.sync_points.front()) { + if (m_remote_image_ctx->get_snap_id( + cls::rbd::UserSnapshotNamespace(), sync_point.snap_name) == + CEPH_NOSNAP) { + derr << ": failed to locate sync point snapshot: " + << sync_point.snap_name << dendl; + } else if (!sync_point.from_snap_name.empty()) { + derr << ": unexpected from_snap_name in primary sync point: " + << sync_point.from_snap_name << dendl; + } else { + // first sync point is OK -- keep it + break; + } + m_invalid_master_sync_point = true; + } + + if (snap_names.count(sync_point.snap_name) == 0) { + snap_names.insert(sync_point.snap_name); + m_snap_names.push_back(sync_point.snap_name); + } + + MirrorPeerSyncPoint &front_sync_point = + m_client_meta_copy.sync_points.front(); + if (!sync_point.from_snap_name.empty() && + snap_names.count(sync_point.from_snap_name) == 0 && + sync_point.from_snap_name != front_sync_point.snap_name) { + snap_names.insert(sync_point.from_snap_name); + m_snap_names.push_back(sync_point.from_snap_name); + } + } + } + + send_remove_snap(); +} + +template <typename I> +void SyncPointPruneRequest<I>::send_remove_snap() { + if (m_snap_names.empty()) { + send_refresh_image(); + return; + } + + const std::string &snap_name = m_snap_names.front(); + + dout(20) << ": snap_name=" << snap_name << dendl; + + Context *ctx = create_context_callback< + SyncPointPruneRequest<I>, &SyncPointPruneRequest<I>::handle_remove_snap>( + this); + m_remote_image_ctx->operations->snap_remove(cls::rbd::UserSnapshotNamespace(), + snap_name.c_str(), + ctx); +} + +template <typename I> +void SyncPointPruneRequest<I>::handle_remove_snap(int r) { + dout(20) << ": r=" << r << dendl; + + ceph_assert(!m_snap_names.empty()); + std::string snap_name = m_snap_names.front(); + m_snap_names.pop_front(); + + if (r == -ENOENT) { + r = 0; + } + if (r < 0) { + derr << ": failed to remove snapshot '" << snap_name << "': " + << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_remove_snap(); +} + +template <typename I> +void SyncPointPruneRequest<I>::send_refresh_image() { + dout(20) << dendl; + + Context *ctx = create_context_callback< + SyncPointPruneRequest<I>, &SyncPointPruneRequest<I>::handle_refresh_image>( + this); + m_remote_image_ctx->state->refresh(ctx); +} + +template <typename I> +void SyncPointPruneRequest<I>::handle_refresh_image(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": remote image refresh failed: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + send_update_client(); +} + +template <typename I> +void SyncPointPruneRequest<I>::send_update_client() { + dout(20) << dendl; + + if (m_sync_complete) { + m_client_meta_copy.sync_points.pop_front(); + if (m_client_meta_copy.sync_points.empty()) { + m_client_meta_copy.state = librbd::journal::MIRROR_PEER_STATE_REPLAYING; + } + } else { + while (m_client_meta_copy.sync_points.size() > 1) { + m_client_meta_copy.sync_points.pop_back(); + } + if (m_invalid_master_sync_point) { + // all subsequent sync points would have been pruned + m_client_meta_copy.sync_points.clear(); + } + } + + bufferlist client_data_bl; + librbd::journal::ClientData client_data(m_client_meta_copy); + encode(client_data, client_data_bl); + + Context *ctx = create_context_callback< + SyncPointPruneRequest<I>, &SyncPointPruneRequest<I>::handle_update_client>( + this); + m_journaler->update_client(client_data_bl, ctx); +} + +template <typename I> +void SyncPointPruneRequest<I>::handle_update_client(int r) { + dout(20) << ": r=" << r << dendl; + + if (r < 0) { + derr << ": failed to update client data: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + // update provided meta structure to reflect reality + *m_client_meta = m_client_meta_copy; + finish(0); +} + +template <typename I> +void SyncPointPruneRequest<I>::finish(int r) { + dout(20) << ": r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::image_sync::SyncPointPruneRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h new file mode 100644 index 00000000..65e13ef5 --- /dev/null +++ b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h @@ -0,0 +1,96 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H +#define RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H + +#include "librbd/journal/Types.h" +#include "librbd/journal/TypeTraits.h" +#include <list> +#include <string> + +class Context; +namespace journal { class Journaler; } +namespace librbd { class ImageCtx; } +namespace librbd { namespace journal { struct MirrorPeerClientMeta; } } + +namespace rbd { +namespace mirror { +namespace image_sync { + +template <typename ImageCtxT = librbd::ImageCtx> +class SyncPointPruneRequest { +public: + typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits; + typedef typename TypeTraits::Journaler Journaler; + typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta; + typedef librbd::journal::MirrorPeerSyncPoint MirrorPeerSyncPoint; + + static SyncPointPruneRequest* create(ImageCtxT *remote_image_ctx, + bool sync_complete, + Journaler *journaler, + MirrorPeerClientMeta *client_meta, + Context *on_finish) { + return new SyncPointPruneRequest(remote_image_ctx, sync_complete, journaler, + client_meta, on_finish); + } + + SyncPointPruneRequest(ImageCtxT *remote_image_ctx, bool sync_complete, + Journaler *journaler, MirrorPeerClientMeta *client_meta, + Context *on_finish); + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * | . . . . . + * | . . + * v v . (repeat if from snap + * REMOVE_SNAP . . . unused by other sync) + * | + * v + * REFRESH_IMAGE + * | + * v + * UPDATE_CLIENT + * | + * v + * <finish> + * + * @endverbatim + */ + + ImageCtxT *m_remote_image_ctx; + bool m_sync_complete; + Journaler *m_journaler; + MirrorPeerClientMeta *m_client_meta; + Context *m_on_finish; + + MirrorPeerClientMeta m_client_meta_copy; + std::list<std::string> m_snap_names; + + bool m_invalid_master_sync_point = false; + + void send_remove_snap(); + void handle_remove_snap(int r); + + void send_refresh_image(); + void handle_refresh_image(int r); + + void send_update_client(); + void handle_update_client(int r); + + void finish(int r); +}; + +} // namespace image_sync +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::image_sync::SyncPointPruneRequest<librbd::ImageCtx>; + +#endif // RBD_MIRROR_IMAGE_SYNC_SYNC_POINT_PRUNE_REQUEST_H diff --git a/src/tools/rbd_mirror/instance_watcher/Types.cc b/src/tools/rbd_mirror/instance_watcher/Types.cc new file mode 100644 index 00000000..0e992273 --- /dev/null +++ b/src/tools/rbd_mirror/instance_watcher/Types.cc @@ -0,0 +1,245 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Types.h" +#include "include/ceph_assert.h" +#include "include/stringify.h" +#include "common/Formatter.h" + +namespace rbd { +namespace mirror { +namespace instance_watcher { + +namespace { + +class EncodePayloadVisitor : public boost::static_visitor<void> { +public: + explicit EncodePayloadVisitor(bufferlist &bl) : m_bl(bl) {} + + template <typename Payload> + inline void operator()(const Payload &payload) const { + using ceph::encode; + encode(static_cast<uint32_t>(Payload::NOTIFY_OP), m_bl); + payload.encode(m_bl); + } + +private: + bufferlist &m_bl; +}; + +class DecodePayloadVisitor : public boost::static_visitor<void> { +public: + DecodePayloadVisitor(__u8 version, bufferlist::const_iterator &iter) + : m_version(version), m_iter(iter) {} + + template <typename Payload> + inline void operator()(Payload &payload) const { + payload.decode(m_version, m_iter); + } + +private: + __u8 m_version; + bufferlist::const_iterator &m_iter; +}; + +class DumpPayloadVisitor : public boost::static_visitor<void> { +public: + explicit DumpPayloadVisitor(Formatter *formatter) : m_formatter(formatter) {} + + template <typename Payload> + inline void operator()(const Payload &payload) const { + NotifyOp notify_op = Payload::NOTIFY_OP; + m_formatter->dump_string("notify_op", stringify(notify_op)); + payload.dump(m_formatter); + } + +private: + ceph::Formatter *m_formatter; +}; + +} // anonymous namespace + +void PayloadBase::encode(bufferlist &bl) const { + using ceph::encode; + encode(request_id, bl); +} + +void PayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) { + using ceph::decode; + decode(request_id, iter); +} + +void PayloadBase::dump(Formatter *f) const { + f->dump_unsigned("request_id", request_id); +} + +void ImagePayloadBase::encode(bufferlist &bl) const { + using ceph::encode; + PayloadBase::encode(bl); + encode(global_image_id, bl); +} + +void ImagePayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) { + using ceph::decode; + PayloadBase::decode(version, iter); + decode(global_image_id, iter); +} + +void ImagePayloadBase::dump(Formatter *f) const { + PayloadBase::dump(f); + f->dump_string("global_image_id", global_image_id); +} + +void PeerImageRemovedPayload::encode(bufferlist &bl) const { + using ceph::encode; + PayloadBase::encode(bl); + encode(global_image_id, bl); + encode(peer_mirror_uuid, bl); +} + +void PeerImageRemovedPayload::decode(__u8 version, bufferlist::const_iterator &iter) { + using ceph::decode; + PayloadBase::decode(version, iter); + decode(global_image_id, iter); + decode(peer_mirror_uuid, iter); +} + +void PeerImageRemovedPayload::dump(Formatter *f) const { + PayloadBase::dump(f); + f->dump_string("global_image_id", global_image_id); + f->dump_string("peer_mirror_uuid", peer_mirror_uuid); +} + +void SyncPayloadBase::encode(bufferlist &bl) const { + using ceph::encode; + PayloadBase::encode(bl); + encode(sync_id, bl); +} + +void SyncPayloadBase::decode(__u8 version, bufferlist::const_iterator &iter) { + using ceph::decode; + PayloadBase::decode(version, iter); + decode(sync_id, iter); +} + +void SyncPayloadBase::dump(Formatter *f) const { + PayloadBase::dump(f); + f->dump_string("sync_id", sync_id); +} + +void UnknownPayload::encode(bufferlist &bl) const { + ceph_abort(); +} + +void UnknownPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void UnknownPayload::dump(Formatter *f) const { +} + +void NotifyMessage::encode(bufferlist& bl) const { + ENCODE_START(2, 2, bl); + boost::apply_visitor(EncodePayloadVisitor(bl), payload); + ENCODE_FINISH(bl); +} + +void NotifyMessage::decode(bufferlist::const_iterator& iter) { + DECODE_START(2, iter); + + uint32_t notify_op; + decode(notify_op, iter); + + // select the correct payload variant based upon the encoded op + switch (notify_op) { + case NOTIFY_OP_IMAGE_ACQUIRE: + payload = ImageAcquirePayload(); + break; + case NOTIFY_OP_IMAGE_RELEASE: + payload = ImageReleasePayload(); + break; + case NOTIFY_OP_PEER_IMAGE_REMOVED: + payload = PeerImageRemovedPayload(); + break; + case NOTIFY_OP_SYNC_REQUEST: + payload = SyncRequestPayload(); + break; + case NOTIFY_OP_SYNC_START: + payload = SyncStartPayload(); + break; + default: + payload = UnknownPayload(); + break; + } + + apply_visitor(DecodePayloadVisitor(struct_v, iter), payload); + DECODE_FINISH(iter); +} + +void NotifyMessage::dump(Formatter *f) const { + apply_visitor(DumpPayloadVisitor(f), payload); +} + +void NotifyMessage::generate_test_instances(std::list<NotifyMessage *> &o) { + o.push_back(new NotifyMessage(ImageAcquirePayload())); + o.push_back(new NotifyMessage(ImageAcquirePayload(1, "gid"))); + + o.push_back(new NotifyMessage(ImageReleasePayload())); + o.push_back(new NotifyMessage(ImageReleasePayload(1, "gid"))); + + o.push_back(new NotifyMessage(PeerImageRemovedPayload())); + o.push_back(new NotifyMessage(PeerImageRemovedPayload(1, "gid", "uuid"))); + + o.push_back(new NotifyMessage(SyncRequestPayload())); + o.push_back(new NotifyMessage(SyncRequestPayload(1, "sync_id"))); + + o.push_back(new NotifyMessage(SyncStartPayload())); + o.push_back(new NotifyMessage(SyncStartPayload(1, "sync_id"))); +} + +std::ostream &operator<<(std::ostream &out, const NotifyOp &op) { + switch (op) { + case NOTIFY_OP_IMAGE_ACQUIRE: + out << "ImageAcquire"; + break; + case NOTIFY_OP_IMAGE_RELEASE: + out << "ImageRelease"; + break; + case NOTIFY_OP_PEER_IMAGE_REMOVED: + out << "PeerImageRemoved"; + break; + case NOTIFY_OP_SYNC_REQUEST: + out << "SyncRequest"; + break; + case NOTIFY_OP_SYNC_START: + out << "SyncStart"; + break; + default: + out << "Unknown (" << static_cast<uint32_t>(op) << ")"; + break; + } + return out; +} + +void NotifyAckPayload::encode(bufferlist &bl) const { + using ceph::encode; + encode(instance_id, bl); + encode(request_id, bl); + encode(ret_val, bl); +} + +void NotifyAckPayload::decode(bufferlist::const_iterator &iter) { + using ceph::decode; + decode(instance_id, iter); + decode(request_id, iter); + decode(ret_val, iter); +} + +void NotifyAckPayload::dump(Formatter *f) const { + f->dump_string("instance_id", instance_id); + f->dump_unsigned("request_id", request_id); + f->dump_int("request_id", ret_val); +} + +} // namespace instance_watcher +} // namespace mirror +} // namespace rbd diff --git a/src/tools/rbd_mirror/instance_watcher/Types.h b/src/tools/rbd_mirror/instance_watcher/Types.h new file mode 100644 index 00000000..b0b7b779 --- /dev/null +++ b/src/tools/rbd_mirror/instance_watcher/Types.h @@ -0,0 +1,197 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_INSTANCE_WATCHER_TYPES_H +#define RBD_MIRROR_INSTANCE_WATCHER_TYPES_H + +#include <string> +#include <set> +#include <boost/variant.hpp> + +#include "include/buffer_fwd.h" +#include "include/encoding.h" +#include "include/int_types.h" + +namespace ceph { class Formatter; } + +namespace rbd { +namespace mirror { +namespace instance_watcher { + +enum NotifyOp { + NOTIFY_OP_IMAGE_ACQUIRE = 0, + NOTIFY_OP_IMAGE_RELEASE = 1, + NOTIFY_OP_PEER_IMAGE_REMOVED = 2, + NOTIFY_OP_SYNC_REQUEST = 3, + NOTIFY_OP_SYNC_START = 4 +}; + +struct PayloadBase { + uint64_t request_id; + + PayloadBase() : request_id(0) { + } + + PayloadBase(uint64_t request_id) : request_id(request_id) { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct ImagePayloadBase : public PayloadBase { + std::string global_image_id; + + ImagePayloadBase() : PayloadBase() { + } + + ImagePayloadBase(uint64_t request_id, const std::string &global_image_id) + : PayloadBase(request_id), global_image_id(global_image_id) { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct ImageAcquirePayload : public ImagePayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_IMAGE_ACQUIRE; + + ImageAcquirePayload() { + } + ImageAcquirePayload(uint64_t request_id, const std::string &global_image_id) + : ImagePayloadBase(request_id, global_image_id) { + } +}; + +struct ImageReleasePayload : public ImagePayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_IMAGE_RELEASE; + + ImageReleasePayload() { + } + ImageReleasePayload(uint64_t request_id, const std::string &global_image_id) + : ImagePayloadBase(request_id, global_image_id) { + } +}; + +struct PeerImageRemovedPayload : public PayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_PEER_IMAGE_REMOVED; + + std::string global_image_id; + std::string peer_mirror_uuid; + + PeerImageRemovedPayload() { + } + PeerImageRemovedPayload(uint64_t request_id, + const std::string& global_image_id, + const std::string& peer_mirror_uuid) + : PayloadBase(request_id), + global_image_id(global_image_id), peer_mirror_uuid(peer_mirror_uuid) { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct SyncPayloadBase : public PayloadBase { + std::string sync_id; + + SyncPayloadBase() : PayloadBase() { + } + + SyncPayloadBase(uint64_t request_id, const std::string &sync_id) + : PayloadBase(request_id), sync_id(sync_id) { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct SyncRequestPayload : public SyncPayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_SYNC_REQUEST; + + SyncRequestPayload() : SyncPayloadBase() { + } + + SyncRequestPayload(uint64_t request_id, const std::string &sync_id) + : SyncPayloadBase(request_id, sync_id) { + } +}; + +struct SyncStartPayload : public SyncPayloadBase { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_SYNC_START; + + SyncStartPayload() : SyncPayloadBase() { + } + + SyncStartPayload(uint64_t request_id, const std::string &sync_id) + : SyncPayloadBase(request_id, sync_id) { + } +}; + +struct UnknownPayload { + static const NotifyOp NOTIFY_OP = static_cast<NotifyOp>(-1); + + UnknownPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +typedef boost::variant<ImageAcquirePayload, + ImageReleasePayload, + PeerImageRemovedPayload, + SyncRequestPayload, + SyncStartPayload, + UnknownPayload> Payload; + +struct NotifyMessage { + NotifyMessage(const Payload &payload = UnknownPayload()) : payload(payload) { + } + + Payload payload; + + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& it); + void dump(Formatter *f) const; + + static void generate_test_instances(std::list<NotifyMessage *> &o); +}; + +WRITE_CLASS_ENCODER(NotifyMessage); + +std::ostream &operator<<(std::ostream &out, const NotifyOp &op); + +struct NotifyAckPayload { + std::string instance_id; + uint64_t request_id; + int ret_val; + + NotifyAckPayload() : request_id(0), ret_val(0) { + } + + NotifyAckPayload(const std::string &instance_id, uint64_t request_id, + int ret_val) + : instance_id(instance_id), request_id(request_id), ret_val(ret_val) { + } + + void encode(bufferlist &bl) const; + void decode(bufferlist::const_iterator& it); + void dump(Formatter *f) const; +}; + +WRITE_CLASS_ENCODER(NotifyAckPayload); + +} // namespace instance_watcher +} // namespace mirror +} // namespace librbd + +using rbd::mirror::instance_watcher::encode; +using rbd::mirror::instance_watcher::decode; + +#endif // RBD_MIRROR_INSTANCE_WATCHER_TYPES_H diff --git a/src/tools/rbd_mirror/instances/Types.h b/src/tools/rbd_mirror/instances/Types.h new file mode 100644 index 00000000..8b0a68fc --- /dev/null +++ b/src/tools/rbd_mirror/instances/Types.h @@ -0,0 +1,28 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_INSTANCES_TYPES_H +#define CEPH_RBD_MIRROR_INSTANCES_TYPES_H + +#include <string> +#include <vector> + +namespace rbd { +namespace mirror { +namespace instances { + +struct Listener { + typedef std::vector<std::string> InstanceIds; + + virtual ~Listener() { + } + + virtual void handle_added(const InstanceIds& instance_ids) = 0; + virtual void handle_removed(const InstanceIds& instance_ids) = 0; +}; + +} // namespace instances +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_INSTANCES_TYPES_H diff --git a/src/tools/rbd_mirror/leader_watcher/Types.cc b/src/tools/rbd_mirror/leader_watcher/Types.cc new file mode 100644 index 00000000..d2fb7908 --- /dev/null +++ b/src/tools/rbd_mirror/leader_watcher/Types.cc @@ -0,0 +1,161 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Types.h" +#include "include/ceph_assert.h" +#include "include/stringify.h" +#include "common/Formatter.h" + +namespace rbd { +namespace mirror { +namespace leader_watcher { + +namespace { + +class EncodePayloadVisitor : public boost::static_visitor<void> { +public: + explicit EncodePayloadVisitor(bufferlist &bl) : m_bl(bl) {} + + template <typename Payload> + inline void operator()(const Payload &payload) const { + using ceph::encode; + encode(static_cast<uint32_t>(Payload::NOTIFY_OP), m_bl); + payload.encode(m_bl); + } + +private: + bufferlist &m_bl; +}; + +class DecodePayloadVisitor : public boost::static_visitor<void> { +public: + DecodePayloadVisitor(__u8 version, bufferlist::const_iterator &iter) + : m_version(version), m_iter(iter) {} + + template <typename Payload> + inline void operator()(Payload &payload) const { + payload.decode(m_version, m_iter); + } + +private: + __u8 m_version; + bufferlist::const_iterator &m_iter; +}; + +class DumpPayloadVisitor : public boost::static_visitor<void> { +public: + explicit DumpPayloadVisitor(Formatter *formatter) : m_formatter(formatter) {} + + template <typename Payload> + inline void operator()(const Payload &payload) const { + NotifyOp notify_op = Payload::NOTIFY_OP; + m_formatter->dump_string("notify_op", stringify(notify_op)); + payload.dump(m_formatter); + } + +private: + ceph::Formatter *m_formatter; +}; + +} // anonymous namespace + +void HeartbeatPayload::encode(bufferlist &bl) const { +} + +void HeartbeatPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void HeartbeatPayload::dump(Formatter *f) const { +} + +void LockAcquiredPayload::encode(bufferlist &bl) const { +} + +void LockAcquiredPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void LockAcquiredPayload::dump(Formatter *f) const { +} + +void LockReleasedPayload::encode(bufferlist &bl) const { +} + +void LockReleasedPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void LockReleasedPayload::dump(Formatter *f) const { +} + +void UnknownPayload::encode(bufferlist &bl) const { + ceph_abort(); +} + +void UnknownPayload::decode(__u8 version, bufferlist::const_iterator &iter) { +} + +void UnknownPayload::dump(Formatter *f) const { +} + +void NotifyMessage::encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + boost::apply_visitor(EncodePayloadVisitor(bl), payload); + ENCODE_FINISH(bl); +} + +void NotifyMessage::decode(bufferlist::const_iterator& iter) { + DECODE_START(1, iter); + + uint32_t notify_op; + decode(notify_op, iter); + + // select the correct payload variant based upon the encoded op + switch (notify_op) { + case NOTIFY_OP_HEARTBEAT: + payload = HeartbeatPayload(); + break; + case NOTIFY_OP_LOCK_ACQUIRED: + payload = LockAcquiredPayload(); + break; + case NOTIFY_OP_LOCK_RELEASED: + payload = LockReleasedPayload(); + break; + default: + payload = UnknownPayload(); + break; + } + + apply_visitor(DecodePayloadVisitor(struct_v, iter), payload); + DECODE_FINISH(iter); +} + +void NotifyMessage::dump(Formatter *f) const { + apply_visitor(DumpPayloadVisitor(f), payload); +} + +void NotifyMessage::generate_test_instances(std::list<NotifyMessage *> &o) { + o.push_back(new NotifyMessage(HeartbeatPayload())); + o.push_back(new NotifyMessage(LockAcquiredPayload())); + o.push_back(new NotifyMessage(LockReleasedPayload())); +} + +std::ostream &operator<<(std::ostream &out, const NotifyOp &op) { + switch (op) { + case NOTIFY_OP_HEARTBEAT: + out << "Heartbeat"; + break; + case NOTIFY_OP_LOCK_ACQUIRED: + out << "LockAcquired"; + break; + case NOTIFY_OP_LOCK_RELEASED: + out << "LockReleased"; + break; + default: + out << "Unknown (" << static_cast<uint32_t>(op) << ")"; + break; + } + return out; +} + +} // namespace leader_watcher +} // namespace mirror +} // namespace librbd diff --git a/src/tools/rbd_mirror/leader_watcher/Types.h b/src/tools/rbd_mirror/leader_watcher/Types.h new file mode 100644 index 00000000..1278e54b --- /dev/null +++ b/src/tools/rbd_mirror/leader_watcher/Types.h @@ -0,0 +1,117 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef RBD_MIRROR_LEADER_WATCHER_TYPES_H +#define RBD_MIRROR_LEADER_WATCHER_TYPES_H + +#include "include/int_types.h" +#include "include/buffer_fwd.h" +#include "include/encoding.h" +#include <string> +#include <vector> +#include <boost/variant.hpp> + +struct Context; + +namespace ceph { class Formatter; } + +namespace rbd { +namespace mirror { +namespace leader_watcher { + +struct Listener { + typedef std::vector<std::string> InstanceIds; + + virtual ~Listener() { + } + + virtual void post_acquire_handler(Context *on_finish) = 0; + virtual void pre_release_handler(Context *on_finish) = 0; + + virtual void update_leader_handler( + const std::string &leader_instance_id) = 0; + + virtual void handle_instances_added(const InstanceIds& instance_ids) = 0; + virtual void handle_instances_removed(const InstanceIds& instance_ids) = 0; +}; + +enum NotifyOp { + NOTIFY_OP_HEARTBEAT = 0, + NOTIFY_OP_LOCK_ACQUIRED = 1, + NOTIFY_OP_LOCK_RELEASED = 2, +}; + +struct HeartbeatPayload { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_HEARTBEAT; + + HeartbeatPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct LockAcquiredPayload { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_LOCK_ACQUIRED; + + LockAcquiredPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct LockReleasedPayload { + static const NotifyOp NOTIFY_OP = NOTIFY_OP_LOCK_RELEASED; + + LockReleasedPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +struct UnknownPayload { + static const NotifyOp NOTIFY_OP = static_cast<NotifyOp>(-1); + + UnknownPayload() { + } + + void encode(bufferlist &bl) const; + void decode(__u8 version, bufferlist::const_iterator &iter); + void dump(Formatter *f) const; +}; + +typedef boost::variant<HeartbeatPayload, + LockAcquiredPayload, + LockReleasedPayload, + UnknownPayload> Payload; + +struct NotifyMessage { + NotifyMessage(const Payload &payload = UnknownPayload()) : payload(payload) { + } + + Payload payload; + + void encode(bufferlist& bl) const; + void decode(bufferlist::const_iterator& it); + void dump(Formatter *f) const; + + static void generate_test_instances(std::list<NotifyMessage *> &o); +}; + +WRITE_CLASS_ENCODER(NotifyMessage); + +std::ostream &operator<<(std::ostream &out, const NotifyOp &op); + +} // namespace leader_watcher +} // namespace mirror +} // namespace librbd + +using rbd::mirror::leader_watcher::encode; +using rbd::mirror::leader_watcher::decode; + +#endif // RBD_MIRROR_LEADER_WATCHER_TYPES_H diff --git a/src/tools/rbd_mirror/main.cc b/src/tools/rbd_mirror/main.cc new file mode 100644 index 00000000..ab350a01 --- /dev/null +++ b/src/tools/rbd_mirror/main.cc @@ -0,0 +1,104 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/ceph_argparse.h" +#include "common/config.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/perf_counters.h" +#include "global/global_init.h" +#include "global/signal_handler.h" +#include "Mirror.h" +#include "Types.h" + +#include <vector> + +rbd::mirror::Mirror *mirror = nullptr; +PerfCounters *g_perf_counters = nullptr; + +void usage() { + std::cout << "usage: rbd-mirror [options...]" << std::endl; + std::cout << "options:\n"; + std::cout << " -m monaddress[:port] connect to specified monitor\n"; + std::cout << " --keyring=<path> path to keyring for local cluster\n"; + std::cout << " --log-file=<logfile> file to log debug output\n"; + std::cout << " --debug-rbd-mirror=<log-level>/<memory-level> set rbd-mirror debug level\n"; + generic_server_usage(); +} + +static void handle_signal(int signum) +{ + if (mirror) + mirror->handle_signal(signum); +} + +int main(int argc, const char **argv) +{ + std::vector<const char*> args; + argv_to_vec(argc, argv, args); + if (args.empty()) { + cerr << argv[0] << ": -h or --help for usage" << std::endl; + exit(1); + } + if (ceph_argparse_need_usage(args)) { + usage(); + exit(0); + } + + auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + + if (g_conf()->daemonize) { + global_init_daemonize(g_ceph_context); + } + + common_init_finish(g_ceph_context); + + init_async_signal_handler(); + register_async_signal_handler(SIGHUP, handle_signal); + register_async_signal_handler_oneshot(SIGINT, handle_signal); + register_async_signal_handler_oneshot(SIGTERM, handle_signal); + + std::vector<const char*> cmd_args; + argv_to_vec(argc, argv, cmd_args); + + // disable unnecessary librbd cache + g_ceph_context->_conf.set_val_or_die("rbd_cache", "false"); + + auto prio = + g_ceph_context->_conf.get_val<int64_t>("rbd_mirror_perf_stats_prio"); + PerfCountersBuilder plb(g_ceph_context, "rbd_mirror", + rbd::mirror::l_rbd_mirror_first, + rbd::mirror::l_rbd_mirror_last); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay, "replay", "Replays", + "r", prio); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay_bytes, "replay_bytes", + "Replayed data", "rb", prio, unit_t(UNIT_BYTES)); + plb.add_time_avg(rbd::mirror::l_rbd_mirror_replay_latency, "replay_latency", + "Replay latency", "rl", prio); + g_perf_counters = plb.create_perf_counters(); + g_ceph_context->get_perfcounters_collection()->add(g_perf_counters); + + mirror = new rbd::mirror::Mirror(g_ceph_context, cmd_args); + int r = mirror->init(); + if (r < 0) { + std::cerr << "failed to initialize: " << cpp_strerror(r) << std::endl; + goto cleanup; + } + + mirror->run(); + + cleanup: + unregister_async_signal_handler(SIGHUP, handle_signal); + unregister_async_signal_handler(SIGINT, handle_signal); + unregister_async_signal_handler(SIGTERM, handle_signal); + shutdown_async_signal_handler(); + + g_ceph_context->get_perfcounters_collection()->remove(g_perf_counters); + + delete mirror; + delete g_perf_counters; + + return r < 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc new file mode 100644 index 00000000..a1d9c1b5 --- /dev/null +++ b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.cc @@ -0,0 +1,89 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h" +#include "common/debug.h" +#include "common/errno.h" +#include "cls/rbd/cls_rbd_client.h" +#include "librbd/Utils.h" +#include <map> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd_mirror +#undef dout_prefix +#define dout_prefix *_dout << "rbd::mirror::pool_watcher::RefreshImagesRequest " \ + << this << " " << __func__ << ": " + +namespace rbd { +namespace mirror { +namespace pool_watcher { + +static const uint32_t MAX_RETURN = 1024; + +using librbd::util::create_rados_callback; + +template <typename I> +void RefreshImagesRequest<I>::send() { + m_image_ids->clear(); + mirror_image_list(); +} + +template <typename I> +void RefreshImagesRequest<I>::mirror_image_list() { + dout(10) << dendl; + + librados::ObjectReadOperation op; + librbd::cls_client::mirror_image_list_start(&op, m_start_after, MAX_RETURN); + + m_out_bl.clear(); + librados::AioCompletion *aio_comp = create_rados_callback< + RefreshImagesRequest<I>, + &RefreshImagesRequest<I>::handle_mirror_image_list>(this); + int r = m_remote_io_ctx.aio_operate(RBD_MIRRORING, aio_comp, &op, &m_out_bl); + ceph_assert(r == 0); + aio_comp->release(); +} + +template <typename I> +void RefreshImagesRequest<I>::handle_mirror_image_list(int r) { + dout(10) << "r=" << r << dendl; + + std::map<std::string, std::string> ids; + if (r == 0) { + auto it = m_out_bl.cbegin(); + r = librbd::cls_client::mirror_image_list_finish(&it, &ids); + } + + if (r < 0 && r != -ENOENT) { + derr << "failed to list mirrored images: " << cpp_strerror(r) << dendl; + finish(r); + return; + } + + // store as global -> local image ids + for (auto &id : ids) { + m_image_ids->emplace(id.second, id.first); + } + + if (ids.size() == MAX_RETURN) { + m_start_after = ids.rbegin()->first; + mirror_image_list(); + return; + } + + finish(0); +} + +template <typename I> +void RefreshImagesRequest<I>::finish(int r) { + dout(10) << "r=" << r << dendl; + + m_on_finish->complete(r); + delete this; +} + +} // namespace pool_watcher +} // namespace mirror +} // namespace rbd + +template class rbd::mirror::pool_watcher::RefreshImagesRequest<librbd::ImageCtx>; diff --git a/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h new file mode 100644 index 00000000..8bfeabe2 --- /dev/null +++ b/src/tools/rbd_mirror/pool_watcher/RefreshImagesRequest.h @@ -0,0 +1,73 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H +#define CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H + +#include "include/buffer.h" +#include "include/rados/librados.hpp" +#include "tools/rbd_mirror/Types.h" +#include <string> + +struct Context; + +namespace librbd { struct ImageCtx; } + +namespace rbd { +namespace mirror { +namespace pool_watcher { + +template <typename ImageCtxT = librbd::ImageCtx> +class RefreshImagesRequest { +public: + static RefreshImagesRequest *create(librados::IoCtx &remote_io_ctx, + ImageIds *image_ids, Context *on_finish) { + return new RefreshImagesRequest(remote_io_ctx, image_ids, on_finish); + } + + RefreshImagesRequest(librados::IoCtx &remote_io_ctx, ImageIds *image_ids, + Context *on_finish) + : m_remote_io_ctx(remote_io_ctx), m_image_ids(image_ids), + m_on_finish(on_finish) { + } + + void send(); + +private: + /** + * @verbatim + * + * <start> + * | + * | /-------------\ + * | | | + * v v | (more images) + * MIRROR_IMAGE_LIST ---/ + * | + * v + * <finish> + * + * @endverbatim + */ + + librados::IoCtx &m_remote_io_ctx; + ImageIds *m_image_ids; + Context *m_on_finish; + + bufferlist m_out_bl; + std::string m_start_after; + + void mirror_image_list(); + void handle_mirror_image_list(int r); + + void finish(int r); + +}; + +} // namespace pool_watcher +} // namespace mirror +} // namespace rbd + +extern template class rbd::mirror::pool_watcher::RefreshImagesRequest<librbd::ImageCtx>; + +#endif // CEPH_RBD_MIRROR_POOL_WATCHER_REFRESH_IMAGES_REQUEST_H diff --git a/src/tools/rbd_mirror/pool_watcher/Types.h b/src/tools/rbd_mirror/pool_watcher/Types.h new file mode 100644 index 00000000..52dfc342 --- /dev/null +++ b/src/tools/rbd_mirror/pool_watcher/Types.h @@ -0,0 +1,27 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H +#define CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H + +#include "tools/rbd_mirror/Types.h" +#include <string> + +namespace rbd { +namespace mirror { +namespace pool_watcher { + +struct Listener { + virtual ~Listener() { + } + + virtual void handle_update(const std::string &mirror_uuid, + ImageIds &&added_image_ids, + ImageIds &&removed_image_ids) = 0; +}; + +} // namespace pool_watcher +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_POOL_WATCHER_TYPES_H diff --git a/src/tools/rbd_mirror/service_daemon/Types.cc b/src/tools/rbd_mirror/service_daemon/Types.cc new file mode 100644 index 00000000..7dc6537c --- /dev/null +++ b/src/tools/rbd_mirror/service_daemon/Types.cc @@ -0,0 +1,29 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "tools/rbd_mirror/service_daemon/Types.h" +#include <iostream> + +namespace rbd { +namespace mirror { +namespace service_daemon { + +std::ostream& operator<<(std::ostream& os, const CalloutLevel& callout_level) { + switch (callout_level) { + case CALLOUT_LEVEL_INFO: + os << "info"; + break; + case CALLOUT_LEVEL_WARNING: + os << "warning"; + break; + case CALLOUT_LEVEL_ERROR: + os << "error"; + break; + } + return os; +} + +} // namespace service_daemon +} // namespace mirror +} // namespace rbd + diff --git a/src/tools/rbd_mirror/service_daemon/Types.h b/src/tools/rbd_mirror/service_daemon/Types.h new file mode 100644 index 00000000..3aab7201 --- /dev/null +++ b/src/tools/rbd_mirror/service_daemon/Types.h @@ -0,0 +1,33 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H +#define CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H + +#include "include/int_types.h" +#include <iosfwd> +#include <string> +#include <boost/variant.hpp> + +namespace rbd { +namespace mirror { +namespace service_daemon { + +typedef uint64_t CalloutId; +const uint64_t CALLOUT_ID_NONE {0}; + +enum CalloutLevel { + CALLOUT_LEVEL_INFO, + CALLOUT_LEVEL_WARNING, + CALLOUT_LEVEL_ERROR +}; + +std::ostream& operator<<(std::ostream& os, const CalloutLevel& callout_level); + +typedef boost::variant<bool, uint64_t, std::string> AttributeValue; + +} // namespace service_daemon +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_SERVICE_DAEMON_TYPES_H diff --git a/src/tools/rbd_nbd/CMakeLists.txt b/src/tools/rbd_nbd/CMakeLists.txt new file mode 100644 index 00000000..5356fae4 --- /dev/null +++ b/src/tools/rbd_nbd/CMakeLists.txt @@ -0,0 +1,4 @@ +add_executable(rbd-nbd rbd-nbd.cc) +target_include_directories(rbd-nbd PUBLIC ${GENL_INCLUDE_DIR}) +target_link_libraries(rbd-nbd librbd librados global ${GENL_LIBRARIES}) +install(TARGETS rbd-nbd DESTINATION bin) diff --git a/src/tools/rbd_nbd/nbd-netlink.h b/src/tools/rbd_nbd/nbd-netlink.h new file mode 100644 index 00000000..f932f96a --- /dev/null +++ b/src/tools/rbd_nbd/nbd-netlink.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2017 Facebook. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#ifndef _UAPILINUX_NBD_NETLINK_H +#define _UAPILINUX_NBD_NETLINK_H + +#define NBD_GENL_FAMILY_NAME "nbd" +#define NBD_GENL_VERSION 0x1 + +/* Configuration policy attributes, used for CONNECT */ +enum { + NBD_ATTR_UNSPEC, + NBD_ATTR_INDEX, + NBD_ATTR_SIZE_BYTES, + NBD_ATTR_BLOCK_SIZE_BYTES, + NBD_ATTR_TIMEOUT, + NBD_ATTR_SERVER_FLAGS, + NBD_ATTR_CLIENT_FLAGS, + NBD_ATTR_SOCKETS, + __NBD_ATTR_MAX, +}; +#define NBD_ATTR_MAX (__NBD_ATTR_MAX - 1) + +/* + * This is the format for multiple sockets with NBD_ATTR_SOCKETS + * + * [NBD_ATTR_SOCKETS] + * [NBD_SOCK_ITEM] + * [NBD_SOCK_FD] + * [NBD_SOCK_ITEM] + * [NBD_SOCK_FD] + */ +enum { + NBD_SOCK_ITEM_UNSPEC, + NBD_SOCK_ITEM, + __NBD_SOCK_ITEM_MAX, +}; +#define NBD_SOCK_ITEM_MAX (__NBD_SOCK_ITEM_MAX - 1) + +enum { + NBD_SOCK_UNSPEC, + NBD_SOCK_FD, + __NBD_SOCK_MAX, +}; +#define NBD_SOCK_MAX (__NBD_SOCK_MAX - 1) + +enum { + NBD_CMD_UNSPEC, + NBD_CMD_CONNECT, + NBD_CMD_DISCONNECT, + NBD_CMD_RECONFIGURE, + __NBD_CMD_MAX, +}; +#define NBD_CMD_MAX (__NBD_CMD_MAX - 1) + +#endif /* _UAPILINUX_NBD_NETLINK_H */ diff --git a/src/tools/rbd_nbd/rbd-nbd.cc b/src/tools/rbd_nbd/rbd-nbd.cc new file mode 100644 index 00000000..42dc92ad --- /dev/null +++ b/src/tools/rbd_nbd/rbd-nbd.cc @@ -0,0 +1,1615 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +/* + * rbd-nbd - RBD in userspace + * + * Copyright (C) 2015 - 2016 Kylin Corporation + * + * Author: Yunchuan Wen <yunchuan.wen@kylin-cloud.com> + * Li Wang <li.wang@kylin-cloud.com> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * +*/ + +#include "include/int_types.h" + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> + +#include <linux/nbd.h> +#include <linux/fs.h> +#include <sys/ioctl.h> +#include <sys/socket.h> + +#include "nbd-netlink.h" +#include <libnl3/netlink/genl/genl.h> +#include <libnl3/netlink/genl/ctrl.h> +#include <libnl3/netlink/genl/mngt.h> + +#include <fstream> +#include <iostream> +#include <memory> +#include <regex> +#include <boost/algorithm/string/predicate.hpp> + +#include "common/Formatter.h" +#include "common/Preforker.h" +#include "common/TextTable.h" +#include "common/ceph_argparse.h" +#include "common/config.h" +#include "common/dout.h" +#include "common/errno.h" +#include "common/module.h" +#include "common/safe_io.h" +#include "common/version.h" + +#include "global/global_init.h" +#include "global/signal_handler.h" + +#include "include/rados/librados.hpp" +#include "include/rbd/librbd.hpp" +#include "include/stringify.h" +#include "include/xlist.h" + +#include "mon/MonClient.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "rbd-nbd: " + +struct Config { + int nbds_max = 0; + int max_part = 255; + int timeout = -1; + + bool exclusive = false; + bool readonly = false; + bool set_max_part = false; + bool try_netlink = false; + + std::string poolname; + std::string nsname; + std::string imgname; + std::string snapname; + std::string devpath; + + std::string format; + bool pretty_format = false; +}; + +static void usage() +{ + std::cout << "Usage: rbd-nbd [options] map <image-or-snap-spec> Map an image to nbd device\n" + << " unmap <device|image-or-snap-spec> Unmap nbd device\n" + << " [options] list-mapped List mapped nbd devices\n" + << "Map options:\n" + << " --device <device path> Specify nbd device path (/dev/nbd{num})\n" + << " --read-only Map read-only\n" + << " --nbds_max <limit> Override for module param nbds_max\n" + << " --max_part <limit> Override for module param max_part\n" + << " --exclusive Forbid writes by other clients\n" + << " --timeout <seconds> Set nbd request timeout\n" + << " --try-netlink Use the nbd netlink interface\n" + << "\n" + << "List options:\n" + << " --format plain|json|xml Output format (default: plain)\n" + << " --pretty-format Pretty formatting (json and xml)\n" + << std::endl; + generic_server_usage(); +} + +static int nbd = -1; +static int nbd_index = -1; + +enum Command { + None, + Connect, + Disconnect, + List +}; + +static Command cmd = None; + +#define RBD_NBD_BLKSIZE 512UL + +#define HELP_INFO 1 +#define VERSION_INFO 2 + +#ifdef CEPH_BIG_ENDIAN +#define ntohll(a) (a) +#elif defined(CEPH_LITTLE_ENDIAN) +#define ntohll(a) swab(a) +#else +#error "Could not determine endianess" +#endif +#define htonll(a) ntohll(a) + +static int parse_args(vector<const char*>& args, std::ostream *err_msg, + Command *command, Config *cfg); +static int netlink_resize(int nbd_index, uint64_t size); + +class NBDServer +{ +private: + int fd; + librbd::Image ℑ + +public: + NBDServer(int _fd, librbd::Image& _image) + : fd(_fd) + , image(_image) + , disconnect_lock("NBDServer::DisconnectLocker") + , lock("NBDServer::Locker") + , reader_thread(*this, &NBDServer::reader_entry) + , writer_thread(*this, &NBDServer::writer_entry) + , started(false) + {} + +private: + Mutex disconnect_lock; + Cond disconnect_cond; + std::atomic<bool> terminated = { false }; + + void shutdown() + { + bool expected = false; + if (terminated.compare_exchange_strong(expected, true)) { + ::shutdown(fd, SHUT_RDWR); + + Mutex::Locker l(lock); + cond.Signal(); + } + } + + struct IOContext + { + xlist<IOContext*>::item item; + NBDServer *server = nullptr; + struct nbd_request request; + struct nbd_reply reply; + bufferlist data; + int command = 0; + + IOContext() + : item(this) + {} + }; + + friend std::ostream &operator<<(std::ostream &os, const IOContext &ctx); + + Mutex lock; + Cond cond; + xlist<IOContext*> io_pending; + xlist<IOContext*> io_finished; + + void io_start(IOContext *ctx) + { + Mutex::Locker l(lock); + io_pending.push_back(&ctx->item); + } + + void io_finish(IOContext *ctx) + { + Mutex::Locker l(lock); + ceph_assert(ctx->item.is_on_list()); + ctx->item.remove_myself(); + io_finished.push_back(&ctx->item); + cond.Signal(); + } + + IOContext *wait_io_finish() + { + Mutex::Locker l(lock); + while(io_finished.empty() && !terminated) + cond.Wait(lock); + + if (io_finished.empty()) + return NULL; + + IOContext *ret = io_finished.front(); + io_finished.pop_front(); + + return ret; + } + + void wait_clean() + { + ceph_assert(!reader_thread.is_started()); + Mutex::Locker l(lock); + while(!io_pending.empty()) + cond.Wait(lock); + + while(!io_finished.empty()) { + std::unique_ptr<IOContext> free_ctx(io_finished.front()); + io_finished.pop_front(); + } + } + + static void aio_callback(librbd::completion_t cb, void *arg) + { + librbd::RBD::AioCompletion *aio_completion = + reinterpret_cast<librbd::RBD::AioCompletion*>(cb); + + IOContext *ctx = reinterpret_cast<IOContext *>(arg); + int ret = aio_completion->get_return_value(); + + dout(20) << __func__ << ": " << *ctx << dendl; + + if (ret == -EINVAL) { + // if shrinking an image, a pagecache writeback might reference + // extents outside of the range of the new image extents + dout(0) << __func__ << ": masking IO out-of-bounds error" << dendl; + ctx->data.clear(); + ret = 0; + } + + if (ret < 0) { + ctx->reply.error = htonl(-ret); + } else if ((ctx->command == NBD_CMD_READ) && + ret < static_cast<int>(ctx->request.len)) { + int pad_byte_count = static_cast<int> (ctx->request.len) - ret; + ctx->data.append_zero(pad_byte_count); + dout(20) << __func__ << ": " << *ctx << ": Pad byte count: " + << pad_byte_count << dendl; + ctx->reply.error = htonl(0); + } else { + ctx->reply.error = htonl(0); + } + ctx->server->io_finish(ctx); + + aio_completion->release(); + } + + void reader_entry() + { + while (!terminated) { + std::unique_ptr<IOContext> ctx(new IOContext()); + ctx->server = this; + + dout(20) << __func__ << ": waiting for nbd request" << dendl; + + int r = safe_read_exact(fd, &ctx->request, sizeof(struct nbd_request)); + if (r < 0) { + derr << "failed to read nbd request header: " << cpp_strerror(r) + << dendl; + goto signal; + } + + if (ctx->request.magic != htonl(NBD_REQUEST_MAGIC)) { + derr << "invalid nbd request header" << dendl; + goto signal; + } + + ctx->request.from = ntohll(ctx->request.from); + ctx->request.type = ntohl(ctx->request.type); + ctx->request.len = ntohl(ctx->request.len); + + ctx->reply.magic = htonl(NBD_REPLY_MAGIC); + memcpy(ctx->reply.handle, ctx->request.handle, sizeof(ctx->reply.handle)); + + ctx->command = ctx->request.type & 0x0000ffff; + + dout(20) << *ctx << ": start" << dendl; + + switch (ctx->command) + { + case NBD_CMD_DISC: + // NBD_DO_IT will return when pipe is closed + dout(0) << "disconnect request received" << dendl; + goto signal; + case NBD_CMD_WRITE: + bufferptr ptr(ctx->request.len); + r = safe_read_exact(fd, ptr.c_str(), ctx->request.len); + if (r < 0) { + derr << *ctx << ": failed to read nbd request data: " + << cpp_strerror(r) << dendl; + goto signal; + } + ctx->data.push_back(ptr); + break; + } + + IOContext *pctx = ctx.release(); + io_start(pctx); + librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(pctx, aio_callback); + switch (pctx->command) + { + case NBD_CMD_WRITE: + image.aio_write(pctx->request.from, pctx->request.len, pctx->data, c); + break; + case NBD_CMD_READ: + image.aio_read(pctx->request.from, pctx->request.len, pctx->data, c); + break; + case NBD_CMD_FLUSH: + image.aio_flush(c); + break; + case NBD_CMD_TRIM: + image.aio_discard(pctx->request.from, pctx->request.len, c); + break; + default: + derr << *pctx << ": invalid request command" << dendl; + c->release(); + goto signal; + } + } + dout(20) << __func__ << ": terminated" << dendl; + +signal: + Mutex::Locker l(disconnect_lock); + disconnect_cond.Signal(); + } + + void writer_entry() + { + while (!terminated) { + dout(20) << __func__ << ": waiting for io request" << dendl; + std::unique_ptr<IOContext> ctx(wait_io_finish()); + if (!ctx) { + dout(20) << __func__ << ": no io requests, terminating" << dendl; + return; + } + + dout(20) << __func__ << ": got: " << *ctx << dendl; + + int r = safe_write(fd, &ctx->reply, sizeof(struct nbd_reply)); + if (r < 0) { + derr << *ctx << ": failed to write reply header: " << cpp_strerror(r) + << dendl; + return; + } + if (ctx->command == NBD_CMD_READ && ctx->reply.error == htonl(0)) { + r = ctx->data.write_fd(fd); + if (r < 0) { + derr << *ctx << ": failed to write replay data: " << cpp_strerror(r) + << dendl; + return; + } + } + dout(20) << *ctx << ": finish" << dendl; + } + dout(20) << __func__ << ": terminated" << dendl; + } + + class ThreadHelper : public Thread + { + public: + typedef void (NBDServer::*entry_func)(); + private: + NBDServer &server; + entry_func func; + public: + ThreadHelper(NBDServer &_server, entry_func _func) + :server(_server) + ,func(_func) + {} + protected: + void* entry() override + { + (server.*func)(); + server.shutdown(); + return NULL; + } + } reader_thread, writer_thread; + + bool started; +public: + void start() + { + if (!started) { + dout(10) << __func__ << ": starting" << dendl; + + started = true; + + reader_thread.create("rbd_reader"); + writer_thread.create("rbd_writer"); + } + } + + void wait_for_disconnect() + { + if (!started) + return; + + Mutex::Locker l(disconnect_lock); + disconnect_cond.Wait(disconnect_lock); + } + + ~NBDServer() + { + if (started) { + dout(10) << __func__ << ": terminating" << dendl; + + shutdown(); + + reader_thread.join(); + writer_thread.join(); + + wait_clean(); + + started = false; + } + } +}; + +std::ostream &operator<<(std::ostream &os, const NBDServer::IOContext &ctx) { + + os << "[" << std::hex << ntohll(*((uint64_t *)ctx.request.handle)); + + switch (ctx.command) + { + case NBD_CMD_WRITE: + os << " WRITE "; + break; + case NBD_CMD_READ: + os << " READ "; + break; + case NBD_CMD_FLUSH: + os << " FLUSH "; + break; + case NBD_CMD_TRIM: + os << " TRIM "; + break; + default: + os << " UNKNOWN(" << ctx.command << ") "; + break; + } + + os << ctx.request.from << "~" << ctx.request.len << " " + << std::dec << ntohl(ctx.reply.error) << "]"; + + return os; +} + +class NBDWatchCtx : public librbd::UpdateWatchCtx +{ +private: + int fd; + int nbd_index; + bool use_netlink; + librados::IoCtx &io_ctx; + librbd::Image ℑ + unsigned long size; +public: + NBDWatchCtx(int _fd, + int _nbd_index, + bool _use_netlink, + librados::IoCtx &_io_ctx, + librbd::Image &_image, + unsigned long _size) + : fd(_fd) + , nbd_index(_nbd_index) + , use_netlink(_use_netlink) + , io_ctx(_io_ctx) + , image(_image) + , size(_size) + { } + + ~NBDWatchCtx() override {} + + void handle_notify() override + { + librbd::image_info_t info; + if (image.stat(info, sizeof(info)) == 0) { + unsigned long new_size = info.size; + int ret; + + if (new_size != size) { + dout(5) << "resize detected" << dendl; + if (ioctl(fd, BLKFLSBUF, NULL) < 0) + derr << "invalidate page cache failed: " << cpp_strerror(errno) + << dendl; + if (use_netlink) { + ret = netlink_resize(nbd_index, new_size); + } else { + ret = ioctl(fd, NBD_SET_SIZE, new_size); + if (ret < 0) + derr << "resize failed: " << cpp_strerror(errno) << dendl; + } + + if (!ret) + size = new_size; + + if (ioctl(fd, BLKRRPART, NULL) < 0) { + derr << "rescan of partition table failed: " << cpp_strerror(errno) + << dendl; + } + if (image.invalidate_cache() < 0) + derr << "invalidate rbd cache failed" << dendl; + } + } + } +}; + +class NBDListIterator { +public: + bool get(int *pid, Config *cfg) { + while (true) { + std::string nbd_path = "/sys/block/nbd" + stringify(m_index); + if(access(nbd_path.c_str(), F_OK) != 0) { + return false; + } + + *cfg = Config(); + cfg->devpath = "/dev/nbd" + stringify(m_index++); + + std::ifstream ifs; + ifs.open(nbd_path + "/pid", std::ifstream::in); + if (!ifs.is_open()) { + continue; + } + ifs >> *pid; + + int r = get_mapped_info(*pid, cfg); + if (r < 0) { + continue; + } + + return true; + } + } + +private: + int m_index = 0; + + int get_mapped_info(int pid, Config *cfg) { + int r; + std::string path = "/proc/" + stringify(pid) + "/cmdline"; + std::ifstream ifs; + std::string cmdline; + std::vector<const char*> args; + + ifs.open(path.c_str(), std::ifstream::in); + if (!ifs.is_open()) + return -1; + ifs >> cmdline; + + for (unsigned i = 0; i < cmdline.size(); i++) { + const char *arg = &cmdline[i]; + if (i == 0) { + if (strcmp(basename(arg) , "rbd-nbd") != 0) { + return -EINVAL; + } + } else { + args.push_back(arg); + } + + while (cmdline[i] != '\0') { + i++; + } + } + + std::ostringstream err_msg; + Command command; + r = parse_args(args, &err_msg, &command, cfg); + if (r < 0) { + return r; + } + + if (command != Connect) { + return -ENOENT; + } + + return 0; + } +}; + +static int load_module(Config *cfg) +{ + ostringstream param; + int ret; + + if (cfg->nbds_max) + param << "nbds_max=" << cfg->nbds_max; + + if (cfg->max_part) + param << " max_part=" << cfg->max_part; + + if (!access("/sys/module/nbd", F_OK)) { + if (cfg->nbds_max || cfg->set_max_part) + cerr << "rbd-nbd: ignoring kernel module parameter options: nbd module already loaded" + << std::endl; + return 0; + } + + ret = module_load("nbd", param.str().c_str()); + if (ret < 0) + cerr << "rbd-nbd: failed to load nbd kernel module: " << cpp_strerror(-ret) + << std::endl; + + return ret; +} + +static int check_device_size(int nbd_index, unsigned long expected_size) +{ + // There are bugs with some older kernel versions that result in an + // overflow for large image sizes. This check is to ensure we are + // not affected. + + unsigned long size = 0; + std::string path = "/sys/block/nbd" + stringify(nbd_index) + "/size"; + std::ifstream ifs; + ifs.open(path.c_str(), std::ifstream::in); + if (!ifs.is_open()) { + cerr << "rbd-nbd: failed to open " << path << std::endl; + return -EINVAL; + } + ifs >> size; + size *= RBD_NBD_BLKSIZE; + + if (size == 0) { + // Newer kernel versions will report real size only after nbd + // connect. Assume this is the case and return success. + return 0; + } + + if (size != expected_size) { + cerr << "rbd-nbd: kernel reported invalid device size (" << size + << ", expected " << expected_size << ")" << std::endl; + return -EINVAL; + } + + return 0; +} + +static int parse_nbd_index(const std::string& devpath) +{ + int index, ret; + + ret = sscanf(devpath.c_str(), "/dev/nbd%d", &index); + if (ret <= 0) { + // mean an early matching failure. But some cases need a negative value. + if (ret == 0) + ret = -EINVAL; + cerr << "rbd-nbd: invalid device path: " << devpath + << " (expected /dev/nbd{num})" << std::endl; + return ret; + } + + return index; +} + +static int try_ioctl_setup(Config *cfg, int fd, uint64_t size, uint64_t flags) +{ + int index = 0, r; + + if (cfg->devpath.empty()) { + char dev[64]; + const char *path = "/sys/module/nbd/parameters/nbds_max"; + int nbds_max = -1; + if (access(path, F_OK) == 0) { + std::ifstream ifs; + ifs.open(path, std::ifstream::in); + if (ifs.is_open()) { + ifs >> nbds_max; + ifs.close(); + } + } + + while (true) { + snprintf(dev, sizeof(dev), "/dev/nbd%d", index); + + nbd = open(dev, O_RDWR); + if (nbd < 0) { + if (nbd == -EPERM && nbds_max != -1 && index < (nbds_max-1)) { + ++index; + continue; + } + r = nbd; + cerr << "rbd-nbd: failed to find unused device" << std::endl; + goto done; + } + + r = ioctl(nbd, NBD_SET_SOCK, fd); + if (r < 0) { + close(nbd); + ++index; + continue; + } + + cfg->devpath = dev; + break; + } + } else { + r = parse_nbd_index(cfg->devpath); + if (r < 0) + goto done; + index = r; + + nbd = open(cfg->devpath.c_str(), O_RDWR); + if (nbd < 0) { + r = nbd; + cerr << "rbd-nbd: failed to open device: " << cfg->devpath << std::endl; + goto done; + } + + r = ioctl(nbd, NBD_SET_SOCK, fd); + if (r < 0) { + r = -errno; + cerr << "rbd-nbd: the device " << cfg->devpath << " is busy" << std::endl; + close(nbd); + goto done; + } + } + + r = ioctl(nbd, NBD_SET_BLKSIZE, RBD_NBD_BLKSIZE); + if (r < 0) { + r = -errno; + goto close_nbd; + } + + r = ioctl(nbd, NBD_SET_SIZE, size); + if (r < 0) { + r = -errno; + goto close_nbd; + } + + ioctl(nbd, NBD_SET_FLAGS, flags); + + if (cfg->timeout >= 0) { + r = ioctl(nbd, NBD_SET_TIMEOUT, (unsigned long)cfg->timeout); + if (r < 0) { + r = -errno; + cerr << "rbd-nbd: failed to set timeout: " << cpp_strerror(r) + << std::endl; + goto close_nbd; + } + } + + dout(10) << "ioctl setup complete for " << cfg->devpath << dendl; + nbd_index = index; + return 0; + +close_nbd: + if (r < 0) { + ioctl(nbd, NBD_CLEAR_SOCK); + cerr << "rbd-nbd: failed to map, status: " << cpp_strerror(-r) << std::endl; + } + close(nbd); +done: + return r; +} + +static void netlink_cleanup(struct nl_sock *sock) +{ + if (!sock) + return; + + nl_close(sock); + nl_socket_free(sock); +} + +static struct nl_sock *netlink_init(int *id) +{ + struct nl_sock *sock; + int ret; + + sock = nl_socket_alloc(); + if (!sock) { + cerr << "rbd-nbd: Could not allocate netlink socket." << std::endl; + return NULL; + } + + ret = genl_connect(sock); + if (ret < 0) { + cerr << "rbd-nbd: Could not connect netlink socket. Error " << ret + << std::endl; + goto free_sock; + } + + *id = genl_ctrl_resolve(sock, "nbd"); + if (*id < 0) + // nbd netlink interface not supported. + goto close_sock; + + return sock; + +close_sock: + nl_close(sock); +free_sock: + nl_socket_free(sock); + return NULL; +} + +static int netlink_disconnect(int index) +{ + struct nl_sock *sock; + struct nl_msg *msg; + int ret, nl_id; + + sock = netlink_init(&nl_id); + if (!sock) + // Try ioctl + return 1; + + nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, genl_handle_msg, NULL); + + msg = nlmsg_alloc(); + if (!msg) { + cerr << "rbd-nbd: Could not allocate netlink message." << std::endl; + goto free_sock; + } + + if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, nl_id, 0, 0, + NBD_CMD_DISCONNECT, 0)) { + cerr << "rbd-nbd: Could not setup message." << std::endl; + goto nla_put_failure; + } + + NLA_PUT_U32(msg, NBD_ATTR_INDEX, index); + + ret = nl_send_sync(sock, msg); + netlink_cleanup(sock); + if (ret < 0) { + cerr << "rbd-nbd: netlink disconnect failed: " << nl_geterror(-ret) + << std::endl; + return -EIO; + } + + return 0; + +nla_put_failure: + nlmsg_free(msg); +free_sock: + netlink_cleanup(sock); + return -EIO; +} + +static int netlink_disconnect_by_path(const std::string& devpath) +{ + int index; + + index = parse_nbd_index(devpath); + if (index < 0) + return index; + + return netlink_disconnect(index); +} + +static int netlink_resize(int nbd_index, uint64_t size) +{ + struct nl_sock *sock; + struct nl_msg *msg; + int nl_id, ret; + + sock = netlink_init(&nl_id); + if (!sock) { + cerr << "rbd-nbd: Netlink interface not supported." << std::endl; + return 1; + } + + nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, genl_handle_msg, NULL); + + msg = nlmsg_alloc(); + if (!msg) { + cerr << "rbd-nbd: Could not allocate netlink message." << std::endl; + goto free_sock; + } + + if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, nl_id, 0, 0, + NBD_CMD_RECONFIGURE, 0)) { + cerr << "rbd-nbd: Could not setup message." << std::endl; + goto free_msg; + } + + NLA_PUT_U32(msg, NBD_ATTR_INDEX, nbd_index); + NLA_PUT_U64(msg, NBD_ATTR_SIZE_BYTES, size); + + ret = nl_send_sync(sock, msg); + if (ret < 0) { + cerr << "rbd-nbd: netlink resize failed: " << nl_geterror(ret) << std::endl; + goto free_sock; + } + + netlink_cleanup(sock); + dout(10) << "netlink resize complete for nbd" << nbd_index << dendl; + return 0; + +nla_put_failure: +free_msg: + nlmsg_free(msg); +free_sock: + netlink_cleanup(sock); + return -EIO; +} + +static int netlink_connect_cb(struct nl_msg *msg, void *arg) +{ + struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlmsg_hdr(msg)); + Config *cfg = (Config *)arg; + struct nlattr *msg_attr[NBD_ATTR_MAX + 1]; + uint32_t index; + int ret; + + ret = nla_parse(msg_attr, NBD_ATTR_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + if (ret) { + cerr << "rbd-nbd: Unsupported netlink reply" << std::endl; + return -NLE_MSGTYPE_NOSUPPORT; + } + + if (!msg_attr[NBD_ATTR_INDEX]) { + cerr << "rbd-nbd: netlink connect reply missing device index." << std::endl; + return -NLE_MSGTYPE_NOSUPPORT; + } + + index = nla_get_u32(msg_attr[NBD_ATTR_INDEX]); + cfg->devpath = "/dev/nbd" + stringify(index); + nbd_index = index; + + return NL_OK; +} + +static int netlink_connect(Config *cfg, struct nl_sock *sock, int nl_id, int fd, + uint64_t size, uint64_t flags) +{ + struct nlattr *sock_attr; + struct nlattr *sock_opt; + struct nl_msg *msg; + int ret; + + nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, netlink_connect_cb, cfg); + + msg = nlmsg_alloc(); + if (!msg) { + cerr << "rbd-nbd: Could not allocate netlink message." << std::endl; + return -ENOMEM; + } + + if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, nl_id, 0, 0, NBD_CMD_CONNECT, + 0)) { + cerr << "rbd-nbd: Could not setup message." << std::endl; + goto free_msg; + } + + if (!cfg->devpath.empty()) { + ret = parse_nbd_index(cfg->devpath); + if (ret < 0) + goto free_msg; + + NLA_PUT_U32(msg, NBD_ATTR_INDEX, ret); + } + + if (cfg->timeout >= 0) + NLA_PUT_U64(msg, NBD_ATTR_TIMEOUT, cfg->timeout); + + NLA_PUT_U64(msg, NBD_ATTR_SIZE_BYTES, size); + NLA_PUT_U64(msg, NBD_ATTR_BLOCK_SIZE_BYTES, RBD_NBD_BLKSIZE); + NLA_PUT_U64(msg, NBD_ATTR_SERVER_FLAGS, flags); + + sock_attr = nla_nest_start(msg, NBD_ATTR_SOCKETS); + if (!sock_attr) { + cerr << "rbd-nbd: Could not init sockets in netlink message." << std::endl; + goto free_msg; + } + + sock_opt = nla_nest_start(msg, NBD_SOCK_ITEM); + if (!sock_opt) { + cerr << "rbd-nbd: Could not init sock in netlink message." << std::endl; + goto free_msg; + } + + NLA_PUT_U32(msg, NBD_SOCK_FD, fd); + nla_nest_end(msg, sock_opt); + nla_nest_end(msg, sock_attr); + + ret = nl_send_sync(sock, msg); + if (ret < 0) { + cerr << "rbd-nbd: netlink connect failed: " << nl_geterror(ret) + << std::endl; + return -EIO; + } + + dout(10) << "netlink connect complete for " << cfg->devpath << dendl; + return 0; + +nla_put_failure: +free_msg: + nlmsg_free(msg); + return -EIO; +} + +static int try_netlink_setup(Config *cfg, int fd, uint64_t size, uint64_t flags) +{ + struct nl_sock *sock; + int nl_id, ret; + + sock = netlink_init(&nl_id); + if (!sock) { + cerr << "rbd-nbd: Netlink interface not supported. Using ioctl interface." + << std::endl; + return 1; + } + + dout(10) << "netlink interface supported." << dendl; + + ret = netlink_connect(cfg, sock, nl_id, fd, size, flags); + netlink_cleanup(sock); + + if (ret != 0) + return ret; + + nbd = open(cfg->devpath.c_str(), O_RDWR); + if (nbd < 0) { + cerr << "rbd-nbd: failed to open device: " << cfg->devpath << std::endl; + return nbd; + } + + return 0; +} + +static void handle_signal(int signum) +{ + int ret; + + ceph_assert(signum == SIGINT || signum == SIGTERM); + derr << "*** Got signal " << sig_str(signum) << " ***" << dendl; + + if (nbd < 0 || nbd_index < 0) { + dout(20) << __func__ << ": " << "disconnect not needed." << dendl; + return; + } + + dout(20) << __func__ << ": " << "sending NBD_DISCONNECT" << dendl; + ret = netlink_disconnect(nbd_index); + if (ret == 1) + ret = ioctl(nbd, NBD_DISCONNECT); + + if (ret != 0) { + derr << "rbd-nbd: disconnect failed. Error: " << ret << dendl; + } else { + dout(20) << __func__ << ": " << "disconnected" << dendl; + } +} + +static NBDServer *start_server(int fd, librbd::Image& image) +{ + NBDServer *server; + + server = new NBDServer(fd, image); + server->start(); + + init_async_signal_handler(); + register_async_signal_handler(SIGHUP, sighup_handler); + register_async_signal_handler_oneshot(SIGINT, handle_signal); + register_async_signal_handler_oneshot(SIGTERM, handle_signal); + + return server; +} + +static void run_server(Preforker& forker, NBDServer *server, bool netlink_used) +{ + if (g_conf()->daemonize) { + global_init_postfork_finish(g_ceph_context); + forker.daemonize(); + } + + if (netlink_used) + server->wait_for_disconnect(); + else + ioctl(nbd, NBD_DO_IT); + + unregister_async_signal_handler(SIGHUP, sighup_handler); + unregister_async_signal_handler(SIGINT, handle_signal); + unregister_async_signal_handler(SIGTERM, handle_signal); + shutdown_async_signal_handler(); +} + +static int do_map(int argc, const char *argv[], Config *cfg) +{ + int r; + + librados::Rados rados; + librbd::RBD rbd; + librados::IoCtx io_ctx; + librbd::Image image; + + int read_only = 0; + unsigned long flags; + unsigned long size; + bool use_netlink; + + int fd[2]; + + librbd::image_info_t info; + + Preforker forker; + NBDServer *server; + + vector<const char*> args; + argv_to_vec(argc, argv, args); + if (args.empty()) { + cerr << argv[0] << ": -h or --help for usage" << std::endl; + exit(1); + } + if (ceph_argparse_need_usage(args)) { + usage(); + exit(0); + } + + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + g_ceph_context->_conf.set_val_or_die("pid_file", ""); + + if (global_init_prefork(g_ceph_context) >= 0) { + std::string err; + r = forker.prefork(err); + if (r < 0) { + cerr << err << std::endl; + return r; + } + if (forker.is_parent()) { + if (forker.parent_wait(err) != 0) { + return -ENXIO; + } + return 0; + } + global_init_postfork_start(g_ceph_context); + } + + common_init_finish(g_ceph_context); + global_init_chdir(g_ceph_context); + + if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) == -1) { + r = -errno; + goto close_ret; + } + + r = rados.init_with_context(g_ceph_context); + if (r < 0) + goto close_fd; + + r = rados.connect(); + if (r < 0) + goto close_fd; + + r = rados.ioctx_create(cfg->poolname.c_str(), io_ctx); + if (r < 0) + goto close_fd; + + io_ctx.set_namespace(cfg->nsname); + + r = rbd.open(io_ctx, image, cfg->imgname.c_str()); + if (r < 0) + goto close_fd; + + if (cfg->exclusive) { + r = image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE); + if (r < 0) { + cerr << "rbd-nbd: failed to acquire exclusive lock: " << cpp_strerror(r) + << std::endl; + goto close_fd; + } + } + + if (!cfg->snapname.empty()) { + r = image.snap_set(cfg->snapname.c_str()); + if (r < 0) + goto close_fd; + } + + r = image.stat(info, sizeof(info)); + if (r < 0) + goto close_fd; + + flags = NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_TRIM | NBD_FLAG_HAS_FLAGS; + if (!cfg->snapname.empty() || cfg->readonly) { + flags |= NBD_FLAG_READ_ONLY; + read_only = 1; + } + + if (info.size > ULONG_MAX) { + r = -EFBIG; + cerr << "rbd-nbd: image is too large (" << byte_u_t(info.size) + << ", max is " << byte_u_t(ULONG_MAX) << ")" << std::endl; + goto close_fd; + } + + size = info.size; + + r = load_module(cfg); + if (r < 0) + goto close_fd; + + server = start_server(fd[1], image); + + use_netlink = cfg->try_netlink; + if (use_netlink) { + r = try_netlink_setup(cfg, fd[0], size, flags); + if (r < 0) { + goto free_server; + } else if (r == 1) { + use_netlink = false; + } + } + + if (!use_netlink) { + r = try_ioctl_setup(cfg, fd[0], size, flags); + if (r < 0) + goto free_server; + } + + r = check_device_size(nbd_index, size); + if (r < 0) + goto close_nbd; + + r = ioctl(nbd, BLKROSET, (unsigned long) &read_only); + if (r < 0) { + r = -errno; + goto close_nbd; + } + + { + uint64_t handle; + + NBDWatchCtx watch_ctx(nbd, nbd_index, use_netlink, io_ctx, image, + info.size); + r = image.update_watch(&watch_ctx, &handle); + if (r < 0) + goto close_nbd; + + cout << cfg->devpath << std::endl; + + run_server(forker, server, use_netlink); + + r = image.update_unwatch(handle); + ceph_assert(r == 0); + } + +close_nbd: + if (r < 0) { + if (use_netlink) { + netlink_disconnect(nbd_index); + } else { + ioctl(nbd, NBD_CLEAR_SOCK); + cerr << "rbd-nbd: failed to map, status: " << cpp_strerror(-r) + << std::endl; + } + } + close(nbd); +free_server: + delete server; +close_fd: + close(fd[0]); + close(fd[1]); +close_ret: + image.close(); + io_ctx.close(); + rados.shutdown(); + + forker.exit(r < 0 ? EXIT_FAILURE : 0); + // Unreachable; + return r; +} + +static int do_unmap(Config *cfg) +{ + int r, nbd; + + /* + * The netlink disconnect call supports devices setup with netlink or ioctl, + * so we always try that first. + */ + r = netlink_disconnect_by_path(cfg->devpath); + if (r != 1) + return r; + + nbd = open(cfg->devpath.c_str(), O_RDWR); + if (nbd < 0) { + cerr << "rbd-nbd: failed to open device: " << cfg->devpath << std::endl; + return nbd; + } + + r = ioctl(nbd, NBD_DISCONNECT); + if (r < 0) { + cerr << "rbd-nbd: the device is not used" << std::endl; + } + + close(nbd); + return r; +} + +static int parse_imgpath(const std::string &imgpath, Config *cfg, + std::ostream *err_msg) { + std::regex pattern("^(?:([^/]+)/(?:([^/@]+)/)?)?([^@]+)(?:@([^/@]+))?$"); + std::smatch match; + if (!std::regex_match(imgpath, match, pattern)) { + std::cerr << "rbd-nbd: invalid spec '" << imgpath << "'" << std::endl; + return -EINVAL; + } + + if (match[1].matched) { + cfg->poolname = match[1]; + } + + if (match[2].matched) { + cfg->nsname = match[2]; + } + + cfg->imgname = match[3]; + + if (match[4].matched) + cfg->snapname = match[4]; + + return 0; +} + +static int do_list_mapped_devices(const std::string &format, bool pretty_format) +{ + bool should_print = false; + std::unique_ptr<ceph::Formatter> f; + TextTable tbl; + + if (format == "json") { + f.reset(new JSONFormatter(pretty_format)); + } else if (format == "xml") { + f.reset(new XMLFormatter(pretty_format)); + } else if (!format.empty() && format != "plain") { + std::cerr << "rbd-nbd: invalid output format: " << format << std::endl; + return -EINVAL; + } + + if (f) { + f->open_array_section("devices"); + } else { + tbl.define_column("id", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("pool", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("namespace", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("image", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("snap", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("device", TextTable::LEFT, TextTable::LEFT); + } + + int pid; + Config cfg; + NBDListIterator it; + while (it.get(&pid, &cfg)) { + if (f) { + f->open_object_section("device"); + f->dump_int("id", pid); + f->dump_string("pool", cfg.poolname); + f->dump_string("namespace", cfg.nsname); + f->dump_string("image", cfg.imgname); + f->dump_string("snap", cfg.snapname); + f->dump_string("device", cfg.devpath); + f->close_section(); + } else { + should_print = true; + if (cfg.snapname.empty()) { + cfg.snapname = "-"; + } + tbl << pid << cfg.poolname << cfg.nsname << cfg.imgname << cfg.snapname + << cfg.devpath << TextTable::endrow; + } + } + + if (f) { + f->close_section(); // devices + f->flush(std::cout); + } + if (should_print) { + std::cout << tbl; + } + return 0; +} + +static bool find_mapped_dev_by_spec(Config *cfg) { + int pid; + Config c; + NBDListIterator it; + while (it.get(&pid, &c)) { + if (c.poolname == cfg->poolname && c.nsname == cfg->nsname && + c.imgname == cfg->imgname && c.snapname == cfg->snapname) { + *cfg = c; + return true; + } + } + return false; +} + + +static int parse_args(vector<const char*>& args, std::ostream *err_msg, + Command *command, Config *cfg) { + std::string conf_file_list; + std::string cluster; + CephInitParameters iparams = ceph_argparse_early_args( + args, CEPH_ENTITY_TYPE_CLIENT, &cluster, &conf_file_list); + + ConfigProxy config{false}; + config->name = iparams.name; + config->cluster = cluster; + + if (!conf_file_list.empty()) { + config.parse_config_files(conf_file_list.c_str(), nullptr, 0); + } else { + config.parse_config_files(nullptr, nullptr, 0); + } + config.parse_env(CEPH_ENTITY_TYPE_CLIENT); + config.parse_argv(args); + cfg->poolname = config.get_val<std::string>("rbd_default_pool"); + + std::vector<const char*>::iterator i; + std::ostringstream err; + + for (i = args.begin(); i != args.end(); ) { + if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { + return HELP_INFO; + } else if (ceph_argparse_flag(args, i, "-v", "--version", (char*)NULL)) { + return VERSION_INFO; + } else if (ceph_argparse_witharg(args, i, &cfg->devpath, "--device", (char *)NULL)) { + } else if (ceph_argparse_witharg(args, i, &cfg->nbds_max, err, "--nbds_max", (char *)NULL)) { + if (!err.str().empty()) { + *err_msg << "rbd-nbd: " << err.str(); + return -EINVAL; + } + if (cfg->nbds_max < 0) { + *err_msg << "rbd-nbd: Invalid argument for nbds_max!"; + return -EINVAL; + } + } else if (ceph_argparse_witharg(args, i, &cfg->max_part, err, "--max_part", (char *)NULL)) { + if (!err.str().empty()) { + *err_msg << "rbd-nbd: " << err.str(); + return -EINVAL; + } + if ((cfg->max_part < 0) || (cfg->max_part > 255)) { + *err_msg << "rbd-nbd: Invalid argument for max_part(0~255)!"; + return -EINVAL; + } + cfg->set_max_part = true; + } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) { + cfg->readonly = true; + } else if (ceph_argparse_flag(args, i, "--exclusive", (char *)NULL)) { + cfg->exclusive = true; + } else if (ceph_argparse_witharg(args, i, &cfg->timeout, err, "--timeout", + (char *)NULL)) { + if (!err.str().empty()) { + *err_msg << "rbd-nbd: " << err.str(); + return -EINVAL; + } + if (cfg->timeout < 0) { + *err_msg << "rbd-nbd: Invalid argument for timeout!"; + return -EINVAL; + } + } else if (ceph_argparse_witharg(args, i, &cfg->format, err, "--format", + (char *)NULL)) { + } else if (ceph_argparse_flag(args, i, "--pretty-format", (char *)NULL)) { + cfg->pretty_format = true; + } else if (ceph_argparse_flag(args, i, "--try-netlink", (char *)NULL)) { + cfg->try_netlink = true; + } else { + ++i; + } + } + + Command cmd = None; + if (args.begin() != args.end()) { + if (strcmp(*args.begin(), "map") == 0) { + cmd = Connect; + } else if (strcmp(*args.begin(), "unmap") == 0) { + cmd = Disconnect; + } else if (strcmp(*args.begin(), "list-mapped") == 0) { + cmd = List; + } else { + *err_msg << "rbd-nbd: unknown command: " << *args.begin(); + return -EINVAL; + } + args.erase(args.begin()); + } + + if (cmd == None) { + *err_msg << "rbd-nbd: must specify command"; + return -EINVAL; + } + + switch (cmd) { + case Connect: + if (args.begin() == args.end()) { + *err_msg << "rbd-nbd: must specify image-or-snap-spec"; + return -EINVAL; + } + if (parse_imgpath(*args.begin(), cfg, err_msg) < 0) { + return -EINVAL; + } + args.erase(args.begin()); + break; + case Disconnect: + if (args.begin() == args.end()) { + *err_msg << "rbd-nbd: must specify nbd device or image-or-snap-spec"; + return -EINVAL; + } + if (boost::starts_with(*args.begin(), "/dev/")) { + cfg->devpath = *args.begin(); + } else { + if (parse_imgpath(*args.begin(), cfg, err_msg) < 0) { + return -EINVAL; + } + if (!find_mapped_dev_by_spec(cfg)) { + *err_msg << "rbd-nbd: " << *args.begin() << " is not mapped"; + return -ENOENT; + } + } + args.erase(args.begin()); + break; + default: + //shut up gcc; + break; + } + + if (args.begin() != args.end()) { + *err_msg << "rbd-nbd: unknown args: " << *args.begin(); + return -EINVAL; + } + + *command = cmd; + return 0; +} + +static int rbd_nbd(int argc, const char *argv[]) +{ + int r; + Config cfg; + vector<const char*> args; + argv_to_vec(argc, argv, args); + + std::ostringstream err_msg; + r = parse_args(args, &err_msg, &cmd, &cfg); + if (r == HELP_INFO) { + usage(); + return 0; + } else if (r == VERSION_INFO) { + std::cout << pretty_version_to_str() << std::endl; + return 0; + } else if (r < 0) { + cerr << err_msg.str() << std::endl; + return r; + } + + switch (cmd) { + case Connect: + if (cfg.imgname.empty()) { + cerr << "rbd-nbd: image name was not specified" << std::endl; + return -EINVAL; + } + + r = do_map(argc, argv, &cfg); + if (r < 0) + return -EINVAL; + break; + case Disconnect: + r = do_unmap(&cfg); + if (r < 0) + return -EINVAL; + break; + case List: + r = do_list_mapped_devices(cfg.format, cfg.pretty_format); + if (r < 0) + return -EINVAL; + break; + default: + usage(); + break; + } + + return 0; +} + +int main(int argc, const char *argv[]) +{ + int r = rbd_nbd(argc, argv); + if (r < 0) { + return EXIT_FAILURE; + } + return 0; +} diff --git a/src/tools/rbd_recover_tool/FAQ b/src/tools/rbd_recover_tool/FAQ new file mode 100644 index 00000000..1655e853 --- /dev/null +++ b/src/tools/rbd_recover_tool/FAQ @@ -0,0 +1,16 @@ +# author: min chen(minchen@ubuntukylin.com) 2014 2015 + +1. error "get_image_metadata_v2: no meta_header_seq input" +cause: + database is old, refresh database +solution: + ./rbd-recover-tool database + +2. Error initializing leveldb: IO error: lock /var/lib/ceph/osd/ceph-0/current/omap/LOCK: Resource temporarily unavailable + ERROR: error flushing journal /var/lib/ceph/osd/ceph-0/journal for object store /var/lib/ceph/osd/ceph-0: (1) Operation not permitted +cause: + when ./rbd-recover-tool database is interrupted , but command has been sent to each osd node, and there is a process reading leveldb and it is LOCKED + if run ./rbd-recover-tool database again, all command are sent to osd nodes again, while previous process is locking leveldb, so all new command + are failed. +solution: + wait until all previous command finished. diff --git a/src/tools/rbd_recover_tool/README b/src/tools/rbd_recover_tool/README new file mode 100644 index 00000000..d289c11c --- /dev/null +++ b/src/tools/rbd_recover_tool/README @@ -0,0 +1,97 @@ +# author: Min chen(minchen@ubuntukylin.com) 2014 2015 + +------------- ceph rbd recover tool ------------- + + ceph rbd recover tool is used for recovering ceph rbd image, when all ceph services are killed. +it is based on ceph-0.80.x (Firefly and newer) + currently, ceph service(ceph-mon, ceph-osd) evently are not available caused by bugs or sth else +, especially on large scale ceph cluster, so that the ceph cluster can not supply service +and rbd images can not be accessed. In this case, a tool to recover rbd image is necessary. + ceph rbd recover tool is just used for this, it can collect all objects of an image from distributed +osd nodes with the latest pg epoch, and splice objects by offset to a complete image. To make sure +object data is complete, this tool does flush osd journal on each osd node before recovering. + but, there are some limitions: +-need ssh service and unobstructed network +-osd data must be accessed on local disk +-clone image is not supported, while snapshot is supported +-only support relicated pool + +before you run this tool, you should make sure that: +1). all processes (ceph-osd, ceph-mon, ceph-mds) are shutdown +2). ssh daemon is running & network is ok (ssh to each node without password) +3). ceph-kvstore-tool is installed(for ubuntu: apt-get install ceph-test) +4). osd disk is not crashed and data can be accessed on local filesystem + +-architecture: + + +---- osd.0 + | +admin_node -----------+---- osd.1 + | + +---- osd.2 + | + ...... + +-files: +admin_node: {rbd-recover-tool common_h epoch_h metadata_h database_h} +osd: {osd_job common_h epoch_h metadata_h} #/var/rbd_tool/osd_job +in this architecture, admin_node acts as client, osds act as server. +so, they run different files: +on admin_node run: rbd-recover-tool <action> [<parameters>] +on osd node run: ./osd_job <function> <parameters> +admin_node will copy files: osd_job, common_h, epoch_h, metadata_h to remote osd node + + +-config file +before you run this tool, make sure write config files first +osd_host_path: osd hostnames and osd data path #user input + osdhost0 /var/lib/ceph/osd/ceph-0 + osdhost1 /var/lib/ceph/osd/ceph-1 + ...... +mon_host: all mon node hostname #user input + monhost0 + monhost1 + ...... +mds_host: all mds node hostname #user input + mdshost0 + mdshost1 + ...... +then, init_env_admin function will create file: osd_host +osd_host: all osd node hostname #generated by admin_job, user ignore it + osdhost0 + osdhost1 + ...... + + +-usage: +rbd-recovert-tool <operation> +<operation> : +database #generating offline database: hobject path, node hostname, pg_epoch and image metadata +list #list all images from offline database +lookup <pool_id>/<image_name>[@[<snap_name>]] #lookup image metadata in offline database +recover <pool_id><image_name>[@[<snap_name>]] [/path/to/store/image] #recover image data according to image metadata + +-steps: +1. stop all ceph services: ceph-mon, ceph-osd, ceph-mds +2. setup config files: osd_host_path, mon_host, mds_host +3. rbd-recover-tool database # wait a long time +4. rbd-recover-tool list +4. rbd-recover-tool recover <pool_id>/<image_name>[@[<image_name>]] [/path/to/store/image] + + +-debug & error check +if admin_node operation is failed, you can check it on osd node +cd /var/rbd_tool/osd_job +./osd_job <operation> +<operation> : +do_image_id <image_id_hobject> #get image id of image format v2 +do_image_id <image_header_hobject> #get image id of image format v1 +do_image_metadata_v1 <image_header_hobject> #get image metadata of image format v1, maybe pg epoch is not latest +do_image_metadata_v2 <image_header_hobject> #get image metadata of image format v2, maybe pg epoch is not latest +do_image_list #get all images on this osd(image head hobject) +do_pg_epoch #get all pg epoch and store it in /var/rbd_tool/single_node/node_pg_epoch +do_omap_list #list all omap headers and omap entries on this osd + + +-FAQ +file FAQ lists some common confusing cases while testing diff --git a/src/tools/rbd_recover_tool/TODO b/src/tools/rbd_recover_tool/TODO new file mode 100644 index 00000000..c36d4c94 --- /dev/null +++ b/src/tools/rbd_recover_tool/TODO @@ -0,0 +1,2 @@ + +1.support clone imag diff --git a/src/tools/rbd_recover_tool/common_h b/src/tools/rbd_recover_tool/common_h new file mode 100644 index 00000000..f2df662a --- /dev/null +++ b/src/tools/rbd_recover_tool/common_h @@ -0,0 +1,412 @@ +#!/usr/bin/env bash +# file: common_h +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +my_dir=$(dirname "$0") + +# admin node init path +rbd_image=/var/rbd_tool/rbd_image +database=$rbd_image/database +image_coll_v1=$rbd_image/image_coll_v1 +image_coll_v2=$rbd_image/image_coll_v2 +pg_coll=$rbd_image/pg_coll +images=$rbd_image/images +images_meta=$rbd_image/images_meta +default_backup_dir=/var/rbd_tool/default_backup_dir + +# admin node: image snap & nosnap +nosnap= #$rbd_image/<image_name>/nosnap +snap= #rbd_image/<image_name>/<snap_name> + +# osd node init path +job_path=/var/rbd_tool/osd_job +single_node=/var/rbd_tool/single_node + +# osd node vars +osd_env= #single_node/$cluster$id/osd_env +osd_data= #/var/lib/ceph/osd/$cluster-$id +omap_path= #$osd_data/current/omap +image_list_v1= #single_node/$cluster-$id/image_list_v1 +image_list_v2= #single_node/$cluster-$id/image_list_v2 +image_v1= #$single_node/$cluster-$id/image_v1 +image_v2= #$single_node/$cluster-$id/image_v2 +pgid_list= #$single_node/$cluster-$id/pgid_list +node_pg_epoch= #$single_node/$cluster-$id/node_pg_epoch +omap_list= #$single_node/$cluster-$id/omap_list + +# admin node config file +osd_host_path=$my_dir/config/osd_host_path +osd_host_mapping= #$pwd_path/config/osd_host_mapping # host --> host_remote: by init_env_admin() +osd_host=$my_dir/config/osd_host #generated by function init_env_admin() +mon_host=$my_dir/config/mon_host +mds_host=$my_dir/config/mds_host + +# ssh option +ssh_option="-o ConnectTimeout=1" + +# gen md5sum +function gen_md5() +{ + echo $1|md5sum|awk '{print $1}' +} + +# on each osd node +# check ceph environment: ssh, ceph-kvstore-tool, osd_data_path +function check_ceph_env() +{ + local func="check_ceph_env" + if [ $# -lt 2 ];then + echo "$func: parameters: <node> <data_path>" + exit + fi + local node=$1 + local data_path=$2 + local res= + local cmd= + + trap 'echo [$node]: ssh failed; exit' INT HUP + ssh -o ConnectTimeout=1 $node "echo -n" </dev/null + res=$? + if [ $res -ne 0 ];then + echo "[$node]: ssh failed" + exit + fi + + cmd=ceph-kvstore-tool + trap 'echo [$node]: $cmd failed; exit' INT HUP + ssh -o ConnectTimeout=1 $node "$cmd &>/dev/null;" </dev/null + res=$? + # ceph-kvstore-tool will return 1 with no parameters input + if [ $res -ne 1 ];then + echo "[$node]: $cmd not installed" + exit + fi + + trap 'echo [$node]: stat $data_path failed; exit' INT HUP + ssh -o ConnectTimeout=1 $node "stat $data_path &>/dev/null;" </dev/null + res=$? + if [ $res -ne 0 ];then + echo "[$node]: $data_path not exists" + exit + fi +} + +# osd node context : osd_data_path +function init_env_osd() +{ + local func="init_env_osd" + if [ "$1"x = ""x ];then + echo "$func: no osd_data_path input" + exit + fi + osd_data=$1 + omap_path=$osd_data/current/omap + + if [ ! -e $single_node ];then + mkdir -p $single_node + fi + + local osd_id=`gen_md5 $osd_data` + local osd_dir=$single_node/$osd_id + + if [ ! -e $osd_dir ];then + mkdir -p $osd_dir + fi + + image_list_v1=$osd_dir/image_list_v1 + image_list_v2=$osd_dir/image_list_v2 + image_v1=$osd_dir/image_v1 + image_v2=$osd_dir/image_v2 + pgid_list=$osd_dir/pgid_list + node_pg_epoch=$osd_dir/node_pg_epoch + omap_list=$osd_dir/omap_list +} + +# admin node process file: osd_host_path +function init_env_admin() +{ + local func="init_env_admin" + local pwd_path=`pwd` + osd_host_mapping=$pwd_path/config/osd_host_mapping + if [ ! -s $osd_host_path ];then + echo "$func: config/osd_host_path not exists or empty" + exit + fi + if [ ! -e $rbd_image ];then + mkdir -p $rbd_image + fi + if [ ! -e $images ];then + mkdir -p $images + fi + + if [ ! -s $mon_host ];then + echo "$func: config/mon_host not exists or empty" + exit + fi + if [ ! -e $mds_host ];then + echo "$func: config/mds_host not exists" + exit + fi + + # we just judge if osd_host is needed to be updated + if [ -s $osd_host ] && [ $osd_host -nt $osd_host_path ];then + return + fi + echo "$func: create osd_host ..." + # create file: osd_host and osd_host_mapping + >$osd_host + >$osd_host_mapping + local lines=0 + local lineno=0 + while read line + do + lineno=$(($lineno + 1)) + if [ "$line"x = ""x ];then + continue; + fi + local node=`echo $line|awk '{print $1}'` + if [ "$node"x = ""x ];then + echo "$func: osd_host_path : line $lineno: osd hostname not input" + rm -rf $osd_host $osd_host_mapping + exit + fi + local data_path=`echo $line|awk '{print $2}'` + if [ "$data_path"x = ""x ];then + echo "$func: osd_host_path : line $lineno: osd data_path not input" + rm -rf $osd_host $osd_host_mapping + exit + fi + lines=$(($lines + 1)) + # in case : there are servral hostnames on the same node + # just need output of `hostname` + local hostname_alias= + hostname_alias=`ssh $ssh_option $node "hostname" 2>/dev/null </dev/null` + if [ "$hostname_alias"x = ""x ];then + echo "$func: osd_host_path: line $lineno: $node: get remote hostname alias failed" + rm -rf $osd_host $osd_host_mapping + exit + fi + echo "$node $hostname_alias" >>$osd_host_mapping + echo $node >> $osd_host + # check ceph env on remote osd + check_ceph_env $node $data_path + done < $osd_host_path + + if [ $lines = 0 ];then + echo "$func: no osd host path valid" + exit + fi +} + +function admin_parse_osd() +{ + local func="admin_parse_osd" + if [ -s $osd_host ];then + return + fi + # create file: osd_host + >$osd_host + local lines=0 + local lineno=0 + while read line + do + lineno=$(($lineno + 1)) + if [ "$line"x = ""x ];then + continue; + fi + local node=`echo $line|awk '{print $1}'` + if [ "$node"x = ""x ];then + echo "$func: osd_host_path : line $lineno: osd_host not input" + exit + fi + local data_path=`echo $line|awk '{print $2}'` + if [ "$data_path"x = ""x ];then + echo "$func: osd_host_path : line $lineno: osd_data not input" + exit + fi + lines=$(($lines + 1)) + echo $node >> $osd_host + done < $osd_host_path +} + +# for osd node +function get_omap_list() +{ + ceph-kvstore-tool $omap_path list > $omap_list +} + +function convert_underline() +{ + if [ "$1"x = ""x ];then + return + fi + + echo $1|sed -e 's/_/\\u/gp'|head -n 1 +} + +function dump_backslash() +{ + echo $*|sed -e 's/\\/\\\\/gp'|head -n 1 +} + +function dump_dump_backslash() +{ + echo $*|sed -e 's/\\/\\\\\\\\/gp'|head -n 1 +} + +function char_convert() +{ + if [ "$1"x = ""x ];then + return + fi + + echo $1|sed -e 's/_/\\u/gp' -e 's/\./%e/gp' -e 's/%/%p/gp'|head -n 1 +} + +function check_osd_process() +{ + local func="check_osd_process" + local host=$1 + if [ "$1"x = ""x ];then + exit + fi + local cmds="ps aux|grep ceph-osd|grep -v grep" + local ret=/tmp/ret.$$$$ + ssh $ssh_option $host $cmds |tee $ret + if [ -s $ret ];then + echo "$func: [$host] ceph-osd process is not killed" + exit + fi + rm -f $ret +} + +function get_map_header_prefix() +{ + echo "_HOBJTOSEQ_" +} + +function get_map_header_key() +{ + local func="get_map_header_key" + if [ "$1"x = ""x ];then + #echo $func': no keyword input' + exit + fi + local keyword=$1 + local res=`cat $omap_list| grep $keyword` + if [ "$res"x = ""x ];then + #echo "$func: map_header_key = $keyword not exists" + exit + fi + echo $res|awk -F ":" '{print $2}' +} + +function get_header_seq() +{ + local func="get_header_seq" + if [ "$1"x == ""x ];then + #echo "$func: no prefix input" + exit; + elif [ "$2"x == ""x ];then + #echo "$func: no key input" + exit; + fi + local prefix=$1; + local key=$2; + local res=/tmp/header_seq.$$$$ + + ceph-kvstore-tool $omap_path get $prefix $key 2>/dev/null 1>$res + if [ $? != 0 ]; then + #echo "$func: <$prefix , $key> not exists" ; + exit; + fi + + # ceph-kvstore-tool get result like this: + # 02 01 7e 00 00 00 12 44 00 00 00 00 00 00 00 00 + # get header seq bytes: + # 12 44 00 00 00 00 00 00 + # -> 00 00 00 00 00 00 44 12 + # echo $((16#0000000000004412)) -> 17426 == header_seq + local seq=`cat $res |head -n 2|tail -n 1| \ + awk ' + BEGIN { + FS=":" + seq=""; + i=7; + } { + split($2, arr, " ") + # header_seq uint64 : 8 bytes + for (x=7; x>=0; --x) { + seq=seq""arr[i+x]; + } + } + END { + print seq + }'` + if [ "$seq"x = ""x ];then + #echo "$func: get <$prefix , $key> failed" + exit; + fi + rm -f $res + echo $((16#$seq)) +} + +# get header info key/value +function get_header_kv() +{ + local func="get_header_kv" + if [ "$1"x = ""x ];then + #echo "$func: no prefix input" + exit + elif [ "$2"x = ""x ];then + #echo "$func: no key input" + exit + elif [ "$3"x != "string"x ] && [ "$3"x != "int"x ];then + #echo "$func: no valid type input, use type (string|int)" + exit + fi + + local prefix=$1 + local key=$2 + local types=$3 + local res=/tmp/kv.$$$$ + + ceph-kvstore-tool $omap_path get $prefix $key 2>/dev/null 1>$res + if [ $? != 0 ];then + #echo "$func: <$prefix , $key> not exists" + exit + fi + + if [ "$types"x = "string"x ];then + local value=`cat $res |tail -n +2|head -n -1|awk -F ": " '{printf $3}'|sed -n 's/^\.\{4\}//p'` + echo $value + elif [ "$types"x = "int"x ];then + local value=`cat $res |tail -n +2|head -n -1| \ + awk ' + BEGIN{ + FS=":" + } { + split($2, arr, " "); + len=length(arr) + for (i=len; i>0; --i) { + printf arr[i]; + } + }'` + echo $((16#$value)) + fi + rm -f $res +} diff --git a/src/tools/rbd_recover_tool/config/mds_host b/src/tools/rbd_recover_tool/config/mds_host new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/tools/rbd_recover_tool/config/mds_host diff --git a/src/tools/rbd_recover_tool/config/mon_host b/src/tools/rbd_recover_tool/config/mon_host new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/tools/rbd_recover_tool/config/mon_host diff --git a/src/tools/rbd_recover_tool/config/osd_host_path b/src/tools/rbd_recover_tool/config/osd_host_path new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/tools/rbd_recover_tool/config/osd_host_path diff --git a/src/tools/rbd_recover_tool/database_h b/src/tools/rbd_recover_tool/database_h new file mode 100644 index 00000000..4ff20425 --- /dev/null +++ b/src/tools/rbd_recover_tool/database_h @@ -0,0 +1,1134 @@ +#!/usr/bin/env bash +# file: database_h +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +my_dir=$(dirname "$0") + +. $my_dir/common_h +. $my_dir/metadata_h +. $my_dir/epoch_h + +db_image_prefix= +db_image_size= +db_order= +db_snap_id= +db_snap_image_size= +found=0 + +#init osd_data and get all objects path +function gen_database() +{ + local func="gen_database" + rm -rf $database/* + rm -rf $images + rm -rf $raw + mkdir -p $database + local host= + local data_path= + + trap 'echo $func failed; exit;' INT HUP + while read line + do + { + host=`echo $line|awk '{print $1}'` + data_path=`echo $line|awk '{print $2}'` + if [ "$host"x = ""x ] || [ "$data_path"x = ""x ];then + continue + fi + local cmds="find $data_path/current -type f" + ssh $ssh_option $host $cmds > $database/$host + } & + done < $osd_host_path + wait + echo "$func: finish" +} + +# collect hobjects from database +# and choose the object whose epoch is latest +# then, sort the objects by their offsets in image +function gather_hobject_common() +{ + func="gather_hobject_common" + + trap 'echo $func failed; exit;' INT HUP + if [ $# -lt 2 ];then + echo "$func: parameters: <pool_id> <image_prefix> [<snap_id>]" + exit + fi + + local pool_id=$1 + local image_prefix=$2 + pool_id=$(($pool_id)) + local hex_pool_id=`printf "%x" $pool_id` + # NOSNAP = uint64(-2) + local snap_id=`printf "%u" -2` + local hex_snap_id="head" + local psuffix= + local fsuffix="_head" + if [ $# = 3 ];then + snap_id=$(($3)) + hex_snap_id=`printf "%x" $snap_id` + psuffix="_"$snap_id + fsuffix="_"$snap_id + fi + local underline_image_prefix=`convert_underline $image_prefix` + local dump_image_prefix=`dump_backslash $underline_image_prefix` + local ddump_image_prefix=`dump_dump_backslash $underline_image_prefix` + local images_raw_dir=$rbd_image/raw + local image_hobjects_dir=$images/pool_$pool_id/$image_prefix + # $images/raw/$image_prefix"_head" + local image_hobjects_raw=$images_raw_dir/$image_prefix"$fsuffix" + # $images/$image_prefix/$image_prefix"_head" + local image_hobjects_stable=$image_hobjects_dir/$image_prefix"$fsuffix" + + if [ ! -e $images_raw_dir ];then + mkdir -p $images_raw_dir + fi + if [ ! -e $image_hobjects_dir ];then + local image_metadata=$images_meta/$image_name_in + mkdir -p $image_hobjects_dir + fi + + pushd $database >/dev/null + local pattern="\.[0-9a-f]+__"$hex_snap_id"_[0-9A-F]{8}__"$hex_pool_id + >$image_hobjects_raw + grep -r -E $dump_image_prefix""$pattern * >$image_hobjects_raw + if [ ! -s $image_hobjects_raw ];then + echo "$func: image snap [ $image_prefix"$psuffix" ] is empty" + return 1 #no data available + fi + popd >/dev/null + + local offset_dir_temp=$images_raw_dir/$image_prefix"$fsuffix""_dir_temp" + rm -rf $offset_dir_temp + mkdir -p $offset_dir_temp + + echo "gather hobjects from database: snapid=$snap_id ..." + + # format: ceph2:/var/lib/ceph/osd/ceph-1/current/2.d3_head/rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2 + local tmp_image=$offset_dir_temp/tmpimage.$$$$ + >$tmp_image + cat $image_hobjects_raw | + awk -F ':' ' + BEGIN { + pg_coll="'$pg_coll'" + tmp_image="'$tmp_image'" + osd_host_mapping="'$osd_host_mapping'" + snapid="'$snap_id'" + }{ + # $2 = /var/lib/ceph/osd/ceph-1/current/2.d3_head/rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2 + + split($2, arr1, "/current/"); # {/var/lib/ceph/osd/ceph-1/, 2.d3_head/rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2} + split(arr1[2], arr2, "/"); # {2.d3_head, rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2} + split(arr2[1], arr3, "_head"); # {2.d3,} + + hobject=$2; + data_path=arr1[1]; + gsub(/\\u/, "\\\\\\\\u", hobject); # dump backslash to delay escape (\ -> \\) + "awk \"\\$1 == \\\""$1"\\\" {print \\$2}\" "osd_host_mapping" | head -n 1" | getline node + pgid = arr3[1]; + + len=length(arr2); + offset_hobject=arr2[len] # rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2 + split(offset_hobject, offarr1, "."); # {rb, 0, 1293, 6b8b4567, 000000000002__head_FB425CD3__2} + len1=length(offarr1) + offset_p=offarr1[len1] # 000000000002__head_FB425CD3__2 + split(offset_p, offarr2, "__"); # {000000000002, head_FB425CD3, 2} + offset=offarr2[1]; # 000000000002 + + system("echo -n \""node" "pgid" "hobject" "offset" "snapid" \" >>"tmp_image); + #system("echo -n \""node" "pgid" "hobject" "offset" "snapid" \""); + #print node" "pgid" "hobject" "offset" "snapid + + # find pg_epoch from pg_coll database + system("awk \"\\$1 == \\\""node"\\\" && \\$2 == \\\""pgid"\\\" && \\$4 == \\\""data_path"\\\" {print \\$3}\" "pg_coll" >>"tmp_image); + #system("awk \"\\$1 == \\\""node"\\\" && \\$2 == \\\""pgid"\\\" && \\$4 == \\\""data_path"\\\" {print \\$3}\" "pg_coll); + }' + + local sort_image=$offset_dir_temp/sortimage.$$$$ + >$sort_image + sort -t ' ' -k 4.1,4 -k 6.1nr -k 1.1,1 $tmp_image >$sort_image + sort -t ' ' -k 4.1,4 -u $sort_image > $image_hobjects_stable + + #rm -rf $offset_dir_temp + return 0 +} + +function gather_hobject_nosnap() +{ + gather_hobject_common $1 $2 +} + +function gather_hobject_snap() +{ + gather_hobject_common $1 $2 $3 +} + +# select the max pg_epoch item of the same $field +# if no same $field, choose the first +# format : "node $field pg_epoch" +function choose_epoch() +{ + cat $1|sort -t ' ' -k 3.1,3nr -k 2.1,2n |head -n 1; +} + +# lookup image info , after scatter_node_jobs & gather_node_infos +function lookup_image() +{ + local func="lookup_image" + if [ $# -lt 2 ];then + echo "$func: parameters error <pool_id> <image_name> [<snap_name>]" + fi + local pool_id=$1 + local image_name=$2 + local snap_name=$3 + pool_id=$((pool_id)) + echo -e "$func: pool_id = $pool_id\timage_name = $image_name\tsnap_name = $snap_name" + if [ $pool_id -lt 0 ];then + echo "$func: pool_id must great than zero" + exit + fi + local hex_pool_id=`printf "%x" $pool_id` + input_image $image_name + local node= + local item=/tmp/item.$$$$ + local img_name=`dump_backslash $image_name` + + local image_format=0 + local image_id_hobject= + local image_header_hobject= + local result=/tmp/tmp_result.$$$$ + local res1=/tmp/tmp_res1.$$$$ + local res2=/tmp/tmp_res2.$$$$ + local data_path= + + # image format v1 + { + cat $image_coll_v1|grep -E "/$img_name\.rbd__head_[0-9A-F]{8}__$hex_pool_id" >$res1 + if [ -s $res1 ];then + echo -n "$func: rbd_header_hobject = " + choose_epoch $res1| tee $item + #choose_epoch $res1 > $item + + if [ -e $item ];then + node=`cat $item|awk '{print $1}'` + image_header_hobject=`cat $item|awk '{print $2}'` + if [ "$node"x = ""x ];then + echo "$func: v1 node is NULL" + exit + fi + if [ "$image_header_hobject"x = ""x ];then + echo "$func: v1 image_header_hobject is NULL" + exit + fi + rm -f $item + fi + + image_format=1 + echo -e "image_name:\t$image_name_in" + echo -e "image_format:\t$image_format" + data_path=`echo $image_header_hobject|awk -F "/current" '{print $1}'` + + >$result + cmds="bash $job_path/osd_job do_image_metadata_v1 $data_path `dump_backslash $image_header_hobject` $snap_name" + ssh $ssh_option $node $cmds | tee $result + fi + } + + # image format v2 + { + cat $image_coll_v2|grep -E "/rbd\\\\uid\."$img_name"__head_[0-9A-F]{8}__$hex_pool_id" >$res2 + if [ -s $res2 ];then + echo -n "$func: rbd_id_hobject = " + choose_epoch $res2 | tee $item + #choose_epoch $res2 > $item + + if [ -e $item ];then + node=`cat $item|awk '{print $1}'` + image_id_hobject=`cat $item|awk '{print $2}'` + if [ "$node"x = ""x ];then + echo "$func: v2 node is NULL(to get image_id_hobject)" + exit + fi + if [ "$image_id_hobject"x = ""x ];then + echo "$func: v2 image_id_hobject is NULL" + exit + fi + rm -f $item + fi + + check_osd_process $node + image_format=2 + + local tid=/tmp/image_id.$$$$ + data_path=`echo $image_id_hobject|awk -F "/current" '{print $1}'` + >$tid + cmds="bash $job_path/osd_job do_image_id $data_path `dump_backslash $image_id_hobject`" + ssh $ssh_option $node $cmds > $tid + + local image_id=`cat $tid` + rm -f $tid + + #get image_header_hobject + pushd $database >/dev/null + local pattern="header\."$image_id"__head_[0-9A-F]{8}__$hex_pool_id" + local tcoll=/tmp/tmp_image_head_coll.$$$$ + + # hostname(by command hostname) in $pg_coll maybe different from hostname in tcoll(input by user) + # t_host: hostname read from config file ($tcoll) + # t_host_remote: $(hostname) on osd node ($pg_coll) + grep -r -E $pattern * >$tcoll + popd >/dev/null + + local t_host=(`cat $tcoll|awk -F ":" '{print $1}'`) + local t_pgid=(`cat $tcoll|awk -F ":" '{print $2}'|sed -n 's/.*\/\([0-9a-fA-F]\+\.[0-9a-fA-F]\+\)_head\/.*/\1/p'`) + local t_hobject=(`cat $tcoll|awk -F ":" '{print $2}'`) + local t_data_path=(`cat $tcoll|awk -F ":" '{split($2, arr, "/current/"); print arr[1];}'`) + rm -f $tcoll + declare -a t_host_remote + + #if there is no failed pg migration, number of t_host is replica num + #replica num : 3, 4, 5 ... + local t_hostname=/tmp/t_hostname.$$$$ + for ((i=0; i<${#t_host[*]}; i++)) + do + ssh $ssh_option ${t_host[$i]} "hostname" >$t_hostname + if [ $? != 0 ];then + echo "$func: ${t_host[$i]} get host_remote failed" + exit + fi + t_host_remote[$i]=`cat $t_hostname` + done + rm -f $t_hostname + + local t_item=/tmp/tmp_item.$$$$ + local tmp_item=/tmp/tmp_tmp_item.$$$$ + + >$tmp_item + for ((i=0; i<${#t_host_remote[*]}; i++ )) + do + local node=${t_host_remote[$i]} + local pgid=${t_pgid[$i]} + awk '$1 == "'"$node"'" && $2 == "'"$pgid"'" {print}' $pg_coll >>$tmp_item + done + + # t_item: <remote_hostname> <pgid> <epoch> <data_path> + sort -u $tmp_item >$t_item + rm -f $tmp_item + + local entry=`choose_epoch $t_item` #t_host_remote + rm -f $t_item + + node=`echo $entry|awk '{print $1}'` + data_path=`echo $entry|awk '{print $4}'` + if [ "$node"x = ""x ];then + echo "$func: v2 node is NULL (to get image_header_hobject)" + exit + fi + + for ((i=0; i<${#t_host_remote[*]}; i++)) + do + if [ "${t_host_remote[$i]}"x = "$node"x ] && [ "${t_data_path[$i]}"x = "$data_path"x ];then + image_header_hobject=${t_hobject[$i]} + break + fi + done + + if [ "$image_id_hobject"x = ""x ];then + echo "$func: v2 image_header_hobject is NULL" + exit + fi + + check_osd_process $node + + echo "$func: rbd_header_hobject = $node $image_header_hobject" + echo -e "image_name:\t$image_name_in" + echo -e "image_format:\t$image_format" + + #data_path=`echo $image_header_hobject|awk -F "/current" '{print $1}'` + >$result + cmds="bash $job_path/osd_job do_image_metadata_v2 $data_path $image_id `dump_backslash $image_header_hobject` $snap_name" + ssh $ssh_option $node $cmds | tee $result + fi + } + + if [ ! -s $result ];then + echo "$func: $image_name_in not exists" + exit + fi + + # to assign value to global variable + db_image_prefix=`cat $result|awk '/^(object_prefix|block_name):/{print $2}'` + if [ "$db_image_prefix"x = ""x ];then + echo "$func: image_prefix is NULL" + exit + fi + + db_image_size=`cat $result|awk '/^image_size:/{print $2}'` + db_order=`cat $result|awk '/^order:/{print $2}'` + if [ "$snap_name"x != ""x ];then + db_snap_id=`cat $result|awk '/^snapshot:/{print $2}'` + if [ "$db_snap_id"x = ""x ];then + echo "$func: $image_name_in@$snap_name NOT EXISTS" + exit + fi + db_snap_image_size=`cat $result|awk '/^snapshot:/{print $4}'` + else + #save snaplist + local image_snaplist=$images/pool_$pool_id/$image_name_in/@snaplist + local image_dir=$images/pool_$pool_id/$image_name_in + if [ ! -e $image_dir ];then + mkdir -p $image_dir + fi + cat $result|awk '/^snapshot:/{print $2" "$3" "$4}' >$image_snaplist + fi + found=1 + rm -f $result +} + +function list_images() +{ + echo "=============== format ==============" + echo "format: <pool_id>/<image_name>" + echo "================ v1: ================" + #sed -n 's/\(.*\)\/\(.*\)\.rbd__\(.*\)/\2/p' $image_coll_v1|sort -u|sed -e 's/\\u/_/g' + sed -n 's/.*\/\(.*\)\.rbd__head_[0-9A-F]\{8\}__\([0-9a-f]\+\).*/\2 \1/p' $image_coll_v1|sort -u|awk '{print strtonum("0x"$1)"/"$2;}'|sed -e 's/\\u/_/g' + echo "================ v2: ================" + #sed -n 's/\(.*\)\/rbd\\uid.\(.*\)__\(head.*\)/\2/p' $image_coll_v2|sort -u|sed 's/\\u/_/g' + sed -n 's/.*\/rbd\\uid.\(.*\)__head_[0-9A-F]\{8\}__\([0-9a-f]\+\).*/\2 \1/p' $image_coll_v2|sort -u|awk '{print strtonum("0x"$1)"/"$2}'|sed 's/\\u/_/g' +} + +# lookup image metadata +# and +# collect hobjects of image with the latest pg epoch +function discover_image_nosnap() +{ + local func="discover_image_nosnap" + echo "$func ..." + local pool_id=$1 + local image_name=$2 + pool_id=$(($pool_id)) + lookup_image $pool_id $image_name # assign $image_prefix + gather_hobject_nosnap $pool_id $db_image_prefix + if [ $? -ne 0 ];then + exit + fi + local image_hobjects_stable_nosnap=$images/pool_$pool_id/$db_image_prefix/$db_image_prefix"_head" + local image_hobjects_dir=$images/pool_$pool_id/$image_name_in + if [ ! -e $image_hobjects_dir ];then + mkdir -p $image_hobjects_dir + fi + # mv image_prefix to image_name + mv $image_hobjects_stable_nosnap $image_hobjects_dir/$image_name_in + rm -rf $images/pool_$pool_id/$db_image_prefix +} + +# get the offset snapid object +# if there is no object, choose the smallest snapid which is greater than current snapid +function get_object_clone() +{ + local func="get_object_clone" + if [ $# -lt 4 ];then + exit + fi + + local object_offset_string=$1 + local snapid=$2 + local snaplist_path=$3 + local snapset_output_dir=$4 + + # snapid in desc + local snap_coll_arr=(` + cat $snaplist_path|awk '{ if ($1 >= '"$snapid"') print "'"$snapset_output_dir"'/@"$1}'`) + + local hex_snapid=`printf "%x" $snapid` + pushd $snapset_output_dir >/dev/null + # get object with the smallest snapid greater than current snapid + awk '$4 == "'"$object_offset_string"'" && $5 >= '$snapid' {print}' `echo ${snap_coll_arr[@]}` |tail -n 1 + popd >/dev/null +} + +# gather hobject for each snapid +function gen_snapset_hobject() +{ + local func="gen_image_snapset" + echo "$func ..." + if [ $# -lt 4 ];then + echo "$func: parameters: <pool_id> <image_prefix> <snaplist_path> <snapset_output_dir>" + exit + fi + local pool_id=$1 + local image_prefix=$2 + local snaplist_path=$3 + local snapset_output_dir=$4 + pool_id=$(($pool_id)) + OIFS=$IFS + IFS=$'\n' + local snaparr=(`cat $snaplist_path`) + # gather hobject for each snapshot + trap 'echo $func failed; exit;' INT HUP + for line in ${snaparr[@]} + do + OOIFS=$IFS + IFS=$' ' + local field=(`echo $line`) + local snapid=${field[0]} + local image_hobjects_stable_snap=$images/pool_$pool_id/$image_prefix/$image_prefix"_"$snapid + local image_snap=$snapset_output_dir/@$snapid + gather_hobject_snap $pool_id $image_prefix $snapid + local res=$? + if [ $res -ne 0 ];then + touch $image_snap + else + mv $image_hobjects_stable_snap $image_snap + fi + IFS=$OOIFS + done + IFS=$OIFS +} + +# lookup image metadata and get snapid hobjects +function discover_image_snap() +{ + local func="discover_image_snap" + echo "$func ..." + if [ $# -lt 3 ];then + echo "$func: parameters: <pool_id> <image_name> [<snap_name>]" + exit + fi + local pool_id=$1 + local image_name=$2 + local snap_name=$3 + pool_id=$(($pool_id)) + #mkdir -p $images/$image_prefix + lookup_image $pool_id $image_name $snap_name # input image_name and snap_name to lookup metadata and snap_id + if [ "$db_snap_id"x = ""x ];then + echo "$func: lookup image failed to gen snapid" + exit + fi + local image_hobjects_dir_prefix=$images/pool_$pool_id/$db_image_prefix + local image_nosnap=$images/pool_$pool_id/$image_name_in + #check if image nosnap recovered + if [ ! -s $image_nosnap ];then + echo "$func: please recover image nosnap before recover with snap" + rm -rf $image_hobjects_dir_prefix + exit + fi + local image_hobject_dir=$images/pool_$pool_id/$image_name_in + local image_snap_hobject=$image_hobject_dir/$image_name_in@$db_snap_id + local image_snap_hobject_head=$image_hobject_dir/$image_name_in@$db_snap_id@head + local image_snaplist=$image_hobject_dir/@snaplist + local image_snapset_dir=$image_hobject_dir/@snapset_dir + local image_head=$image_hobject_dir/$image_name_in + if [ ! -e $image_hobject_dir ];then + mkdir -p $image_hobject_dir + fi + # only gen snapset one time + if [ ! -e $image_snapset_dir ];then + mkdir -p $image_snapset_dir + gen_snapset_hobject $pool_id $db_image_prefix $image_snaplist $image_snapset_dir + + fi + + echo "$func: will get object clone ..." + >$image_snap_hobject + >$image_snap_hobject_head + + trap 'echo $func failed; exit;' INT HUP + # get each offset 's snapid hobject + while read line + do + #echo $line + OOIFS=$IFS + IFS=$' ' + local field=(`echo $line`) + local offset_string=${field[3]} + IFS=$OOIFS + local entry=`get_object_clone $offset_string $db_snap_id $image_snaplist $image_snapset_dir` + if [ "$entry"x != ""x ];then + echo $entry >> $image_snap_hobject + echo `dump_backslash $line` >> $image_snap_hobject_head + fi + done < $image_head + rm -rf $image_hobjects_dir_prefix +} + +# after discover_image_nosnap +# collect objects from osds one by one in sequence +function copy_image_nosnap_single_thread() +{ + local func="copy_image_nosnap_single_thread" + echo "$func ..." + if [ $# -lt 3 ];then + echo "$func: parameters: <pool_id> <image_hobjects> <backup_dir>" + exit + fi + local pool_id=$1 + local image_hobjects=$2 + local backup_dir=$3 + pool_id=$(($pool_id)) + + # make sure lookup_image first + if [ $found = 0 ];then + echo "$func: image not found, maybe forget to discover_image" + exit + fi + if [ ! -e $backup_dir ];then + mkdir -p $backup_dir + fi + + local image_dir=$backup_dir/pool_$pool_id/$image_name_in + local image_file=$image_dir/$image_name_in + local CURRENT=$image_dir/@CURRENT + local LOCK=$image_dir/@LOCK + if [ ! -e $image_dir ];then + mkdir -p $image_dir + fi + if [ -e $LOCK ];then + echo "$func: $LOCK is locked by other process" + exit + else + touch $LOCK + fi + + >$image_file + truncate -s $db_image_size $image_file + echo "head">$CURRENT + + local count=$(($db_image_size >> $db_order)) + local start=`cat $image_hobjects|head -n 1|awk '{print $4}'` + local end=`cat $image_hobjects|tail -n 1|awk '{print $4}'` + local entry_count=`cat $image_hobjects|wc -l` + + local char_bits=$((`echo $start|wc -c` -1 )) + local format="%0"$char_bits"x" + + local expect_start=`printf $format 0` + local expect_end=`printf $format $(($count -1 ))` + + echo -e "object_count\t$entry_count" + echo -e "expect\t\t[$expect_start ~ $expect_end] count:$count" + echo -e "range\t\t[$start ~ $end] count:$entry_count" + + local icount=0 + local istart= + local iend= + local percent= + + trap 'echo $func failed; exit;' INT HUP + local unit=$((1<<$db_order)) + while read line + do + { + icount=$(($icount+1)) + node=`echo $line|awk '{print $1}'` + hobject=`echo $line|awk '{print $3}'` + offset=`echo $line|awk '{print $4}'` + off=$((16#$offset)) + if [ $icount = 1 ];then + istart=$offset + fi + hobject=`dump_backslash $hobject` + iend=$offset + sshcmd="cat $hobject" + ssh $ssh_option $node $sshcmd < /dev/null | dd of=$image_file bs=$unit seek=$off conv=notrunc 2>/dev/null + percent=`echo "scale=3; 100*$icount/$entry_count"|bc` + tput sc #record current cursor + echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%" + if [ $icount != $entry_count ];then + tput rc # backport most recent cursor + fi + } + done < $image_hobjects + + echo + echo -n "size: " + ls -lh $image_file|awk '{print $5"\t"$9}' + echo -n "du: " + du -h $image_file + #unlock + rm -f $LOCK +} + + +# ssh copy snap_object & head_object from osd to admin node +# copy all snapshot objects +# and +# all head objects which have the same offset as snapshot objects +function collect_image_snap_objects() +{ + local func="collect_image_snap_objects" + #$1=backup_dir, $2=snap_name, $3=snap_hobjects, $4=head_hobjects + if [ $# -lt 6 ];then + echo "$func: parameters: <pool_id> <image_name> <snap_id> <snap_hobjects> <head_hobjects> <backup_dir>" + exit + fi + + local pool_id=$1 + local image_name=$2 + local snap_id=$3 + local snap_hobjects=$4 #snap hobjects info + local head_hobjects=$5 #head hobjects info + local backup_dir=$6 + pool_id=$(($pool_id)) + + local head_dir=$backup_dir/pool_$pool_id/$image_name/@head + local snap_dir=$backup_dir/pool_$pool_id/$image_name/@$snap_id + local CURRENT=$backup_dir/pool_$pool_id/$image_name/@CURRENT + + if [ ! -e $head_dir ];then + mkdir -p $head_dir + fi + if [ ! -e $snap_dir ];then + mkdir -p $snap_dir + fi + + local snap_node= #osd node + local snap_hobject= #hobject path with snapid on osd + local snap_offset= + local snap_filename= + + local head_node= + local head_hobject= + local head_offset= + local head_filename= + + # ignore if there is no object in snapshot(empty ) + if [ ! -s $snap_hobjects ];then + echo "$func: $snap_hobjects is empty" + return 0 + fi + local start=`head -n 1 $snap_hobjects|awk '{print $4}'` + local end=`tail -n 1 $snap_hobjects|awk '{print $4}'` + local entry_count=`cat $snap_hobjects|wc -l` + if [ $((16#$first_offset)) -gt $((16#$last_offset)) ];then + echo "$func: $snap_hobjects not sorted" + return 1 + fi + + # just assert if ignored empty snapshot + if [ "$start"x = ""x ] || [ "$end"x = ""x ];then + return 1 + fi + + # speed up copy snapshot + # lookup the corresponding head hobject of snap hobject + # use command: grep <offset> <head hobjects> + # + # eg. + # head hobjects: (32 objects, snapid = uint64(-2) = 18446744073709551614) + # ceph1 29.4d /var/lib/ceph/osd/ceph-0/current/29.4d_head/rb.0.1c414.6b8b4567.000000000000__head_EC2C1C4D__1d 000000000000 18446744073709551614 869 + # ceph1 29.8c /var/lib/ceph/osd/ceph-0/current/29.8c_head/rb.0.1c414.6b8b4567.000000000001__head_0F439A8C__1d 000000000001 18446744073709551614 867 + # ceph1 29.6a /var/lib/ceph/osd/ceph-0/current/29.6a_head/rb.0.1c414.6b8b4567.000000000002__head_FC55706A__1d 000000000002 18446744073709551614 869 + # ceph1 29.8b /var/lib/ceph/osd/ceph-0/current/29.8b_head/rb.0.1c414.6b8b4567.000000000003__head_20A6328B__1d 000000000003 18446744073709551614 869 + # ceph2 29.75 /var/lib/ceph/osd/ceph-1/current/29.75_head/rb.0.1c414.6b8b4567.000000000004__head_AC5ADB75__1d 000000000004 18446744073709551614 867 + # ceph2 29.23 /var/lib/ceph/osd/ceph-1/current/29.23_head/rb.0.1c414.6b8b4567.000000000005__head_1FDEA823__1d 000000000005 18446744073709551614 867 + # ...... + # ceph1 29.34 /var/lib/ceph/osd/ceph-0/current/29.34_head/rb.0.1c414.6b8b4567.00000000001f__head_52373734__1d 00000000001f 18446744073709551614 869 + # + # snap hobjects: (3 objects, snapid >= 29) + # ceph1 29.8c /var/lib/ceph/osd/ceph-0/current/29.8c_head/rb.0.1c414.6b8b4567.000000000001__1f_0F439A8C__1d 000000000001 31 867 + # ceph1 29.6a /var/lib/ceph/osd/ceph-0/current/29.6a_head/rb.0.1c414.6b8b4567.000000000002__1e_FC55706A__1d 000000000002 30 869 + # ceph1 29.8b /var/lib/ceph/osd/ceph-0/current/29.8b_head/rb.0.1c414.6b8b4567.000000000003__1d_20A6328B__1d 000000000003 29 869 + # + # so find out offset in head hobjects line number: + # snap hobjects: 000000000001 ---> head hobjects: 2 (n1) + # snap hobjects: 000000000003 ---> head hobjects: 4 (n2) + # + # finally , grep range from the whole file [1 ~ N] shranked to part of file [n1 ~ n2] + # the worst case : [n1 ~ n2] = [1 ~ N], means no shranking + + # get the line number of the start offset in head hobjects + local n1=`grep -n $start $head_hobjects|head -n 1|cut -d ":" -f 1` + # get the line number of the end offset in head hobjects + local n2=`grep -n $end $head_hobjects|head -n 1|cut -d ":" -f 1` + + local icount=0 + local istart= + local iend= + local percent= + + OIFS=$IFS + IFS=$'\n' + + #assume file:snap_hobjects is not very large, and can be loaded into memory + local snap_arr=(`cat $snap_hobjects`) + local snap_tmp=/tmp/snaptmp.$$$$ + + # snap_tmp: + # consists of snap hobject or head hobject + # select lineno range: [n1 ~ n2] + head -n $n2 $head_hobjects|tail -n $(($n2-$n1+1)) >$snap_tmp + + echo "copy image snap/head objects from osd ..." + echo -e "object_count\t$entry_count" + echo -e "range\t\t[$start ~ $end] count:$entry_count" + + trap 'echo $func failed; exit;' INT HUP + for line in ${snap_arr[*]} + do + icount=$(($icount+1)) + + OOIFS=$IFS + IFS=$' ' + + local arr=(`echo $line`) + snap_node=${arr[0]} + snap_hobject=${arr[2]} + snap_offset=${arr[3]} + snap_filename=$snap_dir/$snap_offset + + if [ $icount = 1 ];then + istart=$snap_offset + fi + iend=$snap_offset + + #lookup corresponding head hobject of snap hobject + local res=`grep $snap_offset $snap_tmp|head -n 1` + if [ "$res"x = ""x ];then + echo "$func: image object[ $snap_offset ] missing" + exit + fi + + local arr2=(`echo $res`) + head_node=${arr2[0]} + head_hobject=${arr2[2]} + head_offset=${arr2[3]} + head_filename=$head_dir/$head_offset + + # just copy object(snap/head) if it does not exist + if [ ! -e $snap_filename ];then + ssh $ssh_option $snap_node "cat $snap_hobject" > $snap_filename + fi + if [ ! -e $head_filename ];then + ssh $ssh_option $head_node "cat $head_hobject" > $head_filename + fi + IFS=$OOIFS + + percent=`echo "scale=3; 100*$icount/$entry_count"|bc` + tput sc #record current cursor + echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%" + if [ $icount != $entry_count ];then + tput rc # backport most recent cursor + fi + done + echo + IFS=$OIFS + rm -f $snap_tmp + return 0 +} + +# copy all snap objects and corresponding head objects from osds +# in single process +function copy_image_snap_single_thread() +{ + local func="copy_image_snap_single_thread" + if [ $# -lt 6 ];then + echo "$func: parameters: <pool_id> <image_name> <snap_id> <snap_hobjects> <head_hobjects> <backup_dir>" + exit + fi + local pool_id=$1 + local image_name=$2 + local snap_id=$3 + local snap_hobjects=$4 + local head_hobjects=$5 + local backup_dir=$6 + pool_id=$(($pool_id)) + + local CURRENT=$backup_dir/pool_$pool_id/$image_name/@CURRENT + local LOCK=$backup_dir/pool_$pool_id/$image_name/@LOCK + #lock + if [ -e $LOCK ];then + echo "$func: $LOCK is locked by other process" + exit + else + touch $LOCK + fi + collect_image_snap_objects $pool_id $image_name $snap_id $snap_hobjects $head_hobjects $backup_dir + #unlock + rm -f $LOCK +} + +# after all snap objects and necessary head objects are copied, +# just pick appropriate head objects and snap objects and write them to image +# in order to rollback image to snapshot +# +# init: image is created by copy_image_nosnap_single_thread firstly +# +# all output include 3 parts: +# <image> <head objects> <snap objects> +# +# head objects1 --- snap1 objects +# head objects2 --- snap2 objects +# image head objects3 --- snap3 objects +# ...... +# head objectsN --- snapN objects +# +# how to rollback: +# firstly rollback to head, secondly write <snapX objects> +# head = <image> + <head objects> +# snap1 = <image> + <head objects> + <snap1 objects> +# snap2 = <image> + <head objects> + <snap2 objects> +# snap3 = <image> + <head objects> + <snap3 objects> +# ...... +# snapN = <image> + <head objects> + <snapN objects> +# +# improve rollback: +# there is intersection of head objects and snapX objects, if snapX objects are not empty +# and need to deduplicate the intersection. +# deduplicate steps: +# - get difference set of head objects and snapX objects +# - write the difference set objects to image +# - write the snapX objects to image +function rollback_image_snap() +{ + local func="rollback_image_snap" + + echo "$func ..." + + trap 'echo $func failed; exit;' INT HUP + if [ $# -lt 6 ];then + echo "$func: parameters <pool_id> <image_name> <snap_id> <snap_object_dir> <backup_dir> <image_unit>" + exit + fi + local pool_id=$1 + local image_name=$2 + local snap_id=$3 + local snap_object_dir=$4 + local backup_dir=$5 + local image_unit=$6 + + local need_diff_set=0 + + local image_path=$backup_dir/pool_$pool_id/$image_name/$image_name + local head_object_dir=$backup_dir/pool_$pool_id/$image_name/@head + local CURRENT=$backup_dir/pool_$pool_id/$image_name/@CURRENT + local LOCK=$backup_dir/pool_$pool_id/$image_name/@LOCK + if [ -e $LOCK ];then + echo "$func: $LOCK is locked by other process" + exit + else + touch $LOCK + fi + if [ $snap_id -ne -2 ];then + echo $snap_id > $CURRENT + else + echo "head" > $CURRENT + fi + + if [ ! -e $snap_object_dir ];then + return 0 + fi + + if [ "$snap_object_dir"x != "$head_object_dir"x ];then + echo "$func: need to compute diff_set of head" + need_diff_set=1 + else + echo "$func: NO diff_set" + need_diff_set=0 + fi + + local entry_count=0 + local start= + local end= + local offset= + local icount=0 + local istart= + local iend= + local percent= + + local snap_objects= + local head_objects= + local diff_set= + + snap_objects=(`ls $snap_object_dir`) + + # if need to compute difference set of head_objects and snap_objects + if [ $need_diff_set -ne 0 ];then + head_objects=(`ls $head_object_dir`) + + #get the difference set: ( head_objects - snap_objects ) + diff_set=(` + sort -m <(echo ${head_objects[@]}|xargs -n 1 echo) <(echo ${snap_objects[@]}|xargs -n 1 echo) \ + <(echo ${snap_objects[@]}|xargs -n 1 echo) |uniq -u`) + + # copy diff_set of head object to image + pushd $head_object_dir >/dev/null + + echo "$func: copy diff_set head objects ..." + entry_count=${#diff_set[@]} + start=${diff_set[0]} + end= + if [ $entry_count -gt 0 ];then + end=${diff_set[$(($entry_count - 1))]} + fi + offset= + icount=0 + istart= + iend= + percent= + + echo -e "object_count\t$entry_count" + echo -e "range\t\t[$start ~ $end] count:$entry_count" + + for object in ${diff_set[@]} + do + icount=$(($icount+1)) + if [ $icount = 1 ];then + istart=$object + fi + iend=$object + + local offset=$((16#$object)) + dd if=$object of=$image_path bs=$image_unit seek=$offset conv=notrunc 2>/dev/null + + percent=`echo "scale=3; 100*$icount/$entry_count"|bc` + tput sc #record current cursor + echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%" + if [ $icount != $entry_count ];then + tput rc # backport most recent cursor + fi + done + if [ $entry_count -gt 0 ];then + echo + fi + popd >/dev/null + + if [ $snap_id -ne -2 ];then + echo -e "$image_name already rollback diff_set: (head - snap)" + fi + fi + + # copy snap object to image + pushd $snap_object_dir >/dev/null + + if [ $need_diff_set -ne 0 ];then + echo "$func: copy snap objects ..." + else + echo "$func: copy head objects ..." + fi + entry_count=${#snap_objects[@]} + start=${snap_objects[0]} + end= + if [ $entry_count -gt 0 ];then + end=${snap_objects[$(($entry_count - 1))]} + fi + offset= + icount=0 + istart= + iend= + percent= + + echo -e "object_count\t$entry_count" + echo -e "range\t\t[$start ~ $end] count:$entry_count" + + for object in ${snap_objects[@]} + do + icount=$(($icount+1)) + if [ $icount = 1 ];then + istart=$object + fi + iend=$object + + local offset=$((16#$object)) + dd if=$object of=$image_path bs=$image_unit seek=$offset conv=notrunc 2>/dev/null + + percent=`echo "scale=3; 100*$icount/$entry_count"|bc` + tput sc #record current cursor + echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%" + if [ $icount != $entry_count ];then + tput rc # backport most recent cursor + fi + done + if [ $entry_count -gt 0 ];then + echo + fi + popd >/dev/null + + rm -f $LOCK + if [ $snap_id -ne -2 ];then + echo "$image_name rollback to snapid: $snap_id" + else + echo "$image_name rollback to head" + fi +} + +function recover_image() +{ + local func="recover_image" + echo "$func ..." + + if [ $# -lt 3 ];then + echo "$func: parameters: <pool_id> <image_name> <snap_name> [<backup_dir>]" + exit + fi + + local pool_id=$1 + local img_name=$2 + local snap_name=$3 + local backup_dir=$4 + pool_id=$(($pool_id)) + if [ "$snap_name"x = "@"x ];then + snap_name= + fi + if [ "$backup_dir"x = ""x ];then + backup_dir=$default_backup_dir + fi + + #recover image with nosnap + if [ "$snap_name"x = ""x ];then + discover_image_nosnap $pool_id $img_name #input image_name + local image_hobjects=$images/pool_$pool_id/$image_name_in/$image_name_in + copy_image_nosnap_single_thread $pool_id $image_hobjects $backup_dir + + #recover image with snap + else + + # check if recovered head already + local img_hobjects_path=$images/pool_$pool_id/$img_name/$img_name + local img_file_path=$backup_dir/pool_$pool_id/$img_name/$img_name + if [ ! -e $img_hobjects_path ] || [ ! -e $img_file_path ];then + echo "$func: $img_name@$snap_name : can not rollback to snapshot, please recover image head first" + exit + fi + + # rollback to head + if [ "$snap_name"x = "@@"x ];then + local head_dir=$backup_dir/pool_$pool_id/$img_name/@head + if [ -e $head_dir ];then + local unit=`pushd $head_dir >/dev/null; ls|head -n 1|xargs -n 1 stat|awk '/Size:/{print $2}'` + # rollback to head + rollback_image_snap $pool_id $img_name -2 $backup_dir/$img_name/@head $backup_dir $unit + echo "$image_name_in head : $backup_dir/$img_name/$img_name" + else + echo "$func: no need to rollback to head" + fi + return 0 + fi + + # rollback to snap + discover_image_snap $pool_id $img_name $snap_name # get image meta & get snapid object + local snap_hobjects=$images/pool_$pool_id/$image_name_in/$image_name_in@$db_snap_id + local head_hobjects=$images/pool_$pool_id/$image_name_in/$image_name_in@$db_snap_id@head + local snap_object_dir=$backup_dir/pool_$pool_id/$image_name_in/@$db_snap_id + local image_path=$backup_dir/pool_$pool_id/$image_name_in/$image_name_in + local image_unit=$((1<<$db_order)) + copy_image_snap_single_thread $pool_id $image_name_in $db_snap_id $snap_hobjects $head_hobjects $backup_dir + rollback_image_snap $pool_id $image_name_in $db_snap_id $snap_object_dir $backup_dir $image_unit + echo "$image_name_in@$snap_name : $image_path" + fi +} diff --git a/src/tools/rbd_recover_tool/epoch_h b/src/tools/rbd_recover_tool/epoch_h new file mode 100644 index 00000000..e268eafa --- /dev/null +++ b/src/tools/rbd_recover_tool/epoch_h @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +# file: epoch_h +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +my_dir=$(dirname "$0") +. $my_dir/common_h + +#pgid_list=$single_node/$cluster-$id/pgid_list +function get_pgid_list() +{ + find $osd_data/current/ -type d -name "*_head"|\ + sed -n 's/\(.*\)\/current\/\([0-9a-fA-F]\+\.[0-9a-fA-F]\+\)_head/\2 \1/p'|\ + sort -t ' ' -k 1.1,1h -k 2.1,2 > $pgid_list; +} + +function get_pgid() +{ + hobject_path=$1 + echo $hobject_path| sed -n 's/\(.*\)\/\([0-9a-fA-F]\+\.[0-9a-fA-F]\+\)_head\(.*\)/\2/p' +} + +infos_seq= +function get_infos_seq() +{ + local func="get_infos_seq" + + local keyword=":infos." + local infos_key=`get_map_header_key $keyword` + + if [ "$infos_key"x = ""x ];then + echo "$func: keyword not input or infos_key not exists" + exit + fi + local prefix=`get_map_header_prefix` + local key=$infos_key + + infos_seq=`get_header_seq $prefix $key` + if [ "$infos_seq"x = ""x ];then + echo "$func: infos_seq not exists" + exit + fi +} + +pg_epoch= +function get_pg_epoch() +{ + local func="get_pg_epoch" + if [ "$1"x = ""x ];then + echo "$func: no pgid input" + exit + fi + + get_pg_epoch_firefly "$1" + if [ "$pg_epoch"x != ""x ]; then + # echo "Epoch for $1: $pg_epoch (firefly)" + return + fi + + get_pg_epoch_hammer "$1" + if [ "$pg_epoch"x != ""x ]; then + # echo "Epoch for $1: $pg_epoch (hammer)" + return + fi + + echo "$func: Couldn't find epoch for $1" + exit +} + +function get_pg_epoch_firefly() +{ + local func="get_pg_epoch_firefly" + if [ "$1"x = ""x ];then + echo "$func: no pgid input" + exit + fi + local pgid=$1 + local key=$pgid"_epoch" + + #get_infos_seq; + # infos_seq default to 1 + infos_seq=1 + local infos_seq=`printf "%016d" $infos_seq` + local prefix="_USER_"$infos_seq"_USER_" + + pg_epoch=`get_header_kv $prefix $key int` +} + +function get_pg_epoch_hammer() +{ + local func="get_pg_epoch_hammer" + if [ "$1"x = ""x ];then + echo "$func: no pgid input" + exit + fi + local pgid="$1" + local hkey_prefix="$(get_map_header_prefix)" + local hkey="$(printf '...head.%x.%08X' "$(echo "$pgid"|cut -d'.' -f1)" "$((0x$(echo "$pgid"|cut -d'.' -f2)))")" + + local infos_seq="$(get_header_seq "$hkey_prefix" "$hkey")" + local infos_seq=`printf "%016d" $infos_seq` + local prefix="_USER_"$infos_seq"_USER_" + local key="_epoch" + + pg_epoch=`get_header_kv $prefix $key int` +} diff --git a/src/tools/rbd_recover_tool/metadata_h b/src/tools/rbd_recover_tool/metadata_h new file mode 100644 index 00000000..4aa491b5 --- /dev/null +++ b/src/tools/rbd_recover_tool/metadata_h @@ -0,0 +1,368 @@ +#!/usr/bin/env bash +# file: metadata_h +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +my_dir=$(dirname "$0") +. $my_dir/common_h +. $my_dir/epoch_h + +# put origin name in $image_name_in: for output +# put convert "_" name in $image_name: for grep image hobjects from database +image_name_in= +image_name= +function input_image() +{ + local func="input_image" + if [ "$1"x = ""x ];then + echo "$func: no image name input" + exit + fi + + image_name_in=$1 + # "_" -> "\u" + image_name=`convert_underline $image_name_in` +} + +#======================================== distinguish v1 or v2 =================================== +#image_list_v1=$single_node/$cluster-$id/image_list_v1 +#image_list_v2=$single_node/$cluster-$id/image_list_v2 +function get_image_list() +{ + find $osd_data/current/ -type f|grep ".rbd__" >$image_list_v1 + find $osd_data/current/ -type f|grep "rbd\\\\uid." >$image_list_v2 +} + +function get_image_format_by_hobject() +{ + local func="get_image_format" + if [ "$1"x = ""x ];then + exit + fi + local res1=`cat $image_list_v1|grep $1` + if [ "$res1"x != ""x ];then + echo 1 + exit + fi + + local res2=`cat $image_list_v2|grep $1` + if [ "$res2"x = ""x ];then + echo 2 + exit + fi +} + +#======================================== image format v1 ======================================== +# <image_name>.rbd include 3 parts: +# header + snap_count*snapshot + snap_count*snap_name +# +# struct rbd_obj_header_ondisk { +# 40 char text[40]; +# 24 char block_name[RBD_MAX_BLOCK_NAME_SIZE]; +# 4 char signature[4]; +# 8 char version[8]; +# struct { +# 1 __u8 order; +# 1 __u8 crypt_type; +# 1 __u8 comp_type; +# 1 __u8 unused; +# } __attribute__((packed)) options; +# 8 __le64 image_size;//hexdump -C s=80 n=8 +# 8 __le64 snap_seq; //hexdump -C s=88 n=8 +# 4 __le32 snap_count;//hexdump -C s=96 n=4 +# 4 __le32 reserved; +# 8 __le64 snap_names_len;//hexdump -C s=104 n=8 +# struct rbd_obj_snap_ondisk snaps[0]; +# } __attribute__((packed)); +# +# sizeof(rbd_obj_header_ondisk): 112 +# +# struct rbd_obj_snap_ondisk { +# 8 __le64 id; //hexdump -C s=112+i*16 n=8 , i=[0, snap_count) +# 8 __le64 image_size;//hexdump -C s=112+i*16+8 n=8, i=[0, snap_count) +# } __attribute__((packed)); +# sizeof(rbd_obj_snap_ondisk): 16 +# +# get snap_names form <image_nane>.rbd +# hexdump -e '10/1 "%_c"' -s $((112 + $snap_count*16)) -n $snap_names_len <image_name>.rbd +# then split snap_names into array + +function get_image_metadata_v1() +{ + local func="get_image_metadata_v1" + if [ "$1"x = ""x ];then + echo "$func: no image head object input" + exit + fi + local snap_name= + if [ "$2"x != ""x ];then + snap_name=$2 + fi + + if [ ! -e $1 ];then + echo "$func: $1 not exists" + exit + fi + local hobject_path=$1 + d_hobject_path=`dump_backslash $1` + local image_format=`get_image_format_by_hobject $d_hobject_path` + if [ $image_format != 1 ];then + echo "$func: image_format must be 1" + exit + fi + + if [ ! -e $hobject_path ];then + echo "$func: $hobject_path not exists" + exit + fi + + # decode rbd_obj_header_ondisk of <image_name>.rbd + local block_name=`hexdump -e '10/1 "%c"' -s 40 -n 24 $hobject_path` + local order=`hexdump -e '10/4 "%u"' -s 76 -n 1 $hobject_path` + local image_size=`hexdump -C -s 80 -n 8 $hobject_path|head -n 1|awk '{for (i=9; i>1; i--) {printf $i}}'` + image_size=$((16#$image_size)) + local snap_seq=`hexdump -C -s 88 -n 8 $hobject_path|head -n 1| + awk '{num=""; for(i=9; i>1; i--){ num=num""$i;} print strtonum("0x"num);}'` + local snap_count=`hexdump -C -s 96 -n 4 $hobject_path|head -n 1| + awk '{num=""; for(i=5; i>1; i--){ num=num""$i;} print strtonum("0x"num);}'` + local snap_names_len=`hexdump -C -s 104 -n 8 $hobject_path|head -n 1| + awk '{num=""; for(i=9; i>1; i--){ num=num""$i;} print strtonum("0x"num);}'` + + echo -e "block_name:\t$block_name" + echo -e "order:\t\t$order" + echo -e "image_size:\t$image_size" + echo -e "snap_seq:\t$snap_seq" + + # decode N rbd_obj_snap_ondisk of <image_name>.rbd + declare -a snap_ids + declare -a snap_names + declare -a snap_image_sizes + local size_header=112 #sizeof(rbd_obj_header_ondisk) + local size_snap=16 #sizeof(rbd_obj_snap_ondisk) + local offset=0 + local id_off=0 + local size_off=0 + for ((i=0; i<$snap_count; i++)) + do + offset=$(($size_header + $i * $size_snap)) + id_off=$offset + size_off=$(($offset + 8)) + snap_ids[$i]=`hexdump -C -s $id_off -n 8 $hobject_path|head -n 1| + awk '{num=""; for(i=9; i>1; i--){num=num""$i;} print strtonum("0x"num);}'` + snap_image_sizes[$i]=`hexdump -C -s $size_off -n 8 $hobject_path|head -n 1| + awk '{num=""; for(i=9; i>1; i--){num=num""$i;} print strtonum("0x"num);}'` + done + offset=$(($size_header + $snap_count * $size_snap)) + snap_names=(`hexdump -e '10/1 "%_c"' -s $offset -n $snap_names_len $hobject_path| + awk -F "\\\\\\\\\\\\\\\\0" '{for(i=1; i<=NF; i++) {print $i" "} }'`); + + echo -e "\t\tID\tNAME\t\tSIZE" + for ((i=0; i<$snap_count; i++)) + do + if [ "$snap_name"x = ""x ];then + echo -n -e "snapshot:\t" + echo -e "${snap_ids[$i]}\t${snap_names[$i]}\t\t${snap_image_sizes[$i]}" + continue + fi + if [ "$snap_name"x = "${snap_names[$i]}"x ];then + echo -n -e "snapshot:\t" + echo -e "${snap_ids[$i]}\t${snap_names[$i]}\t\t${snap_image_sizes[$i]}" + return + fi + done +} + +#======================================== end image format v1 ======================================== + +#======================================== image format v2 ======================================== + +# map_header, header_seq, header, key/value +# eg. +# map_header _HOBJTOSEQ_:rbd%uheader%e139a6b8b4567...head.2.68E826B6 +# meta_header_seq 17426 +# header: _USER_0000000000017426_USER_:object_prefix +# _USER_0000000000017426_USER_:order +# _USER_0000000000017426_USER_:size +# _USER_0000000000017426_USER_:snap_seq +# key/value ceph-kvstore-tool /storepath get _USER_0000000000017426_USER_ (object_prefix|order|size|snap_seq) + +# decode image id from image_id_hobject +function get_image_id() +{ + local func="get_image_id" + if [ "$1"x = ""x ];then + exit; + fi + local image_id_hobject=$1 #from admin node's database + + if [ ! -e $image_id_hobject ];then + #echo "$func: $image_id_hobject not exists" + exit; + fi + + # get len of string + local n=`hexdump -e '10/4 "%u"' -s 0 -n 4 $image_id_hobject` + # get string + hexdump -e '10/1 "%c"' -s 4 -n $n $image_id_hobject +} + +#find image_id omap entry in omaplist +map_header_prefix= +map_header_key= +function get_map_header() +{ + local func="get_map_header" + local image_id=$1 + if [ "$image_id"x = ""x ];then + echo "$func: no image_id input" + exit; + fi + map_header_prefix=`get_map_header_prefix` + local keyword="header%e"$image_id + map_header_key=`get_map_header_key $keyword` + if [ "$map_header_key"x = ""x ];then + echo "$func: map_header_key is NULL(not in omaplist)" + exit + fi +} + +#get meta header seq from map_header +meta_header_seq= +function get_meta_header_seq() +{ + local func="get_meta_header_seq" + if [ "$1"x == ""x ];then + echo "$func: no prefix input" + exit; + elif [ "$2"x == ""x ];then + echo "$func: no key input" + exit; + fi + local prefix=$1; + local key=$2; + meta_header_seq=`get_header_seq $prefix $key` +} + +# get image metadata : object_prefix, order, image_size, snap_seq +object_prefix= +order= +image_size= +snap_seq= +function get_image_metadata_v2() +{ + local func="get_image_metadata_v2" + if [ "$1"x = ""x ];then + echo "$func: no meta_header_seq input" + exit; + fi + local meta_header_seq=`printf "%016d" $1` + #echo "$func: meta_header_seq = "$meta_header_seq + local ghobject_key="_USER_"$meta_header_seq"_USER_" + local prefix=$ghobject_key + + object_prefix=`get_header_kv $prefix object_prefix string` + #object_prefix="rbd_data.$image_id" + order=`get_header_kv $prefix order int` + image_size=`get_header_kv $prefix size int` + snap_seq=`get_header_kv $prefix snap_seq int` + + echo -e "object_prefix:\t$object_prefix" + echo -e "order:\t\t$order" + echo -e "image_size:\t$image_size" + echo -e "snap_seq:\t$snap_seq" + + # list snapshot + list_snaps_v2 $1 $2 +} + +# struct cls_rbd_snap { +# snapid_t id; +# string name; +# uint64_t image_size; +# uint64_t features; +# uint8_t protection_status; +# cls_rbd_parent parent; +# } +# decode cls_rbd_snap +# 1 u8 struct_v +# 1 u8 struct_compat +# 4 u32 struct_len +# 8 u64 snapid_t id //s=6 n=8 +# 4 u32 len of name //s=14 n=4 +# len char name //s=18 n=len +# 8 u64 image_size +# 8 u64 features +# ...... +# +function list_snaps_v2() +{ + local func="list_snaps_v2" + if [ "$1"x = ""x ];then + exit + fi + local sname= + if [ $# -eq 2 ];then + sname=$2 + fi + local meta_header_seq=`printf "%016d" $1` + local prefix="_USER_"$meta_header_seq"_USER_" + local keys=(`awk -F ":" '/snapshot_/ && $1 == "'"$prefix"'" {if ($2 == "") exit; split($2, arr, "_"); + print arr[2];}' $omap_list|sort -r`) + echo -e "\t\tID\tNAME\t\tSIZE" + for key in ${keys[@]} + do + key="snapshot_$key" + local arr=(`ceph-kvstore-tool $omap_path get $prefix $key|awk -F ":" '{print $2}'`); + # get snap_name + tmp= + for ((i=17; i>13; i--)) + do + tmp="$tmp${arr[$i]}" + done + local len=$((16#$tmp)) + local snap_name= + for ((i=18; i<$((18+$len)); i++)) + do + # convert ascii to char + local char=`echo -e "\x${arr[$i]}"` + snap_name="$snap_name$char" + done + # get snap_id (little endian) + local tmp= + for ((i=13; i>5; i--)) + do + tmp="$tmp${arr[$i]}" + done + local snap_id=$((16#$tmp)) + # get image_size of current snap (little endian) + tmp= + for ((i=$((25+$len)); i>$((17+$len)); i--)) + do + tmp="$tmp${arr[$i]}" + done + local image_size=$((16#$tmp)) + if [ "$sname"x = ""x ];then + echo -e "snapshot:\t$snap_id\t$snap_name\t\t$image_size" + continue + fi + if [ "$sname"x = "$snap_name"x ];then + echo -e "snapshot:\t$snap_id\t$snap_name\t\t$image_size" + return + fi + done +} + +#======================================== end image format v2 ======================================== diff --git a/src/tools/rbd_recover_tool/osd_job b/src/tools/rbd_recover_tool/osd_job new file mode 100755 index 00000000..b4b80be8 --- /dev/null +++ b/src/tools/rbd_recover_tool/osd_job @@ -0,0 +1,170 @@ +#!/usr/bin/env bash +# file: osd_job +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +my_dir=$(dirname "$0") + +. $my_dir/common_h +. $my_dir/metadata_h +. $my_dir/epoch_h + +function check_ceph_osd() +{ + local func="check_ceph_osd" + local host=`hostname` + # if ceph-osd service is still running, except flush-journal + if [ "`ps aux|grep ceph-osd|grep -v flush-journal|grep -v grep`"x != ""x ];then + echo "[$host]: $func: ceph-osd is running..., stop it" + exit + fi +} + +function cat_pg_epoch() +{ + local func="cat_pg_epoch" + init_env_osd $1 + if [ -e $node_pg_epoch ];then + cat $node_pg_epoch + fi +} + +function cat_image_v1() +{ + local func="cat_image_v1" + init_env_osd $1 + if [ -e $image_v1 ];then + cat $image_v1 + fi +} + +function cat_image_v2() +{ + local func="cat_image_v2" + init_env_osd $1 + if [ -e $image_v2 ];then + cat $image_v2 + fi +} + +function flush_osd_journal() +{ + local func="flush_osd_journal" + init_env_osd $1 + local osd_data_path=$osd_data + local osd_journal_path=$osd_data/journal + local whoami_path=$osd_data/whoami + local host=`hostname` + if [ ! -e $whoami_path ];then + echo "[$host]: $func: $whoami_path not exists" + exit + fi + local whoami=`cat $whoami_path` + echo "[$host]: $func ..." + ceph-osd -i $whoami --osd-data $osd_data_path --osd-journal $osd_journal_path --flush-journal >/dev/null + if [ $? -ne 0 ];then + echo "[$host]: $func: flush osd journal failed" + exit + fi +} + +function do_omap_list() +{ + local func="do_omap_list" + init_env_osd $1 + local host=`hostname` + echo "[$host]: $func ..." + get_omap_list +} + +# get all pgs epoch +function do_pg_epoch() +{ + local func="do_pg_epoch" + init_env_osd $1 + local node=`hostname` + get_pgid_list + >$node_pg_epoch + local pgid= + local data_path= + local host=`hostname` + echo "[$host]: $func ..." + while read line + do + { + pgid=`echo $line|awk '{print $1}'` + data_path=`echo $line|awk '{print $2}'` + get_pg_epoch $pgid + echo -e "$node $pgid $pg_epoch $data_path" >>$node_pg_epoch + } + done < $pgid_list +} + +# get an list of image in this osd node, pg epoch maybe not the latest, the admin node will do distinguish +function do_image_list() +{ + local func="do_image_list" + init_env_osd $1 + get_image_list + local node=`hostname` + >$image_v1 + >$image_v2 + local host=`hostname` + echo "[$host]: $func ..." + for line in `cat $image_list_v1` + do + pgid=`get_pgid $line` + get_pg_epoch $pgid + echo "$node $line $pg_epoch" >> $image_v1 + done + for line in `cat $image_list_v2` + do + pgid=`get_pgid $line` + get_pg_epoch $pgid + echo "$node $line $pg_epoch" >> $image_v2 + done +} + +function do_image_id() +{ + local func="do_image_id" + init_env_osd $1 + get_image_id $2 +} + +function do_image_metadata_v1() +{ + local func="do_image_metadata_v1" + init_env_osd $1 + local image_header_hobject=$2 + local snap_name=$3 + get_image_metadata_v1 $image_header_hobject $snap_name +} + +function do_image_metadata_v2() +{ + local func="do_image_metadata_v2" + init_env_osd $1 + local image_id=$2 + local image_header_hobject=$3 + local snap_name=$4 + get_map_header $image_id + get_meta_header_seq $map_header_prefix $map_header_key + get_image_metadata_v2 $meta_header_seq $snap_name +} + +check_ceph_osd +$* diff --git a/src/tools/rbd_recover_tool/rbd-recover-tool b/src/tools/rbd_recover_tool/rbd-recover-tool new file mode 100755 index 00000000..b7a25865 --- /dev/null +++ b/src/tools/rbd_recover_tool/rbd-recover-tool @@ -0,0 +1,327 @@ +#!/usr/bin/env bash +# file: rbd-recover-tool +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +# rbd-recover-tool is an offline recover tool for rbd image in replicated pool +# when ceph cluster is stopped. +# it is a simple disater recovery policy, just for urgent condition + +my_dir=$(dirname "$0") + +. $my_dir/common_h +. $my_dir/metadata_h +. $my_dir/epoch_h +. $my_dir/database_h + +#scp files from admin node to osd node +file1=common_h +file2=metadata_h +file3=epoch_h +file4=osd_job + +#------------ admin node's action ------------- + +function scp_file() +{ + local func="scp_file" + file=$1 + if [ "$1"x = ""x ];then + echo "$func: not file input" + exit + fi + for host in `cat $osd_host` + do + { + echo "$func: $host" + scp $ssh_option $file $host:$job_path 1>/dev/null + } & + done +} + +function scp_files() +{ + local func="scp_files" + for host in `cat $osd_host` + do + { + echo "$func: $host" + scp $ssh_option $file1 $host:$job_path + scp $ssh_option $file2 $host:$job_path + scp $ssh_option $file3 $host:$job_path + scp $ssh_option $file4 $host:$job_path + } & + done + wait + echo "$func: finish" +} + +function scatter_node_jobs() +{ + local func="scatter_node_jobs" + local host= + local data_path= + echo "$func: flush osd journal & generate infos: omap, pg, image metadata ..." + + trap 'echo $func failed; exit' INT HUP + while read line + do + { + host=`echo $line|awk '{print $1}'` + data_path=`echo $line|awk '{print $2}'` + check_osd_process $host + + cmd="mkdir -p $job_path" + ssh $ssh_option $host $cmd + scp $ssh_option $file1 $host:$job_path >/dev/null + scp $ssh_option $file2 $host:$job_path >/dev/null + scp $ssh_option $file3 $host:$job_path >/dev/null + scp $ssh_option $file4 $host:$job_path >/dev/null + + cmd="bash $job_path/osd_job flush_osd_journal $data_path;" + cmd="$cmd $job_path/osd_job do_omap_list $data_path;" + cmd="$cmd bash $job_path/osd_job do_pg_epoch $data_path;" + cmd="$cmd bash $job_path/osd_job do_image_list $data_path;" + + ssh $ssh_option $host $cmd </dev/null + } & + done < $osd_host_path + wait + echo "$func: finish" +} + +function gather_node_infos() +{ + local func="gather_node_infos" + echo "$func ..." + >$pg_coll + >$image_coll_v1 + >$image_coll_v2 + trap 'echo $func failed; exit' INT HUP + while read line + do + { + host=`echo $line|awk '{print $1}'` + data_path=`echo $line|awk '{print $2}'` + echo "$func: $host" + check_osd_process $host + + #pg epoch + cmd1="bash $job_path/osd_job cat_pg_epoch $data_path" + ssh $ssh_option $host $cmd1 >> $pg_coll + #image v1 + cmd2="bash $job_path/osd_job cat_image_v1 $data_path" + ssh $ssh_option $host $cmd2 >> $image_coll_v1 + #image v2 + cmd3="bash $job_path/osd_job cat_image_v2 $data_path" + ssh $ssh_option $host $cmd3 >> $image_coll_v2 + } & + done < $osd_host_path + wait + echo "$func: finish" +} + +function scatter_gather() +{ + local func="scatter_gather" + if [ ! -s $osd_host ];then + echo "$func: no osd_host input" + exit + fi + if [ ! -s $mon_host ];then + echo "$func: no mon_host input" + exit + fi + scatter_node_jobs + gather_node_infos +} + + +#------------- operations -------------- + +function database() +{ + scatter_gather + gen_database +} + +function list() +{ + list_images +} + +function lookup() +{ + lookup_image $1 $2 $3 +} + +function recover() +{ + recover_image $1 $2 $3 $4 +} + +#------------- helper ------------- + +function usage() +{ + local cmd_name="rbd-recover-tool" + echo + echo "$cmd_name is used to recover rbd image of replicated pool, + when all ceph services are stopped" + echo "Usage:" + echo "$cmd_name database + gather pg info, object info, image metadata, + and epoch info from all osd nodes, + this will cosume a long time, just be patient, + especially when scale up to 1000+ osds" + echo "$cmd_name list + list all rbd images of all replicated pools, + before to lookup & recover" + echo "$cmd_name lookup <pool_id>/<image_name>[@[<snap_name>]] + show image metadata: image format, rbd id, size, order, snapseq + In addition, for image with snapshots, + this will list all snapshot infomations" + echo "$cmd_name recover <pool_id>/<image_name>[@[<snap_name>]] [</path/to/store/image>] + all snapshots share one image head, to economize disk space + so there is only one snapshot at any time, + image is saved at </path/to/store/image>/pool_<pool_id>/image_name/image_name + cat <path/to/store/image>/pool_<pool_id>/image_name/@CURRENT, + will show snapid + recover to raw image/nosnap/head: <image_name> + rollback to image head: <image_name>@ + rollback to image snap: <image_name>@<snap_name> + recover steps: + 1. recover image nosnap (only one time) + 2. rollback to image snap" +} + +function get_path() +{ + local func="get_path" + if [ $# -lt 1 ];then + return + fi + if [[ $1 =~ // ]];then + return # "/path//to" is invalid + fi + local parent=`dirname $1` + local name=`basename $1` + if [ "$parent"x = "/"x ];then + echo "$parent$name" + else + echo -n "$parent/$name" + fi +} + +function admin_cmd() +{ + local func="admin_cmd" + if [ $# -lt 1 ];then + usage + exit + fi + if [ "$1"x = "-h"x ] || [ "$1"x = "--help"x ];then + usage + exit + fi + + if [ "$1"x = "database"x ];then + if [ $# -gt 1 ];then + usage + exit + fi + # remove osd_host to refresh osd_host and osd_host_mapping + rm -f $osd_host + init_env_admin + database + elif [ "$1"x = "list"x ];then + if [ $# -gt 1 ];then + usage + exit + fi + init_env_admin + list + elif [ "$1"x = "lookup"x ];then + if [ $# -gt 2 ];then + usage + exit + fi + local pool_id=-1 + local image_name= + local snap_name= + if [[ $2 =~ ^([^@/]+)/([^@/]+)$ ]];then + pool_id="${BASH_REMATCH[1]}" + image_name="${BASH_REMATCH[2]}" + elif [[ $2 =~ ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then + pool_id="${BASH_REMATCH[1]}" + image_name="${BASH_REMATCH[2]}" + snap_name="${BASH_REMATCH[3]}" + else + echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]" + exit + fi + init_env_admin + lookup $pool_id $image_name $snap_name + elif [ "$1"x = "recover"x ];then + if [ $# -lt 2 ] || [ $# -gt 3 ];then + usage + exit + fi + local pool_id=-1 + local image_name= + local snap_name=@ + local image_dir= + if [[ $2 =~ ^([^@/]+)/([^@/]+)$ ]];then + pool_id="${BASH_REMATCH[1]}" + image_name="${BASH_REMATCH[2]}" + elif [[ $2 =~ ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then + pool_id="${BASH_REMATCH[1]}" + image_name="${BASH_REMATCH[2]}" + snap_name="${BASH_REMATCH[3]}" + if [ "$snap_name"x = ""x ];then + snap_name=@@ + fi + else + echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]" + exit + fi + if [ $# = 3 ];then + image_dir=`get_path $3` + if [ "image_dir"x = ""x ];then + echo "$3 invalid" + exit + fi + fi + init_env_admin + recover $pool_id $image_name $snap_name $image_dir + elif [ "$1"x = "scp_files"x ];then + if [ $# -gt 1 ];then + exit + fi + admin_parse_osd + scp_files + elif [ "$1"x = "scp_file"x ];then + if [ $# -gt 2 ];then + exit + fi + admin_parse_osd + scp_file $2 + else + echo "$func: $1: command not found" + fi +} + +admin_cmd $* diff --git a/src/tools/rbd_recover_tool/test_rbd_recover_tool.sh b/src/tools/rbd_recover_tool/test_rbd_recover_tool.sh new file mode 100755 index 00000000..876b47b9 --- /dev/null +++ b/src/tools/rbd_recover_tool/test_rbd_recover_tool.sh @@ -0,0 +1,542 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +# unit test case for rbd-recover-tool + +#prepare: +# - write config files: config/osd_host, config/mon_host, config/storage_path, config/mds_host if exist mds +#step 1. rbd export all images as you need +#step 2. stop all ceph services +#step 3. use ceph_rbd_recover_tool to recover all images +#step 4. compare md5sum of recover image with that of export image who has the same image name + +ssh_opt="-o ConnectTimeout=1" +my_dir=$(dirname "$0") +tool_dir=$my_dir + +#storage_path=$my_dir/config/storage_path +mon_host=$my_dir/config/mon_host +osd_host=$my_dir/config/osd_host +mds_host=$my_dir/config/mds_host + +test_dir= # `cat $storage_path` +export_dir= #$test_dir/export +recover_dir= #$test_dir/recover +image_names= #$test_dir/image_names +online_images= #$test_dir/online_images, all images on ceph rbd pool +gen_db= #$test_dir/gen_db, label database if exist +pool=rbd +pool_id=2 + +function get_pool_id() +{ + local pool_id_file=/tmp/pool_id_file.$$$$ + ceph osd pool stats $pool|head -n 1|awk '{print $4}' >$pool_id_file + if [ $? -ne 0 ];then + echo "$func: get pool id failed: pool = $pool" + rm -f $pool_id_file + exit + fi + pool_id=`cat $pool_id_file` + echo "$func: pool_id = $pool_id" + rm -f $pool_id_file +} + +function init() +{ + local func="init" + if [ $# -eq 0 ];then + echo "$func: must input <path> to storage images, enough disk space is good" + exit + fi + if [ ! -s $osd_host ];then + echo "$func: config/osd_host not exists or empty" + exit + fi + if [ ! -s $mon_host ];then + echo "$func: config/mon_host not exists or empty" + exit + fi + if [ ! -e $mds_host ];then + echo "$func: config/mds_host not exists" + exit + fi + test_dir=$1 + export_dir=$test_dir/export + recover_dir=$test_dir/recover + image_names=$test_dir/image_names + online_images=$test_dir/online_images + gen_db=$test_dir/gen_db + + trap 'echo "ceph cluster is stopped ..."; exit;' INT + ceph -s >/dev/null + get_pool_id + + mkdir -p $test_dir + mkdir -p $export_dir + mkdir -p $recover_dir + rm -rf $export_dir/* + rm -rf $recover_dir/* +} + +function do_gen_database() +{ + local func="do_gen_database" + if [ -s $gen_db ] && [ `cat $gen_db` = 1 ];then + echo "$func: database already existed" + exit + fi + bash $tool_dir/rbd-recover-tool database + echo 1 >$gen_db +} + +#check if all ceph processes are stopped +function check_ceph_service() +{ + local func="check_ceph_service" + local res=`cat $osd_host $mon_host $mds_host|sort -u|tr -d [:blank:]|xargs -n 1 -I @ ssh $ssh_opt @ "ps aux|grep -E \"(ceph-osd|ceph-mon|ceph-mds)\"|grep -v grep"` + if [ "$res"x != ""x ];then + echo "$func: NOT all ceph services are stopped" + return 1 + exit + fi + echo "$func: all ceph services are stopped" + return 0 +} + +function stop_ceph() +{ + local func="stop_ceph" + #cat osd_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-osd" + while read osd + do + { + osd=`echo $osd|tr -d [:blank:]` + if [ "$osd"x = ""x ];then + continue + fi + #ssh $ssh_opt $osd "killall ceph-osd ceph-mon ceph-mds" </dev/null + ssh $ssh_opt $osd "killall ceph-osd" </dev/null + } & + done < $osd_host + wait + echo "waiting kill all osd ..." + sleep 1 + #cat $mon_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mon ceph-osd ceph-mds" + cat $mon_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mon" + #cat $mds_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mds ceph-mon ceph-osd" + cat $mds_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mds" +} + +function create_image() +{ + local func="create_image" + if [ ${#} -lt 3 ];then + echo "create_image: parameters: <image_name> <size> <image_format>" + exit + fi + local image_name=$1 + local size=$2 + local image_format=$3 + if [ $image_format -lt 1 ] || [ $image_format -gt 2 ];then + echo "$func: image_format must be 1 or 2" + exit + fi + local res=`rbd list|grep -E "^$1$"` + echo "$func $image_name ..." + if [ "$res"x = ""x ];then + rbd -p $pool create $image_name --size $size --image_format $image_format + else + if [ $image_format -eq 2 ];then + rbd snap ls $image_name|tail -n +2|awk '{print $2}'|xargs -n 1 -I % rbd snap unprotect $image_name@% + fi + rbd snap purge $image_name + #rbd rm $image_name + rbd -p $pool resize --allow-shrink --size $size $image_name + fi +} + +function export_image() +{ + local func="export_image" + + if [ $# -lt 2 ];then + echo "$func: parameters: <image_name> <image_format> [<image_size>]" + exit + fi + + local image_name=$1 + local format=$(($2)) + local size=$(($3)) #MB + + if [ $format -ne 1 ] && [ $format -ne 2 ];then + echo "$func: image format must be 1 or 2" + exit + fi + + if [ $size -eq 0 ];then + size=24 #MB + echo "$func: size = $size" + fi + local mnt=/rbdfuse + + mount |grep "rbd-fuse on /rbdfuse" &>/dev/null + if [ $? -ne 0 ];then + rbd-fuse $mnt + fi + + create_image $image_name $size $format + + dd conv=notrunc if=/dev/urandom of=$mnt/$image_name bs=4M count=$(($size/4)) + + local export_image_dir=$export_dir/pool_$pool_id/$image_name + mkdir -p $export_image_dir + local export_md5_nosnap=$export_image_dir/@md5_nosnap + >$export_md5_nosnap + + local export_image_path=$export_image_dir/$image_name + rm -f $export_image_path + + rbd export $pool/$image_name $export_image_path + md5sum $export_image_path |awk '{print $1}' >$export_md5_nosnap +} + +function recover_image() +{ + local func="recover_snapshots" + if [ $# -lt 1 ];then + echo "$func: parameters: <image_name>" + exit + fi + + local image_name=$1 + #pool_id=29 + + local recover_image_dir=$recover_dir/pool_$pool_id/$image_name + mkdir -p $recover_image_dir + local recover_md5_nosnap=$recover_image_dir/@md5_nosnap + >$recover_md5_nosnap + local snapshot= + + bash $tool_dir/rbd-recover-tool recover $pool_id/$image_name $recover_dir + md5sum $recover_image_dir/$image_name|awk '{print $1}' >$recover_md5_nosnap +} + +function make_snapshot() +{ + local func="make_snapshot" + if [ $# -lt 5 ];then + echo "$func: parameters: <ofile> <seek> <count> <snap> <export_image_dir>" + exit + fi + local ofile=$1 + local seek=$(($2)) + local count=$(($3)) + local snap=$4 + local export_image_dir=$5 + + if [ $seek -lt 0 ];then + echo "$func: seek can not be minus" + exit + fi + + if [ $count -lt 1 ];then + echo "$func: count must great than zero" + exit + fi + + echo "[$snap] $func ..." + echo "$1 $2 $3 $4" + rbd snap ls $image_name|grep $snap; + + local res=$? + if [ $res -eq 0 ];then + return $res + fi + + dd conv=notrunc if=/dev/urandom of=$ofile bs=1M count=$count seek=$seek 2>/dev/null + snapshot=$image_name@$snap + rbd snap create $snapshot + rm -f $export_image_dir/$snapshot + rbd export $pool/$image_name $export_image_dir/$snapshot + pushd $export_image_dir >/dev/null + md5sum $snapshot >> @md5 + popd >/dev/null +} + +function recover_snapshots() +{ + local func="recover_snapshots" + if [ $# -lt 1 ];then + echo "$func: parameters: <image_name>" + exit + fi + + local image_name=$1 + #pool_id=29 + + local recover_image_dir=$recover_dir/pool_$pool_id/$image_name + mkdir -p $recover_image_dir + local recover_md5=$recover_image_dir/@md5 + >$recover_md5 + local snapshot= + + + # recover head + bash $tool_dir/rbd-recover-tool recover $pool_id/$image_name $recover_dir + + # recover snapshots + for((i=1; i<10; i++)) + do + snapshot=snap$i + bash $tool_dir/rbd-recover-tool recover $pool_id/$image_name@$snapshot $recover_dir + pushd $recover_image_dir >/dev/null + local chksum=`md5sum $image_name|awk '{print $1}'` + echo "$chksum $image_name@$snapshot" >>@md5 + popd >/dev/null + done +} + +function export_snapshots() +{ + local func="export_snapshots" + + if [ $# -lt 2 ];then + echo "$func: parameters: <image_name> <image_format> [<image_size>]" + exit + fi + + local image_name=$1 + local format=$(($2)) + local size=$(($3)) #MB + + if [ $format -ne 1 ] && [ $format -ne 2 ];then + echo "$func: image format must be 1 or 2" + exit + fi + + if [ $size -eq 0 ];then + size=24 #MB + echo "$func: size = $size" + fi + local mnt=/rbdfuse + + mount |grep "rbd-fuse on /rbdfuse" &>/dev/null + if [ $? -ne 0 ];then + rbd-fuse $mnt + fi + + create_image $image_name $size $format + + local export_image_dir=$export_dir/pool_$pool_id/$image_name + mkdir -p $export_image_dir + local export_md5=$export_image_dir/@md5 + >$export_md5 + + # create 9 snapshots + # image = {object0, object1, object2, object3, object4, object5, ...} + # + # snap1 : init/write all objects + # snap2 : write object0 + # snap3 : write object1 + # snap4 : write object2 + # snap5 : write object3 + # snap6 : write object4 + # snap7 : write object5 + # snap8 : write object0 + # snap9 : write object3 + + make_snapshot $mnt/$image_name 0 $size snap1 $export_image_dir + make_snapshot $mnt/$image_name 0 1 snap2 $export_image_dir + make_snapshot $mnt/$image_name 4 1 snap3 $export_image_dir + make_snapshot $mnt/$image_name 8 1 snap4 $export_image_dir + make_snapshot $mnt/$image_name 12 1 snap5 $export_image_dir + make_snapshot $mnt/$image_name 16 1 snap6 $export_image_dir + make_snapshot $mnt/$image_name 20 1 snap7 $export_image_dir + make_snapshot $mnt/$image_name 1 1 snap8 $export_image_dir + make_snapshot $mnt/$image_name 13 1 snap9 $export_image_dir +} + +function check_recover_nosnap() +{ + local func="check_recover_nosnap" + if [ $# -lt 3 ];then + echo "$func: parameters: <export_md5_file> <recover_md5_file> <image_name>" + fi + local export_md5=$1 + local recover_md5=$2 + local image_name=$3 + + local ifpassed="FAILED" + + echo "================ < $image_name nosnap > ================" + + local export_md5sum=`cat $export_md5` + local recover_md5sum=`cat $recover_md5` + + if [ "$export_md5sum"x != ""x ] && [ "$export_md5sum"x = "$recover_md5sum"x ];then + ifpassed="PASSED" + fi + echo "export: $export_md5sum" + echo "recover: $recover_md5sum $ifpassed" +} + +function check_recover_snapshots() +{ + local func="check_recover_snapshots" + if [ $# -lt 3 ];then + echo "$func: parameters: <export_md5_file> <recover_md5_file> <image_name>" + fi + local export_md5=$1 + local recover_md5=$2 + local image_name=$3 + + local ifpassed="FAILED" + + echo "================ < $image_name snapshots > ================" + + OIFS=$IFS + IFS=$'\n' + local export_md5s=(`cat $export_md5`) + local recover_md5s=(`cat $recover_md5`) + for((i=0; i<9; i++)) + do + OOIFS=$IFS + IFS=$' ' + local x=$(($i+1)) + snapshot=snap$x + + local export_arr=(`echo ${export_md5s[$i]}`) + local recover_arr=(`echo ${recover_md5s[$i]}`) + echo "export: ${export_md5s[$i]}" + if [ "${export_arr[1]}"x != ""x ] && [ "${export_arr[1]}"x = "${recover_arr[1]}"x ];then + ifpassed="PASSED" + fi + echo "recover: ${recover_md5s[$i]} $ifpassed" + IFS=$OOIFS + done + IFS=$OIFS +} + +# step 1: export image, snapshot +function do_export_nosnap() +{ + export_image image_v1_nosnap 1 + export_image image_v2_nosnap 2 +} + +function do_export_snap() +{ + export_snapshots image_v1_snap 1 + export_snapshots image_v2_snap 2 +} + +# step 2: stop ceph cluster and gen database +function stop_cluster_gen_database() +{ + trap 'echo stop ceph cluster failed; exit;' INT HUP + stop_ceph + sleep 2 + check_ceph_service + local res=$? + while [ $res -ne 0 ] + do + stop_ceph + sleep 2 + check_ceph_service + res=$? + done + + echo 0 >$gen_db + do_gen_database +} + +# step 3: recover image,snapshot +function do_recover_nosnap() +{ + recover_image image_v1_nosnap + recover_image image_v2_nosnap +} + +function do_recover_snap() +{ + recover_snapshots image_v1_snap + recover_snapshots image_v2_snap +} + +# step 4: check md5sum pair<export_md5sum, recover_md5sum> +function do_check_recover_nosnap() +{ + local image1=image_v1_nosnap + local image2=image_v2_nosnap + + local export_md5_1=$export_dir/pool_$pool_id/$image1/@md5_nosnap + local export_md5_2=$export_dir/pool_$pool_id/$image2/@md5_nosnap + local recover_md5_1=$recover_dir/pool_$pool_id/$image1/@md5_nosnap + local recover_md5_2=$recover_dir/pool_$pool_id/$image2/@md5_nosnap + + check_recover_nosnap $export_md5_1 $recover_md5_1 $image1 + check_recover_nosnap $export_md5_2 $recover_md5_2 $image2 +} + +function do_check_recover_snap() +{ + local image1=image_v1_snap + local image2=image_v2_snap + + local export_md5_1=$export_dir/pool_$pool_id/$image1/@md5 + local export_md5_2=$export_dir/pool_$pool_id/$image2/@md5 + local recover_md5_1=$recover_dir/pool_$pool_id/$image1/@md5 + local recover_md5_2=$recover_dir/pool_$pool_id/$image2/@md5 + + check_recover_snapshots $export_md5_1 $recover_md5_1 $image1 + check_recover_snapshots $export_md5_2 $recover_md5_2 $image2 +} + +function test_case_1() +{ + do_export_nosnap + stop_cluster_gen_database + do_recover_nosnap + do_check_recover_nosnap +} + +function test_case_2() +{ + do_export_snap + stop_cluster_gen_database + do_recover_snap + do_check_recover_snap +} + +function test_case_3() +{ + do_export_nosnap + do_export_snap + + stop_cluster_gen_database + + do_recover_nosnap + do_recover_snap + + do_check_recover_nosnap + do_check_recover_snap +} + + +init $* +test_case_3 |