summaryrefslogtreecommitdiffstats
path: root/src/mon/MonMap.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/mon/MonMap.h')
-rw-r--r--src/mon/MonMap.h546
1 files changed, 546 insertions, 0 deletions
diff --git a/src/mon/MonMap.h b/src/mon/MonMap.h
new file mode 100644
index 000000000..02304edfd
--- /dev/null
+++ b/src/mon/MonMap.h
@@ -0,0 +1,546 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MONMAP_H
+#define CEPH_MONMAP_H
+
+#ifdef WITH_SEASTAR
+#include <seastar/core/future.hh>
+#endif
+
+#include "common/config_fwd.h"
+#include "common/ceph_releases.h"
+
+#include "include/err.h"
+#include "include/types.h"
+
+#include "mon/mon_types.h"
+#include "msg/Message.h"
+
+class health_check_map_t;
+
+#ifdef WITH_SEASTAR
+namespace crimson::common {
+ class ConfigProxy;
+}
+#endif
+
+namespace ceph {
+ class Formatter;
+}
+
+struct mon_info_t {
+ /**
+ * monitor name
+ *
+ * i.e., 'foo' in 'mon.foo'
+ */
+ std::string name;
+ /**
+ * monitor's public address(es)
+ *
+ * public facing address(es), used to communicate with all clients
+ * and with other monitors.
+ */
+ entity_addrvec_t public_addrs;
+ /**
+ * the priority of the mon, the lower value the more preferred
+ */
+ uint16_t priority{0};
+ uint16_t weight{0};
+
+ /**
+ * The location of the monitor, in CRUSH hierarchy terms
+ */
+ std::map<std::string,std::string> crush_loc;
+
+ // <REMOVE ME>
+ mon_info_t(const std::string& n, const entity_addr_t& p_addr, uint16_t p)
+ : name(n), public_addrs(p_addr), priority(p)
+ {}
+ // </REMOVE ME>
+
+ mon_info_t(const std::string& n, const entity_addrvec_t& p_addrs,
+ uint16_t p, uint16_t w)
+ : name(n), public_addrs(p_addrs), priority(p), weight(w)
+ {}
+ mon_info_t(const std::string &n, const entity_addrvec_t& p_addrs)
+ : name(n), public_addrs(p_addrs)
+ { }
+
+ mon_info_t() { }
+
+
+ void encode(ceph::buffer::list& bl, uint64_t features) const;
+ void decode(ceph::buffer::list::const_iterator& p);
+ void print(std::ostream& out) const;
+};
+WRITE_CLASS_ENCODER_FEATURES(mon_info_t)
+
+inline std::ostream& operator<<(std::ostream& out, const mon_info_t& mon) {
+ mon.print(out);
+ return out;
+}
+
+class MonMap {
+ public:
+ epoch_t epoch; // what epoch/version of the monmap
+ uuid_d fsid;
+ utime_t last_changed;
+ utime_t created;
+
+ std::map<std::string, mon_info_t> mon_info;
+ std::map<entity_addr_t, std::string> addr_mons;
+
+ std::vector<std::string> ranks;
+ /* ranks which were removed when this map took effect.
+ There should only be one at a time, but leave support
+ for arbitrary numbers just to be safe. */
+ std::set<int> removed_ranks;
+
+ /**
+ * Persistent Features are all those features that once set on a
+ * monmap cannot, and should not, be removed. These will define the
+ * non-negotiable features that a given monitor must support to
+ * properly operate in a given quorum.
+ *
+ * Should be reserved for features that we really want to make sure
+ * are sticky, and are important enough to tolerate not being able
+ * to downgrade a monitor.
+ */
+ mon_feature_t persistent_features;
+ /**
+ * Optional Features are all those features that can be enabled or
+ * disabled following a given criteria -- e.g., user-mandated via the
+ * cli --, and act much like indicators of what the cluster currently
+ * supports.
+ *
+ * They are by no means "optional" in the sense that monitors can
+ * ignore them. Just that they are not persistent.
+ */
+ mon_feature_t optional_features;
+
+ /**
+ * Returns the set of features required by this monmap.
+ *
+ * The features required by this monmap is the union of all the
+ * currently set persistent features and the currently set optional
+ * features.
+ *
+ * @returns the set of features required by this monmap
+ */
+ mon_feature_t get_required_features() const {
+ return (persistent_features | optional_features);
+ }
+
+ // upgrade gate
+ ceph_release_t min_mon_release{ceph_release_t::unknown};
+
+ void _add_ambiguous_addr(const std::string& name,
+ entity_addr_t addr,
+ int priority,
+ int weight,
+ bool for_mkfs);
+
+ enum election_strategy {
+ // Keep in sync with ElectionLogic.h!
+ CLASSIC = 1, // the original rank-based one
+ DISALLOW = 2, // disallow a set from being leader
+ CONNECTIVITY = 3 // includes DISALLOW, extends to prefer stronger connections
+ };
+ election_strategy strategy = CLASSIC;
+ std::set<std::string> disallowed_leaders; // can't be leader under CONNECTIVITY/DISALLOW
+ bool stretch_mode_enabled = false;
+ string tiebreaker_mon;
+ set<string> stretch_marked_down_mons; // can't be leader until fully recovered
+
+public:
+ void calc_legacy_ranks();
+ void calc_addr_mons() {
+ // populate addr_mons
+ addr_mons.clear();
+ for (auto& p : mon_info) {
+ for (auto& a : p.second.public_addrs.v) {
+ addr_mons[a] = p.first;
+ }
+ }
+ }
+
+ MonMap()
+ : epoch(0) {
+ }
+
+ uuid_d& get_fsid() { return fsid; }
+
+ unsigned size() const {
+ return mon_info.size();
+ }
+
+ unsigned min_quorum_size(unsigned total_mons=0) const {
+ if (total_mons == 0) {
+ total_mons = size();
+ }
+ return total_mons / 2 + 1;
+ }
+
+ epoch_t get_epoch() const { return epoch; }
+ void set_epoch(epoch_t e) { epoch = e; }
+
+ /**
+ * Obtain list of public facing addresses
+ *
+ * @param ls list to populate with the monitors' addresses
+ */
+ void list_addrs(std::list<entity_addr_t>& ls) const {
+ for (auto& i : mon_info) {
+ for (auto& j : i.second.public_addrs.v) {
+ ls.push_back(j);
+ }
+ }
+ }
+
+ /**
+ * Add new monitor to the monmap
+ *
+ * @param m monitor info of the new monitor
+ */
+ void add(const mon_info_t& m) {
+ ceph_assert(mon_info.count(m.name) == 0);
+ for (auto& a : m.public_addrs.v) {
+ ceph_assert(addr_mons.count(a) == 0);
+ }
+ mon_info[m.name] = m;
+ if (get_required_features().contains_all(
+ ceph::features::mon::FEATURE_NAUTILUS)) {
+ ranks.push_back(m.name);
+ ceph_assert(ranks.size() == mon_info.size());
+ } else {
+ calc_legacy_ranks();
+ }
+ calc_addr_mons();
+ }
+
+ /**
+ * Add new monitor to the monmap
+ *
+ * @param name Monitor name (i.e., 'foo' in 'mon.foo')
+ * @param addr Monitor's public address
+ */
+ void add(const std::string &name, const entity_addrvec_t &addrv,
+ uint16_t priority=0, uint16_t weight=0) {
+ add(mon_info_t(name, addrv, priority, weight));
+ }
+
+ /**
+ * Remove monitor from the monmap
+ *
+ * @param name Monitor name (i.e., 'foo' in 'mon.foo')
+ */
+ void remove(const std::string &name) {
+ // this must match what we do in ConnectionTracker::notify_rank_removed
+ ceph_assert(mon_info.count(name));
+ int rank = get_rank(name);
+ mon_info.erase(name);
+ disallowed_leaders.erase(name);
+ ceph_assert(mon_info.count(name) == 0);
+ if (rank >= 0 ) {
+ removed_ranks.insert(rank);
+ }
+ if (get_required_features().contains_all(
+ ceph::features::mon::FEATURE_NAUTILUS)) {
+ ranks.erase(std::find(ranks.begin(), ranks.end(), name));
+ ceph_assert(ranks.size() == mon_info.size());
+ } else {
+ calc_legacy_ranks();
+ }
+ calc_addr_mons();
+ }
+
+ /**
+ * Rename monitor from @p oldname to @p newname
+ *
+ * @param oldname monitor's current name (i.e., 'foo' in 'mon.foo')
+ * @param newname monitor's new name (i.e., 'bar' in 'mon.bar')
+ */
+ void rename(std::string oldname, std::string newname) {
+ ceph_assert(contains(oldname));
+ ceph_assert(!contains(newname));
+ mon_info[newname] = mon_info[oldname];
+ mon_info.erase(oldname);
+ mon_info[newname].name = newname;
+ if (get_required_features().contains_all(
+ ceph::features::mon::FEATURE_NAUTILUS)) {
+ *std::find(ranks.begin(), ranks.end(), oldname) = newname;
+ ceph_assert(ranks.size() == mon_info.size());
+ } else {
+ calc_legacy_ranks();
+ }
+ calc_addr_mons();
+ }
+
+ int set_rank(const std::string& name, int rank) {
+ int oldrank = get_rank(name);
+ if (oldrank < 0) {
+ return -ENOENT;
+ }
+ if (rank < 0 || rank >= (int)ranks.size()) {
+ return -EINVAL;
+ }
+ if (oldrank != rank) {
+ ranks.erase(ranks.begin() + oldrank);
+ ranks.insert(ranks.begin() + rank, name);
+ }
+ return 0;
+ }
+
+ bool contains(const std::string& name) const {
+ return mon_info.count(name);
+ }
+
+ /**
+ * Check if monmap contains a monitor with address @p a
+ *
+ * @note checks for all addresses a monitor may have, public or otherwise.
+ *
+ * @param a monitor address
+ * @returns true if monmap contains a monitor with address @p;
+ * false otherwise.
+ */
+ bool contains(const entity_addr_t &a, std::string *name=nullptr) const {
+ for (auto& i : mon_info) {
+ for (auto& j : i.second.public_addrs.v) {
+ if (j == a) {
+ if (name) {
+ *name = i.first;
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+ bool contains(const entity_addrvec_t &av, std::string *name=nullptr) const {
+ for (auto& i : mon_info) {
+ for (auto& j : i.second.public_addrs.v) {
+ for (auto& k : av.v) {
+ if (j == k) {
+ if (name) {
+ *name = i.first;
+ }
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ std::string get_name(unsigned n) const {
+ ceph_assert(n < ranks.size());
+ return ranks[n];
+ }
+ std::string get_name(const entity_addr_t& a) const {
+ std::map<entity_addr_t, std::string>::const_iterator p = addr_mons.find(a);
+ if (p == addr_mons.end())
+ return std::string();
+ else
+ return p->second;
+ }
+ std::string get_name(const entity_addrvec_t& av) const {
+ for (auto& i : av.v) {
+ std::map<entity_addr_t, std::string>::const_iterator p = addr_mons.find(i);
+ if (p != addr_mons.end())
+ return p->second;
+ }
+ return std::string();
+ }
+
+ int get_rank(const std::string& n) const {
+ if (auto found = std::find(ranks.begin(), ranks.end(), n);
+ found != ranks.end()) {
+ return std::distance(ranks.begin(), found);
+ } else {
+ return -1;
+ }
+ }
+ int get_rank(const entity_addr_t& a) const {
+ std::string n = get_name(a);
+ if (!n.empty()) {
+ return get_rank(n);
+ }
+ return -1;
+ }
+ int get_rank(const entity_addrvec_t& av) const {
+ std::string n = get_name(av);
+ if (!n.empty()) {
+ return get_rank(n);
+ }
+ return -1;
+ }
+ bool get_addr_name(const entity_addr_t& a, std::string& name) {
+ if (addr_mons.count(a) == 0)
+ return false;
+ name = addr_mons[a];
+ return true;
+ }
+
+ const entity_addrvec_t& get_addrs(const std::string& n) const {
+ ceph_assert(mon_info.count(n));
+ std::map<std::string,mon_info_t>::const_iterator p = mon_info.find(n);
+ return p->second.public_addrs;
+ }
+ const entity_addrvec_t& get_addrs(unsigned m) const {
+ ceph_assert(m < ranks.size());
+ return get_addrs(ranks[m]);
+ }
+ void set_addrvec(const std::string& n, const entity_addrvec_t& a) {
+ ceph_assert(mon_info.count(n));
+ mon_info[n].public_addrs = a;
+ calc_addr_mons();
+ }
+ uint16_t get_priority(const std::string& n) const {
+ auto it = mon_info.find(n);
+ ceph_assert(it != mon_info.end());
+ return it->second.priority;
+ }
+ uint16_t get_weight(const std::string& n) const {
+ auto it = mon_info.find(n);
+ ceph_assert(it != mon_info.end());
+ return it->second.weight;
+ }
+ void set_weight(const std::string& n, uint16_t v) {
+ auto it = mon_info.find(n);
+ ceph_assert(it != mon_info.end());
+ it->second.weight = v;
+ }
+
+ void encode(ceph::buffer::list& blist, uint64_t con_features) const;
+ void decode(ceph::buffer::list& blist) {
+ auto p = std::cbegin(blist);
+ decode(p);
+ }
+ void decode(ceph::buffer::list::const_iterator& p);
+
+ void generate_fsid() {
+ fsid.generate_random();
+ }
+
+ // read from/write to a file
+ int write(const char *fn);
+ int read(const char *fn);
+
+ /**
+ * build an initial bootstrap monmap from conf
+ *
+ * Build an initial bootstrap monmap from the config. This will
+ * try, in this order:
+ *
+ * 1 monmap -- an explicitly provided monmap
+ * 2 mon_host -- list of monitors
+ * 3 config [mon.*] sections, and 'mon addr' fields in those sections
+ *
+ * @param cct context (and associated config)
+ * @param errout std::ostream to send error messages too
+ */
+#ifdef WITH_SEASTAR
+ seastar::future<> build_initial(const crimson::common::ConfigProxy& conf, bool for_mkfs);
+#else
+ int build_initial(CephContext *cct, bool for_mkfs, std::ostream& errout);
+#endif
+ /**
+ * filter monmap given a set of initial members.
+ *
+ * Remove mons that aren't in the initial_members list. Add missing
+ * mons and give them dummy IPs (blank IPv4, with a non-zero
+ * nonce). If the name matches my_name, then my_addr will be used in
+ * place of a dummy addr.
+ *
+ * @param initial_members list of initial member names
+ * @param my_name name of self, can be blank
+ * @param my_addr my addr
+ * @param removed optional pointer to set to insert removed mon addrs to
+ */
+ void set_initial_members(CephContext *cct,
+ std::list<std::string>& initial_members,
+ std::string my_name,
+ const entity_addrvec_t& my_addrs,
+ std::set<entity_addrvec_t> *removed);
+
+ void print(std::ostream& out) const;
+ void print_summary(std::ostream& out) const;
+ void dump(ceph::Formatter *f) const;
+ void dump_summary(ceph::Formatter *f) const;
+
+ void check_health(health_check_map_t *checks) const;
+
+ static void generate_test_instances(std::list<MonMap*>& o);
+protected:
+ /**
+ * build a monmap from a list of entity_addrvec_t's
+ *
+ * Give mons dummy names.
+ *
+ * @param addrs list of entity_addrvec_t's
+ * @param prefix prefix to prepend to generated mon names
+ */
+ void init_with_addrs(const std::vector<entity_addrvec_t>& addrs,
+ bool for_mkfs,
+ std::string_view prefix);
+ /**
+ * build a monmap from a list of ips
+ *
+ * Give mons dummy names.
+ *
+ * @param hosts list of ips, space or comma separated
+ * @param prefix prefix to prepend to generated mon names
+ * @return 0 for success, -errno on error
+ */
+ int init_with_ips(const std::string& ips,
+ bool for_mkfs,
+ std::string_view prefix);
+ /**
+ * build a monmap from a list of hostnames
+ *
+ * Give mons dummy names.
+ *
+ * @param hosts list of ips, space or comma separated
+ * @param prefix prefix to prepend to generated mon names
+ * @return 0 for success, -errno on error
+ */
+ int init_with_hosts(const std::string& hostlist,
+ bool for_mkfs,
+ std::string_view prefix);
+ int init_with_config_file(const ConfigProxy& conf, std::ostream& errout);
+#if WITH_SEASTAR
+ seastar::future<> read_monmap(const std::string& monmap);
+ /// try to build monmap with different settings, like
+ /// mon_host, mon* sections, and mon_dns_srv_name
+ seastar::future<> build_monmap(const crimson::common::ConfigProxy& conf, bool for_mkfs);
+ /// initialize monmap by resolving given service name
+ seastar::future<> init_with_dns_srv(bool for_mkfs, const std::string& name);
+#else
+ /// read from encoded monmap file
+ int init_with_monmap(const std::string& monmap, std::ostream& errout);
+ int init_with_dns_srv(CephContext* cct, std::string srv_name, bool for_mkfs,
+ std::ostream& errout);
+#endif
+};
+WRITE_CLASS_ENCODER_FEATURES(MonMap)
+
+inline std::ostream& operator<<(std::ostream &out, const MonMap &m) {
+ m.print_summary(out);
+ return out;
+}
+
+#endif