summaryrefslogtreecommitdiffstats
path: root/src/mds/FSMap.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/mds/FSMap.h')
-rw-r--r--src/mds/FSMap.h636
1 files changed, 636 insertions, 0 deletions
diff --git a/src/mds/FSMap.h b/src/mds/FSMap.h
new file mode 100644
index 000000000..f57a4177a
--- /dev/null
+++ b/src/mds/FSMap.h
@@ -0,0 +1,636 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#ifndef CEPH_FSMAP_H
+#define CEPH_FSMAP_H
+
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <string_view>
+
+#include <errno.h>
+
+#include "include/types.h"
+#include "common/Clock.h"
+#include "mds/MDSMap.h"
+
+#include "include/CompatSet.h"
+#include "include/ceph_features.h"
+#include "include/common_fwd.h"
+#include "common/Formatter.h"
+#include "mds/mdstypes.h"
+
+#if __cplusplus <= 201703L
+template<class Key, class T, class Compare, class Alloc, class Pred>
+typename std::map<Key, T, Compare, Alloc>::size_type
+erase_if(std::map<Key, T, Compare, Alloc>& c, Pred pred) {
+ auto old_size = c.size();
+ for (auto i = c.begin(), last = c.end(); i != last; ) {
+ if (pred(*i)) {
+ i = c.erase(i);
+ } else {
+ ++i;
+ }
+ }
+ return old_size - c.size();
+}
+#endif
+
+class health_check_map_t;
+
+struct ClusterInfo {
+ ClusterInfo() = default;
+ ClusterInfo(std::string_view client_name, std::string_view cluster_name,
+ std::string_view fs_name)
+ : client_name(client_name),
+ cluster_name(cluster_name),
+ fs_name(fs_name) {
+ }
+
+ std::string client_name;
+ std::string cluster_name;
+ std::string fs_name;
+
+ bool operator==(const ClusterInfo &cluster_info) const {
+ return client_name == cluster_info.client_name &&
+ cluster_name == cluster_info.cluster_name &&
+ fs_name == cluster_info.fs_name;
+ }
+
+ void dump(ceph::Formatter *f) const;
+ void print(std::ostream& out) const;
+
+ void encode(ceph::buffer::list &bl) const;
+ void decode(ceph::buffer::list::const_iterator &iter);
+};
+
+inline std::ostream& operator<<(std::ostream& out, const ClusterInfo &cluster_info) {
+ out << "{client_name=" << cluster_info.client_name << ", cluster_name="
+ << cluster_info.cluster_name << ", fs_name=" << cluster_info.fs_name << "}";
+ return out;
+}
+
+struct Peer {
+ Peer() = default;
+ Peer(std::string_view uuid)
+ : uuid(uuid) {
+ }
+ Peer(std::string_view uuid,
+ const ClusterInfo &remote)
+ : uuid(uuid),
+ remote(remote) {
+ }
+
+ std::string uuid;
+ ClusterInfo remote;
+
+ bool operator==(const Peer &rhs) const {
+ return uuid == rhs.uuid;
+ }
+
+ bool operator<(const Peer &rhs) const {
+ return uuid < rhs.uuid;
+ }
+
+ void dump(ceph::Formatter *f) const;
+ void print(std::ostream& out) const;
+
+ void encode(ceph::buffer::list &bl) const;
+ void decode(ceph::buffer::list::const_iterator &iter);
+};
+
+typedef std::set<Peer> Peers;
+inline std::ostream& operator<<(std::ostream& out, const Peer &peer) {
+ out << "{uuid=" << peer.uuid << ", remote_cluster=" << peer.remote << "}";
+ return out;
+}
+
+struct MirrorInfo {
+ MirrorInfo() = default;
+
+ bool is_mirrored() const {
+ return mirrored;
+ }
+ void enable_mirroring() {
+ mirrored = true;
+ }
+ void disable_mirroring() {
+ peers.clear();
+ mirrored = false;
+ }
+
+ // uuid variant check
+ bool has_peer(std::string_view uuid) const {
+ return peers.find(Peer(uuid)) != peers.end();
+ }
+ // client_name/cluster_name/fs_name variant check
+ bool has_peer(std::string_view client_name,
+ std::string_view cluster_name,
+ std::string_view fs_name) const {
+ ClusterInfo cluster_info(client_name, cluster_name, fs_name);
+ for (auto &peer : peers) {
+ if (peer.remote == cluster_info) {
+ return true;
+ }
+ }
+ return false;
+ }
+ bool has_peers() const {
+ return !peers.empty();
+ }
+
+ void peer_add(std::string_view uuid,
+ std::string_view client_name,
+ std::string_view cluster_name,
+ std::string_view fs_name) {
+ peers.emplace(Peer(uuid, ClusterInfo(client_name, cluster_name, fs_name)));
+ }
+ void peer_remove(std::string_view uuid) {
+ peers.erase(uuid);
+ }
+
+ bool mirrored = false;
+ Peers peers;
+
+ void dump(ceph::Formatter *f) const;
+ void print(std::ostream& out) const;
+
+ void encode(ceph::buffer::list &bl) const;
+ void decode(ceph::buffer::list::const_iterator &iter);
+};
+
+inline std::ostream& operator<<(std::ostream& out, const MirrorInfo &mirror_info) {
+ out << "{peers=" << mirror_info.peers << "}";
+ return out;
+}
+
+WRITE_CLASS_ENCODER(ClusterInfo)
+WRITE_CLASS_ENCODER(Peer)
+WRITE_CLASS_ENCODER(MirrorInfo)
+
+/**
+ * The MDSMap and any additional fields describing a particular
+ * filesystem (a unique fs_cluster_id_t).
+ */
+class Filesystem
+{
+public:
+ using ref = std::shared_ptr<Filesystem>;
+ using const_ref = std::shared_ptr<Filesystem const>;
+
+ template<typename... Args>
+ static ref create(Args&&... args)
+ {
+ return std::make_shared<Filesystem>(std::forward<Args>(args)...);
+ }
+
+ void encode(ceph::buffer::list& bl, uint64_t features) const;
+ void decode(ceph::buffer::list::const_iterator& p);
+
+ void dump(ceph::Formatter *f) const;
+ void print(std::ostream& out) const;
+
+ bool is_upgradeable() const {
+ bool asr = mds_map.allows_standby_replay();
+ auto in_mds = mds_map.get_num_in_mds();
+ auto up_mds = mds_map.get_num_up_mds();
+ return
+ /* fs was "down" */
+ (in_mds == 0)
+ /* max_mds was set to 1; asr must be disabled */
+ || (!asr && in_mds == 1)
+ /* max_mds any value and all MDS were failed; asr must be disabled */
+ || (!asr && up_mds == 0);
+ }
+
+ /**
+ * Return true if a daemon is already assigned as
+ * STANDBY_REPLAY for the gid `who`
+ */
+ bool has_standby_replay(mds_gid_t who) const
+ {
+ return get_standby_replay(who) != MDS_GID_NONE;
+ }
+ mds_gid_t get_standby_replay(mds_gid_t who) const;
+ bool is_standby_replay(mds_gid_t who) const
+ {
+ auto p = mds_map.mds_info.find(who);
+ if (p != mds_map.mds_info.end() &&
+ p->second.state == MDSMap::STATE_STANDBY_REPLAY) {
+ return true;
+ }
+ return false;
+ }
+
+ fs_cluster_id_t fscid = FS_CLUSTER_ID_NONE;
+ MDSMap mds_map;
+ MirrorInfo mirror_info;
+};
+WRITE_CLASS_ENCODER_FEATURES(Filesystem)
+
+class FSMap {
+public:
+ friend class MDSMonitor;
+ friend class PaxosFSMap;
+ using mds_info_t = MDSMap::mds_info_t;
+
+ static const version_t STRUCT_VERSION = 7;
+ static const version_t STRUCT_VERSION_TRIM_TO = 7;
+
+ FSMap() : default_compat(MDSMap::get_compat_set_default()) {}
+
+ FSMap(const FSMap &rhs)
+ :
+ epoch(rhs.epoch),
+ next_filesystem_id(rhs.next_filesystem_id),
+ legacy_client_fscid(rhs.legacy_client_fscid),
+ default_compat(rhs.default_compat),
+ enable_multiple(rhs.enable_multiple),
+ ever_enabled_multiple(rhs.ever_enabled_multiple),
+ mds_roles(rhs.mds_roles),
+ standby_daemons(rhs.standby_daemons),
+ standby_epochs(rhs.standby_epochs),
+ struct_version(rhs.struct_version)
+ {
+ filesystems.clear();
+ for (const auto &i : rhs.filesystems) {
+ const auto &fs = i.second;
+ filesystems[fs->fscid] = std::make_shared<Filesystem>(*fs);
+ }
+ }
+
+ FSMap &operator=(const FSMap &rhs);
+
+ const CompatSet &get_default_compat() const {return default_compat;}
+
+ void filter(const std::vector<std::string>& allowed)
+ {
+ if (allowed.empty()) {
+ return;
+ }
+
+ erase_if(filesystems, [&](const auto& f) {
+ return std::find(allowed.begin(), allowed.end(), f.second->mds_map.get_fs_name()) == allowed.end();
+ });
+
+ erase_if(mds_roles, [&](const auto& r) {
+ return std::find(allowed.begin(), allowed.end(), fs_name_from_gid(r.first)) == allowed.end();
+ });
+ }
+
+ void set_enable_multiple(const bool v)
+ {
+ enable_multiple = v;
+ if (true == v) {
+ ever_enabled_multiple = true;
+ }
+ }
+
+ bool get_enable_multiple() const
+ {
+ return enable_multiple;
+ }
+
+ void set_legacy_client_fscid(fs_cluster_id_t fscid)
+ {
+ ceph_assert(fscid == FS_CLUSTER_ID_NONE || filesystems.count(fscid));
+ legacy_client_fscid = fscid;
+ }
+
+ fs_cluster_id_t get_legacy_client_fscid() const
+ {
+ return legacy_client_fscid;
+ }
+
+ size_t get_num_standby() const {
+ return standby_daemons.size();
+ }
+
+ bool is_any_degraded() const;
+
+ /**
+ * Get state of all daemons (for all filesystems, including all standbys)
+ */
+ std::map<mds_gid_t, mds_info_t> get_mds_info() const;
+
+ const mds_info_t* get_available_standby(const Filesystem& fs) const;
+
+ /**
+ * Resolve daemon name to GID
+ */
+ mds_gid_t find_mds_gid_by_name(std::string_view s) const;
+
+ /**
+ * Resolve daemon name to status
+ */
+ const mds_info_t* find_by_name(std::string_view name) const;
+
+ /**
+ * Does a daemon exist with this GID?
+ */
+ bool gid_exists(mds_gid_t gid,
+ const std::vector<std::string>& in = {}) const
+ {
+ try {
+ std::string_view m = fs_name_from_gid(gid);
+ return in.empty() || std::find(in.begin(), in.end(), m) != in.end();
+ } catch (const std::out_of_range&) {
+ return false;
+ }
+ }
+
+ /**
+ * Does a daemon with this GID exist, *and* have an MDS rank assigned?
+ */
+ bool gid_has_rank(mds_gid_t gid) const
+ {
+ return gid_exists(gid) && mds_roles.at(gid) != FS_CLUSTER_ID_NONE;
+ }
+
+ /**
+ * Which filesystem owns this GID?
+ */
+ fs_cluster_id_t fscid_from_gid(mds_gid_t gid) const {
+ if (!gid_exists(gid)) {
+ return FS_CLUSTER_ID_NONE;
+ }
+ return mds_roles.at(gid);
+ }
+
+ /**
+ * Insert a new MDS daemon, as a standby
+ */
+ void insert(const MDSMap::mds_info_t &new_info);
+
+ /**
+ * Assign an MDS cluster standby replay rank to a standby daemon
+ */
+ void assign_standby_replay(
+ const mds_gid_t standby_gid,
+ const fs_cluster_id_t leader_ns,
+ const mds_rank_t leader_rank);
+
+ /**
+ * Assign an MDS cluster rank to a standby daemon
+ */
+ void promote(
+ mds_gid_t standby_gid,
+ Filesystem& filesystem,
+ mds_rank_t assigned_rank);
+
+ /**
+ * A daemon reports that it is STATE_STOPPED: remove it,
+ * and the rank it held.
+ *
+ * @returns a list of any additional GIDs that were removed from the map
+ * as a side effect (like standby replays)
+ */
+ std::vector<mds_gid_t> stop(mds_gid_t who);
+
+ /**
+ * The rank held by 'who', if any, is to be relinquished, and
+ * the state for the daemon GID is to be forgotten.
+ */
+ void erase(mds_gid_t who, epoch_t blocklist_epoch);
+
+ /**
+ * Update to indicate that the rank held by 'who' is damaged
+ */
+ void damaged(mds_gid_t who, epoch_t blocklist_epoch);
+
+ /**
+ * Update to indicate that the rank `rank` is to be removed
+ * from the damaged list of the filesystem `fscid`
+ */
+ bool undamaged(const fs_cluster_id_t fscid, const mds_rank_t rank);
+
+ /**
+ * Initialize a Filesystem and assign a fscid. Update legacy_client_fscid
+ * to point to the new filesystem if it's the only one.
+ *
+ * Caller must already have validated all arguments vs. the existing
+ * FSMap and OSDMap contents.
+ */
+ Filesystem::ref create_filesystem(
+ std::string_view name, int64_t metadata_pool,
+ int64_t data_pool, uint64_t features,
+ fs_cluster_id_t fscid, bool recover);
+
+ /**
+ * Remove the filesystem (it must exist). Caller should already
+ * have failed out any MDSs that were assigned to the filesystem.
+ */
+ void erase_filesystem(fs_cluster_id_t fscid);
+
+ /**
+ * Reset all the state information (not configuration information)
+ * in a particular filesystem. Caller must have verified that
+ * the filesystem already exists.
+ */
+ void reset_filesystem(fs_cluster_id_t fscid);
+
+ /**
+ * Mutator helper for Filesystem objects: expose a non-const
+ * Filesystem pointer to `fn` and update epochs appropriately.
+ */
+ template<typename T>
+ void modify_filesystem(fs_cluster_id_t fscid, T&& fn)
+ {
+ auto& fs = filesystems.at(fscid);
+ fn(fs);
+ fs->mds_map.epoch = epoch;
+ }
+
+ /**
+ * Apply a mutation to the mds_info_t structure for a particular
+ * daemon (identified by GID), and make appropriate updates to epochs.
+ */
+ template<typename T>
+ void modify_daemon(mds_gid_t who, T&& fn)
+ {
+ const auto& fscid = mds_roles.at(who);
+ if (fscid == FS_CLUSTER_ID_NONE) {
+ auto& info = standby_daemons.at(who);
+ fn(info);
+ ceph_assert(info.state == MDSMap::STATE_STANDBY);
+ standby_epochs[who] = epoch;
+ } else {
+ auto& fs = filesystems.at(fscid);
+ auto& info = fs->mds_map.mds_info.at(who);
+ fn(info);
+ fs->mds_map.epoch = epoch;
+ }
+ }
+
+ /**
+ * Given that gid exists in a filesystem or as a standby, return
+ * a reference to its info.
+ */
+ const mds_info_t& get_info_gid(mds_gid_t gid) const
+ {
+ auto fscid = mds_roles.at(gid);
+ if (fscid == FS_CLUSTER_ID_NONE) {
+ return standby_daemons.at(gid);
+ } else {
+ return filesystems.at(fscid)->mds_map.mds_info.at(gid);
+ }
+ }
+
+ std::string_view fs_name_from_gid(mds_gid_t gid) const
+ {
+ auto fscid = mds_roles.at(gid);
+ if (fscid == FS_CLUSTER_ID_NONE or !filesystem_exists(fscid)) {
+ return std::string_view();
+ } else {
+ return get_filesystem(fscid)->mds_map.get_fs_name();
+ }
+ }
+
+ bool is_standby_replay(mds_gid_t who) const
+ {
+ return filesystems.at(mds_roles.at(who))->is_standby_replay(who);
+ }
+
+ mds_gid_t get_standby_replay(mds_gid_t who) const
+ {
+ return filesystems.at(mds_roles.at(who))->get_standby_replay(who);
+ }
+
+ Filesystem::const_ref get_legacy_filesystem()
+ {
+ if (legacy_client_fscid == FS_CLUSTER_ID_NONE) {
+ return nullptr;
+ } else {
+ return filesystems.at(legacy_client_fscid);
+ }
+ }
+
+ /**
+ * A daemon has informed us of its offload targets
+ */
+ void update_export_targets(mds_gid_t who, const std::set<mds_rank_t> &targets)
+ {
+ auto fscid = mds_roles.at(who);
+ modify_filesystem(fscid, [who, &targets](auto&& fs) {
+ fs->mds_map.mds_info.at(who).export_targets = targets;
+ });
+ }
+
+ epoch_t get_epoch() const { return epoch; }
+ void inc_epoch() { epoch++; }
+
+ version_t get_struct_version() const { return struct_version; }
+ bool is_struct_old() const {
+ return struct_version < STRUCT_VERSION_TRIM_TO;
+ }
+
+ size_t filesystem_count() const {return filesystems.size();}
+ bool filesystem_exists(fs_cluster_id_t fscid) const {return filesystems.count(fscid) > 0;}
+ Filesystem::const_ref get_filesystem(fs_cluster_id_t fscid) const {return std::const_pointer_cast<const Filesystem>(filesystems.at(fscid));}
+ Filesystem::ref get_filesystem(fs_cluster_id_t fscid) {return filesystems.at(fscid);}
+ Filesystem::ref get_filesystem(mds_gid_t gid) {
+ return filesystems.at(mds_roles.at(gid));
+ }
+ Filesystem::const_ref get_filesystem(void) const {return std::const_pointer_cast<const Filesystem>(filesystems.begin()->second);}
+ Filesystem::const_ref get_filesystem(std::string_view name) const;
+ Filesystem::const_ref get_filesystem(mds_gid_t gid) const {
+ return filesystems.at(mds_roles.at(gid));
+ }
+
+ std::vector<Filesystem::const_ref> get_filesystems(void) const;
+
+ int parse_filesystem(
+ std::string_view ns_str,
+ Filesystem::const_ref *result
+ ) const;
+
+ int parse_role(
+ std::string_view role_str,
+ mds_role_t *role,
+ std::ostream &ss,
+ const std::vector<std::string> &filter) const;
+
+ int parse_role(
+ std::string_view role_str,
+ mds_role_t *role,
+ std::ostream &ss) const;
+
+ /**
+ * Return true if this pool is in use by any of the filesystems
+ */
+ bool pool_in_use(int64_t poolid) const;
+
+ const mds_info_t* find_replacement_for(mds_role_t role) const;
+
+ void get_health(std::list<std::pair<health_status_t,std::string> >& summary,
+ std::list<std::pair<health_status_t,std::string> > *detail) const;
+
+ void get_health_checks(health_check_map_t *checks) const;
+
+ bool check_health(void);
+
+ /**
+ * Assert that the FSMap, Filesystem, MDSMap, mds_info_t relations are
+ * all self-consistent.
+ */
+ void sanity(bool pending=false) const;
+
+ void encode(ceph::buffer::list& bl, uint64_t features) const;
+ void decode(ceph::buffer::list::const_iterator& p);
+ void decode(ceph::buffer::list& bl) {
+ auto p = bl.cbegin();
+ decode(p);
+ }
+ void sanitize(const std::function<bool(int64_t pool)>& pool_exists);
+
+ void print(std::ostream& out) const;
+ void print_summary(ceph::Formatter *f, std::ostream *out) const;
+ void print_daemon_summary(std::ostream& out) const;
+ void print_fs_summary(std::ostream& out) const;
+
+ void dump(ceph::Formatter *f) const;
+ static void generate_test_instances(std::list<FSMap*>& ls);
+
+protected:
+ epoch_t epoch = 0;
+ uint64_t next_filesystem_id = FS_CLUSTER_ID_ANONYMOUS + 1;
+ fs_cluster_id_t legacy_client_fscid = FS_CLUSTER_ID_NONE;
+ CompatSet default_compat;
+ bool enable_multiple = true;
+ bool ever_enabled_multiple = true; // < the cluster had multiple FS enabled once
+
+ std::map<fs_cluster_id_t, Filesystem::ref> filesystems;
+
+ // Remember which Filesystem an MDS daemon's info is stored in
+ // (or in standby_daemons for FS_CLUSTER_ID_NONE)
+ std::map<mds_gid_t, fs_cluster_id_t> mds_roles;
+
+ // For MDS daemons not yet assigned to a Filesystem
+ std::map<mds_gid_t, mds_info_t> standby_daemons;
+ std::map<mds_gid_t, epoch_t> standby_epochs;
+
+private:
+ epoch_t struct_version = 0;
+};
+WRITE_CLASS_ENCODER_FEATURES(FSMap)
+
+inline std::ostream& operator<<(std::ostream& out, const FSMap& m) {
+ m.print_summary(NULL, &out);
+ return out;
+}
+
+#endif