From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/mon/AuthMonitor.cc | 2166 ++++++ src/mon/AuthMonitor.h | 240 + src/mon/CMakeLists.txt | 46 + src/mon/CommandHandler.cc | 43 + src/mon/CommandHandler.h | 35 + src/mon/ConfigMap.cc | 291 + src/mon/ConfigMap.h | 154 + src/mon/ConfigMonitor.cc | 1017 +++ src/mon/ConfigMonitor.h | 58 + src/mon/ConnectionTracker.cc | 361 + src/mon/ConnectionTracker.h | 205 + src/mon/CreatingPGs.h | 234 + src/mon/ElectionLogic.cc | 557 ++ src/mon/ElectionLogic.h | 460 ++ src/mon/Elector.cc | 807 +++ src/mon/Elector.h | 406 ++ src/mon/FSCommands.cc | 1702 +++++ src/mon/FSCommands.h | 94 + src/mon/HealthMonitor.cc | 882 +++ src/mon/HealthMonitor.h | 75 + src/mon/KVMonitor.cc | 530 ++ src/mon/KVMonitor.h | 69 + src/mon/LogMonitor.cc | 1293 ++++ src/mon/LogMonitor.h | 210 + src/mon/MDSMonitor.cc | 2398 +++++++ src/mon/MDSMonitor.h | 158 + src/mon/MgrMap.h | 639 ++ src/mon/MgrMonitor.cc | 1448 ++++ src/mon/MgrMonitor.h | 146 + src/mon/MgrStatMonitor.cc | 367 + src/mon/MgrStatMonitor.h | 109 + src/mon/MonCap.cc | 687 ++ src/mon/MonCap.h | 227 + src/mon/MonClient.cc | 2063 ++++++ src/mon/MonClient.h | 784 +++ src/mon/MonCommand.h | 175 + src/mon/MonCommands.h | 1420 ++++ src/mon/MonMap.cc | 1007 +++ src/mon/MonMap.h | 548 ++ src/mon/MonOpRequest.h | 238 + src/mon/MonSub.cc | 114 + src/mon/MonSub.h | 46 + src/mon/Monitor.cc | 6948 +++++++++++++++++++ src/mon/Monitor.h | 1150 ++++ src/mon/MonitorDBStore.h | 814 +++ src/mon/MonmapMonitor.cc | 1464 ++++ src/mon/MonmapMonitor.h | 111 + src/mon/OSDMonitor.cc | 15050 +++++++++++++++++++++++++++++++++++++++++ src/mon/OSDMonitor.h | 883 +++ src/mon/PGMap.cc | 4094 +++++++++++ src/mon/PGMap.h | 557 ++ src/mon/Paxos.cc | 1591 +++++ src/mon/Paxos.h | 1384 ++++ src/mon/PaxosFSMap.h | 54 + src/mon/PaxosService.cc | 466 ++ src/mon/PaxosService.h | 900 +++ src/mon/Session.h | 295 + src/mon/error_code.cc | 85 + src/mon/error_code.h | 49 + src/mon/health_check.h | 198 + src/mon/mon_types.h | 680 ++ 61 files changed, 61282 insertions(+) create mode 100644 src/mon/AuthMonitor.cc create mode 100644 src/mon/AuthMonitor.h create mode 100644 src/mon/CMakeLists.txt create mode 100644 src/mon/CommandHandler.cc create mode 100644 src/mon/CommandHandler.h create mode 100644 src/mon/ConfigMap.cc create mode 100644 src/mon/ConfigMap.h create mode 100644 src/mon/ConfigMonitor.cc create mode 100644 src/mon/ConfigMonitor.h create mode 100644 src/mon/ConnectionTracker.cc create mode 100644 src/mon/ConnectionTracker.h create mode 100644 src/mon/CreatingPGs.h create mode 100644 src/mon/ElectionLogic.cc create mode 100644 src/mon/ElectionLogic.h create mode 100644 src/mon/Elector.cc create mode 100644 src/mon/Elector.h create mode 100644 src/mon/FSCommands.cc create mode 100644 src/mon/FSCommands.h create mode 100644 src/mon/HealthMonitor.cc create mode 100644 src/mon/HealthMonitor.h create mode 100644 src/mon/KVMonitor.cc create mode 100644 src/mon/KVMonitor.h create mode 100644 src/mon/LogMonitor.cc create mode 100644 src/mon/LogMonitor.h create mode 100644 src/mon/MDSMonitor.cc create mode 100644 src/mon/MDSMonitor.h create mode 100644 src/mon/MgrMap.h create mode 100644 src/mon/MgrMonitor.cc create mode 100644 src/mon/MgrMonitor.h create mode 100644 src/mon/MgrStatMonitor.cc create mode 100644 src/mon/MgrStatMonitor.h create mode 100644 src/mon/MonCap.cc create mode 100644 src/mon/MonCap.h create mode 100644 src/mon/MonClient.cc create mode 100644 src/mon/MonClient.h create mode 100644 src/mon/MonCommand.h create mode 100644 src/mon/MonCommands.h create mode 100644 src/mon/MonMap.cc create mode 100644 src/mon/MonMap.h create mode 100644 src/mon/MonOpRequest.h create mode 100644 src/mon/MonSub.cc create mode 100644 src/mon/MonSub.h create mode 100644 src/mon/Monitor.cc create mode 100644 src/mon/Monitor.h create mode 100644 src/mon/MonitorDBStore.h create mode 100644 src/mon/MonmapMonitor.cc create mode 100644 src/mon/MonmapMonitor.h create mode 100644 src/mon/OSDMonitor.cc create mode 100644 src/mon/OSDMonitor.h create mode 100644 src/mon/PGMap.cc create mode 100644 src/mon/PGMap.h create mode 100644 src/mon/Paxos.cc create mode 100644 src/mon/Paxos.h create mode 100644 src/mon/PaxosFSMap.h create mode 100644 src/mon/PaxosService.cc create mode 100644 src/mon/PaxosService.h create mode 100644 src/mon/Session.h create mode 100644 src/mon/error_code.cc create mode 100644 src/mon/error_code.h create mode 100644 src/mon/health_check.h create mode 100644 src/mon/mon_types.h (limited to 'src/mon') diff --git a/src/mon/AuthMonitor.cc b/src/mon/AuthMonitor.cc new file mode 100644 index 000000000..395ff4926 --- /dev/null +++ b/src/mon/AuthMonitor.cc @@ -0,0 +1,2166 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include + +#include "mon/AuthMonitor.h" +#include "mon/Monitor.h" +#include "mon/MonitorDBStore.h" +#include "mon/OSDMonitor.h" +#include "mon/MDSMonitor.h" +#include "mon/ConfigMonitor.h" + +#include "messages/MMonCommand.h" +#include "messages/MAuth.h" +#include "messages/MAuthReply.h" +#include "messages/MMonGlobalID.h" +#include "messages/MMonUsedPendingKeys.h" +#include "msg/Messenger.h" + +#include "auth/AuthServiceHandler.h" +#include "auth/KeyRing.h" +#include "include/stringify.h" +#include "include/ceph_assert.h" + +#include "mds/MDSAuthCaps.h" +#include "mgr/MgrCap.h" +#include "osd/OSDCap.h" + +#define dout_subsys ceph_subsys_mon +#undef dout_prefix +#define dout_prefix _prefix(_dout, mon, get_last_committed()) +using namespace TOPNSPC::common; + +using std::cerr; +using std::cout; +using std::dec; +using std::hex; +using std::list; +using std::map; +using std::make_pair; +using std::ostream; +using std::ostringstream; +using std::pair; +using std::set; +using std::setfill; +using std::string; +using std::stringstream; +using std::to_string; +using std::vector; +using std::unique_ptr; + +using ceph::bufferlist; +using ceph::decode; +using ceph::encode; +using ceph::Formatter; +using ceph::JSONFormatter; +using ceph::make_message; +using ceph::mono_clock; +using ceph::mono_time; +using ceph::timespan_str; +static ostream& _prefix(std::ostream *_dout, Monitor &mon, version_t v) { + return *_dout << "mon." << mon.name << "@" << mon.rank + << "(" << mon.get_state_name() + << ").auth v" << v << " "; +} + +ostream& operator<<(ostream &out, const AuthMonitor &pm) +{ + return out << "auth"; +} + +bool AuthMonitor::check_rotate() +{ + KeyServerData::Incremental rot_inc; + rot_inc.op = KeyServerData::AUTH_INC_SET_ROTATING; + if (mon.key_server.prepare_rotating_update(rot_inc.rotating_bl)) { + dout(10) << __func__ << " updating rotating" << dendl; + push_cephx_inc(rot_inc); + return true; + } + return false; +} + +void AuthMonitor::process_used_pending_keys( + const std::map& used_pending_keys) +{ + for (auto& [name, used_key] : used_pending_keys) { + dout(10) << __func__ << " used pending_key for " << name << dendl; + KeyServerData::Incremental inc; + inc.op = KeyServerData::AUTH_INC_ADD; + inc.name = name; + + mon.key_server.get_auth(name, inc.auth); + for (auto& p : pending_auth) { + if (p.inc_type == AUTH_DATA) { + KeyServerData::Incremental auth_inc; + auto q = p.auth_data.cbegin(); + decode(auth_inc, q); + if (auth_inc.op == KeyServerData::AUTH_INC_ADD && + auth_inc.name == name) { + dout(10) << __func__ << " starting with pending uncommitted" << dendl; + inc.auth = auth_inc.auth; + } + } + } + if (stringify(inc.auth.pending_key) == stringify(used_key)) { + dout(10) << __func__ << " committing pending_key -> key for " + << name << dendl; + inc.auth.key = inc.auth.pending_key; + inc.auth.pending_key.clear(); + push_cephx_inc(inc); + } + } +} + +/* + Tick function to update the map based on performance every N seconds +*/ + +void AuthMonitor::tick() +{ + if (!is_active()) return; + + dout(10) << *this << dendl; + + // increase global_id? + bool propose = false; + bool increase; + { + std::lock_guard l(mon.auth_lock); + increase = _should_increase_max_global_id(); + } + if (increase) { + if (mon.is_leader()) { + increase_max_global_id(); + propose = true; + } else { + dout(10) << __func__ << "requesting more ids from leader" << dendl; + MMonGlobalID *req = new MMonGlobalID(); + req->old_max_id = max_global_id; + mon.send_mon_message(req, mon.get_leader()); + } + } + + if (mon.monmap->min_mon_release >= ceph_release_t::quincy) { + auto used_pending_keys = mon.key_server.get_used_pending_keys(); + if (!used_pending_keys.empty()) { + dout(10) << __func__ << " " << used_pending_keys.size() << " used pending_keys" + << dendl; + if (mon.is_leader()) { + process_used_pending_keys(used_pending_keys); + propose = true; + } else { + MMonUsedPendingKeys *req = new MMonUsedPendingKeys(); + req->used_pending_keys = used_pending_keys; + mon.send_mon_message(req, mon.get_leader()); + } + } + } + + if (!mon.is_leader()) { + return; + } + + if (check_rotate()) { + propose = true; + } + + if (propose) { + propose_pending(); + } +} + +void AuthMonitor::on_active() +{ + dout(10) << "AuthMonitor::on_active()" << dendl; + + if (!mon.is_leader()) + return; + + mon.key_server.start_server(); + mon.key_server.clear_used_pending_keys(); + + if (is_writeable()) { + bool propose = false; + if (check_rotate()) { + propose = true; + } + bool increase; + { + std::lock_guard l(mon.auth_lock); + increase = _should_increase_max_global_id(); + } + if (increase) { + increase_max_global_id(); + propose = true; + } + if (propose) { + propose_pending(); + } + } +} + +bufferlist _encode_cap(const string& cap) +{ + bufferlist bl; + encode(cap, bl); + return bl; +} + +void AuthMonitor::get_initial_keyring(KeyRing *keyring) +{ + dout(10) << __func__ << dendl; + ceph_assert(keyring != nullptr); + + bufferlist bl; + int ret = mon.store->get("mkfs", "keyring", bl); + if (ret == -ENOENT) { + return; + } + // fail hard only if there's an error we're not expecting to see + ceph_assert(ret == 0); + + auto p = bl.cbegin(); + decode(*keyring, p); +} + +void _generate_bootstrap_keys( + list >* auth_lst) +{ + ceph_assert(auth_lst != nullptr); + + map > bootstrap = { + { "admin", { + { "mon", _encode_cap("allow *") }, + { "osd", _encode_cap("allow *") }, + { "mds", _encode_cap("allow *") }, + { "mgr", _encode_cap("allow *") } + } }, + { "bootstrap-osd", { + { "mon", _encode_cap("allow profile bootstrap-osd") } + } }, + { "bootstrap-rgw", { + { "mon", _encode_cap("allow profile bootstrap-rgw") } + } }, + { "bootstrap-mds", { + { "mon", _encode_cap("allow profile bootstrap-mds") } + } }, + { "bootstrap-mgr", { + { "mon", _encode_cap("allow profile bootstrap-mgr") } + } }, + { "bootstrap-rbd", { + { "mon", _encode_cap("allow profile bootstrap-rbd") } + } }, + { "bootstrap-rbd-mirror", { + { "mon", _encode_cap("allow profile bootstrap-rbd-mirror") } + } } + }; + + for (auto &p : bootstrap) { + EntityName name; + name.from_str("client." + p.first); + EntityAuth auth; + auth.key.create(g_ceph_context, CEPH_CRYPTO_AES); + auth.caps = p.second; + + auth_lst->push_back(make_pair(name, auth)); + } +} + +void AuthMonitor::create_initial_keys(KeyRing *keyring) +{ + dout(10) << __func__ << " with keyring" << dendl; + ceph_assert(keyring != nullptr); + + list > auth_lst; + _generate_bootstrap_keys(&auth_lst); + + for (auto &p : auth_lst) { + if (keyring->exists(p.first)) { + continue; + } + keyring->add(p.first, p.second); + } +} + +void AuthMonitor::create_initial() +{ + dout(10) << "create_initial -- creating initial map" << dendl; + + // initialize rotating keys + mon.key_server.clear_secrets(); + check_rotate(); + ceph_assert(pending_auth.size() == 1); + + if (mon.is_keyring_required()) { + KeyRing keyring; + // attempt to obtain an existing mkfs-time keyring + get_initial_keyring(&keyring); + // create missing keys in the keyring + create_initial_keys(&keyring); + // import the resulting keyring + import_keyring(keyring); + } + + max_global_id = MIN_GLOBAL_ID; + + Incremental inc; + inc.inc_type = GLOBAL_ID; + inc.max_global_id = max_global_id; + pending_auth.push_back(inc); + + format_version = 3; +} + +void AuthMonitor::update_from_paxos(bool *need_bootstrap) +{ + dout(10) << __func__ << dendl; + load_health(); + + version_t version = get_last_committed(); + version_t keys_ver = mon.key_server.get_ver(); + if (version == keys_ver) + return; + ceph_assert(version > keys_ver); + + version_t latest_full = get_version_latest_full(); + + dout(10) << __func__ << " version " << version << " keys ver " << keys_ver + << " latest " << latest_full << dendl; + + if ((latest_full > 0) && (latest_full > keys_ver)) { + bufferlist latest_bl; + int err = get_version_full(latest_full, latest_bl); + ceph_assert(err == 0); + ceph_assert(latest_bl.length() != 0); + dout(7) << __func__ << " loading summary e " << latest_full << dendl; + dout(7) << __func__ << " latest length " << latest_bl.length() << dendl; + auto p = latest_bl.cbegin(); + __u8 struct_v; + decode(struct_v, p); + decode(max_global_id, p); + decode(mon.key_server, p); + mon.key_server.set_ver(latest_full); + keys_ver = latest_full; + } + + dout(10) << __func__ << " key server version " << mon.key_server.get_ver() << dendl; + + // walk through incrementals + while (version > keys_ver) { + bufferlist bl; + int ret = get_version(keys_ver+1, bl); + ceph_assert(ret == 0); + ceph_assert(bl.length()); + + // reset if we are moving to initial state. we will normally have + // keys in here temporarily for bootstrapping that we need to + // clear out. + if (keys_ver == 0) + mon.key_server.clear_secrets(); + + dout(20) << __func__ << " walking through version " << (keys_ver+1) + << " len " << bl.length() << dendl; + + auto p = bl.cbegin(); + __u8 v; + decode(v, p); + while (!p.end()) { + Incremental inc; + decode(inc, p); + switch (inc.inc_type) { + case GLOBAL_ID: + max_global_id = inc.max_global_id; + break; + + case AUTH_DATA: + { + KeyServerData::Incremental auth_inc; + auto iter = inc.auth_data.cbegin(); + decode(auth_inc, iter); + mon.key_server.apply_data_incremental(auth_inc); + break; + } + } + } + + keys_ver++; + mon.key_server.set_ver(keys_ver); + + if (keys_ver == 1 && mon.is_keyring_required()) { + auto t(std::make_shared()); + t->erase("mkfs", "keyring"); + mon.store->apply_transaction(t); + } + } + + { + std::lock_guard l(mon.auth_lock); + if (last_allocated_id == 0) { + last_allocated_id = max_global_id; + dout(10) << __func__ << " last_allocated_id initialized to " + << max_global_id << dendl; + } + } + + dout(10) << __func__ << " max_global_id=" << max_global_id + << " format_version " << format_version + << dendl; + + mon.key_server.dump(); +} + +bool AuthMonitor::_should_increase_max_global_id() +{ + ceph_assert(ceph_mutex_is_locked(mon.auth_lock)); + auto num_prealloc = g_conf()->mon_globalid_prealloc; + if (max_global_id < num_prealloc || + (last_allocated_id + 1) >= max_global_id - num_prealloc / 2) { + return true; + } + return false; +} + +void AuthMonitor::increase_max_global_id() +{ + ceph_assert(mon.is_leader()); + + Incremental inc; + inc.inc_type = GLOBAL_ID; + inc.max_global_id = max_global_id + g_conf()->mon_globalid_prealloc; + dout(10) << "increasing max_global_id to " << inc.max_global_id << dendl; + pending_auth.push_back(inc); +} + +bool AuthMonitor::should_propose(double& delay) +{ + return (!pending_auth.empty()); +} + +void AuthMonitor::create_pending() +{ + pending_auth.clear(); + dout(10) << "create_pending v " << (get_last_committed() + 1) << dendl; +} + +void AuthMonitor::encode_pending(MonitorDBStore::TransactionRef t) +{ + dout(10) << __func__ << " v " << (get_last_committed() + 1) << dendl; + + bufferlist bl; + + __u8 v = 1; + encode(v, bl); + vector::iterator p; + for (p = pending_auth.begin(); p != pending_auth.end(); ++p) + p->encode(bl, mon.get_quorum_con_features()); + + version_t version = get_last_committed() + 1; + put_version(t, version, bl); + put_last_committed(t, version); + + // health + health_check_map_t next; + map> bad_detail; // entity -> details + for (auto i = mon.key_server.secrets_begin(); + i != mon.key_server.secrets_end(); + ++i) { + for (auto& p : i->second.caps) { + ostringstream ss; + if (!valid_caps(p.first, p.second, &ss)) { + ostringstream ss2; + ss2 << i->first << " " << ss.str(); + bad_detail[i->first.to_str()].push_back(ss2.str()); + } + } + } + for (auto& inc : pending_auth) { + if (inc.inc_type == AUTH_DATA) { + KeyServerData::Incremental auth_inc; + auto iter = inc.auth_data.cbegin(); + decode(auth_inc, iter); + if (auth_inc.op == KeyServerData::AUTH_INC_DEL) { + bad_detail.erase(auth_inc.name.to_str()); + } else if (auth_inc.op == KeyServerData::AUTH_INC_ADD) { + for (auto& p : auth_inc.auth.caps) { + ostringstream ss; + if (!valid_caps(p.first, p.second, &ss)) { + ostringstream ss2; + ss2 << auth_inc.name << " " << ss.str(); + bad_detail[auth_inc.name.to_str()].push_back(ss2.str()); + } + } + } + } + } + if (bad_detail.size()) { + ostringstream ss; + ss << bad_detail.size() << " auth entities have invalid capabilities"; + health_check_t *check = &next.add("AUTH_BAD_CAPS", HEALTH_ERR, ss.str(), + bad_detail.size()); + for (auto& i : bad_detail) { + for (auto& j : i.second) { + check->detail.push_back(j); + } + } + } + encode_health(next, t); +} + +void AuthMonitor::encode_full(MonitorDBStore::TransactionRef t) +{ + version_t version = mon.key_server.get_ver(); + // do not stash full version 0 as it will never be removed nor read + if (version == 0) + return; + + dout(10) << __func__ << " auth v " << version << dendl; + ceph_assert(get_last_committed() == version); + + bufferlist full_bl; + std::scoped_lock l{mon.key_server.get_lock()}; + dout(20) << __func__ << " key server has " + << (mon.key_server.has_secrets() ? "" : "no ") + << "secrets!" << dendl; + __u8 v = 1; + encode(v, full_bl); + encode(max_global_id, full_bl); + encode(mon.key_server, full_bl); + + put_version_full(t, version, full_bl); + put_version_latest_full(t, version); +} + +version_t AuthMonitor::get_trim_to() const +{ + unsigned max = g_conf()->paxos_max_join_drift * 2; + version_t version = get_last_committed(); + if (mon.is_leader() && (version > max)) + return version - max; + return 0; +} + +bool AuthMonitor::preprocess_query(MonOpRequestRef op) +{ + auto m = op->get_req(); + dout(10) << "preprocess_query " << *m << " from " << m->get_orig_source_inst() << dendl; + switch (m->get_type()) { + case MSG_MON_COMMAND: + try { + return preprocess_command(op); + } catch (const bad_cmd_get& e) { + bufferlist bl; + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + return true; + } + + case CEPH_MSG_AUTH: + return prep_auth(op, false); + + case MSG_MON_GLOBAL_ID: + return false; + + case MSG_MON_USED_PENDING_KEYS: + return false; + + default: + ceph_abort(); + return true; + } +} + +bool AuthMonitor::prepare_update(MonOpRequestRef op) +{ + auto m = op->get_req(); + dout(10) << "prepare_update " << *m << " from " << m->get_orig_source_inst() << dendl; + switch (m->get_type()) { + case MSG_MON_COMMAND: + try { + return prepare_command(op); + } catch (const bad_cmd_get& e) { + bufferlist bl; + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + return true; + } + case MSG_MON_GLOBAL_ID: + return prepare_global_id(op); + case MSG_MON_USED_PENDING_KEYS: + return prepare_used_pending_keys(op); + case CEPH_MSG_AUTH: + return prep_auth(op, true); + default: + ceph_abort(); + return false; + } +} + +void AuthMonitor::_set_mon_num_rank(int num, int rank) +{ + dout(10) << __func__ << " num " << num << " rank " << rank << dendl; + ceph_assert(ceph_mutex_is_locked(mon.auth_lock)); + mon_num = num; + mon_rank = rank; +} + +uint64_t AuthMonitor::_assign_global_id() +{ + ceph_assert(ceph_mutex_is_locked(mon.auth_lock)); + if (mon_num < 1 || mon_rank < 0) { + dout(10) << __func__ << " inactive (num_mon " << mon_num + << " rank " << mon_rank << ")" << dendl; + return 0; + } + if (!last_allocated_id) { + dout(10) << __func__ << " last_allocated_id == 0" << dendl; + return 0; + } + + uint64_t id = last_allocated_id + 1; + int remainder = id % mon_num; + if (remainder) { + remainder = mon_num - remainder; + } + id += remainder + mon_rank; + + if (id >= max_global_id) { + dout(10) << __func__ << " failed (max " << max_global_id << ")" << dendl; + return 0; + } + + last_allocated_id = id; + dout(10) << __func__ << " " << id << " (max " << max_global_id << ")" + << dendl; + return id; +} + +uint64_t AuthMonitor::assign_global_id(bool should_increase_max) +{ + uint64_t id; + { + std::lock_guard l(mon.auth_lock); + id =_assign_global_id(); + if (should_increase_max) { + should_increase_max = _should_increase_max_global_id(); + } + } + if (mon.is_leader() && + should_increase_max) { + increase_max_global_id(); + } + return id; +} + +bool AuthMonitor::prep_auth(MonOpRequestRef op, bool paxos_writable) +{ + auto m = op->get_req(); + dout(10) << "prep_auth() blob_size=" << m->get_auth_payload().length() << dendl; + + MonSession *s = op->get_session(); + if (!s) { + dout(10) << "no session, dropping" << dendl; + return true; + } + + int ret = 0; + MAuthReply *reply; + bufferlist response_bl; + auto indata = m->auth_payload.cbegin(); + __u32 proto = m->protocol; + bool start = false; + bool finished = false; + EntityName entity_name; + bool is_new_global_id = false; + + // set up handler? + if (m->protocol == 0 && !s->auth_handler) { + set<__u32> supported; + + try { + __u8 struct_v = 1; + decode(struct_v, indata); + decode(supported, indata); + decode(entity_name, indata); + decode(s->con->peer_global_id, indata); + } catch (const ceph::buffer::error &e) { + dout(10) << "failed to decode initial auth message" << dendl; + ret = -EINVAL; + goto reply; + } + + // do we require cephx signatures? + + if (!m->get_connection()->has_feature(CEPH_FEATURE_MSG_AUTH)) { + if (entity_name.get_type() == CEPH_ENTITY_TYPE_MON || + entity_name.get_type() == CEPH_ENTITY_TYPE_OSD || + entity_name.get_type() == CEPH_ENTITY_TYPE_MDS || + entity_name.get_type() == CEPH_ENTITY_TYPE_MGR) { + if (g_conf()->cephx_cluster_require_signatures || + g_conf()->cephx_require_signatures) { + dout(1) << m->get_source_inst() + << " supports cephx but not signatures and" + << " 'cephx [cluster] require signatures = true';" + << " disallowing cephx" << dendl; + supported.erase(CEPH_AUTH_CEPHX); + } + } else { + if (g_conf()->cephx_service_require_signatures || + g_conf()->cephx_require_signatures) { + dout(1) << m->get_source_inst() + << " supports cephx but not signatures and" + << " 'cephx [service] require signatures = true';" + << " disallowing cephx" << dendl; + supported.erase(CEPH_AUTH_CEPHX); + } + } + } else if (!m->get_connection()->has_feature(CEPH_FEATURE_CEPHX_V2)) { + if (entity_name.get_type() == CEPH_ENTITY_TYPE_MON || + entity_name.get_type() == CEPH_ENTITY_TYPE_OSD || + entity_name.get_type() == CEPH_ENTITY_TYPE_MDS || + entity_name.get_type() == CEPH_ENTITY_TYPE_MGR) { + if (g_conf()->cephx_cluster_require_version >= 2 || + g_conf()->cephx_require_version >= 2) { + dout(1) << m->get_source_inst() + << " supports cephx but not v2 and" + << " 'cephx [cluster] require version >= 2';" + << " disallowing cephx" << dendl; + supported.erase(CEPH_AUTH_CEPHX); + } + } else { + if (g_conf()->cephx_service_require_version >= 2 || + g_conf()->cephx_require_version >= 2) { + dout(1) << m->get_source_inst() + << " supports cephx but not v2 and" + << " 'cephx [service] require version >= 2';" + << " disallowing cephx" << dendl; + supported.erase(CEPH_AUTH_CEPHX); + } + } + } + + int type; + if (entity_name.get_type() == CEPH_ENTITY_TYPE_MON || + entity_name.get_type() == CEPH_ENTITY_TYPE_OSD || + entity_name.get_type() == CEPH_ENTITY_TYPE_MDS || + entity_name.get_type() == CEPH_ENTITY_TYPE_MGR) + type = mon.auth_cluster_required.pick(supported); + else + type = mon.auth_service_required.pick(supported); + + s->auth_handler = get_auth_service_handler(type, g_ceph_context, &mon.key_server); + if (!s->auth_handler) { + dout(1) << "client did not provide supported auth type" << dendl; + ret = -ENOTSUP; + goto reply; + } + start = true; + proto = type; + } else if (!s->auth_handler) { + dout(10) << "protocol specified but no s->auth_handler" << dendl; + ret = -EINVAL; + goto reply; + } + + /* assign a new global_id? we assume this should only happen on the first + request. If a client tries to send it later, it'll screw up its auth + session */ + if (!s->con->peer_global_id) { + s->con->peer_global_id = assign_global_id(paxos_writable); + if (!s->con->peer_global_id) { + + delete s->auth_handler; + s->auth_handler = NULL; + + if (mon.is_leader() && paxos_writable) { + dout(10) << "increasing global id, waitlisting message" << dendl; + wait_for_active(op, new C_RetryMessage(this, op)); + goto done; + } + + if (!mon.is_leader()) { + dout(10) << "not the leader, requesting more ids from leader" << dendl; + int leader = mon.get_leader(); + MMonGlobalID *req = new MMonGlobalID(); + req->old_max_id = max_global_id; + mon.send_mon_message(req, leader); + wait_for_finished_proposal(op, new C_RetryMessage(this, op)); + return true; + } + + ceph_assert(!paxos_writable); + return false; + } + is_new_global_id = true; + } + + try { + if (start) { + // new session + ret = s->auth_handler->start_session(entity_name, + s->con->peer_global_id, + is_new_global_id, + &response_bl, + &s->con->peer_caps_info); + } else { + // request + ret = s->auth_handler->handle_request( + indata, + 0, // no connection_secret needed + &response_bl, + &s->con->peer_caps_info, + nullptr, nullptr); + } + if (ret == -EIO) { + wait_for_active(op, new C_RetryMessage(this,op)); + goto done; + } + if (ret > 0) { + if (!s->authenticated && + mon.ms_handle_fast_authentication(s->con.get()) > 0) { + finished = true; + } + ret = 0; + } + } catch (const ceph::buffer::error &err) { + ret = -EINVAL; + dout(0) << "caught error when trying to handle auth request, probably malformed request" << dendl; + } + +reply: + reply = new MAuthReply(proto, &response_bl, ret, s->con->peer_global_id); + mon.send_reply(op, reply); + if (finished) { + // always send the latest monmap. + if (m->monmap_epoch < mon.monmap->get_epoch()) + mon.send_latest_monmap(m->get_connection().get()); + + mon.configmon()->check_sub(s); + } +done: + return true; +} + +bool AuthMonitor::preprocess_command(MonOpRequestRef op) +{ + auto m = op->get_req(); + int r = -1; + bufferlist rdata; + stringstream ss, ds; + + cmdmap_t cmdmap; + if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { + // ss has reason for failure + string rs = ss.str(); + mon.reply_command(op, -EINVAL, rs, rdata, get_last_committed()); + return true; + } + + string prefix; + cmd_getval(cmdmap, "prefix", prefix); + if (prefix == "auth add" || + prefix == "auth del" || + prefix == "auth rm" || + prefix == "auth get-or-create" || + prefix == "auth get-or-create-key" || + prefix == "auth get-or-create-pending" || + prefix == "auth clear-pending" || + prefix == "auth commit-pending" || + prefix == "fs authorize" || + prefix == "auth import" || + prefix == "auth caps") { + return false; + } + + MonSession *session = op->get_session(); + if (!session) { + mon.reply_command(op, -EACCES, "access denied", rdata, get_last_committed()); + return true; + } + + // entity might not be supplied, but if it is, it should be valid + string entity_name; + cmd_getval(cmdmap, "entity", entity_name); + EntityName entity; + if (!entity_name.empty() && !entity.from_str(entity_name)) { + ss << "invalid entity_auth " << entity_name; + mon.reply_command(op, -EINVAL, ss.str(), get_last_committed()); + return true; + } + + string format = cmd_getval_or(cmdmap, "format", "plain"); + boost::scoped_ptr f(Formatter::create(format)); + + if (prefix == "auth export") { + KeyRing keyring; + export_keyring(keyring); + if (!entity_name.empty()) { + EntityAuth eauth; + if (keyring.get_auth(entity, eauth)) { + KeyRing kr; + kr.add(entity, eauth); + if (f) + kr.encode_formatted("auth", f.get(), rdata); + else + kr.encode_plaintext(rdata); + r = 0; + } else { + ss << "no key for " << eauth; + r = -ENOENT; + } + } else { + if (f) + keyring.encode_formatted("auth", f.get(), rdata); + else + keyring.encode_plaintext(rdata); + r = 0; + } + } else if (prefix == "auth get" && !entity_name.empty()) { + KeyRing keyring; + EntityAuth entity_auth; + if (!mon.key_server.get_auth(entity, entity_auth)) { + ss << "failed to find " << entity_name << " in keyring"; + r = -ENOENT; + } else { + keyring.add(entity, entity_auth); + if (f) + keyring.encode_formatted("auth", f.get(), rdata); + else + keyring.encode_plaintext(rdata); + r = 0; + } + } else if (prefix == "auth print-key" || + prefix == "auth print_key" || + prefix == "auth get-key") { + EntityAuth auth; + if (!mon.key_server.get_auth(entity, auth)) { + ss << "don't have " << entity; + r = -ENOENT; + goto done; + } + if (f) { + auth.key.encode_formatted("auth", f.get(), rdata); + } else { + auth.key.encode_plaintext(rdata); + } + r = 0; + } else if (prefix == "auth list" || + prefix == "auth ls") { + if (f) { + mon.key_server.encode_formatted("auth", f.get(), rdata); + } else { + mon.key_server.encode_plaintext(rdata); + } + r = 0; + goto done; + } else { + ss << "invalid command"; + r = -EINVAL; + } + + done: + rdata.append(ds); + string rs; + getline(ss, rs, '\0'); + mon.reply_command(op, r, rs, rdata, get_last_committed()); + return true; +} + +void AuthMonitor::export_keyring(KeyRing& keyring) +{ + mon.key_server.export_keyring(keyring); +} + +int AuthMonitor::import_keyring(KeyRing& keyring) +{ + dout(10) << __func__ << " " << keyring.size() << " keys" << dendl; + + for (map::iterator p = keyring.get_keys().begin(); + p != keyring.get_keys().end(); + ++p) { + if (p->second.caps.empty()) { + dout(0) << "import: no caps supplied" << dendl; + return -EINVAL; + } + int err = add_entity(p->first, p->second); + ceph_assert(err == 0); + } + return 0; +} + +int AuthMonitor::remove_entity(const EntityName &entity) +{ + dout(10) << __func__ << " " << entity << dendl; + if (!mon.key_server.contains(entity)) + return -ENOENT; + + KeyServerData::Incremental auth_inc; + auth_inc.name = entity; + auth_inc.op = KeyServerData::AUTH_INC_DEL; + push_cephx_inc(auth_inc); + + return 0; +} + +bool AuthMonitor::entity_is_pending(EntityName& entity) +{ + // are we about to have it? + for (auto& p : pending_auth) { + if (p.inc_type == AUTH_DATA) { + KeyServerData::Incremental inc; + auto q = p.auth_data.cbegin(); + decode(inc, q); + if (inc.op == KeyServerData::AUTH_INC_ADD && + inc.name == entity) { + return true; + } + } + } + return false; +} + +int AuthMonitor::exists_and_matches_entity( + const auth_entity_t& entity, + bool has_secret, + stringstream& ss) +{ + return exists_and_matches_entity(entity.name, entity.auth, + entity.auth.caps, has_secret, ss); +} + +int AuthMonitor::exists_and_matches_entity( + const EntityName& name, + const EntityAuth& auth, + const map& caps, + bool has_secret, + stringstream& ss) +{ + + dout(20) << __func__ << " entity " << name << " auth " << auth + << " caps " << caps << " has_secret " << has_secret << dendl; + + EntityAuth existing_auth; + // does entry already exist? + if (mon.key_server.get_auth(name, existing_auth)) { + // key match? + if (has_secret) { + if (existing_auth.key.get_secret().cmp(auth.key.get_secret())) { + ss << "entity " << name << " exists but key does not match"; + return -EEXIST; + } + } + + // caps match? + if (caps.size() != existing_auth.caps.size()) { + ss << "entity " << name << " exists but caps do not match"; + return -EINVAL; + } + for (auto& it : caps) { + if (existing_auth.caps.count(it.first) == 0 || + !existing_auth.caps[it.first].contents_equal(it.second)) { + ss << "entity " << name << " exists but cap " + << it.first << " does not match"; + return -EINVAL; + } + } + + // they match, no-op + return 0; + } + return -ENOENT; +} + +int AuthMonitor::add_entity( + const EntityName& name, + const EntityAuth& auth) +{ + + // okay, add it. + KeyServerData::Incremental auth_inc; + auth_inc.op = KeyServerData::AUTH_INC_ADD; + auth_inc.name = name; + auth_inc.auth = auth; + + dout(10) << " add auth entity " << auth_inc.name << dendl; + dout(30) << " " << auth_inc.auth << dendl; + push_cephx_inc(auth_inc); + return 0; +} + +int AuthMonitor::validate_osd_destroy( + int32_t id, + const uuid_d& uuid, + EntityName& cephx_entity, + EntityName& lockbox_entity, + stringstream& ss) +{ + ceph_assert(paxos.is_plugged()); + + dout(10) << __func__ << " id " << id << " uuid " << uuid << dendl; + + string cephx_str = "osd." + stringify(id); + string lockbox_str = "client.osd-lockbox." + stringify(uuid); + + if (!cephx_entity.from_str(cephx_str)) { + dout(10) << __func__ << " invalid cephx entity '" + << cephx_str << "'" << dendl; + ss << "invalid cephx key entity '" << cephx_str << "'"; + return -EINVAL; + } + + if (!lockbox_entity.from_str(lockbox_str)) { + dout(10) << __func__ << " invalid lockbox entity '" + << lockbox_str << "'" << dendl; + ss << "invalid lockbox key entity '" << lockbox_str << "'"; + return -EINVAL; + } + + if (!mon.key_server.contains(cephx_entity) && + !mon.key_server.contains(lockbox_entity)) { + return -ENOENT; + } + + return 0; +} + +int AuthMonitor::do_osd_destroy( + const EntityName& cephx_entity, + const EntityName& lockbox_entity) +{ + ceph_assert(paxos.is_plugged()); + + dout(10) << __func__ << " cephx " << cephx_entity + << " lockbox " << lockbox_entity << dendl; + + bool removed = false; + + int err = remove_entity(cephx_entity); + if (err == -ENOENT) { + dout(10) << __func__ << " " << cephx_entity << " does not exist" << dendl; + } else { + removed = true; + } + + err = remove_entity(lockbox_entity); + if (err == -ENOENT) { + dout(10) << __func__ << " " << lockbox_entity << " does not exist" << dendl; + } else { + removed = true; + } + + if (!removed) { + dout(10) << __func__ << " entities do not exist -- no-op." << dendl; + return 0; + } + + // given we have paxos plugged, this will not result in a proposal + // being triggered, but it will still be needed so that we get our + // pending state encoded into the paxos' pending transaction. + propose_pending(); + return 0; +} + +int _create_auth( + EntityAuth& auth, + const string& key, + const map& caps) +{ + if (key.empty()) + return -EINVAL; + try { + auth.key.decode_base64(key); + } catch (ceph::buffer::error& e) { + return -EINVAL; + } + auth.caps = caps; + return 0; +} + +int AuthMonitor::validate_osd_new( + int32_t id, + const uuid_d& uuid, + const string& cephx_secret, + const string& lockbox_secret, + auth_entity_t& cephx_entity, + auth_entity_t& lockbox_entity, + stringstream& ss) +{ + + dout(10) << __func__ << " osd." << id << " uuid " << uuid << dendl; + + map cephx_caps = { + { "osd", _encode_cap("allow *") }, + { "mon", _encode_cap("allow profile osd") }, + { "mgr", _encode_cap("allow profile osd") } + }; + map lockbox_caps = { + { "mon", _encode_cap("allow command \"config-key get\" " + "with key=\"dm-crypt/osd/" + + stringify(uuid) + + "/luks\"") } + }; + + bool has_lockbox = !lockbox_secret.empty(); + + string cephx_name = "osd." + stringify(id); + string lockbox_name = "client.osd-lockbox." + stringify(uuid); + + if (!cephx_entity.name.from_str(cephx_name)) { + dout(10) << __func__ << " invalid cephx entity '" + << cephx_name << "'" << dendl; + ss << "invalid cephx key entity '" << cephx_name << "'"; + return -EINVAL; + } + + if (has_lockbox) { + if (!lockbox_entity.name.from_str(lockbox_name)) { + dout(10) << __func__ << " invalid cephx lockbox entity '" + << lockbox_name << "'" << dendl; + ss << "invalid cephx lockbox entity '" << lockbox_name << "'"; + return -EINVAL; + } + } + + if (entity_is_pending(cephx_entity.name) || + (has_lockbox && entity_is_pending(lockbox_entity.name))) { + // If we have pending entities for either the cephx secret or the + // lockbox secret, then our safest bet is to retry the command at + // a later time. These entities may be pending because an `osd new` + // command has been run (which is unlikely, due to the nature of + // the operation, which will force a paxos proposal), or (more likely) + // because a competing client created those entities before we handled + // the `osd new` command. Regardless, let's wait and see. + return -EAGAIN; + } + + if (!is_valid_cephx_key(cephx_secret)) { + ss << "invalid cephx secret."; + return -EINVAL; + } + + if (has_lockbox && !is_valid_cephx_key(lockbox_secret)) { + ss << "invalid cephx lockbox secret."; + return -EINVAL; + } + + int err = _create_auth(cephx_entity.auth, cephx_secret, cephx_caps); + ceph_assert(0 == err); + + bool cephx_is_idempotent = false, lockbox_is_idempotent = false; + err = exists_and_matches_entity(cephx_entity, true, ss); + + if (err != -ENOENT) { + if (err < 0) { + return err; + } + ceph_assert(0 == err); + cephx_is_idempotent = true; + } + + if (has_lockbox) { + err = _create_auth(lockbox_entity.auth, lockbox_secret, lockbox_caps); + ceph_assert(err == 0); + err = exists_and_matches_entity(lockbox_entity, true, ss); + if (err != -ENOENT) { + if (err < 0) { + return err; + } + ceph_assert(0 == err); + lockbox_is_idempotent = true; + } + } + + if (cephx_is_idempotent && (!has_lockbox || lockbox_is_idempotent)) { + return EEXIST; + } + + return 0; +} + +int AuthMonitor::do_osd_new( + const auth_entity_t& cephx_entity, + const auth_entity_t& lockbox_entity, + bool has_lockbox) +{ + ceph_assert(paxos.is_plugged()); + + dout(10) << __func__ << " cephx " << cephx_entity.name + << " lockbox "; + if (has_lockbox) { + *_dout << lockbox_entity.name; + } else { + *_dout << "n/a"; + } + *_dout << dendl; + + // we must have validated before reaching this point. + // if keys exist, then this means they also match; otherwise we would + // have failed before calling this function. + bool cephx_exists = mon.key_server.contains(cephx_entity.name); + + if (!cephx_exists) { + int err = add_entity(cephx_entity.name, cephx_entity.auth); + ceph_assert(0 == err); + } + + if (has_lockbox && + !mon.key_server.contains(lockbox_entity.name)) { + int err = add_entity(lockbox_entity.name, lockbox_entity.auth); + ceph_assert(0 == err); + } + + // given we have paxos plugged, this will not result in a proposal + // being triggered, but it will still be needed so that we get our + // pending state encoded into the paxos' pending transaction. + propose_pending(); + return 0; +} + +bool AuthMonitor::valid_caps( + const string& type, + const string& caps, + ostream *out) +{ + if (type == "mon") { + MonCap moncap; + if (!moncap.parse(caps, out)) { + return false; + } + return true; + } + + if (!g_conf().get_val("mon_auth_validate_all_caps")) { + return true; + } + + if (type == "mgr") { + MgrCap mgrcap; + if (!mgrcap.parse(caps, out)) { + return false; + } + } else if (type == "osd") { + OSDCap ocap; + if (!ocap.parse(caps, out)) { + return false; + } + } else if (type == "mds") { + MDSAuthCaps mdscap; + if (!mdscap.parse(caps, out)) { + return false; + } + } else { + if (out) { + *out << "unknown cap type '" << type << "'"; + } + return false; + } + return true; +} + +bool AuthMonitor::valid_caps(const vector& caps, ostream *out) +{ + for (vector::const_iterator p = caps.begin(); + p != caps.end(); p += 2) { + if ((p+1) == caps.end()) { + *out << "cap '" << *p << "' has no value"; + return false; + } + if (!valid_caps(*p, *(p+1), out)) { + return false; + } + } + return true; +} + +bool AuthMonitor::prepare_command(MonOpRequestRef op) +{ + auto m = op->get_req(); + stringstream ss, ds; + bufferlist rdata; + string rs; + int err = -EINVAL; + + cmdmap_t cmdmap; + if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { + // ss has reason for failure + string rs = ss.str(); + mon.reply_command(op, -EINVAL, rs, rdata, get_last_committed()); + return true; + } + + string prefix; + vectorcaps_vec; + string entity_name; + EntityName entity; + + cmd_getval(cmdmap, "prefix", prefix); + + string format = cmd_getval_or(cmdmap, "format", "plain"); + boost::scoped_ptr f(Formatter::create(format)); + + MonSession *session = op->get_session(); + if (!session) { + mon.reply_command(op, -EACCES, "access denied", rdata, get_last_committed()); + return true; + } + + cmd_getval(cmdmap, "caps", caps_vec); + // fs authorize command's can have odd number of caps arguments + if ((prefix != "fs authorize") && (caps_vec.size() % 2) != 0) { + ss << "bad capabilities request; odd number of arguments"; + err = -EINVAL; + goto done; + } + + cmd_getval(cmdmap, "entity", entity_name); + if (!entity_name.empty() && !entity.from_str(entity_name)) { + ss << "bad entity name"; + err = -EINVAL; + goto done; + } + + if (prefix == "auth import") { + bufferlist bl = m->get_data(); + if (bl.length() == 0) { + ss << "auth import: no data supplied"; + getline(ss, rs); + mon.reply_command(op, -EINVAL, rs, get_last_committed()); + return true; + } + auto iter = bl.cbegin(); + KeyRing keyring; + try { + decode(keyring, iter); + } catch (const ceph::buffer::error &ex) { + ss << "error decoding keyring" << " " << ex.what(); + err = -EINVAL; + goto done; + } + err = import_keyring(keyring); + if (err < 0) { + ss << "auth import: no caps supplied"; + getline(ss, rs); + mon.reply_command(op, -EINVAL, rs, get_last_committed()); + return true; + } + err = 0; + wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, + get_last_committed() + 1)); + return true; + } else if (prefix == "auth add" && !entity_name.empty()) { + /* expected behavior: + * - if command reproduces current state, return 0. + * - if command adds brand new entity, handle it. + * - if command adds new state to existing entity, return error. + */ + KeyServerData::Incremental auth_inc; + auth_inc.name = entity; + bufferlist bl = m->get_data(); + bool has_keyring = (bl.length() > 0); + map new_caps; + + KeyRing new_keyring; + if (has_keyring) { + auto iter = bl.cbegin(); + try { + decode(new_keyring, iter); + } catch (const ceph::buffer::error &ex) { + ss << "error decoding keyring"; + err = -EINVAL; + goto done; + } + } + + if (!valid_caps(caps_vec, &ss)) { + err = -EINVAL; + goto done; + } + + // are we about to have it? + if (entity_is_pending(entity)) { + wait_for_finished_proposal(op, + new Monitor::C_Command(mon, op, 0, rs, get_last_committed() + 1)); + return true; + } + + // build new caps from provided arguments (if available) + for (vector::iterator it = caps_vec.begin(); + it != caps_vec.end() && (it + 1) != caps_vec.end(); + it += 2) { + string sys = *it; + bufferlist cap; + encode(*(it+1), cap); + new_caps[sys] = cap; + } + + // pull info out of provided keyring + EntityAuth new_inc; + if (has_keyring) { + if (!new_keyring.get_auth(auth_inc.name, new_inc)) { + ss << "key for " << auth_inc.name + << " not found in provided keyring"; + err = -EINVAL; + goto done; + } + if (!new_caps.empty() && !new_inc.caps.empty()) { + ss << "caps cannot be specified both in keyring and in command"; + err = -EINVAL; + goto done; + } + if (new_caps.empty()) { + new_caps = new_inc.caps; + } + } + + err = exists_and_matches_entity(auth_inc.name, new_inc, + new_caps, has_keyring, ss); + // if entity/key/caps do not exist in the keyring, just fall through + // and add the entity; otherwise, make sure everything matches (in + // which case it's a no-op), because if not we must fail. + if (err != -ENOENT) { + if (err < 0) { + goto done; + } + // no-op. + ceph_assert(err == 0); + goto done; + } + err = 0; + + // okay, add it. + if (!has_keyring) { + dout(10) << "AuthMonitor::prepare_command generating random key for " + << auth_inc.name << dendl; + new_inc.key.create(g_ceph_context, CEPH_CRYPTO_AES); + } + new_inc.caps = new_caps; + + err = add_entity(auth_inc.name, new_inc); + ceph_assert(err == 0); + + ss << "added key for " << auth_inc.name; + getline(ss, rs); + wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, + get_last_committed() + 1)); + return true; + } else if ((prefix == "auth get-or-create-pending" || + prefix == "auth clear-pending" || + prefix == "auth commit-pending")) { + if (mon.monmap->min_mon_release < ceph_release_t::quincy) { + err = -EPERM; + ss << "pending_keys are not available until after upgrading to quincy"; + goto done; + } + + EntityAuth entity_auth; + if (!mon.key_server.get_auth(entity, entity_auth)) { + ss << "entity " << entity << " does not exist"; + err = -ENOENT; + goto done; + } + + // is there an uncommitted pending_key? (or any change for this entity) + for (auto& p : pending_auth) { + if (p.inc_type == AUTH_DATA) { + KeyServerData::Incremental auth_inc; + auto q = p.auth_data.cbegin(); + decode(auth_inc, q); + if (auth_inc.op == KeyServerData::AUTH_INC_ADD && + auth_inc.name == entity) { + wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, + get_last_committed() + 1)); + return true; + } + } + } + + if (prefix == "auth get-or-create-pending") { + KeyRing kr; + bool exists = false; + if (!entity_auth.pending_key.empty()) { + kr.add(entity, entity_auth.key, entity_auth.pending_key); + err = 0; + exists = true; + } else { + KeyServerData::Incremental auth_inc; + auth_inc.op = KeyServerData::AUTH_INC_ADD; + auth_inc.name = entity; + auth_inc.auth = entity_auth; + auth_inc.auth.pending_key.create(g_ceph_context, CEPH_CRYPTO_AES); + push_cephx_inc(auth_inc); + kr.add(entity, auth_inc.auth.key, auth_inc.auth.pending_key); + push_cephx_inc(auth_inc); + } + if (f) { + kr.encode_formatted("auth", f.get(), rdata); + } else { + kr.encode_plaintext(rdata); + } + if (exists) { + goto done; + } + } else if (prefix == "auth clear-pending") { + if (entity_auth.pending_key.empty()) { + err = 0; + goto done; + } + KeyServerData::Incremental auth_inc; + auth_inc.op = KeyServerData::AUTH_INC_ADD; + auth_inc.name = entity; + auth_inc.auth = entity_auth; + auth_inc.auth.pending_key.clear(); + push_cephx_inc(auth_inc); + } else if (prefix == "auth commit-pending") { + if (entity_auth.pending_key.empty()) { + err = 0; + ss << "no pending key"; + goto done; + } + KeyServerData::Incremental auth_inc; + auth_inc.op = KeyServerData::AUTH_INC_ADD; + auth_inc.name = entity; + auth_inc.auth = entity_auth; + auth_inc.auth.key = auth_inc.auth.pending_key; + auth_inc.auth.pending_key.clear(); + push_cephx_inc(auth_inc); + } + wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, rdata, + get_last_committed() + 1)); + return true; + } else if ((prefix == "auth get-or-create-key" || + prefix == "auth get-or-create") && + !entity_name.empty()) { + // auth get-or-create [mon osdcapa osd osdcapb ...] + + if (!valid_caps(caps_vec, &ss)) { + err = -EINVAL; + goto done; + } + + // Parse the list of caps into a map + std::map wanted_caps; + for (vector::const_iterator it = caps_vec.begin(); + it != caps_vec.end() && (it + 1) != caps_vec.end(); + it += 2) { + const std::string &sys = *it; + bufferlist cap; + encode(*(it+1), cap); + wanted_caps[sys] = cap; + } + + // do we have it? + EntityAuth entity_auth; + if (mon.key_server.get_auth(entity, entity_auth)) { + for (const auto &sys_cap : wanted_caps) { + if (entity_auth.caps.count(sys_cap.first) == 0 || + !entity_auth.caps[sys_cap.first].contents_equal(sys_cap.second)) { + ss << "key for " << entity << " exists but cap " << sys_cap.first + << " does not match"; + err = -EINVAL; + goto done; + } + } + + if (prefix == "auth get-or-create-key") { + if (f) { + entity_auth.key.encode_formatted("auth", f.get(), rdata); + } else { + ds << entity_auth.key; + } + } else { + KeyRing kr; + kr.add(entity, entity_auth.key, entity_auth.pending_key); + if (f) { + kr.set_caps(entity, entity_auth.caps); + kr.encode_formatted("auth", f.get(), rdata); + } else { + kr.encode_plaintext(rdata); + } + } + err = 0; + goto done; + } + + // ...or are we about to? + for (vector::iterator p = pending_auth.begin(); + p != pending_auth.end(); + ++p) { + if (p->inc_type == AUTH_DATA) { + KeyServerData::Incremental auth_inc; + auto q = p->auth_data.cbegin(); + decode(auth_inc, q); + if (auth_inc.op == KeyServerData::AUTH_INC_ADD && + auth_inc.name == entity) { + wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, + get_last_committed() + 1)); + return true; + } + } + } + + // create it + KeyServerData::Incremental auth_inc; + auth_inc.op = KeyServerData::AUTH_INC_ADD; + auth_inc.name = entity; + auth_inc.auth.key.create(g_ceph_context, CEPH_CRYPTO_AES); + auth_inc.auth.caps = wanted_caps; + + push_cephx_inc(auth_inc); + + if (prefix == "auth get-or-create-key") { + if (f) { + auth_inc.auth.key.encode_formatted("auth", f.get(), rdata); + } else { + ds << auth_inc.auth.key; + } + } else { + KeyRing kr; + kr.add(entity, auth_inc.auth.key); + if (f) { + kr.set_caps(entity, wanted_caps); + kr.encode_formatted("auth", f.get(), rdata); + } else { + kr.encode_plaintext(rdata); + } + } + + rdata.append(ds); + getline(ss, rs); + wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, rdata, + get_last_committed() + 1)); + return true; + } else if (prefix == "fs authorize") { + string filesystem; + cmd_getval(cmdmap, "filesystem", filesystem); + string mon_cap_string = "allow r"; + string mds_cap_string, osd_cap_string; + string osd_cap_wanted = "r"; + + std::shared_ptr fs; + if (filesystem != "*" && filesystem != "all") { + fs = mon.mdsmon()->get_fsmap().get_filesystem(filesystem); + if (fs == nullptr) { + ss << "filesystem " << filesystem << " does not exist."; + err = -EINVAL; + goto done; + } else { + mon_cap_string += " fsname=" + std::string(fs->mds_map.get_fs_name()); + } + } + + for (auto it = caps_vec.begin(); + it != caps_vec.end() && (it + 1) != caps_vec.end(); + it += 2) { + const string &path = *it; + const string &cap = *(it+1); + bool root_squash = false; + if ((it + 2) != caps_vec.end() && *(it+2) == "root_squash") { + root_squash = true; + ++it; + } + + if (cap != "r" && cap.compare(0, 2, "rw")) { + ss << "Permission flags must start with 'r' or 'rw'."; + err = -EINVAL; + goto done; + } + if (cap.compare(0, 2, "rw") == 0) + osd_cap_wanted = "rw"; + + char last='\0'; + for (size_t i = 2; i < cap.size(); ++i) { + char c = cap.at(i); + if (last >= c) { + ss << "Permission flags (except 'rw') must be specified in alphabetical order."; + err = -EINVAL; + goto done; + } + switch (c) { + case 'p': + break; + case 's': + break; + default: + ss << "Unknown permission flag '" << c << "'."; + err = -EINVAL; + goto done; + } + } + + mds_cap_string += mds_cap_string.empty() ? "" : ", "; + mds_cap_string += "allow " + cap; + + if (filesystem != "*" && filesystem != "all" && fs != nullptr) { + mds_cap_string += " fsname=" + std::string(fs->mds_map.get_fs_name()); + } + + if (path != "/") { + mds_cap_string += " path=" + path; + } + + if (root_squash) { + mds_cap_string += " root_squash"; + } + } + + osd_cap_string += osd_cap_string.empty() ? "" : ", "; + osd_cap_string += "allow " + osd_cap_wanted + + " tag " + pg_pool_t::APPLICATION_NAME_CEPHFS + + " data=" + filesystem; + + std::map wanted_caps = { + { "mon", _encode_cap(mon_cap_string) }, + { "osd", _encode_cap(osd_cap_string) }, + { "mds", _encode_cap(mds_cap_string) } + }; + + if (!valid_caps("mon", mon_cap_string, &ss) || + !valid_caps("osd", osd_cap_string, &ss) || + !valid_caps("mds", mds_cap_string, &ss)) { + err = -EINVAL; + goto done; + } + + EntityAuth entity_auth; + if (mon.key_server.get_auth(entity, entity_auth)) { + for (const auto &sys_cap : wanted_caps) { + if (entity_auth.caps.count(sys_cap.first) == 0 || + !entity_auth.caps[sys_cap.first].contents_equal(sys_cap.second)) { + ss << entity << " already has fs capabilities that differ from " + << "those supplied. To generate a new auth key for " << entity + << ", first remove " << entity << " from configuration files, " + << "execute 'ceph auth rm " << entity << "', then execute this " + << "command again."; + err = -EINVAL; + goto done; + } + } + + KeyRing kr; + kr.add(entity, entity_auth.key); + if (f) { + kr.set_caps(entity, entity_auth.caps); + kr.encode_formatted("auth", f.get(), rdata); + } else { + kr.encode_plaintext(rdata); + } + err = 0; + goto done; + } + + KeyServerData::Incremental auth_inc; + auth_inc.op = KeyServerData::AUTH_INC_ADD; + auth_inc.name = entity; + auth_inc.auth.key.create(g_ceph_context, CEPH_CRYPTO_AES); + auth_inc.auth.caps = wanted_caps; + + push_cephx_inc(auth_inc); + KeyRing kr; + kr.add(entity, auth_inc.auth.key); + if (f) { + kr.set_caps(entity, wanted_caps); + kr.encode_formatted("auth", f.get(), rdata); + } else { + kr.encode_plaintext(rdata); + } + + rdata.append(ds); + getline(ss, rs); + wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, rdata, + get_last_committed() + 1)); + return true; + } else if (prefix == "auth caps" && !entity_name.empty()) { + KeyServerData::Incremental auth_inc; + auth_inc.name = entity; + if (!mon.key_server.get_auth(auth_inc.name, auth_inc.auth)) { + ss << "couldn't find entry " << auth_inc.name; + err = -ENOENT; + goto done; + } + + if (!valid_caps(caps_vec, &ss)) { + err = -EINVAL; + goto done; + } + + map newcaps; + for (vector::iterator it = caps_vec.begin(); + it != caps_vec.end(); it += 2) + encode(*(it+1), newcaps[*it]); + + auth_inc.op = KeyServerData::AUTH_INC_ADD; + auth_inc.auth.caps = newcaps; + push_cephx_inc(auth_inc); + + ss << "updated caps for " << auth_inc.name; + getline(ss, rs); + wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, + get_last_committed() + 1)); + return true; + } else if ((prefix == "auth del" || prefix == "auth rm") && + !entity_name.empty()) { + KeyServerData::Incremental auth_inc; + auth_inc.name = entity; + if (!mon.key_server.contains(auth_inc.name)) { + err = 0; + goto done; + } + auth_inc.op = KeyServerData::AUTH_INC_DEL; + push_cephx_inc(auth_inc); + + wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, 0, rs, + get_last_committed() + 1)); + return true; + } +done: + rdata.append(ds); + getline(ss, rs, '\0'); + mon.reply_command(op, err, rs, rdata, get_last_committed()); + return false; +} + +bool AuthMonitor::prepare_global_id(MonOpRequestRef op) +{ + dout(10) << "AuthMonitor::prepare_global_id" << dendl; + increase_max_global_id(); + return true; +} + +bool AuthMonitor::prepare_used_pending_keys(MonOpRequestRef op) +{ + dout(10) << __func__ << " " << op << dendl; + auto m = op->get_req(); + process_used_pending_keys(m->used_pending_keys); + return true; +} + +bool AuthMonitor::_upgrade_format_to_dumpling() +{ + dout(1) << __func__ << " upgrading from format 0 to 1" << dendl; + ceph_assert(format_version == 0); + + bool changed = false; + map::iterator p; + for (p = mon.key_server.secrets_begin(); + p != mon.key_server.secrets_end(); + ++p) { + // grab mon caps, if any + string mon_caps; + if (p->second.caps.count("mon") == 0) + continue; + try { + auto it = p->second.caps["mon"].cbegin(); + decode(mon_caps, it); + } + catch (const ceph::buffer::error&) { + dout(10) << __func__ << " unable to parse mon cap for " + << p->first << dendl; + continue; + } + + string n = p->first.to_str(); + string new_caps; + + // set daemon profiles + if ((p->first.is_osd() || p->first.is_mds()) && + mon_caps == "allow rwx") { + new_caps = string("allow profile ") + std::string(p->first.get_type_name()); + } + + // update bootstrap keys + if (n == "client.bootstrap-osd") { + new_caps = "allow profile bootstrap-osd"; + } + if (n == "client.bootstrap-mds") { + new_caps = "allow profile bootstrap-mds"; + } + + if (new_caps.length() > 0) { + dout(5) << __func__ << " updating " << p->first << " mon cap from " + << mon_caps << " to " << new_caps << dendl; + + bufferlist bl; + encode(new_caps, bl); + + KeyServerData::Incremental auth_inc; + auth_inc.name = p->first; + auth_inc.auth = p->second; + auth_inc.auth.caps["mon"] = bl; + auth_inc.op = KeyServerData::AUTH_INC_ADD; + push_cephx_inc(auth_inc); + changed = true; + } + } + return changed; +} + +bool AuthMonitor::_upgrade_format_to_luminous() +{ + dout(1) << __func__ << " upgrading from format 1 to 2" << dendl; + ceph_assert(format_version == 1); + + bool changed = false; + map::iterator p; + for (p = mon.key_server.secrets_begin(); + p != mon.key_server.secrets_end(); + ++p) { + string n = p->first.to_str(); + + string newcap; + if (n == "client.admin") { + // admin gets it all + newcap = "allow *"; + } else if (n.find("osd.") == 0 || + n.find("mds.") == 0 || + n.find("mon.") == 0) { + // daemons follow their profile + string type = n.substr(0, 3); + newcap = "allow profile " + type; + } else if (p->second.caps.count("mon")) { + // if there are any mon caps, give them 'r' mgr caps + newcap = "allow r"; + } + + if (newcap.length() > 0) { + dout(5) << " giving " << n << " mgr '" << newcap << "'" << dendl; + bufferlist bl; + encode(newcap, bl); + + EntityAuth auth = p->second; + auth.caps["mgr"] = bl; + + add_entity(p->first, auth); + changed = true; + } + + if (n.find("mgr.") == 0 && + p->second.caps.count("mon")) { + // the kraken ceph-mgr@.service set the mon cap to 'allow *'. + auto blp = p->second.caps["mon"].cbegin(); + string oldcaps; + decode(oldcaps, blp); + if (oldcaps == "allow *") { + dout(5) << " fixing " << n << " mon cap to 'allow profile mgr'" + << dendl; + bufferlist bl; + encode("allow profile mgr", bl); + + EntityAuth auth = p->second; + auth.caps["mon"] = bl; + add_entity(p->first, p->second); + changed = true; + } + } + } + + // add bootstrap key if it does not already exist + // (might have already been get-or-create'd by + // ceph-create-keys) + EntityName bootstrap_mgr_name; + int r = bootstrap_mgr_name.from_str("client.bootstrap-mgr"); + ceph_assert(r); + if (!mon.key_server.contains(bootstrap_mgr_name)) { + + EntityName name = bootstrap_mgr_name; + EntityAuth auth; + encode("allow profile bootstrap-mgr", auth.caps["mon"]); + auth.key.create(g_ceph_context, CEPH_CRYPTO_AES); + add_entity(name, auth); + changed = true; + } + return changed; +} + +bool AuthMonitor::_upgrade_format_to_mimic() +{ + dout(1) << __func__ << " upgrading from format 2 to 3" << dendl; + ceph_assert(format_version == 2); + + list > auth_lst; + _generate_bootstrap_keys(&auth_lst); + + bool changed = false; + for (auto &p : auth_lst) { + if (mon.key_server.contains(p.first)) { + continue; + } + int err = add_entity(p.first, p.second); + ceph_assert(err == 0); + changed = true; + } + + return changed; +} + +void AuthMonitor::upgrade_format() +{ + constexpr unsigned int FORMAT_NONE = 0; + constexpr unsigned int FORMAT_DUMPLING = 1; + constexpr unsigned int FORMAT_LUMINOUS = 2; + constexpr unsigned int FORMAT_MIMIC = 3; + + // when upgrading from the current format to a new format, ensure that + // the new format doesn't break the older format. I.e., if a given format N + // changes or adds something, ensure that when upgrading from N-1 to N+1, we + // still observe the changes for format N if those have not been superseded + // by N+1. + + unsigned int current = FORMAT_MIMIC; + if (!mon.get_quorum_mon_features().contains_all( + ceph::features::mon::FEATURE_LUMINOUS)) { + // pre-luminous quorum + current = FORMAT_DUMPLING; + } else if (!mon.get_quorum_mon_features().contains_all( + ceph::features::mon::FEATURE_MIMIC)) { + // pre-mimic quorum + current = FORMAT_LUMINOUS; + } + if (format_version >= current) { + dout(20) << __func__ << " format " << format_version + << " is current" << dendl; + return; + } + + // perform a rolling upgrade of the new format, if necessary. + // i.e., if we are moving from format NONE to MIMIC, we will first upgrade + // to DUMPLING, then to LUMINOUS, and finally to MIMIC, in several different + // proposals. + + bool changed = false; + if (format_version == FORMAT_NONE) { + changed = _upgrade_format_to_dumpling(); + + } else if (format_version == FORMAT_DUMPLING) { + changed = _upgrade_format_to_luminous(); + } else if (format_version == FORMAT_LUMINOUS) { + changed = _upgrade_format_to_mimic(); + } + + if (changed) { + // note new format + dout(10) << __func__ << " proposing update from format " << format_version + << " -> " << current << dendl; + format_version = current; + propose_pending(); + } +} + +void AuthMonitor::dump_info(Formatter *f) +{ + /*** WARNING: do not include any privileged information here! ***/ + f->open_object_section("auth"); + f->dump_unsigned("first_committed", get_first_committed()); + f->dump_unsigned("last_committed", get_last_committed()); + f->dump_unsigned("num_secrets", mon.key_server.get_num_secrets()); + f->close_section(); +} diff --git a/src/mon/AuthMonitor.h b/src/mon/AuthMonitor.h new file mode 100644 index 000000000..993b18a02 --- /dev/null +++ b/src/mon/AuthMonitor.h @@ -0,0 +1,240 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_AUTHMONITOR_H +#define CEPH_AUTHMONITOR_H + +#include +#include + +#include "global/global_init.h" +#include "include/ceph_features.h" +#include "include/types.h" +#include "mon/PaxosService.h" +#include "mon/MonitorDBStore.h" + +class MAuth; +class KeyRing; +class Monitor; + +#define MIN_GLOBAL_ID 0x1000 + +class AuthMonitor : public PaxosService { +public: + enum IncType { + GLOBAL_ID, + AUTH_DATA, + }; + struct Incremental { + IncType inc_type; + uint64_t max_global_id; + uint32_t auth_type; + ceph::buffer::list auth_data; + + Incremental() : inc_type(GLOBAL_ID), max_global_id(0), auth_type(0) {} + + void encode(ceph::buffer::list& bl, uint64_t features=-1) const { + using ceph::encode; + ENCODE_START(2, 2, bl); + __u32 _type = (__u32)inc_type; + encode(_type, bl); + if (_type == GLOBAL_ID) { + encode(max_global_id, bl); + } else { + encode(auth_type, bl); + encode(auth_data, bl); + } + ENCODE_FINISH(bl); + } + void decode(ceph::buffer::list::const_iterator& bl) { + DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); + __u32 _type; + decode(_type, bl); + inc_type = (IncType)_type; + ceph_assert(inc_type >= GLOBAL_ID && inc_type <= AUTH_DATA); + if (_type == GLOBAL_ID) { + decode(max_global_id, bl); + } else { + decode(auth_type, bl); + decode(auth_data, bl); + } + DECODE_FINISH(bl); + } + void dump(ceph::Formatter *f) const { + f->dump_int("type", inc_type); + f->dump_int("max_global_id", max_global_id); + f->dump_int("auth_type", auth_type); + f->dump_int("auth_data_len", auth_data.length()); + } + static void generate_test_instances(std::list& ls) { + ls.push_back(new Incremental); + ls.push_back(new Incremental); + ls.back()->inc_type = GLOBAL_ID; + ls.back()->max_global_id = 1234; + ls.push_back(new Incremental); + ls.back()->inc_type = AUTH_DATA; + ls.back()->auth_type = 12; + ls.back()->auth_data.append("foo"); + } + }; + + struct auth_entity_t { + EntityName name; + EntityAuth auth; + }; + + +private: + std::vector pending_auth; + uint64_t max_global_id; + uint64_t last_allocated_id; + + // these are protected by mon->auth_lock + int mon_num = 0, mon_rank = 0; + + bool _upgrade_format_to_dumpling(); + bool _upgrade_format_to_luminous(); + bool _upgrade_format_to_mimic(); + void upgrade_format() override; + + void export_keyring(KeyRing& keyring); + int import_keyring(KeyRing& keyring); + + void push_cephx_inc(KeyServerData::Incremental& auth_inc) { + Incremental inc; + inc.inc_type = AUTH_DATA; + encode(auth_inc, inc.auth_data); + inc.auth_type = CEPH_AUTH_CEPHX; + pending_auth.push_back(inc); + } + + /* validate mon/osd/mds caps; fail on unrecognized service/type */ + bool valid_caps(const std::string& type, const std::string& caps, std::ostream *out); + bool valid_caps(const std::string& type, const ceph::buffer::list& bl, std::ostream *out) { + auto p = bl.begin(); + std::string v; + try { + using ceph::decode; + decode(v, p); + } catch (ceph::buffer::error& e) { + *out << "corrupt capability encoding"; + return false; + } + return valid_caps(type, v, out); + } + bool valid_caps(const std::vector& caps, std::ostream *out); + + void on_active() override; + bool should_propose(double& delay) override; + void get_initial_keyring(KeyRing *keyring); + void create_initial_keys(KeyRing *keyring); + void create_initial() override; + void update_from_paxos(bool *need_bootstrap) override; + void create_pending() override; // prepare a new pending + bool prepare_global_id(MonOpRequestRef op); + bool _should_increase_max_global_id(); ///< called under mon->auth_lock + void increase_max_global_id(); + uint64_t assign_global_id(bool should_increase_max); +public: + uint64_t _assign_global_id(); ///< called under mon->auth_lock + void _set_mon_num_rank(int num, int rank); ///< called under mon->auth_lock + +private: + bool prepare_used_pending_keys(MonOpRequestRef op); + + // propose pending update to peers + void encode_pending(MonitorDBStore::TransactionRef t) override; + void encode_full(MonitorDBStore::TransactionRef t) override; + version_t get_trim_to() const override; + + bool preprocess_query(MonOpRequestRef op) override; // true if processed. + bool prepare_update(MonOpRequestRef op) override; + + bool prep_auth(MonOpRequestRef op, bool paxos_writable); + + bool preprocess_command(MonOpRequestRef op); + bool prepare_command(MonOpRequestRef op); + + bool check_rotate(); + void process_used_pending_keys(const std::map& keys); + + bool entity_is_pending(EntityName& entity); + int exists_and_matches_entity( + const auth_entity_t& entity, + bool has_secret, + std::stringstream& ss); + int exists_and_matches_entity( + const EntityName& name, + const EntityAuth& auth, + const std::map& caps, + bool has_secret, + std::stringstream& ss); + int remove_entity(const EntityName &entity); + int add_entity( + const EntityName& name, + const EntityAuth& auth); + + public: + AuthMonitor(Monitor &mn, Paxos &p, const std::string& service_name) + : PaxosService(mn, p, service_name), + max_global_id(0), + last_allocated_id(0) + {} + + void pre_auth(MAuth *m); + + void tick() override; // check state, take actions + + int validate_osd_destroy( + int32_t id, + const uuid_d& uuid, + EntityName& cephx_entity, + EntityName& lockbox_entity, + std::stringstream& ss); + int do_osd_destroy( + const EntityName& cephx_entity, + const EntityName& lockbox_entity); + + int do_osd_new( + const auth_entity_t& cephx_entity, + const auth_entity_t& lockbox_entity, + bool has_lockbox); + int validate_osd_new( + int32_t id, + const uuid_d& uuid, + const std::string& cephx_secret, + const std::string& lockbox_secret, + auth_entity_t& cephx_entity, + auth_entity_t& lockbox_entity, + std::stringstream& ss); + + void dump_info(ceph::Formatter *f); + + bool is_valid_cephx_key(const std::string& k) { + if (k.empty()) + return false; + + EntityAuth ea; + try { + ea.key.decode_base64(k); + return true; + } catch (ceph::buffer::error& e) { /* fallthrough */ } + return false; + } +}; + + +WRITE_CLASS_ENCODER_FEATURES(AuthMonitor::Incremental) + +#endif diff --git a/src/mon/CMakeLists.txt b/src/mon/CMakeLists.txt new file mode 100644 index 000000000..784b4c3ee --- /dev/null +++ b/src/mon/CMakeLists.txt @@ -0,0 +1,46 @@ +set(lib_mon_srcs + ${CMAKE_SOURCE_DIR}/src/auth/cephx/CephxKeyServer.cc + ${CMAKE_SOURCE_DIR}/src/auth/cephx/CephxServiceHandler.cc + ${CMAKE_SOURCE_DIR}/src/auth/AuthServiceHandler.cc + Paxos.cc + PaxosService.cc + OSDMonitor.cc + MDSMonitor.cc + CommandHandler.cc + FSCommands.cc + MgrMonitor.cc + MgrStatMonitor.cc + Monitor.cc + MonmapMonitor.cc + LogMonitor.cc + AuthMonitor.cc + ConfigMap.cc + ConfigMonitor.cc + Elector.cc + ElectionLogic.cc + ConnectionTracker.cc + HealthMonitor.cc + KVMonitor.cc + ../mds/MDSAuthCaps.cc + ../mgr/mgr_commands.cc + ../osd/OSDCap.cc + ../mgr/MgrCap.cc) + +if(TARGET mgr_cap_obj) + list(APPEND lib_mon_srcs $) +endif() + +if(HAVE_GSSAPI) + list(APPEND lib_mon_srcs + ${CMAKE_SOURCE_DIR}/src/auth/krb/KrbServiceHandler.cpp) +endif() + +add_library(mon STATIC + ${lib_mon_srcs}) +target_link_libraries(mon + kv + heap_profiler + fmt::fmt) +if(WITH_JAEGER) + target_link_libraries(mon jaeger_base) +endif() diff --git a/src/mon/CommandHandler.cc b/src/mon/CommandHandler.cc new file mode 100644 index 000000000..903d35927 --- /dev/null +++ b/src/mon/CommandHandler.cc @@ -0,0 +1,43 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat Ltd + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "CommandHandler.h" + +#include "common/strtol.h" +#include "include/ceph_assert.h" + +#include +#include +#include + +int CommandHandler::parse_bool(std::string_view str, bool* result, std::ostream& ss) +{ + ceph_assert(result != nullptr); + + std::string interr; + int64_t n = strict_strtoll(str.data(), 10, &interr); + + if (str == "false" || str == "no" + || (interr.length() == 0 && n == 0)) { + *result = false; + return 0; + } else if (str == "true" || str == "yes" + || (interr.length() == 0 && n == 1)) { + *result = true; + return 0; + } else { + ss << "value must be false|no|0 or true|yes|1"; + return -EINVAL; + } +} diff --git a/src/mon/CommandHandler.h b/src/mon/CommandHandler.h new file mode 100644 index 000000000..167b4587f --- /dev/null +++ b/src/mon/CommandHandler.h @@ -0,0 +1,35 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat Ltd + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#ifndef COMMAND_HANDLER_H_ +#define COMMAND_HANDLER_H_ + +#include +#include + +class CommandHandler +{ +public: + /** + * Parse true|yes|1 style boolean string from `bool_str` + * `result` must be non-null. + * `ss` will be populated with error message on error. + * + * @return 0 on success, else -EINVAL + */ + int parse_bool(std::string_view str, bool* result, std::ostream& ss); +}; + +#endif diff --git a/src/mon/ConfigMap.cc b/src/mon/ConfigMap.cc new file mode 100644 index 000000000..763b8ce9b --- /dev/null +++ b/src/mon/ConfigMap.cc @@ -0,0 +1,291 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include + +#include "ConfigMap.h" +#include "crush/CrushWrapper.h" +#include "common/entity_name.h" + +using namespace std::literals; + +using std::cerr; +using std::cout; +using std::dec; +using std::hex; +using std::list; +using std::map; +using std::make_pair; +using std::ostream; +using std::ostringstream; +using std::pair; +using std::set; +using std::setfill; +using std::string; +using std::stringstream; +using std::to_string; +using std::vector; +using std::unique_ptr; + +using ceph::bufferlist; +using ceph::decode; +using ceph::encode; +using ceph::Formatter; +using ceph::JSONFormatter; +using ceph::mono_clock; +using ceph::mono_time; +using ceph::timespan_str; + +int MaskedOption::get_precision(const CrushWrapper *crush) +{ + // 0 = most precise + if (mask.location_type.size()) { + int r = crush->get_type_id(mask.location_type); + if (r >= 0) { + return r; + } + // bad type name, ignore it + } + int num_types = crush->get_num_type_names(); + if (mask.device_class.size()) { + return num_types; + } + return num_types + 1; +} + +void OptionMask::dump(Formatter *f) const +{ + if (location_type.size()) { + f->dump_string("location_type", location_type); + f->dump_string("location_value", location_value); + } + if (device_class.size()) { + f->dump_string("device_class", device_class); + } +} + +void MaskedOption::dump(Formatter *f) const +{ + f->dump_string("name", opt->name); + f->dump_string("value", raw_value); + f->dump_string("level", Option::level_to_str(opt->level)); + f->dump_bool("can_update_at_runtime", opt->can_update_at_runtime()); + f->dump_string("mask", mask.to_str()); + mask.dump(f); +} + +ostream& operator<<(ostream& out, const MaskedOption& o) +{ + out << o.opt->name; + if (o.mask.location_type.size()) { + out << "@" << o.mask.location_type << '=' << o.mask.location_value; + } + if (o.mask.device_class.size()) { + out << "@class=" << o.mask.device_class; + } + return out; +} + +// ---------- + +void Section::dump(Formatter *f) const +{ + for (auto& i : options) { + f->dump_object(i.first.c_str(), i.second); + } +} + +std::string Section::get_minimal_conf() const +{ + std::string r; + for (auto& i : options) { + if (i.second.opt->has_flag(Option::FLAG_NO_MON_UPDATE) || + i.second.opt->has_flag(Option::FLAG_MINIMAL_CONF)) { + if (i.second.mask.empty()) { + r += "\t"s + i.first + " = " + i.second.raw_value + "\n"; + } else { + r += "\t# masked option excluded: " + i.first + " = " + + i.second.raw_value + "\n"; + } + } + } + return r; +} + + +// ------------ + +void ConfigMap::dump(Formatter *f) const +{ + f->dump_object("global", global); + f->open_object_section("by_type"); + for (auto& i : by_type) { + f->dump_object(i.first.c_str(), i.second); + } + f->close_section(); + f->open_object_section("by_id"); + for (auto& i : by_id) { + f->dump_object(i.first.c_str(), i.second); + } + f->close_section(); +} + +std::map> +ConfigMap::generate_entity_map( + const EntityName& name, + const map& crush_location, + const CrushWrapper *crush, + const std::string& device_class, + std::map> *src) +{ + // global, then by type, then by name prefix component(s), then name. + // name prefix components are .-separated, + // e.g. client.a.b.c -> [global, client, client.a, client.a.b, client.a.b.c] + vector> sections = { make_pair("global", &global) }; + auto p = by_type.find(name.get_type_name()); + if (p != by_type.end()) { + sections.emplace_back(name.get_type_name(), &p->second); + } + vector name_bits; + boost::split(name_bits, name.to_str(), [](char c){ return c == '.'; }); + std::string tname; + for (unsigned p = 0; p < name_bits.size(); ++p) { + if (p) { + tname += '.'; + } + tname += name_bits[p]; + auto q = by_id.find(tname); + if (q != by_id.end()) { + sections.push_back(make_pair(tname, &q->second)); + } + } + std::map> out; + MaskedOption *prev = nullptr; + for (auto s : sections) { + for (auto& i : s.second->options) { + auto& o = i.second; + // match against crush location, class + if (o.mask.device_class.size() && + o.mask.device_class != device_class) { + continue; + } + if (o.mask.location_type.size()) { + auto p = crush_location.find(o.mask.location_type); + if (p == crush_location.end() || + p->second != o.mask.location_value) { + continue; + } + } + if (prev && prev->opt->name != i.first) { + prev = nullptr; + } + if (prev && + prev->get_precision(crush) < o.get_precision(crush)) { + continue; + } + out[i.first] = o.raw_value; + if (src) { + (*src)[i.first] = make_pair(s.first, &o); + } + prev = &o; + } + } + return out; +} + +bool ConfigMap::parse_mask( + const std::string& who, + std::string *section, + OptionMask *mask) +{ + vector split; + boost::split(split, who, [](char c){ return c == '/'; }); + for (unsigned j = 0; j < split.size(); ++j) { + auto& i = split[j]; + if (i == "global") { + *section = "global"; + continue; + } + size_t delim = i.find(':'); + if (delim != std::string::npos) { + string k = i.substr(0, delim); + if (k == "class") { + mask->device_class = i.substr(delim + 1); + } else { + mask->location_type = k; + mask->location_value = i.substr(delim + 1); + } + continue; + } + string type, id; + auto dotpos = i.find('.'); + if (dotpos != std::string::npos) { + type = i.substr(0, dotpos); + id = i.substr(dotpos + 1); + } else { + type = i; + } + if (EntityName::str_to_ceph_entity_type(type) == CEPH_ENTITY_TYPE_ANY) { + return false; + } + *section = i; + } + return true; +} + +void ConfigMap::parse_key( + const std::string& key, + std::string *name, + std::string *who) +{ + auto last_slash = key.rfind('/'); + if (last_slash == std::string::npos) { + *name = key; + } else if (auto mgrpos = key.find("/mgr/"); mgrpos != std::string::npos) { + *name = key.substr(mgrpos + 1); + *who = key.substr(0, mgrpos); + } else { + *name = key.substr(last_slash + 1); + *who = key.substr(0, last_slash); + } +} + + +// -------------- + +void ConfigChangeSet::dump(Formatter *f) const +{ + f->dump_int("version", version); + f->dump_stream("timestamp") << stamp; + f->dump_string("name", name); + f->open_array_section("changes"); + for (auto& i : diff) { + f->open_object_section("change"); + f->dump_string("name", i.first); + if (i.second.first) { + f->dump_string("previous_value", *i.second.first); + } + if (i.second.second) { + f->dump_string("new_value", *i.second.second); + } + f->close_section(); + } + f->close_section(); +} + +void ConfigChangeSet::print(ostream& out) const +{ + out << "--- " << version << " --- " << stamp; + if (name.size()) { + out << " --- " << name; + } + out << " ---\n"; + for (auto& i : diff) { + if (i.second.first) { + out << "- " << i.first << " = " << *i.second.first << "\n"; + } + if (i.second.second) { + out << "+ " << i.first << " = " << *i.second.second << "\n"; + } + } +} diff --git a/src/mon/ConfigMap.h b/src/mon/ConfigMap.h new file mode 100644 index 000000000..a21e77265 --- /dev/null +++ b/src/mon/ConfigMap.h @@ -0,0 +1,154 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include +#include +#include +#include + +#include "include/utime.h" +#include "common/options.h" +#include "common/entity_name.h" + +class CrushWrapper; + +// the precedence is thus: +// +// global +// crush location (coarse to fine, ordered by type id) +// daemon type (e.g., osd) +// device class (osd only) +// crush location (coarse to fine, ordered by type id) +// daemon name (e.g., mds.foo) +// +// Note that this means that if we have +// +// config/host:foo/a = 1 +// config/osd/rack:foo/a = 2 +// +// then we get a = 2. The osd-level config wins, even though rack +// is less precise than host, because the crush limiters are only +// resolved within a section (global, per-daemon, per-instance). + +struct OptionMask { + std::string location_type, location_value; ///< matches crush_location + std::string device_class; ///< matches device class + + bool empty() const { + return location_type.size() == 0 + && location_value.size() == 0 + && device_class.size() == 0; + } + + std::string to_str() const { + std::string r; + if (location_type.size()) { + r += location_type + ":" + location_value; + } + if (device_class.size()) { + if (r.size()) { + r += "/"; + } + r += "class:" + device_class; + } + return r; + } + void dump(ceph::Formatter *f) const; +}; + +struct MaskedOption { + std::string raw_value; ///< raw, unparsed, unvalidated value + const Option *opt; ///< the option + OptionMask mask; + std::unique_ptr unknown_opt; ///< if fabricated for an unknown option + + MaskedOption(const Option *o, bool fab=false) : opt(o) { + if (fab) { + unknown_opt.reset(o); + } + } + MaskedOption(MaskedOption&& o) { + raw_value = std::move(o.raw_value); + opt = o.opt; + mask = std::move(o.mask); + unknown_opt = std::move(o.unknown_opt); + } + const MaskedOption& operator=(const MaskedOption& o) = delete; + const MaskedOption& operator=(MaskedOption&& o) = delete; + + /// return a precision metric (smaller is more precise) + int get_precision(const CrushWrapper *crush); + + friend std::ostream& operator<<(std::ostream& out, const MaskedOption& o); + + void dump(ceph::Formatter *f) const; +}; + +struct Section { + std::multimap options; + + void clear() { + options.clear(); + } + void dump(ceph::Formatter *f) const; + std::string get_minimal_conf() const; +}; + +struct ConfigMap { + Section global; + std::map> by_type; + std::map> by_id; + std::list> stray_options; + + Section *find_section(const std::string& name) { + if (name == "global") { + return &global; + } + auto i = by_type.find(name); + if (i != by_type.end()) { + return &i->second; + } + i = by_id.find(name); + if (i != by_id.end()) { + return &i->second; + } + return nullptr; + } + void clear() { + global.clear(); + by_type.clear(); + by_id.clear(); + stray_options.clear(); + } + void dump(ceph::Formatter *f) const; + std::map> generate_entity_map( + const EntityName& name, + const std::map& crush_location, + const CrushWrapper *crush, + const std::string& device_class, + std::map> *src=0); + + void parse_key( + const std::string& key, + std::string *name, + std::string *who); + static bool parse_mask( + const std::string& in, + std::string *section, + OptionMask *mask); +}; + + +struct ConfigChangeSet { + version_t version; + utime_t stamp; + std::string name; + + // key -> (old value, new value) + std::map,std::optional>> diff; + + void dump(ceph::Formatter *f) const; + void print(std::ostream& out) const; +}; diff --git a/src/mon/ConfigMonitor.cc b/src/mon/ConfigMonitor.cc new file mode 100644 index 000000000..e24ccbc18 --- /dev/null +++ b/src/mon/ConfigMonitor.cc @@ -0,0 +1,1017 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include + +#include "mon/Monitor.h" +#include "mon/ConfigMonitor.h" +#include "mon/KVMonitor.h" +#include "mon/MgrMonitor.h" +#include "mon/OSDMonitor.h" +#include "messages/MConfig.h" +#include "messages/MGetConfig.h" +#include "messages/MMonCommand.h" +#include "common/Formatter.h" +#include "common/TextTable.h" +#include "common/cmdparse.h" +#include "include/stringify.h" + +#define dout_subsys ceph_subsys_mon +#undef dout_prefix +#define dout_prefix _prefix(_dout, mon, this) +using namespace TOPNSPC::common; + +using namespace std::literals; + +using std::cerr; +using std::cout; +using std::dec; +using std::hex; +using std::list; +using std::map; +using std::make_pair; +using std::ostream; +using std::ostringstream; +using std::pair; +using std::set; +using std::setfill; +using std::string; +using std::stringstream; +using std::to_string; +using std::vector; +using std::unique_ptr; + +using ceph::bufferlist; +using ceph::decode; +using ceph::encode; +using ceph::Formatter; +using ceph::JSONFormatter; +using ceph::mono_clock; +using ceph::mono_time; +using ceph::timespan_str; +static ostream& _prefix(std::ostream *_dout, const Monitor &mon, + const ConfigMonitor *hmon) { + return *_dout << "mon." << mon.name << "@" << mon.rank + << "(" << mon.get_state_name() << ").config "; +} + +const string KEY_PREFIX("config/"); +const string HISTORY_PREFIX("config-history/"); + +ConfigMonitor::ConfigMonitor(Monitor &m, Paxos &p, const string& service_name) + : PaxosService(m, p, service_name) { +} + +void ConfigMonitor::init() +{ + dout(10) << __func__ << dendl; +} + +void ConfigMonitor::create_initial() +{ + dout(10) << __func__ << dendl; + version = 0; + pending.clear(); +} + +void ConfigMonitor::update_from_paxos(bool *need_bootstrap) +{ + if (version == get_last_committed()) { + return; + } + version = get_last_committed(); + dout(10) << __func__ << " " << version << dendl; + load_config(); + check_all_subs(); +} + +void ConfigMonitor::create_pending() +{ + dout(10) << " " << version << dendl; + pending.clear(); + pending_description.clear(); +} + +void ConfigMonitor::encode_pending(MonitorDBStore::TransactionRef t) +{ + dout(10) << " " << (version+1) << dendl; + put_last_committed(t, version+1); + // NOTE: caller should have done encode_pending_to_kvmon() and + // kvmon->propose_pending() to commit the actual config changes. +} + +void ConfigMonitor::encode_pending_to_kvmon() +{ + // we need to pass our data through KVMonitor so that it is properly + // versioned and shared with subscribers. + for (auto& [key, value] : pending_cleanup) { + if (pending.count(key) == 0) { + derr << __func__ << " repair: adjusting config key '" << key << "'" + << dendl; + pending[key] = value; + } + } + pending_cleanup.clear(); + + // TODO: record changed sections (osd, mds.foo, rack:bar, ...) + + string history = HISTORY_PREFIX + stringify(version+1) + "/"; + { + bufferlist metabl; + ::encode(ceph_clock_now(), metabl); + ::encode(pending_description, metabl); + mon.kvmon()->enqueue_set(history, metabl); + } + for (auto& p : pending) { + string key = KEY_PREFIX + p.first; + auto q = current.find(p.first); + if (q != current.end()) { + if (p.second && *p.second == q->second) { + continue; + } + mon.kvmon()->enqueue_set(history + "-" + p.first, q->second); + } else if (!p.second) { + continue; + } + if (p.second) { + dout(20) << __func__ << " set " << key << dendl; + mon.kvmon()->enqueue_set(key, *p.second); + mon.kvmon()->enqueue_set(history + "+" + p.first, *p.second); + } else { + dout(20) << __func__ << " rm " << key << dendl; + mon.kvmon()->enqueue_rm(key); + } + } +} + +version_t ConfigMonitor::get_trim_to() const +{ + // we don't actually need *any* old states, but keep a few. + if (version > 5) { + return version - 5; + } + return 0; +} + +bool ConfigMonitor::preprocess_query(MonOpRequestRef op) +{ + switch (op->get_req()->get_type()) { + case MSG_MON_COMMAND: + try { + return preprocess_command(op); + } catch (const bad_cmd_get& e) { + bufferlist bl; + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + return true; + } + } + return false; +} + +bool ConfigMonitor::preprocess_command(MonOpRequestRef op) +{ + auto m = op->get_req(); + std::stringstream ss; + int err = 0; + + cmdmap_t cmdmap; + if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { + string rs = ss.str(); + mon.reply_command(op, -EINVAL, rs, get_last_committed()); + return true; + } + string format = cmd_getval_or(cmdmap, "format", "plain"); + boost::scoped_ptr f(Formatter::create(format)); + + string prefix; + cmd_getval(cmdmap, "prefix", prefix); + + bufferlist odata; + if (prefix == "config help") { + stringstream ss; + string name; + cmd_getval(cmdmap, "key", name); + name = ConfFile::normalize_key_name(name); + const Option *opt = g_conf().find_option(name); + if (!opt) { + opt = mon.mgrmon()->find_module_option(name); + } + if (opt) { + if (f) { + f->dump_object("option", *opt); + } else { + opt->print(&ss); + } + } else { + ss << "configuration option '" << name << "' not recognized"; + err = -ENOENT; + goto reply; + } + if (f) { + f->flush(odata); + } else { + odata.append(ss.str()); + } + } else if (prefix == "config ls") { + ostringstream ss; + if (f) { + f->open_array_section("options"); + } + for (auto& i : ceph_options) { + if (f) { + f->dump_string("option", i.name); + } else { + ss << i.name << "\n"; + } + } + for (auto& i : mon.mgrmon()->get_mgr_module_options()) { + if (f) { + f->dump_string("option", i.first); + } else { + ss << i.first << "\n"; + } + } + if (f) { + f->close_section(); + f->flush(odata); + } else { + odata.append(ss.str()); + } + } else if (prefix == "config dump") { + list> sections = { + make_pair("global", &config_map.global) + }; + for (string type : { "mon", "mgr", "osd", "mds", "client" }) { + auto i = config_map.by_type.find(type); + if (i != config_map.by_type.end()) { + sections.push_back(make_pair(i->first, &i->second)); + } + auto j = config_map.by_id.lower_bound(type); + while (j != config_map.by_id.end() && + j->first.find(type) == 0) { + sections.push_back(make_pair(j->first, &j->second)); + ++j; + } + } + TextTable tbl; + if (!f) { + tbl.define_column("WHO", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("MASK", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("LEVEL", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("OPTION", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("VALUE", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("RO", TextTable::LEFT, TextTable::LEFT); + } else { + f->open_array_section("config"); + } + for (auto s : sections) { + for (auto& i : s.second->options) { + if (!f) { + tbl << s.first; + tbl << i.second.mask.to_str(); + tbl << Option::level_to_str(i.second.opt->level); + tbl << i.first; + tbl << i.second.raw_value; + tbl << (i.second.opt->can_update_at_runtime() ? "" : "*"); + tbl << TextTable::endrow; + } else { + f->open_object_section("option"); + f->dump_string("section", s.first); + i.second.dump(f.get()); + f->close_section(); + } + } + } + if (!f) { + odata.append(stringify(tbl)); + } else { + f->close_section(); + f->flush(odata); + } + } else if (prefix == "config get") { + string who, name; + cmd_getval(cmdmap, "who", who); + + EntityName entity; + if (!entity.from_str(who) && + !entity.from_str(who + ".")) { + ss << "unrecognized entity '" << who << "'"; + err = -EINVAL; + goto reply; + } + + map crush_location; + string device_class; + if (entity.is_osd()) { + mon.osdmon()->osdmap.crush->get_full_location(who, &crush_location); + int id = atoi(entity.get_id().c_str()); + const char *c = mon.osdmon()->osdmap.crush->get_item_class(id); + if (c) { + device_class = c; + } + dout(10) << __func__ << " crush_location " << crush_location + << " class " << device_class << dendl; + } + + std::map> src; + auto config = config_map.generate_entity_map( + entity, + crush_location, + mon.osdmon()->osdmap.crush.get(), + device_class, + &src); + + if (cmd_getval(cmdmap, "key", name)) { + name = ConfFile::normalize_key_name(name); + const Option *opt = g_conf().find_option(name); + if (!opt) { + opt = mon.mgrmon()->find_module_option(name); + } + if (!opt) { + ss << "unrecognized key '" << name << "'"; + err = -ENOENT; + goto reply; + } + if (opt->has_flag(Option::FLAG_NO_MON_UPDATE)) { + // handle special options + if (name == "fsid") { + odata.append(stringify(mon.monmap->get_fsid())); + odata.append("\n"); + goto reply; + } + err = -EINVAL; + ss << name << " is special and cannot be stored by the mon"; + goto reply; + } + // get a single value + auto p = config.find(name); + if (p != config.end()) { + odata.append(p->second); + odata.append("\n"); + goto reply; + } + if (!entity.is_client() && + opt->daemon_value != Option::value_t{}) { + odata.append(Option::to_str(opt->daemon_value)); + } else { + odata.append(Option::to_str(opt->value)); + } + odata.append("\n"); + } else { + // dump all (non-default) values for this entity + TextTable tbl; + if (!f) { + tbl.define_column("WHO", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("MASK", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("LEVEL", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("OPTION", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("VALUE", TextTable::LEFT, TextTable::LEFT); + tbl.define_column("RO", TextTable::LEFT, TextTable::LEFT); + } else { + f->open_object_section("config"); + } + auto p = config.begin(); + auto q = src.begin(); + for (; p != config.end(); ++p, ++q) { + if (name.size() && p->first != name) { + continue; + } + if (!f) { + tbl << q->second.first; + tbl << q->second.second->mask.to_str(); + tbl << Option::level_to_str(q->second.second->opt->level); + tbl << p->first; + tbl << p->second; + tbl << (q->second.second->opt->can_update_at_runtime() ? "" : "*"); + tbl << TextTable::endrow; + } else { + f->open_object_section(p->first.c_str()); + f->dump_string("value", p->second); + f->dump_string("section", q->second.first); + f->dump_object("mask", q->second.second->mask); + f->dump_bool("can_update_at_runtime", + q->second.second->opt->can_update_at_runtime()); + f->close_section(); + } + } + if (!f) { + odata.append(stringify(tbl)); + } else { + f->close_section(); + f->flush(odata); + } + } + } else if (prefix == "config log") { + int64_t num = 10; + cmd_getval(cmdmap, "num", num); + ostringstream ds; + if (f) { + f->open_array_section("changesets"); + } + for (version_t v = version; v > version - std::min(version, (version_t)num); --v) { + ConfigChangeSet ch; + load_changeset(v, &ch); + if (f) { + f->dump_object("changeset", ch); + } else { + ch.print(ds); + } + } + if (f) { + f->close_section(); + f->flush(odata); + } else { + odata.append(ds.str()); + } + } else if (prefix == "config generate-minimal-conf") { + ostringstream conf; + conf << "# minimal ceph.conf for " << mon.monmap->get_fsid() << "\n"; + + // the basics + conf << "[global]\n"; + conf << "\tfsid = " << mon.monmap->get_fsid() << "\n"; + conf << "\tmon_host = "; + for (auto i = mon.monmap->mon_info.begin(); + i != mon.monmap->mon_info.end(); + ++i) { + if (i != mon.monmap->mon_info.begin()) { + conf << " "; + } + if (i->second.public_addrs.size() == 1 && + i->second.public_addrs.front().is_legacy() && + i->second.public_addrs.front().get_port() == CEPH_MON_PORT_LEGACY) { + // if this is a legacy addr on the legacy default port, then + // use the legacy-compatible formatting so that old clients + // can use this config. new code will see the :6789 and correctly + // interpret this as a v1 address. + conf << i->second.public_addrs.get_legacy_str(); + } else { + conf << i->second.public_addrs; + } + } + conf << "\n"; + conf << config_map.global.get_minimal_conf(); + for (auto m : { &config_map.by_type, &config_map.by_id }) { + for (auto& i : *m) { + auto s = i.second.get_minimal_conf(); + if (s.size()) { + conf << "\n[" << i.first << "]\n" << s; + } + } + } + odata.append(conf.str()); + err = 0; + } else { + return false; + } + + reply: + mon.reply_command(op, err, ss.str(), odata, get_last_committed()); + return true; +} + +void ConfigMonitor::handle_get_config(MonOpRequestRef op) +{ + auto m = op->get_req(); + dout(10) << __func__ << " " << m->name << " host " << m->host << dendl; + + const OSDMap& osdmap = mon.osdmon()->osdmap; + map crush_location; + osdmap.crush->get_full_location(m->host, &crush_location); + auto out = config_map.generate_entity_map( + m->name, + crush_location, + osdmap.crush.get(), + m->device_class); + dout(20) << " config is " << out << dendl; + m->get_connection()->send_message(new MConfig{std::move(out)}); +} + +bool ConfigMonitor::prepare_update(MonOpRequestRef op) +{ + Message *m = op->get_req(); + dout(7) << "prepare_update " << *m + << " from " << m->get_orig_source_inst() << dendl; + switch (m->get_type()) { + case MSG_MON_COMMAND: + try { + return prepare_command(op); + } catch (const bad_cmd_get& e) { + bufferlist bl; + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + return true; + } + } + return false; +} + +bool ConfigMonitor::prepare_command(MonOpRequestRef op) +{ + auto m = op->get_req(); + std::stringstream ss; + int err = -EINVAL; + + // make sure kv is writeable. + if (!mon.kvmon()->is_writeable()) { + dout(10) << __func__ << " waiting for kv mon to be writeable" << dendl; + mon.kvmon()->wait_for_writeable(op, new C_RetryMessage(this, op)); + return false; + } + + cmdmap_t cmdmap; + if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { + string rs = ss.str(); + mon.reply_command(op, -EINVAL, rs, get_last_committed()); + return true; + } + + string prefix; + cmd_getval(cmdmap, "prefix", prefix); + bufferlist odata; + + if (prefix == "config set" || + prefix == "config rm") { + string who; + string name, value; + bool force = false; + cmd_getval(cmdmap, "who", who); + cmd_getval(cmdmap, "name", name); + cmd_getval(cmdmap, "value", value); + cmd_getval(cmdmap, "force", force); + name = ConfFile::normalize_key_name(name); + + if (prefix == "config set" && !force) { + const Option *opt = g_conf().find_option(name); + if (!opt) { + opt = mon.mgrmon()->find_module_option(name); + } + if (!opt) { + ss << "unrecognized config option '" << name << "'"; + err = -EINVAL; + goto reply; + } + + Option::value_t real_value; + string errstr; + err = opt->parse_value(value, &real_value, &errstr, &value); + if (err < 0) { + ss << "error parsing value: " << errstr; + goto reply; + } + + if (opt->has_flag(Option::FLAG_NO_MON_UPDATE)) { + err = -EINVAL; + ss << name << " is special and cannot be stored by the mon"; + goto reply; + } + } + + string section; + OptionMask mask; + if (!ConfigMap::parse_mask(who, §ion, &mask)) { + ss << "unrecognized config target '" << who << "'"; + err = -EINVAL; + goto reply; + } + + string key; + if (section.size()) { + key += section + "/"; + } else { + key += "global/"; + } + string mask_str = mask.to_str(); + if (mask_str.size()) { + key += mask_str + "/"; + } + key += name; + + if (prefix == "config set") { + bufferlist bl; + bl.append(value); + pending[key] = bl; + } else { + pending[key].reset(); + } + goto update; + } else if (prefix == "config reset") { + int64_t revert_to = -1; + cmd_getval(cmdmap, "num", revert_to); + if (revert_to < 0 || + revert_to > (int64_t)version) { + err = -EINVAL; + ss << "must specify a valid historical version to revert to; " + << "see 'ceph config log' for a list of avialable configuration " + << "historical versions"; + goto reply; + } + if (revert_to == (int64_t)version) { + err = 0; + goto reply; + } + for (int64_t v = version; v > revert_to; --v) { + ConfigChangeSet ch; + load_changeset(v, &ch); + for (auto& i : ch.diff) { + if (i.second.first) { + bufferlist bl; + bl.append(*i.second.first); + pending[i.first] = bl; + } else if (i.second.second) { + pending[i.first].reset(); + } + } + } + pending_description = string("reset to ") + stringify(revert_to); + goto update; + } else if (prefix == "config assimilate-conf") { + ConfFile cf; + bufferlist bl = m->get_data(); + err = cf.parse_bufferlist(&bl, &ss); + if (err < 0) { + goto reply; + } + bool updated = false; + ostringstream newconf; + for (auto& [section, s] : cf) { + dout(20) << __func__ << " [" << section << "]" << dendl; + bool did_section = false; + for (auto& [key, val] : s) { + Option::value_t real_value; + string value; + string errstr; + if (key.empty()) { + continue; + } + // a known and worthy option? + const Option *o = g_conf().find_option(key); + if (!o) { + o = mon.mgrmon()->find_module_option(key); + } + if (!o || + (o->flags & Option::FLAG_NO_MON_UPDATE) || + (o->flags & Option::FLAG_CLUSTER_CREATE)) { + goto skip; + } + // normalize + err = o->parse_value(val, &real_value, &errstr, &value); + if (err < 0) { + dout(20) << __func__ << " failed to parse " << key << " = '" + << val << "'" << dendl; + goto skip; + } + // does it conflict with an existing value? + { + const Section *s = config_map.find_section(section); + if (s) { + auto k = s->options.find(key); + if (k != s->options.end()) { + if (value != k->second.raw_value) { + dout(20) << __func__ << " have " << key + << " = " << k->second.raw_value + << " (not " << value << ")" << dendl; + goto skip; + } + dout(20) << __func__ << " already have " << key + << " = " << k->second.raw_value << dendl; + continue; + } + } + } + dout(20) << __func__ << " add " << key << " = " << value + << " (" << val << ")" << dendl; + { + bufferlist bl; + bl.append(value); + pending[section + "/" + key] = bl; + updated = true; + } + continue; + + skip: + dout(20) << __func__ << " skip " << key << " = " << value + << " (" << val << ")" << dendl; + if (!did_section) { + newconf << "\n[" << section << "]\n"; + did_section = true; + } + newconf << "\t" << key << " = " << val << "\n"; + } + } + odata.append(newconf.str()); + if (updated) { + goto update; + } + } else { + ss << "unknown command " << prefix; + err = -EINVAL; + } + +reply: + mon.reply_command(op, err, ss.str(), odata, get_last_committed()); + return false; + +update: + // see if there is an actual change + auto p = pending.begin(); + while (p != pending.end()) { + auto q = current.find(p->first); + if (p->second && q != current.end() && *p->second == q->second) { + // set to same value + p = pending.erase(p); + } else if (!p->second && q == current.end()) { + // erasing non-existent value + p = pending.erase(p); + } else { + ++p; + } + } + if (pending.empty()) { + err = 0; + goto reply; + } + // immediately propose *with* KV mon + encode_pending_to_kvmon(); + paxos.plug(); + mon.kvmon()->propose_pending(); + paxos.unplug(); + force_immediate_propose(); + wait_for_finished_proposal( + op, + new Monitor::C_Command( + mon, op, 0, ss.str(), odata, + get_last_committed() + 1)); + return true; +} + +void ConfigMonitor::tick() +{ + if (!is_active() || !mon.is_leader()) { + return; + } + dout(10) << __func__ << dendl; + bool changed = false; + if (!pending_cleanup.empty()) { + changed = true; + } + if (changed && mon.kvmon()->is_writeable()) { + paxos.plug(); + encode_pending_to_kvmon(); + mon.kvmon()->propose_pending(); + paxos.unplug(); + propose_pending(); + } +} + +void ConfigMonitor::on_active() +{ +} + +void ConfigMonitor::load_config() +{ + std::map renamed_pacific = { + { "mon_osd_blacklist_default_expire", "mon_osd_blocklist_default_expire" }, + { "mon_mds_blacklist_interval", "mon_mds_blocklist_interval" }, + { "mon_mgr_blacklist_interval", "mon_mgr_blocklist_interval" }, + { "rbd_blacklist_on_break_lock", "rbd_blocklist_on_break_lock" }, + { "rbd_blacklist_expire_seconds", "rbd_blocklist_expire_seconds" }, + { "mds_session_blacklist_on_timeout", "mds_session_blocklist_on_timeout" }, + { "mds_session_blacklist_on_evict", "mds_session_blocklist_on_evict" }, + }; + + unsigned num = 0; + KeyValueDB::Iterator it = mon.store->get_iterator(KV_PREFIX); + it->lower_bound(KEY_PREFIX); + config_map.clear(); + current.clear(); + pending_cleanup.clear(); + while (it->valid() && + it->key().compare(0, KEY_PREFIX.size(), KEY_PREFIX) == 0) { + string key = it->key().substr(KEY_PREFIX.size()); + string value = it->value().to_str(); + + current[key] = it->value(); + + string name; + string who; + config_map.parse_key(key, &name, &who); + + // has this option been renamed? + { + auto p = renamed_pacific.find(name); + if (p != renamed_pacific.end()) { + if (mon.monmap->min_mon_release >= ceph_release_t::pacific) { + // schedule a cleanup + pending_cleanup[key].reset(); + pending_cleanup[who + "/" + p->second] = it->value(); + } + // continue loading under the new name + name = p->second; + } + } + + const Option *opt = g_conf().find_option(name); + if (!opt) { + opt = mon.mgrmon()->find_module_option(name); + } + if (!opt) { + dout(10) << __func__ << " unrecognized option '" << name << "'" << dendl; + config_map.stray_options.push_back( + std::unique_ptr