// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab /* * Ceph - scalable distributed file system * * Copyright (C) 2014 John Spray * * This is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License version 2.1, as published by the Free Software * Foundation. See file COPYING. */ // Include this first to get python headers earlier #include "Gil.h" #include "ActivePyModules.h" #include #include "common/errno.h" #include "include/stringify.h" #include "mon/MonMap.h" #include "osd/OSDMap.h" #include "osd/osd_types.h" #include "mgr/MgrContext.h" #include "mgr/TTLCache.h" #include "mgr/mgr_perf_counters.h" #include "DaemonKey.h" #include "DaemonServer.h" #include "mgr/MgrContext.h" #include "PyFormatter.h" // For ::mgr_store_prefix #include "PyModule.h" #include "PyModuleRegistry.h" #include "PyUtil.h" #define dout_context g_ceph_context #define dout_subsys ceph_subsys_mgr #undef dout_prefix #define dout_prefix *_dout << "mgr " << __func__ << " " using std::pair; using std::string; using namespace std::literals; ActivePyModules::ActivePyModules( PyModuleConfig &module_config_, std::map store_data, bool mon_provides_kv_sub, DaemonStateIndex &ds, ClusterState &cs, MonClient &mc, LogChannelRef clog_, LogChannelRef audit_clog_, Objecter &objecter_, Client &client_, Finisher &f, DaemonServer &server, PyModuleRegistry &pmr) : module_config(module_config_), daemon_state(ds), cluster_state(cs), monc(mc), clog(clog_), audit_clog(audit_clog_), objecter(objecter_), client(client_), finisher(f), cmd_finisher(g_ceph_context, "cmd_finisher", "cmdfin"), server(server), py_module_registry(pmr) { store_cache = std::move(store_data); // we can only trust our ConfigMap if the mon cluster has provided // kv sub since our startup. have_local_config_map = mon_provides_kv_sub; _refresh_config_map(); cmd_finisher.start(); } ActivePyModules::~ActivePyModules() = default; void ActivePyModules::dump_server(const std::string &hostname, const DaemonStateCollection &dmc, Formatter *f) { f->dump_string("hostname", hostname); f->open_array_section("services"); std::string ceph_version; for (const auto &[key, state] : dmc) { std::string id; without_gil([&ceph_version, &id, state=state] { std::lock_guard l(state->lock); // TODO: pick the highest version, and make sure that // somewhere else (during health reporting?) we are // indicating to the user if we see mixed versions auto ver_iter = state->metadata.find("ceph_version"); if (ver_iter != state->metadata.end()) { ceph_version = state->metadata.at("ceph_version"); } if (state->metadata.find("id") != state->metadata.end()) { id = state->metadata.at("id"); } }); f->open_object_section("service"); f->dump_string("type", key.type); f->dump_string("id", key.name); f->dump_string("ceph_version", ceph_version); if (!id.empty()) { f->dump_string("name", id); } f->close_section(); } f->close_section(); f->dump_string("ceph_version", ceph_version); } PyObject *ActivePyModules::get_server_python(const std::string &hostname) { const auto dmc = without_gil([&]{ std::lock_guard l(lock); dout(10) << " (" << hostname << ")" << dendl; return daemon_state.get_by_server(hostname); }); PyFormatter f; dump_server(hostname, dmc, &f); return f.get(); } PyObject *ActivePyModules::list_servers_python() { dout(10) << " >" << dendl; without_gil_t no_gil; return daemon_state.with_daemons_by_server([this, &no_gil] (const std::map &all) { no_gil.acquire_gil(); PyFormatter f(false, true); for (const auto &[hostname, daemon_state] : all) { f.open_object_section("server"); dump_server(hostname, daemon_state, &f); f.close_section(); } return f.get(); }); } PyObject *ActivePyModules::get_metadata_python( const std::string &svc_type, const std::string &svc_id) { auto metadata = daemon_state.get(DaemonKey{svc_type, svc_id}); if (metadata == nullptr) { derr << "Requested missing service " << svc_type << "." << svc_id << dendl; Py_RETURN_NONE; } auto l = without_gil([&] { return std::lock_guard(lock); }); PyFormatter f; f.dump_string("hostname", metadata->hostname); for (const auto &[key, val] : metadata->metadata) { f.dump_string(key, val); } return f.get(); } PyObject *ActivePyModules::get_daemon_status_python( const std::string &svc_type, const std::string &svc_id) { auto metadata = daemon_state.get(DaemonKey{svc_type, svc_id}); if (metadata == nullptr) { derr << "Requested missing service " << svc_type << "." << svc_id << dendl; Py_RETURN_NONE; } auto l = without_gil([&] { return std::lock_guard(lock); }); PyFormatter f; for (const auto &[daemon, status] : metadata->service_status) { f.dump_string(daemon, status); } return f.get(); } void ActivePyModules::update_cache_metrics() { auto hit_miss_ratio = ttl_cache.get_hit_miss_ratio(); perfcounter->set(l_mgr_cache_hit, hit_miss_ratio.first); perfcounter->set(l_mgr_cache_miss, hit_miss_ratio.second); } PyObject *ActivePyModules::cacheable_get_python(const std::string &what) { uint64_t ttl_seconds = g_conf().get_val("mgr_ttl_cache_expire_seconds"); if(ttl_seconds > 0) { ttl_cache.set_ttl(ttl_seconds); try{ PyObject* cached = ttl_cache.get(what); update_cache_metrics(); return cached; } catch (std::out_of_range& e) {} } PyObject *obj = get_python(what); if(ttl_seconds && ttl_cache.is_cacheable(what)) { ttl_cache.insert(what, obj); Py_INCREF(obj); } update_cache_metrics(); return obj; } PyObject *ActivePyModules::get_python(const std::string &what) { uint64_t ttl_seconds = g_conf().get_val("mgr_ttl_cache_expire_seconds"); PyFormatter pf; PyJSONFormatter jf; // Use PyJSONFormatter if TTL cache is enabled. Formatter &f = ttl_seconds ? (Formatter&)jf : (Formatter&)pf; if (what == "fs_map") { without_gil_t no_gil; cluster_state.with_fsmap([&](const FSMap &fsmap) { no_gil.acquire_gil(); fsmap.dump(&f); }); } else if (what == "osdmap_crush_map_text") { without_gil_t no_gil; bufferlist rdata; cluster_state.with_osdmap([&](const OSDMap &osd_map){ osd_map.crush->encode(rdata, CEPH_FEATURES_SUPPORTED_DEFAULT); }); std::string crush_text = rdata.to_str(); no_gil.acquire_gil(); return PyUnicode_FromString(crush_text.c_str()); } else if (what.substr(0, 7) == "osd_map") { without_gil_t no_gil; cluster_state.with_osdmap([&](const OSDMap &osd_map){ no_gil.acquire_gil(); if (what == "osd_map") { osd_map.dump(&f, g_ceph_context); } else if (what == "osd_map_tree") { osd_map.print_tree(&f, nullptr); } else if (what == "osd_map_crush") { osd_map.crush->dump(&f); } }); } else if (what == "modified_config_options") { without_gil_t no_gil; auto all_daemons = daemon_state.get_all(); set names; for (auto& [key, daemon] : all_daemons) { std::lock_guard l(daemon->lock); for (auto& [name, valmap] : daemon->config) { names.insert(name); } } no_gil.acquire_gil(); f.open_array_section("options"); for (auto& name : names) { f.dump_string("name", name); } f.close_section(); } else if (what.substr(0, 6) == "config") { // We make a copy of the global config to avoid printing // to py formater (which may drop-take GIL) while holding // the global config lock, which might deadlock with other // thread that is holding the GIL and acquiring the global // config lock. ConfigProxy config{g_conf()}; if (what == "config_options") { config.config_options(&f); } else if (what == "config") { config.show_config(&f); } } else if (what == "mon_map") { without_gil_t no_gil; cluster_state.with_monmap([&](const MonMap &monmap) { no_gil.acquire_gil(); monmap.dump(&f); }); } else if (what == "service_map") { without_gil_t no_gil; cluster_state.with_servicemap([&](const ServiceMap &service_map) { no_gil.acquire_gil(); service_map.dump(&f); }); } else if (what == "osd_metadata") { without_gil_t no_gil; auto dmc = daemon_state.get_by_service("osd"); for (const auto &[key, state] : dmc) { std::lock_guard l(state->lock); with_gil(no_gil, [&f, &name=key.name, state=state] { f.open_object_section(name.c_str()); f.dump_string("hostname", state->hostname); for (const auto &[name, val] : state->metadata) { f.dump_string(name.c_str(), val); } f.close_section(); }); } } else if (what == "mds_metadata") { without_gil_t no_gil; auto dmc = daemon_state.get_by_service("mds"); for (const auto &[key, state] : dmc) { std::lock_guard l(state->lock); with_gil(no_gil, [&f, &name=key.name, state=state] { f.open_object_section(name.c_str()); f.dump_string("hostname", state->hostname); for (const auto &[name, val] : state->metadata) { f.dump_string(name.c_str(), val); } f.close_section(); }); } } else if (what == "pg_summary") { without_gil_t no_gil; cluster_state.with_pgmap( [&f, &no_gil](const PGMap &pg_map) { std::map > osds; std::map > pools; std::map all; for (const auto &i : pg_map.pg_stat) { const auto pool = i.first.m_pool; const std::string state = pg_state_string(i.second.state); // Insert to per-pool map pools[stringify(pool)][state]++; for (const auto &osd_id : i.second.acting) { osds[stringify(osd_id)][state]++; } all[state]++; } no_gil.acquire_gil(); f.open_object_section("by_osd"); for (const auto &i : osds) { f.open_object_section(i.first.c_str()); for (const auto &j : i.second) { f.dump_int(j.first.c_str(), j.second); } f.close_section(); } f.close_section(); f.open_object_section("by_pool"); for (const auto &i : pools) { f.open_object_section(i.first.c_str()); for (const auto &j : i.second) { f.dump_int(j.first.c_str(), j.second); } f.close_section(); } f.close_section(); f.open_object_section("all"); for (const auto &i : all) { f.dump_int(i.first.c_str(), i.second); } f.close_section(); f.open_object_section("pg_stats_sum"); pg_map.pg_sum.dump(&f); f.close_section(); } ); } else if (what == "pg_status") { without_gil_t no_gil; cluster_state.with_pgmap( [&](const PGMap &pg_map) { no_gil.acquire_gil(); pg_map.print_summary(&f, nullptr); } ); } else if (what == "pg_dump") { without_gil_t no_gil; cluster_state.with_pgmap( [&](const PGMap &pg_map) { no_gil.acquire_gil(); pg_map.dump(&f, false); } ); } else if (what == "devices") { without_gil_t no_gil; daemon_state.with_devices2( [&] { with_gil(no_gil, [&] { f.open_array_section("devices"); }); }, [&](const DeviceState &dev) { with_gil(no_gil, [&] { f.dump_object("device", dev); }); }); with_gil(no_gil, [&] { f.close_section(); }); } else if (what.size() > 7 && what.substr(0, 7) == "device ") { without_gil_t no_gil; string devid = what.substr(7); if (!daemon_state.with_device(devid, [&] (const DeviceState& dev) { with_gil_t with_gil{no_gil}; f.dump_object("device", dev); })) { // device not found } } else if (what == "io_rate") { without_gil_t no_gil; cluster_state.with_pgmap( [&](const PGMap &pg_map) { no_gil.acquire_gil(); pg_map.dump_delta(&f); } ); } else if (what == "df") { without_gil_t no_gil; cluster_state.with_osdmap_and_pgmap( [&]( const OSDMap& osd_map, const PGMap &pg_map) { no_gil.acquire_gil(); pg_map.dump_cluster_stats(nullptr, &f, true); pg_map.dump_pool_stats_full(osd_map, nullptr, &f, true); }); } else if (what == "pg_stats") { without_gil_t no_gil; cluster_state.with_pgmap([&](const PGMap &pg_map) { no_gil.acquire_gil(); pg_map.dump_pg_stats(&f, false); }); } else if (what == "pool_stats") { without_gil_t no_gil; cluster_state.with_pgmap([&](const PGMap &pg_map) { no_gil.acquire_gil(); pg_map.dump_pool_stats(&f); }); } else if (what == "pg_ready") { server.dump_pg_ready(&f); } else if (what == "pg_progress") { without_gil_t no_gil; cluster_state.with_pgmap([&](const PGMap &pg_map) { no_gil.acquire_gil(); pg_map.dump_pg_progress(&f); server.dump_pg_ready(&f); }); } else if (what == "osd_stats") { without_gil_t no_gil; cluster_state.with_pgmap([&](const PGMap &pg_map) { no_gil.acquire_gil(); pg_map.dump_osd_stats(&f, false); }); } else if (what == "osd_ping_times") { without_gil_t no_gil; cluster_state.with_pgmap([&](const PGMap &pg_map) { no_gil.acquire_gil(); pg_map.dump_osd_ping_times(&f); }); } else if (what == "osd_pool_stats") { without_gil_t no_gil; int64_t poolid = -ENOENT; cluster_state.with_osdmap_and_pgmap([&](const OSDMap& osdmap, const PGMap& pg_map) { no_gil.acquire_gil(); f.open_array_section("pool_stats"); for (auto &p : osdmap.get_pools()) { poolid = p.first; pg_map.dump_pool_stats_and_io_rate(poolid, osdmap, &f, nullptr); } f.close_section(); }); } else if (what == "health") { without_gil_t no_gil; cluster_state.with_health([&](const ceph::bufferlist &health_json) { no_gil.acquire_gil(); f.dump_string("json", health_json.to_str()); }); } else if (what == "mon_status") { without_gil_t no_gil; cluster_state.with_mon_status( [&](const ceph::bufferlist &mon_status_json) { no_gil.acquire_gil(); f.dump_string("json", mon_status_json.to_str()); }); } else if (what == "mgr_map") { without_gil_t no_gil; cluster_state.with_mgrmap([&](const MgrMap &mgr_map) { no_gil.acquire_gil(); mgr_map.dump(&f); }); } else if (what == "mgr_ips") { entity_addrvec_t myaddrs = server.get_myaddrs(); f.open_array_section("ips"); std::set did; for (auto& i : myaddrs.v) { std::string ip = i.ip_only_to_str(); if (auto [where, inserted] = did.insert(ip); inserted) { f.dump_string("ip", ip); } } f.close_section(); } else if (what == "have_local_config_map") { f.dump_bool("have_local_config_map", have_local_config_map); } else if (what == "active_clean_pgs"){ without_gil_t no_gil; cluster_state.with_pgmap( [&](const PGMap &pg_map) { no_gil.acquire_gil(); f.open_array_section("pg_stats"); for (auto &i : pg_map.pg_stat) { const auto state = i.second.state; const auto pgid_raw = i.first; const auto pgid = stringify(pgid_raw.m_pool) + "." + stringify(pgid_raw.m_seed); const auto reported_epoch = i.second.reported_epoch; if (state & PG_STATE_ACTIVE && state & PG_STATE_CLEAN) { f.open_object_section("pg_stat"); f.dump_string("pgid", pgid); f.dump_string("state", pg_state_string(state)); f.dump_unsigned("reported_epoch", reported_epoch); f.close_section(); } } f.close_section(); const auto num_pg = pg_map.num_pg; f.dump_unsigned("total_num_pgs", num_pg); }); } else { derr << "Python module requested unknown data '" << what << "'" << dendl; Py_RETURN_NONE; } if(ttl_seconds) { return jf.get(); } else { return pf.get(); } } void ActivePyModules::start_one(PyModuleRef py_module) { std::lock_guard l(lock); const auto name = py_module->get_name(); auto active_module = std::make_shared(py_module, clog); pending_modules.insert(name); // Send all python calls down a Finisher to avoid blocking // C++ code, and avoid any potential lock cycles. finisher.queue(new LambdaContext([this, active_module, name](int) { int r = active_module->load(this); std::lock_guard l(lock); pending_modules.erase(name); if (r != 0) { derr << "Failed to run module in active mode ('" << name << "')" << dendl; } else { auto em = modules.emplace(name, active_module); ceph_assert(em.second); // actually inserted dout(4) << "Starting thread for " << name << dendl; active_module->thread.create(active_module->get_thread_name()); dout(4) << "Starting active module " << name <<" finisher thread " << active_module->get_fin_thread_name() << dendl; active_module->finisher.start(); } })); } void ActivePyModules::shutdown() { std::lock_guard locker(lock); // Stop per active module finisher thread for (auto& [name, module] : modules) { dout(4) << "Stopping active module " << name << " finisher thread" << dendl; module->finisher.wait_for_empty(); module->finisher.stop(); } // Signal modules to drop out of serve() and/or tear down resources for (auto& [name, module] : modules) { lock.unlock(); dout(10) << "calling module " << name << " shutdown()" << dendl; module->shutdown(); dout(10) << "module " << name << " shutdown() returned" << dendl; lock.lock(); } // For modules implementing serve(), finish the threads where we // were running that. for (auto& [name, module] : modules) { lock.unlock(); dout(10) << "joining module " << name << dendl; module->thread.join(); dout(10) << "joined module " << name << dendl; lock.lock(); } cmd_finisher.wait_for_empty(); cmd_finisher.stop(); modules.clear(); } void ActivePyModules::notify_all(const std::string ¬ify_type, const std::string ¬ify_id) { std::lock_guard l(lock); dout(10) << __func__ << ": notify_all " << notify_type << dendl; for (auto& [name, module] : modules) { if (!py_module_registry.should_notify(name, notify_type)) { continue; } // Send all python calls down a Finisher to avoid blocking // C++ code, and avoid any potential lock cycles. dout(15) << "queuing notify (" << notify_type << ") to " << name << dendl; Finisher& mod_finisher = py_module_registry.get_active_module_finisher(name); // workaround for https://bugs.llvm.org/show_bug.cgi?id=35984 mod_finisher.queue(new LambdaContext([module=module, notify_type, notify_id] (int r){ module->notify(notify_type, notify_id); })); } } void ActivePyModules::notify_all(const LogEntry &log_entry) { std::lock_guard l(lock); dout(10) << __func__ << ": notify_all (clog)" << dendl; for (auto& [name, module] : modules) { if (!py_module_registry.should_notify(name, "clog")) { continue; } // Send all python calls down a Finisher to avoid blocking // C++ code, and avoid any potential lock cycles. // // Note intentional use of non-reference lambda binding on // log_entry: we take a copy because caller's instance is // probably ephemeral. dout(15) << "queuing notify (clog) to " << name << dendl; Finisher& mod_finisher = py_module_registry.get_active_module_finisher(name); // workaround for https://bugs.llvm.org/show_bug.cgi?id=35984 mod_finisher.queue(new LambdaContext([module=module, log_entry](int r){ module->notify_clog(log_entry); })); } } bool ActivePyModules::get_store(const std::string &module_name, const std::string &key, std::string *val) const { without_gil_t no_gil; std::lock_guard l(lock); const std::string global_key = PyModule::mgr_store_prefix + module_name + "/" + key; dout(4) << __func__ << " key: " << global_key << dendl; auto i = store_cache.find(global_key); if (i != store_cache.end()) { *val = i->second; return true; } else { return false; } } PyObject *ActivePyModules::dispatch_remote( const std::string &other_module, const std::string &method, PyObject *args, PyObject *kwargs, std::string *err) { auto mod_iter = modules.find(other_module); ceph_assert(mod_iter != modules.end()); return mod_iter->second->dispatch_remote(method, args, kwargs, err); } bool ActivePyModules::get_config(const std::string &module_name, const std::string &key, std::string *val) const { const std::string global_key = "mgr/" + module_name + "/" + key; dout(20) << " key: " << global_key << dendl; std::lock_guard lock(module_config.lock); auto i = module_config.config.find(global_key); if (i != module_config.config.end()) { *val = i->second; return true; } else { return false; } } PyObject *ActivePyModules::get_typed_config( const std::string &module_name, const std::string &key, const std::string &prefix) const { without_gil_t no_gil; std::string value; std::string final_key; bool found = false; if (prefix.size()) { final_key = prefix + "/" + key; found = get_config(module_name, final_key, &value); } if (!found) { final_key = key; found = get_config(module_name, final_key, &value); } if (found) { PyModuleRef module = py_module_registry.get_module(module_name); no_gil.acquire_gil(); if (!module) { derr << "Module '" << module_name << "' is not available" << dendl; Py_RETURN_NONE; } // removing value to hide sensitive data going into mgr logs // leaving this for debugging purposes // dout(10) << __func__ << " " << final_key << " found: " << value << dendl; dout(10) << __func__ << " " << final_key << " found" << dendl; return module->get_typed_option_value(key, value); } if (prefix.size()) { dout(10) << " [" << prefix << "/]" << key << " not found " << dendl; } else { dout(10) << " " << key << " not found " << dendl; } Py_RETURN_NONE; } PyObject *ActivePyModules::get_store_prefix(const std::string &module_name, const std::string &prefix) const { without_gil_t no_gil; std::lock_guard l(lock); std::lock_guard lock(module_config.lock); no_gil.acquire_gil(); const std::string base_prefix = PyModule::mgr_store_prefix + module_name + "/"; const std::string global_prefix = base_prefix + prefix; dout(4) << __func__ << " prefix: " << global_prefix << dendl; PyFormatter f; for (auto p = store_cache.lower_bound(global_prefix); p != store_cache.end() && p->first.find(global_prefix) == 0; ++p) { f.dump_string(p->first.c_str() + base_prefix.size(), p->second); } return f.get(); } void ActivePyModules::set_store(const std::string &module_name, const std::string &key, const std::optional& val) { const std::string global_key = PyModule::mgr_store_prefix + module_name + "/" + key; Command set_cmd; { std::lock_guard l(lock); // NOTE: this isn't strictly necessary since we'll also get an MKVData // update from the mon due to our subscription *before* our command is acked. if (val) { store_cache[global_key] = *val; } else { store_cache.erase(global_key); } std::ostringstream cmd_json; JSONFormatter jf; jf.open_object_section("cmd"); if (val) { jf.dump_string("prefix", "config-key set"); jf.dump_string("key", global_key); jf.dump_string("val", *val); } else { jf.dump_string("prefix", "config-key del"); jf.dump_string("key", global_key); } jf.close_section(); jf.flush(cmd_json); set_cmd.run(&monc, cmd_json.str()); } set_cmd.wait(); if (set_cmd.r != 0) { // config-key set will fail if mgr's auth key has insufficient // permission to set config keys // FIXME: should this somehow raise an exception back into Python land? dout(0) << "`config-key set " << global_key << " " << val << "` failed: " << cpp_strerror(set_cmd.r) << dendl; dout(0) << "mon returned " << set_cmd.r << ": " << set_cmd.outs << dendl; } } std::pair ActivePyModules::set_config( const std::string &module_name, const std::string &key, const std::optional& val) { return module_config.set_config(&monc, module_name, key, val); } std::map ActivePyModules::get_services() const { std::map result; std::lock_guard l(lock); for (const auto& [name, module] : modules) { std::string svc_str = module->get_uri(); if (!svc_str.empty()) { result[name] = svc_str; } } return result; } void ActivePyModules::update_kv_data( const std::string prefix, bool incremental, const map, std::less<>>& data) { std::lock_guard l(lock); bool do_config = false; if (!incremental) { dout(10) << "full update on " << prefix << dendl; auto p = store_cache.lower_bound(prefix); while (p != store_cache.end() && p->first.find(prefix) == 0) { dout(20) << " rm prior " << p->first << dendl; p = store_cache.erase(p); } } else { dout(10) << "incremental update on " << prefix << dendl; } for (auto& i : data) { if (i.second) { dout(20) << " set " << i.first << " = " << i.second->to_str() << dendl; store_cache[i.first] = i.second->to_str(); } else { dout(20) << " rm " << i.first << dendl; store_cache.erase(i.first); } if (i.first.find("config/") == 0) { do_config = true; } } if (do_config) { _refresh_config_map(); } } void ActivePyModules::_refresh_config_map() { dout(10) << dendl; config_map.clear(); for (auto p = store_cache.lower_bound("config/"); p != store_cache.end() && p->first.find("config/") == 0; ++p) { string key = p->first.substr(7); if (key.find("mgr/") == 0) { // NOTE: for now, we ignore module options. see also ceph_foreign_option_get(). continue; } string value = p->second; string name; string who; config_map.parse_key(key, &name, &who); const Option *opt = g_conf().find_option(name); if (!opt) { config_map.stray_options.push_back( std::unique_ptr