From 17d6a993fc17d533460c5f40f3908c708e057c18 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 23 May 2024 18:45:17 +0200 Subject: Merging upstream version 18.2.3. Signed-off-by: Daniel Baumann --- src/mds/MetricAggregator.cc | 169 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) (limited to 'src/mds/MetricAggregator.cc') diff --git a/src/mds/MetricAggregator.cc b/src/mds/MetricAggregator.cc index 6487084fb..9765d4d5b 100644 --- a/src/mds/MetricAggregator.cc +++ b/src/mds/MetricAggregator.cc @@ -4,6 +4,9 @@ #include #include +#include "common/ceph_context.h" +#include "common/perf_counters_key.h" + #include "MDSRank.h" #include "MetricAggregator.h" #include "mgr/MgrClient.h" @@ -13,8 +16,36 @@ #undef dout_prefix #define dout_prefix *_dout << "mds.metric.aggregator" << " " << __func__ +// Performance Counters + enum { + l_mds_client_metrics_start = 10000, + l_mds_client_metrics_num_clients, + l_mds_client_metrics_last + }; + +enum { + l_mds_per_client_metrics_start = 20000, + l_mds_per_client_metrics_cap_hits, + l_mds_per_client_metrics_cap_misses, + l_mds_per_client_metrics_avg_read_latency, + l_mds_per_client_metrics_avg_write_latency, + l_mds_per_client_metrics_avg_metadata_latency, + l_mds_per_client_metrics_dentry_lease_hits, + l_mds_per_client_metrics_dentry_lease_misses, + l_mds_per_client_metrics_opened_files, + l_mds_per_client_metrics_opened_inodes, + l_mds_per_client_metrics_pinned_icaps, + l_mds_per_client_metrics_total_inodes, + l_mds_per_client_metrics_total_read_ops, + l_mds_per_client_metrics_total_read_size, + l_mds_per_client_metrics_total_write_ops, + l_mds_per_client_metrics_total_write_size, + l_mds_per_client_metrics_last + }; + MetricAggregator::MetricAggregator(CephContext *cct, MDSRank *mds, MgrClient *mgrc) : Dispatcher(cct), + m_cct(cct), mds(mds), mgrc(mgrc), mds_pinger(mds) { @@ -32,6 +63,15 @@ void MetricAggregator::ping_all_active_ranks() { int MetricAggregator::init() { dout(10) << dendl; + std::string labels = ceph::perf_counters::key_create("mds_client_metrics", + {{"fs_name", mds->mdsmap->get_fs_name()}, + {"id", stringify(mds->get_global_id())}}); + PerfCountersBuilder plb(m_cct, labels, l_mds_client_metrics_start, l_mds_client_metrics_last); + plb.add_u64(l_mds_client_metrics_num_clients, + "num_clients", "Numer of client sessions", "mcli", PerfCountersBuilder::PRIO_CRITICAL); + m_perf_counters = plb.create_perf_counters(); + m_cct->get_perfcounters_collection()->add(m_perf_counters); + pinger = std::thread([this]() { std::unique_lock locker(lock); while (!stopping) { @@ -61,6 +101,24 @@ void MetricAggregator::shutdown() { std::scoped_lock locker(lock); ceph_assert(!stopping); stopping = true; + + // dealloc per-client perf counter + for (auto [crpair, pc] : client_perf_counters) { + PerfCounters *perf_counters = nullptr; + std::swap(perf_counters, pc); + if (perf_counters != nullptr) { + m_cct->get_perfcounters_collection()->remove(perf_counters); + delete perf_counters; + } + } + client_perf_counters.clear(); + + PerfCounters *perf_counters = nullptr; + std::swap(perf_counters, m_perf_counters); + if (perf_counters != nullptr) { + m_cct->get_perfcounters_collection()->remove(perf_counters); + delete perf_counters; + } } if (pinger.joinable()) { @@ -97,10 +155,110 @@ void MetricAggregator::refresh_metrics_for_rank(const entity_inst_t &client, << metrics << dendl; auto &p = clients_by_rank.at(rank); + auto crpair = std::make_pair(client, rank); bool ins = p.insert(client).second; if (ins) { dout(20) << ": rank=" << rank << " has " << p.size() << " connected" << " client(s)" << dendl; + if (m_perf_counters) { + m_perf_counters->inc(l_mds_client_metrics_num_clients); + } + + std::string labels = ceph::perf_counters::key_create("mds_client_metrics-" + std::string(mds->mdsmap->get_fs_name()), + {{"client", stringify(client.name)}, + {"rank", stringify(rank)}}); + PerfCountersBuilder plb(m_cct, labels, l_mds_per_client_metrics_start, l_mds_per_client_metrics_last); + plb.add_u64(l_mds_per_client_metrics_cap_hits, + "cap_hits", "Capability hits", "hcap", PerfCountersBuilder::PRIO_CRITICAL); + plb.add_u64(l_mds_per_client_metrics_cap_misses, + "cap_miss", "Capability misses", "mcap", PerfCountersBuilder::PRIO_CRITICAL); + plb.add_time(l_mds_per_client_metrics_avg_read_latency, + "avg_read_latency", "Average Read Latency", "arlt", PerfCountersBuilder::PRIO_CRITICAL); + plb.add_time(l_mds_per_client_metrics_avg_write_latency, + "avg_write_latency", "Average Write Latency", "awlt", PerfCountersBuilder::PRIO_CRITICAL); + plb.add_time(l_mds_per_client_metrics_avg_metadata_latency, + "avg_metadata_latency", "Average Metadata Latency", "amlt", PerfCountersBuilder::PRIO_CRITICAL); + plb.add_u64(l_mds_per_client_metrics_dentry_lease_hits, + "dentry_lease_hits", "Dentry Lease Hits", "hden", PerfCountersBuilder::PRIO_CRITICAL); + plb.add_u64(l_mds_per_client_metrics_dentry_lease_misses, + "dentry_lease_miss", "Dentry Lease Misses", "mden", PerfCountersBuilder::PRIO_CRITICAL); + plb.add_u64(l_mds_per_client_metrics_opened_files, + "opened_files", "Open Files", "ofil", PerfCountersBuilder::PRIO_CRITICAL); + plb.add_u64(l_mds_per_client_metrics_opened_inodes, + "opened_inodes", "Open Inodes", "oino", PerfCountersBuilder::PRIO_CRITICAL); + plb.add_u64(l_mds_per_client_metrics_pinned_icaps, + "pinned_icaps", "Pinned Inode Caps", "pino", PerfCountersBuilder::PRIO_CRITICAL); + plb.add_u64(l_mds_per_client_metrics_total_inodes, + "total_inodes", "Total Inodes", "tino", PerfCountersBuilder::PRIO_CRITICAL); + plb.add_u64(l_mds_per_client_metrics_total_read_ops, + "total_read_ops", "Total Read Operations", "rops", PerfCountersBuilder::PRIO_CRITICAL); + plb.add_u64(l_mds_per_client_metrics_total_read_size, + "total_read_size", "Total Read Size", "rsiz", PerfCountersBuilder::PRIO_CRITICAL); + plb.add_u64(l_mds_per_client_metrics_total_write_ops, + "total_write_ops", "Total Write Operations", "wops", PerfCountersBuilder::PRIO_CRITICAL); + plb.add_u64(l_mds_per_client_metrics_total_write_size, + "total_write_size", "Total Write Size", "wsiz", PerfCountersBuilder::PRIO_CRITICAL); + client_perf_counters[crpair] = plb.create_perf_counters(); + m_cct->get_perfcounters_collection()->add(client_perf_counters[crpair]); + } + + // update perf counters + PerfCounters *perf_counter_ptr = nullptr; + if (client_perf_counters.contains(crpair)) { + perf_counter_ptr = client_perf_counters[crpair]; + } + + if (perf_counter_ptr) { + // client capability hit ratio + perf_counter_ptr->set(l_mds_per_client_metrics_cap_hits, metrics.cap_hit_metric.hits); + perf_counter_ptr->set(l_mds_per_client_metrics_cap_misses, metrics.cap_hit_metric.misses); + + // some averages + if (metrics.read_latency_metric.updated) { + utime_t ravg(metrics.read_latency_metric.mean.tv.tv_sec * 100, + metrics.read_latency_metric.mean.tv.tv_nsec / 1000000); + perf_counter_ptr->tset(l_mds_per_client_metrics_avg_read_latency, ravg); + } + if (metrics.write_latency_metric.updated) { + utime_t wavg(metrics.write_latency_metric.mean.tv.tv_sec * 100, + metrics.write_latency_metric.mean.tv.tv_nsec / 1000000); + perf_counter_ptr->set(l_mds_per_client_metrics_avg_write_latency, wavg); + } + if (metrics.metadata_latency_metric.updated) { + utime_t mavg(metrics.metadata_latency_metric.mean.tv.tv_sec * 100, + metrics.metadata_latency_metric.mean.tv.tv_nsec / 1000000); + perf_counter_ptr->set(l_mds_per_client_metrics_avg_metadata_latency, mavg); + } + + // dentry leases + if (metrics.dentry_lease_metric.updated) { + perf_counter_ptr->set(l_mds_per_client_metrics_dentry_lease_hits, metrics.dentry_lease_metric.hits); + perf_counter_ptr->set(l_mds_per_client_metrics_dentry_lease_misses, metrics.dentry_lease_metric.misses); + } + + // file+inode opens, pinned inode caps + if (metrics.opened_files_metric.updated) { + perf_counter_ptr->set(l_mds_per_client_metrics_opened_files, metrics.opened_files_metric.opened_files); + perf_counter_ptr->set(l_mds_per_client_metrics_total_inodes, metrics.opened_files_metric.total_inodes); + } + if (metrics.opened_inodes_metric.updated) { + perf_counter_ptr->set(l_mds_per_client_metrics_opened_inodes, metrics.opened_inodes_metric.total_inodes); + perf_counter_ptr->set(l_mds_per_client_metrics_total_inodes, metrics.opened_inodes_metric.total_inodes); + } + if (metrics.pinned_icaps_metric.updated) { + perf_counter_ptr->set(l_mds_per_client_metrics_pinned_icaps, metrics.pinned_icaps_metric.pinned_icaps); + perf_counter_ptr->set(l_mds_per_client_metrics_total_inodes, metrics.pinned_icaps_metric.total_inodes); + } + + // read+write io metrics + if (metrics.read_io_sizes_metric.updated) { + perf_counter_ptr->set(l_mds_per_client_metrics_total_read_ops, metrics.read_io_sizes_metric.total_ops); + perf_counter_ptr->set(l_mds_per_client_metrics_total_read_size, metrics.read_io_sizes_metric.total_size); + } + if (metrics.write_io_sizes_metric.updated) { + perf_counter_ptr->set(l_mds_per_client_metrics_total_write_ops, metrics.write_io_sizes_metric.total_ops); + perf_counter_ptr->set(l_mds_per_client_metrics_total_write_size, metrics.write_io_sizes_metric.total_size); + } } auto update_counter_func = [&metrics](const MDSPerformanceCounterDescriptor &d, @@ -260,6 +418,13 @@ void MetricAggregator::remove_metrics_for_rank(const entity_inst_t &client, ceph_assert(rm); dout(20) << ": rank=" << rank << " has " << p.size() << " connected" << " client(s)" << dendl; + auto crpair = std::make_pair(client, rank); + m_cct->get_perfcounters_collection()->remove(client_perf_counters[crpair]); + delete client_perf_counters[crpair]; + client_perf_counters.erase(crpair); + } + if (m_perf_counters) { + m_perf_counters->dec(l_mds_client_metrics_num_clients); } auto sub_key_func = [client, rank](const MDSPerfMetricSubKeyDescriptor &d, @@ -315,6 +480,10 @@ void MetricAggregator::handle_mds_metrics(const cref_t &m) { << rank << " with sequence number " << seq << dendl; std::scoped_lock locker(lock); + if (stopping) { + dout(10) << ": stopping" << dendl; + return; + } if (!mds_pinger.pong_received(rank, seq)) { return; } -- cgit v1.2.3