summaryrefslogtreecommitdiffstats
path: root/src/mds/MetricAggregator.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/mds/MetricAggregator.cc')
-rw-r--r--src/mds/MetricAggregator.cc169
1 files changed, 169 insertions, 0 deletions
diff --git a/src/mds/MetricAggregator.cc b/src/mds/MetricAggregator.cc
index 6487084fb..9765d4d5b 100644
--- a/src/mds/MetricAggregator.cc
+++ b/src/mds/MetricAggregator.cc
@@ -4,6 +4,9 @@
#include <boost/range/adaptor/map.hpp>
#include <boost/range/algorithm/copy.hpp>
+#include "common/ceph_context.h"
+#include "common/perf_counters_key.h"
+
#include "MDSRank.h"
#include "MetricAggregator.h"
#include "mgr/MgrClient.h"
@@ -13,8 +16,36 @@
#undef dout_prefix
#define dout_prefix *_dout << "mds.metric.aggregator" << " " << __func__
+// Performance Counters
+ enum {
+ l_mds_client_metrics_start = 10000,
+ l_mds_client_metrics_num_clients,
+ l_mds_client_metrics_last
+ };
+
+enum {
+ l_mds_per_client_metrics_start = 20000,
+ l_mds_per_client_metrics_cap_hits,
+ l_mds_per_client_metrics_cap_misses,
+ l_mds_per_client_metrics_avg_read_latency,
+ l_mds_per_client_metrics_avg_write_latency,
+ l_mds_per_client_metrics_avg_metadata_latency,
+ l_mds_per_client_metrics_dentry_lease_hits,
+ l_mds_per_client_metrics_dentry_lease_misses,
+ l_mds_per_client_metrics_opened_files,
+ l_mds_per_client_metrics_opened_inodes,
+ l_mds_per_client_metrics_pinned_icaps,
+ l_mds_per_client_metrics_total_inodes,
+ l_mds_per_client_metrics_total_read_ops,
+ l_mds_per_client_metrics_total_read_size,
+ l_mds_per_client_metrics_total_write_ops,
+ l_mds_per_client_metrics_total_write_size,
+ l_mds_per_client_metrics_last
+ };
+
MetricAggregator::MetricAggregator(CephContext *cct, MDSRank *mds, MgrClient *mgrc)
: Dispatcher(cct),
+ m_cct(cct),
mds(mds),
mgrc(mgrc),
mds_pinger(mds) {
@@ -32,6 +63,15 @@ void MetricAggregator::ping_all_active_ranks() {
int MetricAggregator::init() {
dout(10) << dendl;
+ std::string labels = ceph::perf_counters::key_create("mds_client_metrics",
+ {{"fs_name", mds->mdsmap->get_fs_name()},
+ {"id", stringify(mds->get_global_id())}});
+ PerfCountersBuilder plb(m_cct, labels, l_mds_client_metrics_start, l_mds_client_metrics_last);
+ plb.add_u64(l_mds_client_metrics_num_clients,
+ "num_clients", "Numer of client sessions", "mcli", PerfCountersBuilder::PRIO_CRITICAL);
+ m_perf_counters = plb.create_perf_counters();
+ m_cct->get_perfcounters_collection()->add(m_perf_counters);
+
pinger = std::thread([this]() {
std::unique_lock locker(lock);
while (!stopping) {
@@ -61,6 +101,24 @@ void MetricAggregator::shutdown() {
std::scoped_lock locker(lock);
ceph_assert(!stopping);
stopping = true;
+
+ // dealloc per-client perf counter
+ for (auto [crpair, pc] : client_perf_counters) {
+ PerfCounters *perf_counters = nullptr;
+ std::swap(perf_counters, pc);
+ if (perf_counters != nullptr) {
+ m_cct->get_perfcounters_collection()->remove(perf_counters);
+ delete perf_counters;
+ }
+ }
+ client_perf_counters.clear();
+
+ PerfCounters *perf_counters = nullptr;
+ std::swap(perf_counters, m_perf_counters);
+ if (perf_counters != nullptr) {
+ m_cct->get_perfcounters_collection()->remove(perf_counters);
+ delete perf_counters;
+ }
}
if (pinger.joinable()) {
@@ -97,10 +155,110 @@ void MetricAggregator::refresh_metrics_for_rank(const entity_inst_t &client,
<< metrics << dendl;
auto &p = clients_by_rank.at(rank);
+ auto crpair = std::make_pair(client, rank);
bool ins = p.insert(client).second;
if (ins) {
dout(20) << ": rank=" << rank << " has " << p.size() << " connected"
<< " client(s)" << dendl;
+ if (m_perf_counters) {
+ m_perf_counters->inc(l_mds_client_metrics_num_clients);
+ }
+
+ std::string labels = ceph::perf_counters::key_create("mds_client_metrics-" + std::string(mds->mdsmap->get_fs_name()),
+ {{"client", stringify(client.name)},
+ {"rank", stringify(rank)}});
+ PerfCountersBuilder plb(m_cct, labels, l_mds_per_client_metrics_start, l_mds_per_client_metrics_last);
+ plb.add_u64(l_mds_per_client_metrics_cap_hits,
+ "cap_hits", "Capability hits", "hcap", PerfCountersBuilder::PRIO_CRITICAL);
+ plb.add_u64(l_mds_per_client_metrics_cap_misses,
+ "cap_miss", "Capability misses", "mcap", PerfCountersBuilder::PRIO_CRITICAL);
+ plb.add_time(l_mds_per_client_metrics_avg_read_latency,
+ "avg_read_latency", "Average Read Latency", "arlt", PerfCountersBuilder::PRIO_CRITICAL);
+ plb.add_time(l_mds_per_client_metrics_avg_write_latency,
+ "avg_write_latency", "Average Write Latency", "awlt", PerfCountersBuilder::PRIO_CRITICAL);
+ plb.add_time(l_mds_per_client_metrics_avg_metadata_latency,
+ "avg_metadata_latency", "Average Metadata Latency", "amlt", PerfCountersBuilder::PRIO_CRITICAL);
+ plb.add_u64(l_mds_per_client_metrics_dentry_lease_hits,
+ "dentry_lease_hits", "Dentry Lease Hits", "hden", PerfCountersBuilder::PRIO_CRITICAL);
+ plb.add_u64(l_mds_per_client_metrics_dentry_lease_misses,
+ "dentry_lease_miss", "Dentry Lease Misses", "mden", PerfCountersBuilder::PRIO_CRITICAL);
+ plb.add_u64(l_mds_per_client_metrics_opened_files,
+ "opened_files", "Open Files", "ofil", PerfCountersBuilder::PRIO_CRITICAL);
+ plb.add_u64(l_mds_per_client_metrics_opened_inodes,
+ "opened_inodes", "Open Inodes", "oino", PerfCountersBuilder::PRIO_CRITICAL);
+ plb.add_u64(l_mds_per_client_metrics_pinned_icaps,
+ "pinned_icaps", "Pinned Inode Caps", "pino", PerfCountersBuilder::PRIO_CRITICAL);
+ plb.add_u64(l_mds_per_client_metrics_total_inodes,
+ "total_inodes", "Total Inodes", "tino", PerfCountersBuilder::PRIO_CRITICAL);
+ plb.add_u64(l_mds_per_client_metrics_total_read_ops,
+ "total_read_ops", "Total Read Operations", "rops", PerfCountersBuilder::PRIO_CRITICAL);
+ plb.add_u64(l_mds_per_client_metrics_total_read_size,
+ "total_read_size", "Total Read Size", "rsiz", PerfCountersBuilder::PRIO_CRITICAL);
+ plb.add_u64(l_mds_per_client_metrics_total_write_ops,
+ "total_write_ops", "Total Write Operations", "wops", PerfCountersBuilder::PRIO_CRITICAL);
+ plb.add_u64(l_mds_per_client_metrics_total_write_size,
+ "total_write_size", "Total Write Size", "wsiz", PerfCountersBuilder::PRIO_CRITICAL);
+ client_perf_counters[crpair] = plb.create_perf_counters();
+ m_cct->get_perfcounters_collection()->add(client_perf_counters[crpair]);
+ }
+
+ // update perf counters
+ PerfCounters *perf_counter_ptr = nullptr;
+ if (client_perf_counters.contains(crpair)) {
+ perf_counter_ptr = client_perf_counters[crpair];
+ }
+
+ if (perf_counter_ptr) {
+ // client capability hit ratio
+ perf_counter_ptr->set(l_mds_per_client_metrics_cap_hits, metrics.cap_hit_metric.hits);
+ perf_counter_ptr->set(l_mds_per_client_metrics_cap_misses, metrics.cap_hit_metric.misses);
+
+ // some averages
+ if (metrics.read_latency_metric.updated) {
+ utime_t ravg(metrics.read_latency_metric.mean.tv.tv_sec * 100,
+ metrics.read_latency_metric.mean.tv.tv_nsec / 1000000);
+ perf_counter_ptr->tset(l_mds_per_client_metrics_avg_read_latency, ravg);
+ }
+ if (metrics.write_latency_metric.updated) {
+ utime_t wavg(metrics.write_latency_metric.mean.tv.tv_sec * 100,
+ metrics.write_latency_metric.mean.tv.tv_nsec / 1000000);
+ perf_counter_ptr->set(l_mds_per_client_metrics_avg_write_latency, wavg);
+ }
+ if (metrics.metadata_latency_metric.updated) {
+ utime_t mavg(metrics.metadata_latency_metric.mean.tv.tv_sec * 100,
+ metrics.metadata_latency_metric.mean.tv.tv_nsec / 1000000);
+ perf_counter_ptr->set(l_mds_per_client_metrics_avg_metadata_latency, mavg);
+ }
+
+ // dentry leases
+ if (metrics.dentry_lease_metric.updated) {
+ perf_counter_ptr->set(l_mds_per_client_metrics_dentry_lease_hits, metrics.dentry_lease_metric.hits);
+ perf_counter_ptr->set(l_mds_per_client_metrics_dentry_lease_misses, metrics.dentry_lease_metric.misses);
+ }
+
+ // file+inode opens, pinned inode caps
+ if (metrics.opened_files_metric.updated) {
+ perf_counter_ptr->set(l_mds_per_client_metrics_opened_files, metrics.opened_files_metric.opened_files);
+ perf_counter_ptr->set(l_mds_per_client_metrics_total_inodes, metrics.opened_files_metric.total_inodes);
+ }
+ if (metrics.opened_inodes_metric.updated) {
+ perf_counter_ptr->set(l_mds_per_client_metrics_opened_inodes, metrics.opened_inodes_metric.total_inodes);
+ perf_counter_ptr->set(l_mds_per_client_metrics_total_inodes, metrics.opened_inodes_metric.total_inodes);
+ }
+ if (metrics.pinned_icaps_metric.updated) {
+ perf_counter_ptr->set(l_mds_per_client_metrics_pinned_icaps, metrics.pinned_icaps_metric.pinned_icaps);
+ perf_counter_ptr->set(l_mds_per_client_metrics_total_inodes, metrics.pinned_icaps_metric.total_inodes);
+ }
+
+ // read+write io metrics
+ if (metrics.read_io_sizes_metric.updated) {
+ perf_counter_ptr->set(l_mds_per_client_metrics_total_read_ops, metrics.read_io_sizes_metric.total_ops);
+ perf_counter_ptr->set(l_mds_per_client_metrics_total_read_size, metrics.read_io_sizes_metric.total_size);
+ }
+ if (metrics.write_io_sizes_metric.updated) {
+ perf_counter_ptr->set(l_mds_per_client_metrics_total_write_ops, metrics.write_io_sizes_metric.total_ops);
+ perf_counter_ptr->set(l_mds_per_client_metrics_total_write_size, metrics.write_io_sizes_metric.total_size);
+ }
}
auto update_counter_func = [&metrics](const MDSPerformanceCounterDescriptor &d,
@@ -260,6 +418,13 @@ void MetricAggregator::remove_metrics_for_rank(const entity_inst_t &client,
ceph_assert(rm);
dout(20) << ": rank=" << rank << " has " << p.size() << " connected"
<< " client(s)" << dendl;
+ auto crpair = std::make_pair(client, rank);
+ m_cct->get_perfcounters_collection()->remove(client_perf_counters[crpair]);
+ delete client_perf_counters[crpair];
+ client_perf_counters.erase(crpair);
+ }
+ if (m_perf_counters) {
+ m_perf_counters->dec(l_mds_client_metrics_num_clients);
}
auto sub_key_func = [client, rank](const MDSPerfMetricSubKeyDescriptor &d,
@@ -315,6 +480,10 @@ void MetricAggregator::handle_mds_metrics(const cref_t<MMDSMetrics> &m) {
<< rank << " with sequence number " << seq << dendl;
std::scoped_lock locker(lock);
+ if (stopping) {
+ dout(10) << ": stopping" << dendl;
+ return;
+ }
if (!mds_pinger.pong_received(rank, seq)) {
return;
}