diff options
Diffstat (limited to 'collectors/python.d.plugin/ceph')
-rw-r--r-- | collectors/python.d.plugin/ceph/README.md | 20 | ||||
-rw-r--r-- | collectors/python.d.plugin/ceph/ceph.chart.py | 56 | ||||
-rw-r--r-- | collectors/python.d.plugin/ceph/ceph.conf | 4 |
3 files changed, 62 insertions, 18 deletions
diff --git a/collectors/python.d.plugin/ceph/README.md b/collectors/python.d.plugin/ceph/README.md index f5b36e149..5d671f2aa 100644 --- a/collectors/python.d.plugin/ceph/README.md +++ b/collectors/python.d.plugin/ceph/README.md @@ -1,8 +1,12 @@ -# ceph +<!-- +title: "CEPH monitoring with Netdata" +custom_edit_url: https://github.com/netdata/netdata/edit/master/collectors/python.d.plugin/ceph/README.md +sidebar_label: "CEPH" +--> -This module monitors the ceph cluster usage and consumption data of a server. +# CEPH monitoring with Netdata -It produces: +Monitors the ceph cluster usage and consumption data of a server, and produces: - Cluster statistics (usage, available, latency, objects, read/write rate) - OSD usage @@ -12,7 +16,7 @@ It produces: - Pool read/write rate - number of objects per pool -**Requirements:** +## Requirements - `rados` python module - Granting read permissions to ceph group from keyring file @@ -23,6 +27,14 @@ It produces: ## Configuration +Edit the `python.d/ceph.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/ceph.conf +``` + Sample: ```yaml diff --git a/collectors/python.d.plugin/ceph/ceph.chart.py b/collectors/python.d.plugin/ceph/ceph.chart.py index fe9b2b9ab..494eef45d 100644 --- a/collectors/python.d.plugin/ceph/ceph.chart.py +++ b/collectors/python.d.plugin/ceph/ceph.chart.py @@ -5,6 +5,7 @@ try: import rados + CEPH = True except ImportError: CEPH = False @@ -30,6 +31,7 @@ ORDER = [ 'pool_read_operations', 'pool_write_operations', 'osd_usage', + 'osd_size', 'osd_apply_latency', 'osd_commit_latency' ] @@ -100,6 +102,10 @@ CHARTS = { 'options': [None, 'Ceph OSDs', 'KiB', 'osd', 'ceph.osd_usage', 'line'], 'lines': [] }, + 'osd_size': { + 'options': [None, 'Ceph OSDs size', 'KiB', 'osd', 'ceph.osd_size', 'line'], + 'lines': [] + }, 'osd_apply_latency': { 'options': [None, 'Ceph OSDs apply latency', 'milliseconds', 'osd', 'ceph.apply_latency', 'line'], 'lines': [] @@ -119,6 +125,7 @@ class Service(SimpleService): self.definitions = CHARTS self.config_file = self.configuration.get('config_file') self.keyring_file = self.configuration.get('keyring_file') + self.rados_id = self.configuration.get('rados_id', 'admin') def check(self): """ @@ -147,7 +154,8 @@ class Service(SimpleService): return False try: self.cluster = rados.Rados(conffile=self.config_file, - conf=dict(keyring=self.keyring_file)) + conf=dict(keyring=self.keyring_file), + rados_id=self.rados_id) self.cluster.connect() except rados.Error as error: self.error(error) @@ -161,7 +169,7 @@ class Service(SimpleService): :return: None """ # Pool lines - for pool in sorted(self._get_df()['pools'], key=lambda x:sorted(x.keys())): + for pool in sorted(self._get_df()['pools'], key=lambda x: sorted(x.keys())): self.definitions['pool_usage']['lines'].append([pool['name'], pool['name'], 'absolute']) @@ -169,23 +177,26 @@ class Service(SimpleService): pool['name'], 'absolute']) self.definitions['pool_read_bytes']['lines'].append(['read_{0}'.format(pool['name']), - pool['name'], - 'absolute', 1, 1024]) - self.definitions['pool_write_bytes']['lines'].append(['write_{0}'.format(pool['name']), pool['name'], 'absolute', 1, 1024]) + self.definitions['pool_write_bytes']['lines'].append(['write_{0}'.format(pool['name']), + pool['name'], + 'absolute', 1, 1024]) self.definitions['pool_read_operations']['lines'].append(['read_operations_{0}'.format(pool['name']), - pool['name'], - 'absolute']) - self.definitions['pool_write_operations']['lines'].append(['write_operations_{0}'.format(pool['name']), pool['name'], 'absolute']) + self.definitions['pool_write_operations']['lines'].append(['write_operations_{0}'.format(pool['name']), + pool['name'], + 'absolute']) # OSD lines - for osd in sorted(self._get_osd_df()['nodes'], key=lambda x:sorted(x.keys())): + for osd in sorted(self._get_osd_df()['nodes'], key=lambda x: sorted(x.keys())): self.definitions['osd_usage']['lines'].append([osd['name'], osd['name'], 'absolute']) + self.definitions['osd_size']['lines'].append(['size_{0}'.format(osd['name']), + osd['name'], + 'absolute']) self.definitions['osd_apply_latency']['lines'].append(['apply_latency_{0}'.format(osd['name']), osd['name'], 'absolute']) @@ -203,8 +214,10 @@ class Service(SimpleService): df = self._get_df() osd_df = self._get_osd_df() osd_perf = self._get_osd_perf() + osd_perf_infos = get_osd_perf_infos(osd_perf) pool_stats = self._get_osd_pool_stats() - data.update(self._get_general(osd_perf, pool_stats)) + + data.update(self._get_general(osd_perf_infos, pool_stats)) for pool in df['pools']: data.update(self._get_pool_usage(pool)) data.update(self._get_pool_objects(pool)) @@ -212,14 +225,15 @@ class Service(SimpleService): data.update(self._get_pool_rw(pool_io)) for osd in osd_df['nodes']: data.update(self._get_osd_usage(osd)) - for osd_apply_commit in osd_perf['osd_perf_infos']: + data.update(self._get_osd_size(osd)) + for osd_apply_commit in osd_perf_infos: data.update(self._get_osd_latency(osd_apply_commit)) return data except (ValueError, AttributeError) as error: self.error(error) return None - def _get_general(self, osd_perf, pool_stats): + def _get_general(self, osd_perf_infos, pool_stats): """ Get ceph's general usage :return: dict @@ -237,7 +251,7 @@ class Service(SimpleService): write_bytes_sec += pool_rw_io_b['client_io_rate'].get('write_bytes_sec', 0) read_op_per_sec += pool_rw_io_b['client_io_rate'].get('read_op_per_sec', 0) write_op_per_sec += pool_rw_io_b['client_io_rate'].get('write_op_per_sec', 0) - for perf in osd_perf['osd_perf_infos']: + for perf in osd_perf_infos: apply_latency += perf['perf_stats']['apply_latency_ms'] commit_latency += perf['perf_stats']['commit_latency_ms'] @@ -291,6 +305,14 @@ class Service(SimpleService): return {osd['name']: float(osd['kb_used'])} @staticmethod + def _get_osd_size(osd): + """ + Process raw data into osd dict information to get osd size (kb) + :return: A osd dict with osd name's key and size bytes' value + """ + return {'size_{0}'.format(osd['name']): float(osd['kb'])} + + @staticmethod def _get_osd_latency(osd): """ Get ceph osd apply and commit latency @@ -342,3 +364,11 @@ class Service(SimpleService): 'prefix': 'osd pool stats', 'format': 'json' }), '')[1].decode('utf-8')) + + +def get_osd_perf_infos(osd_perf): + # https://github.com/netdata/netdata/issues/8247 + # module uses 'osd_perf_infos' data, its been moved under 'osdstats` since Ceph v14.2 + if 'osd_perf_infos' in osd_perf: + return osd_perf['osd_perf_infos'] + return osd_perf['osdstats']['osd_perf_infos'] diff --git a/collectors/python.d.plugin/ceph/ceph.conf b/collectors/python.d.plugin/ceph/ceph.conf index 4caabbf6d..81788e866 100644 --- a/collectors/python.d.plugin/ceph/ceph.conf +++ b/collectors/python.d.plugin/ceph/ceph.conf @@ -64,10 +64,12 @@ # config_file: 'config_file' # Ceph config file. # keyring_file: 'keyring_file' # Ceph keyring file. netdata user must be added into ceph group # # and keyring file must be read group permission. +# rados_id: 'rados username' # ID used to connect to ceph cluster. Allows +# # creating a read only key for pulling data v.s. admin # ---------------------------------------------------------------------- # AUTO-DETECTION JOBS # only one of them will run (they have the same name) # config_file: '/etc/ceph/ceph.conf' keyring_file: '/etc/ceph/ceph.client.admin.keyring' - +rados_id: 'admin' |