summaryrefslogtreecommitdiffstats
path: root/collectors/python.d.plugin/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'collectors/python.d.plugin/ceph')
-rw-r--r--collectors/python.d.plugin/ceph/README.md20
-rw-r--r--collectors/python.d.plugin/ceph/ceph.chart.py56
-rw-r--r--collectors/python.d.plugin/ceph/ceph.conf4
3 files changed, 62 insertions, 18 deletions
diff --git a/collectors/python.d.plugin/ceph/README.md b/collectors/python.d.plugin/ceph/README.md
index f5b36e149..5d671f2aa 100644
--- a/collectors/python.d.plugin/ceph/README.md
+++ b/collectors/python.d.plugin/ceph/README.md
@@ -1,8 +1,12 @@
-# ceph
+<!--
+title: "CEPH monitoring with Netdata"
+custom_edit_url: https://github.com/netdata/netdata/edit/master/collectors/python.d.plugin/ceph/README.md
+sidebar_label: "CEPH"
+-->
-This module monitors the ceph cluster usage and consumption data of a server.
+# CEPH monitoring with Netdata
-It produces:
+Monitors the ceph cluster usage and consumption data of a server, and produces:
- Cluster statistics (usage, available, latency, objects, read/write rate)
- OSD usage
@@ -12,7 +16,7 @@ It produces:
- Pool read/write rate
- number of objects per pool
-**Requirements:**
+## Requirements
- `rados` python module
- Granting read permissions to ceph group from keyring file
@@ -23,6 +27,14 @@ It produces:
## Configuration
+Edit the `python.d/ceph.conf` configuration file using `edit-config` from the Netdata [config
+directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`.
+
+```bash
+cd /etc/netdata # Replace this path with your Netdata config directory, if different
+sudo ./edit-config python.d/ceph.conf
+```
+
Sample:
```yaml
diff --git a/collectors/python.d.plugin/ceph/ceph.chart.py b/collectors/python.d.plugin/ceph/ceph.chart.py
index fe9b2b9ab..494eef45d 100644
--- a/collectors/python.d.plugin/ceph/ceph.chart.py
+++ b/collectors/python.d.plugin/ceph/ceph.chart.py
@@ -5,6 +5,7 @@
try:
import rados
+
CEPH = True
except ImportError:
CEPH = False
@@ -30,6 +31,7 @@ ORDER = [
'pool_read_operations',
'pool_write_operations',
'osd_usage',
+ 'osd_size',
'osd_apply_latency',
'osd_commit_latency'
]
@@ -100,6 +102,10 @@ CHARTS = {
'options': [None, 'Ceph OSDs', 'KiB', 'osd', 'ceph.osd_usage', 'line'],
'lines': []
},
+ 'osd_size': {
+ 'options': [None, 'Ceph OSDs size', 'KiB', 'osd', 'ceph.osd_size', 'line'],
+ 'lines': []
+ },
'osd_apply_latency': {
'options': [None, 'Ceph OSDs apply latency', 'milliseconds', 'osd', 'ceph.apply_latency', 'line'],
'lines': []
@@ -119,6 +125,7 @@ class Service(SimpleService):
self.definitions = CHARTS
self.config_file = self.configuration.get('config_file')
self.keyring_file = self.configuration.get('keyring_file')
+ self.rados_id = self.configuration.get('rados_id', 'admin')
def check(self):
"""
@@ -147,7 +154,8 @@ class Service(SimpleService):
return False
try:
self.cluster = rados.Rados(conffile=self.config_file,
- conf=dict(keyring=self.keyring_file))
+ conf=dict(keyring=self.keyring_file),
+ rados_id=self.rados_id)
self.cluster.connect()
except rados.Error as error:
self.error(error)
@@ -161,7 +169,7 @@ class Service(SimpleService):
:return: None
"""
# Pool lines
- for pool in sorted(self._get_df()['pools'], key=lambda x:sorted(x.keys())):
+ for pool in sorted(self._get_df()['pools'], key=lambda x: sorted(x.keys())):
self.definitions['pool_usage']['lines'].append([pool['name'],
pool['name'],
'absolute'])
@@ -169,23 +177,26 @@ class Service(SimpleService):
pool['name'],
'absolute'])
self.definitions['pool_read_bytes']['lines'].append(['read_{0}'.format(pool['name']),
- pool['name'],
- 'absolute', 1, 1024])
- self.definitions['pool_write_bytes']['lines'].append(['write_{0}'.format(pool['name']),
pool['name'],
'absolute', 1, 1024])
+ self.definitions['pool_write_bytes']['lines'].append(['write_{0}'.format(pool['name']),
+ pool['name'],
+ 'absolute', 1, 1024])
self.definitions['pool_read_operations']['lines'].append(['read_operations_{0}'.format(pool['name']),
- pool['name'],
- 'absolute'])
- self.definitions['pool_write_operations']['lines'].append(['write_operations_{0}'.format(pool['name']),
pool['name'],
'absolute'])
+ self.definitions['pool_write_operations']['lines'].append(['write_operations_{0}'.format(pool['name']),
+ pool['name'],
+ 'absolute'])
# OSD lines
- for osd in sorted(self._get_osd_df()['nodes'], key=lambda x:sorted(x.keys())):
+ for osd in sorted(self._get_osd_df()['nodes'], key=lambda x: sorted(x.keys())):
self.definitions['osd_usage']['lines'].append([osd['name'],
osd['name'],
'absolute'])
+ self.definitions['osd_size']['lines'].append(['size_{0}'.format(osd['name']),
+ osd['name'],
+ 'absolute'])
self.definitions['osd_apply_latency']['lines'].append(['apply_latency_{0}'.format(osd['name']),
osd['name'],
'absolute'])
@@ -203,8 +214,10 @@ class Service(SimpleService):
df = self._get_df()
osd_df = self._get_osd_df()
osd_perf = self._get_osd_perf()
+ osd_perf_infos = get_osd_perf_infos(osd_perf)
pool_stats = self._get_osd_pool_stats()
- data.update(self._get_general(osd_perf, pool_stats))
+
+ data.update(self._get_general(osd_perf_infos, pool_stats))
for pool in df['pools']:
data.update(self._get_pool_usage(pool))
data.update(self._get_pool_objects(pool))
@@ -212,14 +225,15 @@ class Service(SimpleService):
data.update(self._get_pool_rw(pool_io))
for osd in osd_df['nodes']:
data.update(self._get_osd_usage(osd))
- for osd_apply_commit in osd_perf['osd_perf_infos']:
+ data.update(self._get_osd_size(osd))
+ for osd_apply_commit in osd_perf_infos:
data.update(self._get_osd_latency(osd_apply_commit))
return data
except (ValueError, AttributeError) as error:
self.error(error)
return None
- def _get_general(self, osd_perf, pool_stats):
+ def _get_general(self, osd_perf_infos, pool_stats):
"""
Get ceph's general usage
:return: dict
@@ -237,7 +251,7 @@ class Service(SimpleService):
write_bytes_sec += pool_rw_io_b['client_io_rate'].get('write_bytes_sec', 0)
read_op_per_sec += pool_rw_io_b['client_io_rate'].get('read_op_per_sec', 0)
write_op_per_sec += pool_rw_io_b['client_io_rate'].get('write_op_per_sec', 0)
- for perf in osd_perf['osd_perf_infos']:
+ for perf in osd_perf_infos:
apply_latency += perf['perf_stats']['apply_latency_ms']
commit_latency += perf['perf_stats']['commit_latency_ms']
@@ -291,6 +305,14 @@ class Service(SimpleService):
return {osd['name']: float(osd['kb_used'])}
@staticmethod
+ def _get_osd_size(osd):
+ """
+ Process raw data into osd dict information to get osd size (kb)
+ :return: A osd dict with osd name's key and size bytes' value
+ """
+ return {'size_{0}'.format(osd['name']): float(osd['kb'])}
+
+ @staticmethod
def _get_osd_latency(osd):
"""
Get ceph osd apply and commit latency
@@ -342,3 +364,11 @@ class Service(SimpleService):
'prefix': 'osd pool stats',
'format': 'json'
}), '')[1].decode('utf-8'))
+
+
+def get_osd_perf_infos(osd_perf):
+ # https://github.com/netdata/netdata/issues/8247
+ # module uses 'osd_perf_infos' data, its been moved under 'osdstats` since Ceph v14.2
+ if 'osd_perf_infos' in osd_perf:
+ return osd_perf['osd_perf_infos']
+ return osd_perf['osdstats']['osd_perf_infos']
diff --git a/collectors/python.d.plugin/ceph/ceph.conf b/collectors/python.d.plugin/ceph/ceph.conf
index 4caabbf6d..81788e866 100644
--- a/collectors/python.d.plugin/ceph/ceph.conf
+++ b/collectors/python.d.plugin/ceph/ceph.conf
@@ -64,10 +64,12 @@
# config_file: 'config_file' # Ceph config file.
# keyring_file: 'keyring_file' # Ceph keyring file. netdata user must be added into ceph group
# # and keyring file must be read group permission.
+# rados_id: 'rados username' # ID used to connect to ceph cluster. Allows
+# # creating a read only key for pulling data v.s. admin
# ----------------------------------------------------------------------
# AUTO-DETECTION JOBS
# only one of them will run (they have the same name)
#
config_file: '/etc/ceph/ceph.conf'
keyring_file: '/etc/ceph/ceph.client.admin.keyring'
-
+rados_id: 'admin'