summaryrefslogtreecommitdiffstats
path: root/collectors/python.d.plugin/mdstat
diff options
context:
space:
mode:
Diffstat (limited to 'collectors/python.d.plugin/mdstat')
-rw-r--r--collectors/python.d.plugin/mdstat/Makefile.inc13
-rw-r--r--collectors/python.d.plugin/mdstat/README.md26
-rw-r--r--collectors/python.d.plugin/mdstat/mdstat.chart.py205
-rw-r--r--collectors/python.d.plugin/mdstat/mdstat.conf32
4 files changed, 276 insertions, 0 deletions
diff --git a/collectors/python.d.plugin/mdstat/Makefile.inc b/collectors/python.d.plugin/mdstat/Makefile.inc
new file mode 100644
index 000000000..5125a271b
--- /dev/null
+++ b/collectors/python.d.plugin/mdstat/Makefile.inc
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# THIS IS NOT A COMPLETE Makefile
+# IT IS INCLUDED BY ITS PARENT'S Makefile.am
+# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT
+
+# install these files
+dist_python_DATA += mdstat/mdstat.chart.py
+dist_pythonconfig_DATA += mdstat/mdstat.conf
+
+# do not install these files, but include them in the distribution
+dist_noinst_DATA += mdstat/README.md mdstat/Makefile.inc
+
diff --git a/collectors/python.d.plugin/mdstat/README.md b/collectors/python.d.plugin/mdstat/README.md
new file mode 100644
index 000000000..1ff8f7dab
--- /dev/null
+++ b/collectors/python.d.plugin/mdstat/README.md
@@ -0,0 +1,26 @@
+# mdstat
+
+Module monitor /proc/mdstat
+
+It produces:
+
+1. **Health** Number of failed disks in every array (aggregate chart).
+
+2. **Disks stats**
+ * total (number of devices array ideally would have)
+ * inuse (number of devices currently are in use)
+
+3. **Current status**
+ * resync in percent
+ * recovery in percent
+ * reshape in percent
+ * check in percent
+
+4. **Operation status** (if resync/recovery/reshape/check is active)
+ * finish in minutes
+ * speed in megabytes/s
+
+### configuration
+No configuration is needed.
+
+---
diff --git a/collectors/python.d.plugin/mdstat/mdstat.chart.py b/collectors/python.d.plugin/mdstat/mdstat.chart.py
new file mode 100644
index 000000000..b7306b6a7
--- /dev/null
+++ b/collectors/python.d.plugin/mdstat/mdstat.chart.py
@@ -0,0 +1,205 @@
+# -*- coding: utf-8 -*-
+# Description: mdstat netdata python.d module
+# Author: Ilya Mashchenko (l2isbad)
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+import re
+
+from collections import defaultdict
+
+from bases.FrameworkServices.SimpleService import SimpleService
+
+MDSTAT = '/proc/mdstat'
+MISMATCH_CNT = '/sys/block/{0}/md/mismatch_cnt'
+
+ORDER = ['mdstat_health']
+
+CHARTS = {
+ 'mdstat_health': {
+ 'options': [None, 'Faulty Devices In MD', 'failed disks', 'health', 'md.health', 'line'],
+ 'lines': []
+ }
+}
+
+RE_DISKS = re.compile(r' (?P<array>[a-zA-Z_0-9]+) : active .+\['
+ r'(?P<total_disks>[0-9]+)/'
+ r'(?P<inuse_disks>[0-9]+)\]')
+
+RE_STATUS = re.compile(r' (?P<array>[a-zA-Z_0-9]+) : active .+ '
+ r'(?P<operation>[a-z]+) =[ ]{1,2}'
+ r'(?P<operation_status>[0-9.]+).+finish='
+ r'(?P<finish_in>([0-9.]+))min speed='
+ r'(?P<speed>[0-9]+)')
+
+
+def md_charts(name):
+ order = [
+ '{0}_disks'.format(name),
+ '{0}_operation'.format(name),
+ '{0}_mismatch_cnt'.format(name),
+ '{0}_finish'.format(name),
+ '{0}_speed'.format(name)
+ ]
+
+ charts = dict()
+ charts[order[0]] = {
+ 'options': [None, 'Disks Stats', 'disks', name, 'md.disks', 'stacked'],
+ 'lines': [
+ ['{0}_total_disks'.format(name), 'total', 'absolute'],
+ ['{0}_inuse_disks'.format(name), 'inuse', 'absolute']
+ ]
+ }
+
+ charts[order[1]] = {
+ 'options': [None, 'Current Status', 'percent', name, 'md.status', 'line'],
+ 'lines': [
+ ['{0}_resync'.format(name), 'resync', 'absolute', 1, 100],
+ ['{0}_recovery'.format(name), 'recovery', 'absolute', 1, 100],
+ ['{0}_reshape'.format(name), 'reshape', 'absolute', 1, 100],
+ ['{0}_check'.format(name), 'check', 'absolute', 1, 100],
+ ]
+ }
+
+ charts[order[2]] = {
+ 'options': [None, 'Mismatch Count', 'unsynchronized blocks', name, 'md.mismatch_cnt', 'line'],
+ 'lines': [
+ ['{0}_mismatch_cnt'.format(name), 'count', 'absolute']
+ ]
+ }
+
+ charts[order[3]] = {
+ 'options': [None, 'Approximate Time Until Finish', 'seconds', name, 'md.rate', 'line'],
+ 'lines': [
+ ['{0}_finish_in'.format(name), 'finish in', 'absolute', 1, 1000]
+ ]
+ }
+
+ charts[order[4]] = {
+ 'options': [None, 'Operation Speed', 'KB/s', name, 'md.rate', 'line'],
+ 'lines': [
+ ['{0}_speed'.format(name), 'speed', 'absolute', 1, 1000]
+ ]
+ }
+
+ return order, charts
+
+
+class MD:
+ def __init__(self, raw_data):
+ self.name = raw_data['array']
+ self.d = raw_data
+
+ def data(self):
+ rv = {
+ 'total_disks': self.d['total_disks'],
+ 'inuse_disks': self.d['inuse_disks'],
+ 'health': int(self.d['total_disks']) - int(self.d['inuse_disks']),
+ 'resync': 0,
+ 'recovery': 0,
+ 'reshape': 0,
+ 'check': 0,
+ 'finish_in': 0,
+ 'speed': 0,
+ }
+
+ v = read_lines(MISMATCH_CNT.format(self.name))
+ if v:
+ rv['mismatch_cnt'] = v
+
+ if self.d.get('operation'):
+ rv[self.d['operation']] = float(self.d['operation_status']) * 100
+ rv['finish_in'] = float(self.d['finish_in']) * 1000 * 60
+ rv['speed'] = float(self.d['speed']) * 1000
+
+ return dict(('{0}_{1}'.format(self.name, k), v) for k, v in rv.items())
+
+
+class Service(SimpleService):
+ def __init__(self, configuration=None, name=None):
+ SimpleService.__init__(self, configuration=configuration, name=name)
+ self.order = ORDER
+ self.definitions = CHARTS
+ self.mds = list()
+
+ @staticmethod
+ def get_mds():
+ raw = read_lines(MDSTAT)
+
+ if not raw:
+ return None
+
+ return find_mds(raw)
+
+ def get_data(self):
+ """
+ Parse data from _get_raw_data()
+ :return: dict
+ """
+ mds = self.get_mds()
+
+ if not mds:
+ return None
+
+ data = dict()
+ for md in mds:
+ if md.name not in self.mds:
+ self.mds.append(md.name)
+ self.add_new_md_charts(md.name)
+ data.update(md.data())
+ return data
+
+ def check(self):
+ if not self.get_mds():
+ self.error('Failed to read data from {0} or there is no active arrays'.format(MDSTAT))
+ return False
+ return True
+
+ def add_new_md_charts(self, name):
+ order, charts = md_charts(name)
+
+ self.charts['mdstat_health'].add_dimension(['{0}_health'.format(name), name])
+
+ for chart_name in order:
+ params = [chart_name] + charts[chart_name]['options']
+ dims = charts[chart_name]['lines']
+
+ chart = self.charts.add_chart(params)
+ for dim in dims:
+ chart.add_dimension(dim)
+
+
+def find_mds(raw_data):
+ data = defaultdict(str)
+ counter = 1
+
+ for row in (elem.strip() for elem in raw_data):
+ if not row:
+ counter += 1
+ continue
+ data[counter] = ' '.join([data[counter], row])
+
+ mds = list()
+
+ for v in data.values():
+ m = RE_DISKS.search(v)
+
+ if not m:
+ continue
+
+ d = m.groupdict()
+
+ m = RE_STATUS.search(v)
+ if m:
+ d.update(m.groupdict())
+
+ mds.append(MD(d))
+
+ return sorted(mds, key=lambda md: md.name)
+
+
+def read_lines(path):
+ try:
+ with open(path) as f:
+ return f.readlines()
+ except (IOError, OSError):
+ return None
diff --git a/collectors/python.d.plugin/mdstat/mdstat.conf b/collectors/python.d.plugin/mdstat/mdstat.conf
new file mode 100644
index 000000000..66a2f153c
--- /dev/null
+++ b/collectors/python.d.plugin/mdstat/mdstat.conf
@@ -0,0 +1,32 @@
+# netdata python.d.plugin configuration for mdstat
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 60
+
+# autodetection_retry sets the job re-check interval in seconds.
+# The job is not deleted if check fails.
+# Attempts to start the job are made once every autodetection_retry.
+# This feature is disabled by default.
+# autodetection_retry: 0