summaryrefslogtreecommitdiffstats
path: root/collectors/python.d.plugin/smartd_log/smartd_log.chart.py
diff options
context:
space:
mode:
Diffstat (limited to 'collectors/python.d.plugin/smartd_log/smartd_log.chart.py')
-rw-r--r--collectors/python.d.plugin/smartd_log/smartd_log.chart.py353
1 files changed, 353 insertions, 0 deletions
diff --git a/collectors/python.d.plugin/smartd_log/smartd_log.chart.py b/collectors/python.d.plugin/smartd_log/smartd_log.chart.py
new file mode 100644
index 000000000..21dbccecc
--- /dev/null
+++ b/collectors/python.d.plugin/smartd_log/smartd_log.chart.py
@@ -0,0 +1,353 @@
+# -*- coding: utf-8 -*-
+# Description: smart netdata python.d module
+# Author: l2isbad, vorph1
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+import os
+import re
+
+from collections import namedtuple
+from time import time
+
+from bases.collection import read_last_line
+from bases.FrameworkServices.SimpleService import SimpleService
+
+# charts order (can be overridden if you want less charts, or different order)
+ORDER = ['1', '4', '5', '7', '9', '12', '193', '194', '197', '198', '200']
+
+SMART_ATTR = {
+ '1': 'Read Error Rate',
+ '2': 'Throughput Performance',
+ '3': 'Spin-Up Time',
+ '4': 'Start/Stop Count',
+ '5': 'Reallocated Sectors Count',
+ '6': 'Read Channel Margin',
+ '7': 'Seek Error Rate',
+ '8': 'Seek Time Performance',
+ '9': 'Power-On Hours Count',
+ '10': 'Spin-up Retries',
+ '11': 'Calibration Retries',
+ '12': 'Power Cycle Count',
+ '13': 'Soft Read Error Rate',
+ '100': 'Erase/Program Cycles',
+ '103': 'Translation Table Rebuild',
+ '108': 'Unknown (108)',
+ '170': 'Reserved Block Count',
+ '171': 'Program Fail Count',
+ '172': 'Erase Fail Count',
+ '173': 'Wear Leveller Worst Case Erase Count',
+ '174': 'Unexpected Power Loss',
+ '175': 'Program Fail Count',
+ '176': 'Erase Fail Count',
+ '177': 'Wear Leveling Count',
+ '178': 'Used Reserved Block Count',
+ '179': 'Used Reserved Block Count',
+ '180': 'Unused Reserved Block Count',
+ '181': 'Program Fail Count',
+ '182': 'Erase Fail Count',
+ '183': 'SATA Downshifts',
+ '184': 'End-to-End error',
+ '185': 'Head Stability',
+ '186': 'Induced Op-Vibration Detection',
+ '187': 'Reported Uncorrectable Errors',
+ '188': 'Command Timeout',
+ '189': 'High Fly Writes',
+ '190': 'Temperature',
+ '191': 'G-Sense Errors',
+ '192': 'Power-Off Retract Cycles',
+ '193': 'Load/Unload Cycles',
+ '194': 'Temperature',
+ '195': 'Hardware ECC Recovered',
+ '196': 'Reallocation Events',
+ '197': 'Current Pending Sectors',
+ '198': 'Off-line Uncorrectable',
+ '199': 'UDMA CRC Error Rate',
+ '200': 'Write Error Rate',
+ '201': 'Soft Read Errors',
+ '202': 'Data Address Mark Errors',
+ '203': 'Run Out Cancel',
+ '204': 'Soft ECC Corrections',
+ '205': 'Thermal Asperity Rate',
+ '206': 'Flying Height',
+ '207': 'Spin High Current',
+ '209': 'Offline Seek Performance',
+ '220': 'Disk Shift',
+ '221': 'G-Sense Error Rate',
+ '222': 'Loaded Hours',
+ '223': 'Load/Unload Retries',
+ '224': 'Load Friction',
+ '225': 'Load/Unload Cycles',
+ '226': 'Load-in Time',
+ '227': 'Torque Amplification Count',
+ '228': 'Power-Off Retracts',
+ '230': 'GMR Head Amplitude',
+ '231': 'Temperature',
+ '232': 'Available Reserved Space',
+ '233': 'Media Wearout Indicator',
+ '240': 'Head Flying Hours',
+ '241': 'Total LBAs Written',
+ '242': 'Total LBAs Read',
+ '250': 'Read Error Retry Rate'
+}
+
+LIMIT = namedtuple('LIMIT', ['min', 'max'])
+
+LIMITS = {
+ '194': LIMIT(0, 200)
+}
+
+RESCAN_INTERVAL = 60
+
+REGEX = re.compile(
+ '(\d+);' # attribute
+ '(\d+);' # normalized value
+ '(\d+)', # raw value
+ re.X
+)
+
+
+def chart_template(chart_name):
+ units, attr_id = chart_name.split('_')[-2:]
+ title = '{value_type} {description}'.format(value_type=units.capitalize(),
+ description=SMART_ATTR[attr_id])
+ family = SMART_ATTR[attr_id].lower()
+
+ return {
+ chart_name: {
+ 'options': [None, title, units, family, 'smartd_log.' + chart_name, 'line'],
+ 'lines': []
+ }
+ }
+
+
+def handle_os_error(method):
+ def on_call(*args):
+ try:
+ return method(*args)
+ except OSError:
+ return None
+ return on_call
+
+
+class SmartAttribute(object):
+ def __init__(self, idx, normalized, raw):
+ self.id = idx
+ self.normalized = normalized
+ self._raw = raw
+
+ @property
+ def raw(self):
+ if self.id in LIMITS:
+ limit = LIMITS[self.id]
+ if limit.min <= int(self._raw) <= limit.max:
+ return self._raw
+ return None
+ return self._raw
+
+ @raw.setter
+ def raw(self, value):
+ self._raw = value
+
+
+class DiskLogFile:
+ def __init__(self, path):
+ self.path = path
+ self.size = os.path.getsize(path)
+
+ @handle_os_error
+ def is_changed(self):
+ new_size = os.path.getsize(self.path)
+ old_size, self.size = self.size, new_size
+
+ return new_size != old_size and new_size
+
+ @staticmethod
+ @handle_os_error
+ def is_valid(log_file, exclude):
+ return all([log_file.endswith('.csv'),
+ not [p for p in exclude if p in log_file],
+ os.access(log_file, os.R_OK),
+ os.path.getsize(log_file)])
+
+
+class Disk:
+ def __init__(self, full_path, age):
+ self.log_file = DiskLogFile(full_path)
+ self.name = os.path.basename(full_path).split('.')[-3]
+ self.age = int(age)
+ self.status = True
+ self.attributes = dict()
+
+ self.get_attributes()
+
+ def __eq__(self, other):
+ if isinstance(other, Disk):
+ return self.name == other.name
+ return self.name == other
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __hash__(self):
+ return hash(repr(self))
+
+ @handle_os_error
+ def is_active(self):
+ return (time() - os.path.getmtime(self.log_file.path)) / 60 < self.age
+
+ @handle_os_error
+ def get_attributes(self):
+ last_line = read_last_line(self.log_file.path)
+ self.attributes = dict((attr, SmartAttribute(attr, normalized, raw)) for attr, normalized, raw
+ in REGEX.findall(last_line))
+ return True
+
+ def data(self):
+ data = dict()
+ for attr in self.attributes.values():
+ data['_'.join([self.name, 'normalized', attr.id])] = attr.normalized
+ if attr.raw is not None:
+ data['_'.join([self.name, 'raw', attr.id])] = attr.raw
+ return data
+
+
+class Service(SimpleService):
+ def __init__(self, configuration=None, name=None):
+ SimpleService.__init__(self, configuration=configuration, name=name)
+ self.log_path = self.configuration.get('log_path', '/var/log/smartd')
+ self.raw = self.configuration.get('raw_values', True)
+ self.exclude = self.configuration.get('exclude_disks', str()).split()
+ self.age = self.configuration.get('age', 30)
+
+ self.runs = 0
+ self.disks = list()
+ self.order = list()
+ self.definitions = dict()
+
+ def check(self):
+ self.disks = self.scan()
+
+ if not self.disks:
+ return None
+
+ user_defined_sa = self.configuration.get('smart_attributes')
+
+ if user_defined_sa:
+ order = user_defined_sa.split() or ORDER
+ else:
+ order = ORDER
+
+ self.create_charts(order)
+
+ return True
+
+ def get_data(self):
+ self.runs += 1
+
+ if self.runs % RESCAN_INTERVAL == 0:
+ self.cleanup_and_rescan()
+
+ data = dict()
+
+ for disk in self.disks:
+
+ if not disk.status:
+ continue
+
+ changed = disk.log_file.is_changed()
+
+ # True = changed, False = unchanged, None = Exception
+ if changed is None:
+ disk.status = False
+ continue
+
+ if changed:
+ success = disk.get_attributes()
+ if not success:
+ disk.status = False
+ continue
+
+ data.update(disk.data())
+
+ return data or None
+
+ def create_charts(self, order):
+ for attr in order:
+ raw_name, normalized_name = 'attr_id_raw_' + attr, 'attr_id_normalized_' + attr
+ raw, normalized = chart_template(raw_name), chart_template(normalized_name)
+ self.order.extend([normalized_name, raw_name])
+ self.definitions.update(raw)
+ self.definitions.update(normalized)
+
+ for disk in self.disks:
+ if attr not in disk.attributes:
+ self.debug("'{disk}' has no attribute '{attr_id}'".format(disk=disk.name,
+ attr_id=attr))
+ continue
+ normalized[normalized_name]['lines'].append(['_'.join([disk.name, 'normalized', attr]), disk.name])
+
+ if not self.raw:
+ continue
+
+ if disk.attributes[attr].raw is not None:
+ raw[raw_name]['lines'].append(['_'.join([disk.name, 'raw', attr]), disk.name])
+ continue
+ self.debug("'{disk}' attribute '{attr_id}' value not in {limits}".format(disk=disk.name,
+ attr_id=attr,
+ limits=LIMITS[attr]))
+
+ def cleanup_and_rescan(self):
+ self.cleanup()
+ new_disks = self.scan(only_new=True)
+
+ for disk in new_disks:
+ valid = False
+
+ for chart in self.charts:
+ value_type, idx = chart.id.split('_')[2:]
+
+ if idx in disk.attributes:
+ valid = True
+ dimension_id = '_'.join([disk.name, value_type, idx])
+
+ if dimension_id in chart:
+ chart.hide_dimension(dimension_id=dimension_id, reverse=True)
+ else:
+ chart.add_dimension([dimension_id, disk.name])
+ if valid:
+ self.disks.append(disk)
+
+ def cleanup(self):
+ for disk in self.disks:
+
+ if not disk.is_active():
+ disk.status = False
+ if not disk.status:
+ for chart in self.charts:
+ dimension_id = '_'.join([disk.name, chart.id[8:]])
+ chart.hide_dimension(dimension_id=dimension_id)
+
+ self.disks = [disk for disk in self.disks if disk.status]
+
+ def scan(self, only_new=None):
+ new_disks = list()
+ for f in os.listdir(self.log_path):
+ full_path = os.path.join(self.log_path, f)
+
+ if DiskLogFile.is_valid(full_path, self.exclude):
+ disk = Disk(full_path, self.age)
+
+ active = disk.is_active()
+ if active is None:
+ continue
+ if active:
+ if not only_new:
+ new_disks.append(disk)
+ else:
+ if disk not in self.disks:
+ new_disks.append(disk)
+ else:
+ if not only_new:
+ self.debug("'{disk}' not updated in the last {age} minutes, "
+ "skipping it.".format(disk=disk.name, age=self.age))
+ return new_disks