diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 01:22:31 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 01:22:31 +0000 |
commit | 8d4f58e49b9dc7d3545651023a36729de773ad86 (patch) | |
tree | 7bc7be4a8e9e298daa1349348400aa2a653866f2 /collectors/python.d.plugin/adaptec_raid | |
parent | Initial commit. (diff) | |
download | netdata-upstream.tar.xz netdata-upstream.zip |
Adding upstream version 1.12.0.upstream/1.12.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'collectors/python.d.plugin/adaptec_raid')
4 files changed, 361 insertions, 0 deletions
diff --git a/collectors/python.d.plugin/adaptec_raid/Makefile.inc b/collectors/python.d.plugin/adaptec_raid/Makefile.inc new file mode 100644 index 0000000..716cdb2 --- /dev/null +++ b/collectors/python.d.plugin/adaptec_raid/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += adaptec_raid/adaptec_raid.chart.py +dist_pythonconfig_DATA += adaptec_raid/adaptec_raid.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += adaptec_raid/README.md adaptec_raid/Makefile.inc + diff --git a/collectors/python.d.plugin/adaptec_raid/README.md b/collectors/python.d.plugin/adaptec_raid/README.md new file mode 100644 index 0000000..682280f --- /dev/null +++ b/collectors/python.d.plugin/adaptec_raid/README.md @@ -0,0 +1,48 @@ +# adaptec raid + +Module collects logical and physical devices health metrics. + +**Requirements:** +* `arcconf` program +* `sudo` program +* `netdata` user needs to be able to sudo the `arcconf` program without password + +To grab stats it executes: + * `sudo -n arcconf GETCONFIG 1 LD` + * `sudo -n arcconf GETCONFIG 1 PD` + + +It produces: + +1. **Logical Device Status** + +2. **Physical Device State** + +3. **Physical Device S.M.A.R.T warnings** + +4. **Physical Device Temperature** + +### prerequisite +This module uses `arcconf` which can only be executed by root. It uses +`sudo` and assumes that it is configured such that the `netdata` user can +execute `arcconf` as root without password. + +Add to `sudoers`: + + netdata ALL=(root) NOPASSWD: /path/to/arcconf + +### configuration + + **adaptec_raid** is disabled by default. Should be explicitly enabled in `python.d.conf`. + +```yaml +adaptec_raid: yes +``` + +#### Screenshot: + +![image](https://user-images.githubusercontent.com/22274335/47278133-6d306680-d601-11e8-87c2-cc9c0f42d686.png) + +--- + +[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fcollectors%2Fpython.d.plugin%2Fadaptec_raid%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() diff --git a/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py b/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py new file mode 100644 index 0000000..1fb1e43 --- /dev/null +++ b/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py @@ -0,0 +1,247 @@ +# -*- coding: utf-8 -*- +# Description: adaptec_raid netdata python.d module +# Author: Ilya Mashchenko (l2isbad) +# SPDX-License-Identifier: GPL-3.0-or-later + + +import re + +from copy import deepcopy + +from bases.FrameworkServices.ExecutableService import ExecutableService +from bases.collection import find_binary + + +disabled_by_default = True + +update_every = 5 + +ORDER = [ + 'ld_status', + 'pd_state', + 'pd_smart_warnings', + 'pd_temperature', +] + +CHARTS = { + 'ld_status': { + 'options': [None, 'Status Is Not OK', 'bool', 'logical devices', 'adapter_raid.ld_status', 'line'], + 'lines': [] + }, + 'pd_state': { + 'options': [None, 'State Is Not OK', 'bool', 'physical devices', 'adapter_raid.pd_state', 'line'], + 'lines': [] + }, + 'pd_smart_warnings': { + 'options': [None, 'S.M.A.R.T warnings', 'count', 'physical devices', + 'adapter_raid.smart_warnings', 'line'], + 'lines': [] + }, + 'pd_temperature': { + 'options': [None, 'Temperature', 'celsius', 'physical devices', 'adapter_raid.temperature', 'line'], + 'lines': [] + }, +} + +SUDO = 'sudo' +ARCCONF = 'arcconf' + +BAD_LD_STATUS = ( + 'Degraded', + 'Failed', +) + +GOOD_PD_STATUS = ( + 'Online', +) + +RE_LD = re.compile( + r'Logical device number\s+([0-9]+).*?' + r'Status of logical device\s+: ([a-zA-Z]+)' +) + + +def find_lds(d): + d = ' '.join(v.strip() for v in d) + return [LD(*v) for v in RE_LD.findall(d)] + + +def find_pds(d): + pds = list() + pd = PD() + + for row in d: + row = row.strip() + if row.startswith('Device #'): + pd = PD() + pd.id = row.split('#')[-1] + elif not pd.id: + continue + + if row.startswith('State'): + v = row.split()[-1] + pd.state = v + elif row.startswith('S.M.A.R.T. warnings'): + v = row.split()[-1] + pd.smart_warnings = v + elif row.startswith('Temperature'): + v = row.split(':')[-1].split()[0] + pd.temperature = v + elif row.startswith('NCQ status'): + if pd.id and pd.state and pd.smart_warnings: + pds.append(pd) + pd = PD() + + return pds + + +class LD: + def __init__(self, ld_id, status): + self.id = ld_id + self.status = status + + def data(self): + return { + 'ld_{0}_status'.format(self.id): int(self.status in BAD_LD_STATUS) + } + + +class PD: + def __init__(self): + self.id = None + self.state = None + self.smart_warnings = None + self.temperature = None + + def data(self): + data = { + 'pd_{0}_state'.format(self.id): int(self.state not in GOOD_PD_STATUS), + 'pd_{0}_smart_warnings'.format(self.id): self.smart_warnings, + } + if self.temperature and self.temperature.isdigit(): + data['pd_{0}_temperature'.format(self.id)] = self.temperature + + return data + + +class Arcconf: + def __init__(self, arcconf): + self.arcconf = arcconf + + def ld_info(self): + return [self.arcconf, 'GETCONFIG', '1', 'LD'] + + def pd_info(self): + return [self.arcconf, 'GETCONFIG', '1', 'PD'] + + +# TODO: hardcoded sudo... +class SudoArcconf: + def __init__(self, arcconf, sudo): + self.arcconf = Arcconf(arcconf) + self.sudo = sudo + + def ld_info(self): + return [self.sudo, '-n'] + self.arcconf.ld_info() + + def pd_info(self): + return [self.sudo, '-n'] + self.arcconf.pd_info() + + +class Service(ExecutableService): + def __init__(self, configuration=None, name=None): + ExecutableService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = deepcopy(CHARTS) + self.use_sudo = self.configuration.get('use_sudo', True) + self.arcconf = None + + def execute(self, command, stderr=False): + return self._get_raw_data(command=command, stderr=stderr) + + def check(self): + arcconf = find_binary(ARCCONF) + if not arcconf: + self.error('can\'t locate "{0}" binary'.format(ARCCONF)) + return False + + sudo = find_binary(SUDO) + if self.use_sudo: + if not sudo: + self.error('can\'t locate "{0}" binary'.format(SUDO)) + return False + err = self.execute([sudo, '-n', '-v'], True) + if err: + self.error(' '.join(err)) + return False + + if self.use_sudo: + self.arcconf = SudoArcconf(arcconf, sudo) + else: + self.arcconf = Arcconf(arcconf) + + lds = self.get_lds() + if not lds: + return False + + self.debug('discovered logical devices ids: {0}'.format([ld.id for ld in lds])) + + pds = self.get_pds() + if not pds: + return False + + self.debug('discovered physical devices ids: {0}'.format([pd.id for pd in pds])) + + self.update_charts(lds, pds) + return True + + def get_data(self): + data = dict() + + for ld in self.get_lds(): + data.update(ld.data()) + + for pd in self.get_pds(): + data.update(pd.data()) + + return data + + def get_lds(self): + raw_lds = self.execute(self.arcconf.ld_info()) + if not raw_lds: + return None + + lds = find_lds(raw_lds) + if not lds: + self.error('failed to parse "{0}" output'.format(' '.join(self.arcconf.ld_info()))) + self.debug('output: {0}'.format(raw_lds)) + return None + return lds + + def get_pds(self): + raw_pds = self.execute(self.arcconf.pd_info()) + if not raw_pds: + return None + + pds = find_pds(raw_pds) + if not pds: + self.error('failed to parse "{0}" output'.format(' '.join(self.arcconf.pd_info()))) + self.debug('output: {0}'.format(raw_pds)) + return None + return pds + + def update_charts(self, lds, pds): + charts = self.definitions + for ld in lds: + dim = ['ld_{0}_status'.format(ld.id), 'ld {0}'.format(ld.id)] + charts['ld_status']['lines'].append(dim) + + for pd in pds: + dim = ['pd_{0}_state'.format(pd.id), 'pd {0}'.format(pd.id)] + charts['pd_state']['lines'].append(dim) + + dim = ['pd_{0}_smart_warnings'.format(pd.id), 'pd {0}'.format(pd.id)] + charts['pd_smart_warnings']['lines'].append(dim) + + dim = ['pd_{0}_temperature'.format(pd.id), 'pd {0}'.format(pd.id)] + charts['pd_temperature']['lines'].append(dim) diff --git a/collectors/python.d.plugin/adaptec_raid/adaptec_raid.conf b/collectors/python.d.plugin/adaptec_raid/adaptec_raid.conf new file mode 100644 index 0000000..fa462ec --- /dev/null +++ b/collectors/python.d.plugin/adaptec_raid/adaptec_raid.conf @@ -0,0 +1,53 @@ +# netdata python.d.plugin configuration for adaptec raid +# +# This file is in YaML format. Generally the format is: +# +# name: value +# + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# ---------------------------------------------------------------------- |