diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 11:08:07 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 11:08:07 +0000 |
commit | c69cb8cc094cc916adbc516b09e944cd3d137c01 (patch) | |
tree | f2878ec41fb6d0e3613906c6722fc02b934eeb80 /collectors/python.d.plugin/adaptec_raid | |
parent | Initial commit. (diff) | |
download | netdata-upstream/1.29.3.tar.xz netdata-upstream/1.29.3.zip |
Adding upstream version 1.29.3.upstream/1.29.3upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
4 files changed, 391 insertions, 0 deletions
diff --git a/collectors/python.d.plugin/adaptec_raid/Makefile.inc b/collectors/python.d.plugin/adaptec_raid/Makefile.inc new file mode 100644 index 0000000..716cdb2 --- /dev/null +++ b/collectors/python.d.plugin/adaptec_raid/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += adaptec_raid/adaptec_raid.chart.py +dist_pythonconfig_DATA += adaptec_raid/adaptec_raid.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += adaptec_raid/README.md adaptec_raid/Makefile.inc + diff --git a/collectors/python.d.plugin/adaptec_raid/README.md b/collectors/python.d.plugin/adaptec_raid/README.md new file mode 100644 index 0000000..b14e8f9 --- /dev/null +++ b/collectors/python.d.plugin/adaptec_raid/README.md @@ -0,0 +1,80 @@ +<!-- +title: "Adaptec RAID controller monitoring with Netdata" +custom_edit_url: https://github.com/netdata/netdata/edit/master/collectors/python.d.plugin/adaptec_raid/README.md +sidebar_label: "Adaptec RAID" +--> + +# Adaptec RAID controller monitoring with Netdata + +Collects logical and physical devices metrics using `arcconf` command-line utility. + +Executed commands: + +- `sudo -n arcconf GETCONFIG 1 LD` +- `sudo -n arcconf GETCONFIG 1 PD` + +## Requirements + +The module uses `arcconf`, which can only be executed by `root`. It uses +`sudo` and assumes that it is configured such that the `netdata` user can execute `arcconf` as root without a password. + +- Add to your `/etc/sudoers` file: + +`which arcconf` shows the full path to the binary. + +```bash +netdata ALL=(root) NOPASSWD: /path/to/arcconf +``` + +- Reset Netdata's systemd + unit [CapabilityBoundingSet](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Capabilities) (Linux + distributions with systemd) + +The default CapabilityBoundingSet doesn't allow using `sudo`, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute `arcconf` using `sudo`. + + +As the `root` user, do the following: + +```cmd +mkdir /etc/systemd/system/netdata.service.d +echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf +systemctl daemon-reload +systemctl restart netdata.service +``` + +## Charts + +- Logical Device Status +- Physical Device State +- Physical Device S.M.A.R.T warnings +- Physical Device Temperature + +## Enable the collector + +The `adaptec_raid` collector is disabled by default. To enable it, use `edit-config` from the +Netdata [config directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` +file. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d.conf +``` + +Change the value of the `adaptec_raid` setting to `yes`. Save the file and restart the Netdata Agent +with `sudo systemctl restart netdata`, or the appropriate method for your system. + +## Configuration + +Edit the `python.d/adaptec_raid.conf` configuration file using `edit-config` from the +Netdata [config directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/adaptec_raid.conf +``` + +![image](https://user-images.githubusercontent.com/22274335/47278133-6d306680-d601-11e8-87c2-cc9c0f42d686.png) + +--- + +[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fcollectors%2Fpython.d.plugin%2Fadaptec_raid%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)](<>) diff --git a/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py b/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py new file mode 100644 index 0000000..564c2ce --- /dev/null +++ b/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py @@ -0,0 +1,245 @@ +# -*- coding: utf-8 -*- +# Description: adaptec_raid netdata python.d module +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + + +import re +from copy import deepcopy + +from bases.FrameworkServices.ExecutableService import ExecutableService +from bases.collection import find_binary + +disabled_by_default = True + +update_every = 5 + +ORDER = [ + 'ld_status', + 'pd_state', + 'pd_smart_warnings', + 'pd_temperature', +] + +CHARTS = { + 'ld_status': { + 'options': [None, 'Status Is Not OK', 'bool', 'logical devices', 'adapter_raid.ld_status', 'line'], + 'lines': [] + }, + 'pd_state': { + 'options': [None, 'State Is Not OK', 'bool', 'physical devices', 'adapter_raid.pd_state', 'line'], + 'lines': [] + }, + 'pd_smart_warnings': { + 'options': [None, 'S.M.A.R.T warnings', 'count', 'physical devices', + 'adapter_raid.smart_warnings', 'line'], + 'lines': [] + }, + 'pd_temperature': { + 'options': [None, 'Temperature', 'celsius', 'physical devices', 'adapter_raid.temperature', 'line'], + 'lines': [] + }, +} + +SUDO = 'sudo' +ARCCONF = 'arcconf' + +BAD_LD_STATUS = ( + 'Degraded', + 'Failed', +) + +GOOD_PD_STATUS = ( + 'Online', +) + +RE_LD = re.compile( + r'Logical [dD]evice number\s+([0-9]+).*?' + r'Status of [lL]ogical [dD]evice\s+: ([a-zA-Z]+)' +) + + +def find_lds(d): + d = ' '.join(v.strip() for v in d) + return [LD(*v) for v in RE_LD.findall(d)] + + +def find_pds(d): + pds = list() + pd = PD() + + for row in d: + row = row.strip() + if row.startswith('Device #'): + pd = PD() + pd.id = row.split('#')[-1] + elif not pd.id: + continue + + if row.startswith('State'): + v = row.split()[-1] + pd.state = v + elif row.startswith('S.M.A.R.T. warnings'): + v = row.split()[-1] + pd.smart_warnings = v + elif row.startswith('Temperature'): + v = row.split(':')[-1].split()[0] + pd.temperature = v + elif row.startswith('NCQ status'): + if pd.id and pd.state and pd.smart_warnings: + pds.append(pd) + pd = PD() + + return pds + + +class LD: + def __init__(self, ld_id, status): + self.id = ld_id + self.status = status + + def data(self): + return { + 'ld_{0}_status'.format(self.id): int(self.status in BAD_LD_STATUS) + } + + +class PD: + def __init__(self): + self.id = None + self.state = None + self.smart_warnings = None + self.temperature = None + + def data(self): + data = { + 'pd_{0}_state'.format(self.id): int(self.state not in GOOD_PD_STATUS), + 'pd_{0}_smart_warnings'.format(self.id): self.smart_warnings, + } + if self.temperature and self.temperature.isdigit(): + data['pd_{0}_temperature'.format(self.id)] = self.temperature + + return data + + +class Arcconf: + def __init__(self, arcconf): + self.arcconf = arcconf + + def ld_info(self): + return [self.arcconf, 'GETCONFIG', '1', 'LD'] + + def pd_info(self): + return [self.arcconf, 'GETCONFIG', '1', 'PD'] + + +# TODO: hardcoded sudo... +class SudoArcconf: + def __init__(self, arcconf, sudo): + self.arcconf = Arcconf(arcconf) + self.sudo = sudo + + def ld_info(self): + return [self.sudo, '-n'] + self.arcconf.ld_info() + + def pd_info(self): + return [self.sudo, '-n'] + self.arcconf.pd_info() + + +class Service(ExecutableService): + def __init__(self, configuration=None, name=None): + ExecutableService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = deepcopy(CHARTS) + self.use_sudo = self.configuration.get('use_sudo', True) + self.arcconf = None + + def execute(self, command, stderr=False): + return self._get_raw_data(command=command, stderr=stderr) + + def check(self): + arcconf = find_binary(ARCCONF) + if not arcconf: + self.error('can\'t locate "{0}" binary'.format(ARCCONF)) + return False + + sudo = find_binary(SUDO) + if self.use_sudo: + if not sudo: + self.error('can\'t locate "{0}" binary'.format(SUDO)) + return False + err = self.execute([sudo, '-n', '-v'], True) + if err: + self.error(' '.join(err)) + return False + + if self.use_sudo: + self.arcconf = SudoArcconf(arcconf, sudo) + else: + self.arcconf = Arcconf(arcconf) + + lds = self.get_lds() + if not lds: + return False + + self.debug('discovered logical devices ids: {0}'.format([ld.id for ld in lds])) + + pds = self.get_pds() + if not pds: + return False + + self.debug('discovered physical devices ids: {0}'.format([pd.id for pd in pds])) + + self.update_charts(lds, pds) + return True + + def get_data(self): + data = dict() + + for ld in self.get_lds(): + data.update(ld.data()) + + for pd in self.get_pds(): + data.update(pd.data()) + + return data + + def get_lds(self): + raw_lds = self.execute(self.arcconf.ld_info()) + if not raw_lds: + return None + + lds = find_lds(raw_lds) + if not lds: + self.error('failed to parse "{0}" output'.format(' '.join(self.arcconf.ld_info()))) + self.debug('output: {0}'.format(raw_lds)) + return None + return lds + + def get_pds(self): + raw_pds = self.execute(self.arcconf.pd_info()) + if not raw_pds: + return None + + pds = find_pds(raw_pds) + if not pds: + self.error('failed to parse "{0}" output'.format(' '.join(self.arcconf.pd_info()))) + self.debug('output: {0}'.format(raw_pds)) + return None + return pds + + def update_charts(self, lds, pds): + charts = self.definitions + for ld in lds: + dim = ['ld_{0}_status'.format(ld.id), 'ld {0}'.format(ld.id)] + charts['ld_status']['lines'].append(dim) + + for pd in pds: + dim = ['pd_{0}_state'.format(pd.id), 'pd {0}'.format(pd.id)] + charts['pd_state']['lines'].append(dim) + + dim = ['pd_{0}_smart_warnings'.format(pd.id), 'pd {0}'.format(pd.id)] + charts['pd_smart_warnings']['lines'].append(dim) + + dim = ['pd_{0}_temperature'.format(pd.id), 'pd {0}'.format(pd.id)] + charts['pd_temperature']['lines'].append(dim) diff --git a/collectors/python.d.plugin/adaptec_raid/adaptec_raid.conf b/collectors/python.d.plugin/adaptec_raid/adaptec_raid.conf new file mode 100644 index 0000000..fa462ec --- /dev/null +++ b/collectors/python.d.plugin/adaptec_raid/adaptec_raid.conf @@ -0,0 +1,53 @@ +# netdata python.d.plugin configuration for adaptec raid +# +# This file is in YaML format. Generally the format is: +# +# name: value +# + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# ---------------------------------------------------------------------- |