diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 14:31:17 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 14:31:17 +0000 |
commit | 8020f71afd34d7696d7933659df2d763ab05542f (patch) | |
tree | 2fdf1b5447ffd8bdd61e702ca183e814afdcb4fc /collectors/python.d.plugin/hpssa | |
parent | Initial commit. (diff) | |
download | netdata-8020f71afd34d7696d7933659df2d763ab05542f.tar.xz netdata-8020f71afd34d7696d7933659df2d763ab05542f.zip |
Adding upstream version 1.37.1.upstream/1.37.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'collectors/python.d.plugin/hpssa')
-rw-r--r-- | collectors/python.d.plugin/hpssa/Makefile.inc | 13 | ||||
-rw-r--r-- | collectors/python.d.plugin/hpssa/README.md | 83 | ||||
-rw-r--r-- | collectors/python.d.plugin/hpssa/hpssa.chart.py | 396 | ||||
-rw-r--r-- | collectors/python.d.plugin/hpssa/hpssa.conf | 61 |
4 files changed, 553 insertions, 0 deletions
diff --git a/collectors/python.d.plugin/hpssa/Makefile.inc b/collectors/python.d.plugin/hpssa/Makefile.inc new file mode 100644 index 0000000..1c04aa4 --- /dev/null +++ b/collectors/python.d.plugin/hpssa/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += hpssa/hpssa.chart.py +dist_pythonconfig_DATA += hpssa/hpssa.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += hpssa/README.md hpssa/Makefile.inc + diff --git a/collectors/python.d.plugin/hpssa/README.md b/collectors/python.d.plugin/hpssa/README.md new file mode 100644 index 0000000..c1d2182 --- /dev/null +++ b/collectors/python.d.plugin/hpssa/README.md @@ -0,0 +1,83 @@ +<!-- +title: "HP Smart Storage Arrays monitoring with Netdata" +custom_edit_url: https://github.com/netdata/netdata/edit/master/collectors/python.d.plugin/hpssa/README.md +sidebar_label: "HP Smart Storage Arrays" +--> + +# HP Smart Storage Arrays monitoring with Netdata + +Monitors controller, cache module, logical and physical drive state and temperature using `ssacli` tool. + +Executed commands: + +- `sudo -n ssacli ctrl all show config detail` + +## Requirements: + +This module uses `ssacli`, which can only be executed by root. It uses +`sudo` and assumes that it is configured such that the `netdata` user can execute `ssacli` as root without a password. + +- Add to your `/etc/sudoers` file: + +`which ssacli` shows the full path to the binary. + +```bash +netdata ALL=(root) NOPASSWD: /path/to/ssacli +``` + +- Reset Netdata's systemd + unit [CapabilityBoundingSet](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Capabilities) (Linux + distributions with systemd) + +The default CapabilityBoundingSet doesn't allow using `sudo`, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute `ssacli` using `sudo`. + +As the `root` user, do the following: + +```cmd +mkdir /etc/systemd/system/netdata.service.d +echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf +systemctl daemon-reload +systemctl restart netdata.service +``` + +## Charts + +- Controller status +- Controller temperature +- Logical drive status +- Physical drive status +- Physical drive temperature + +## Enable the collector + +The `hpssa` collector is disabled by default. To enable it, use `edit-config` from the +Netdata [config directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` +file. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d.conf +``` + +Change the value of the `hpssa` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl +restart netdata`, or the [appropriate method](/docs/configure/start-stop-restart.md) for your system. + +## Configuration + +Edit the `python.d/hpssa.conf` configuration file using `edit-config` from the +Netdata [config directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/hpssa.conf +``` + +If `ssacli` cannot be found in the `PATH`, configure it in `hpssa.conf`. + +```yaml +ssacli_path: /usr/sbin/ssacli +``` + +Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate +method](/docs/configure/start-stop-restart.md) for your system. + diff --git a/collectors/python.d.plugin/hpssa/hpssa.chart.py b/collectors/python.d.plugin/hpssa/hpssa.chart.py new file mode 100644 index 0000000..4da73dc --- /dev/null +++ b/collectors/python.d.plugin/hpssa/hpssa.chart.py @@ -0,0 +1,396 @@ +# -*- coding: utf-8 -*- +# Description: hpssa netdata python.d module +# Author: Peter Gnodde (gnoddep) +# SPDX-License-Identifier: GPL-3.0-or-later + +import os +import re +from copy import deepcopy + +from bases.FrameworkServices.ExecutableService import ExecutableService +from bases.collection import find_binary + +disabled_by_default = True +update_every = 5 + +ORDER = [ + 'ctrl_status', + 'ctrl_temperature', + 'ld_status', + 'pd_status', + 'pd_temperature', +] + +CHARTS = { + 'ctrl_status': { + 'options': [ + None, + 'Status 1 is OK, Status 0 is not OK', + 'Status', + 'Controller', + 'hpssa.ctrl_status', + 'line' + ], + 'lines': [] + }, + 'ctrl_temperature': { + 'options': [ + None, + 'Temperature', + 'Celsius', + 'Controller', + 'hpssa.ctrl_temperature', + 'line' + ], + 'lines': [] + }, + 'ld_status': { + 'options': [ + None, + 'Status 1 is OK, Status 0 is not OK', + 'Status', + 'Logical drives', + 'hpssa.ld_status', + 'line' + ], + 'lines': [] + }, + 'pd_status': { + 'options': [ + None, + 'Status 1 is OK, Status 0 is not OK', + 'Status', + 'Physical drives', + 'hpssa.pd_status', + 'line' + ], + 'lines': [] + }, + 'pd_temperature': { + 'options': [ + None, + 'Temperature', + 'Celsius', + 'Physical drives', + 'hpssa.pd_temperature', + 'line' + ], + 'lines': [] + } +} + +adapter_regex = re.compile(r'^(?P<adapter_type>.+) in Slot (?P<slot>\d+)') +ignored_sections_regex = re.compile( + r''' + ^ + Physical[ ]Drives + | None[ ]attached + | (?:Expander|Enclosure|SEP|Port[ ]Name:)[ ].+ + | .+[ ]at[ ]Port[ ]\S+,[ ]Box[ ]\d+,[ ].+ + | Mirror[ ]Group[ ]\d+: + $ + ''', + re.X +) +mirror_group_regex = re.compile(r'^Mirror Group \d+:$') +disk_partition_regex = re.compile(r'^Disk Partition Information$') +array_regex = re.compile(r'^Array: (?P<id>[A-Z]+)$') +drive_regex = re.compile( + r''' + ^ + Logical[ ]Drive:[ ](?P<logical_drive_id>\d+) + | physicaldrive[ ](?P<fqn>[^:]+:\d+:\d+) + $ + ''', + re.X +) +key_value_regex = re.compile(r'^(?P<key>[^:]+): ?(?P<value>.*)$') +ld_status_regex = re.compile(r'^Status: (?P<status>[^,]+)(?:, (?P<percentage>[0-9.]+)% complete)?$') +error_match = re.compile(r'Error:') + + +class HPSSAException(Exception): + pass + + +class HPSSA(object): + def __init__(self, lines): + self.lines = [line.strip() for line in lines if line.strip()] + self.current_line = 0 + self.adapters = [] + self.parse() + + def __iter__(self): + return self + + def __next__(self): + if self.current_line == len(self.lines): + raise StopIteration + + line = self.lines[self.current_line] + self.current_line += 1 + + return line + + def next(self): + """ + This is for Python 2.7 compatibility + """ + return self.__next__() + + def rewind(self): + self.current_line = max(self.current_line - 1, 0) + + @staticmethod + def match_any(line, *regexes): + return any([regex.match(line) for regex in regexes]) + + def parse(self): + for line in self: + match = adapter_regex.match(line) + if match: + self.adapters.append(self.parse_adapter(**match.groupdict())) + + def parse_adapter(self, slot, adapter_type): + adapter = { + 'slot': int(slot), + 'type': adapter_type, + + 'controller': { + 'status': None, + 'temperature': None, + }, + 'cache': { + 'present': False, + 'status': None, + 'temperature': None, + }, + 'battery': { + 'status': None, + 'count': 0, + }, + + 'logical_drives': [], + 'physical_drives': [], + } + + for line in self: + if error_match.match(line): + raise HPSSAException('Error: {}'.format(line)) + elif adapter_regex.match(line): + self.rewind() + break + elif array_regex.match(line): + self.parse_array(adapter) + elif line == 'Unassigned' or line == 'HBA Drives': + self.parse_physical_drives(adapter) + elif ignored_sections_regex.match(line): + self.parse_ignored_section() + else: + match = key_value_regex.match(line) + if match: + key, value = match.group('key', 'value') + if key == 'Controller Status': + adapter['controller']['status'] = value == 'OK' + elif key == 'Controller Temperature (C)': + adapter['controller']['temperature'] = int(value) + elif key == 'Cache Board Present': + adapter['cache']['present'] = value == 'True' + elif key == 'Cache Status': + adapter['cache']['status'] = value == 'OK' + elif key == 'Cache Module Temperature (C)': + adapter['cache']['temperature'] = int(value) + elif key == 'Battery/Capacitor Count': + adapter['battery']['count'] = int(value) + elif key == 'Battery/Capacitor Status': + adapter['battery']['status'] = value == 'OK' + else: + raise HPSSAException('Cannot parse line: {}'.format(line)) + + return adapter + + def parse_array(self, adapter): + for line in self: + if HPSSA.match_any(line, adapter_regex, array_regex, ignored_sections_regex): + self.rewind() + break + + match = drive_regex.match(line) + if match: + data = match.groupdict() + if data['logical_drive_id']: + self.parse_logical_drive(adapter, int(data['logical_drive_id'])) + else: + self.parse_physical_drive(adapter, data['fqn']) + elif not key_value_regex.match(line): + self.rewind() + break + + def parse_physical_drives(self, adapter): + for line in self: + match = drive_regex.match(line) + if match: + self.parse_physical_drive(adapter, match.group('fqn')) + else: + self.rewind() + break + + def parse_logical_drive(self, adapter, logical_drive_id): + ld = { + 'id': logical_drive_id, + 'status': None, + 'status_complete': None, + } + + for line in self: + if HPSSA.match_any(line, mirror_group_regex, disk_partition_regex): + self.parse_ignored_section() + continue + + match = ld_status_regex.match(line) + if match: + ld['status'] = match.group('status') == 'OK' + + if match.group('percentage'): + ld['status_complete'] = float(match.group('percentage')) / 100 + elif HPSSA.match_any(line, adapter_regex, array_regex, drive_regex, ignored_sections_regex) \ + or not key_value_regex.match(line): + self.rewind() + break + + adapter['logical_drives'].append(ld) + + def parse_physical_drive(self, adapter, fqn): + pd = { + 'fqn': fqn, + 'status': None, + 'temperature': None, + } + + for line in self: + if HPSSA.match_any(line, adapter_regex, array_regex, drive_regex, ignored_sections_regex): + self.rewind() + break + + match = key_value_regex.match(line) + if match: + key, value = match.group('key', 'value') + if key == 'Status': + pd['status'] = value == 'OK' + elif key == 'Current Temperature (C)': + pd['temperature'] = int(value) + else: + self.rewind() + break + + adapter['physical_drives'].append(pd) + + def parse_ignored_section(self): + for line in self: + if HPSSA.match_any(line, adapter_regex, array_regex, drive_regex, ignored_sections_regex) \ + or not key_value_regex.match(line): + self.rewind() + break + + +class Service(ExecutableService): + def __init__(self, configuration=None, name=None): + super(Service, self).__init__(configuration=configuration, name=name) + self.order = ORDER + self.definitions = deepcopy(CHARTS) + self.ssacli_path = self.configuration.get('ssacli_path', 'ssacli') + self.use_sudo = self.configuration.get('use_sudo', True) + self.cmd = [] + + def get_adapters(self): + try: + adapters = HPSSA(self._get_raw_data(command=self.cmd)).adapters + if not adapters: + # If no adapters are returned, run the command again but capture stderr + err = self._get_raw_data(command=self.cmd, stderr=True) + if err: + raise HPSSAException('Error executing cmd {}: {}'.format(' '.join(self.cmd), '\n'.join(err))) + return adapters + except HPSSAException as ex: + self.error(ex) + return [] + + def check(self): + if not os.path.isfile(self.ssacli_path): + ssacli_path = find_binary(self.ssacli_path) + if ssacli_path: + self.ssacli_path = ssacli_path + else: + self.error('Cannot locate "{}" binary'.format(self.ssacli_path)) + return False + + if self.use_sudo: + sudo = find_binary('sudo') + if not sudo: + self.error('Cannot locate "{}" binary'.format('sudo')) + return False + + allowed = self._get_raw_data(command=[sudo, '-n', '-l', self.ssacli_path]) + if not allowed or allowed[0].strip() != os.path.realpath(self.ssacli_path): + self.error('Not allowed to run sudo for command {}'.format(self.ssacli_path)) + return False + + self.cmd = [sudo, '-n'] + + self.cmd.extend([self.ssacli_path, 'ctrl', 'all', 'show', 'config', 'detail']) + self.info('Command: {}'.format(self.cmd)) + + adapters = self.get_adapters() + + self.info('Discovered adapters: {}'.format([adapter['type'] for adapter in adapters])) + if not adapters: + self.error('No adapters discovered') + return False + + return True + + def get_data(self): + netdata = {} + + for adapter in self.get_adapters(): + status_key = '{}_status'.format(adapter['slot']) + temperature_key = '{}_temperature'.format(adapter['slot']) + ld_key = 'ld_{}_'.format(adapter['slot']) + + data = { + 'ctrl_status': { + 'ctrl_' + status_key: adapter['controller']['status'], + 'cache_' + status_key: adapter['cache']['present'] and adapter['cache']['status'], + 'battery_' + status_key: + adapter['battery']['status'] if adapter['battery']['count'] > 0 else None + }, + + 'ctrl_temperature': { + 'ctrl_' + temperature_key: adapter['controller']['temperature'], + 'cache_' + temperature_key: adapter['cache']['temperature'], + }, + + 'ld_status': { + ld_key + '{}_status'.format(ld['id']): ld['status'] for ld in adapter['logical_drives'] + }, + + 'pd_status': {}, + 'pd_temperature': {}, + } + + for pd in adapter['physical_drives']: + pd_key = 'pd_{}_{}'.format(adapter['slot'], pd['fqn']) + data['pd_status'][pd_key + '_status'] = pd['status'] + data['pd_temperature'][pd_key + '_temperature'] = pd['temperature'] + + for chart, dimension_data in data.items(): + for dimension_id, value in dimension_data.items(): + if value is None: + continue + + if dimension_id not in self.charts[chart]: + self.charts[chart].add_dimension([dimension_id]) + + netdata[dimension_id] = value + + return netdata diff --git a/collectors/python.d.plugin/hpssa/hpssa.conf b/collectors/python.d.plugin/hpssa/hpssa.conf new file mode 100644 index 0000000..cc50c98 --- /dev/null +++ b/collectors/python.d.plugin/hpssa/hpssa.conf @@ -0,0 +1,61 @@ +# netdata python.d.plugin configuration for hpssa +# +# This file is in YaML format. Generally the format is: +# +# name: value +# + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 5 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 5 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, hpssa also supports the following: +# +# ssacli_path: /usr/sbin/ssacli # The path to the ssacli executable +# use_sudo: True # Whether to use sudo or not +# ---------------------------------------------------------------------- + +# ssacli_path: /usr/sbin/ssacli +# use_sudo: True |