summaryrefslogtreecommitdiffstats
path: root/src/collectors/python.d.plugin/smartd_log
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-05 12:08:03 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-05 12:08:18 +0000
commit5da14042f70711ea5cf66e034699730335462f66 (patch)
tree0f6354ccac934ed87a2d555f45be4c831cf92f4a /src/collectors/python.d.plugin/smartd_log
parentReleasing debian version 1.44.3-2. (diff)
downloadnetdata-5da14042f70711ea5cf66e034699730335462f66.tar.xz
netdata-5da14042f70711ea5cf66e034699730335462f66.zip
Merging upstream version 1.45.3+dfsg.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/collectors/python.d.plugin/smartd_log')
l---------src/collectors/python.d.plugin/smartd_log/README.md1
-rw-r--r--src/collectors/python.d.plugin/smartd_log/integrations/s.m.a.r.t..md223
-rw-r--r--src/collectors/python.d.plugin/smartd_log/metadata.yaml429
-rw-r--r--src/collectors/python.d.plugin/smartd_log/smartd_log.chart.py790
-rw-r--r--src/collectors/python.d.plugin/smartd_log/smartd_log.conf76
5 files changed, 1519 insertions, 0 deletions
diff --git a/src/collectors/python.d.plugin/smartd_log/README.md b/src/collectors/python.d.plugin/smartd_log/README.md
new file mode 120000
index 000000000..63aad6c85
--- /dev/null
+++ b/src/collectors/python.d.plugin/smartd_log/README.md
@@ -0,0 +1 @@
+integrations/s.m.a.r.t..md \ No newline at end of file
diff --git a/src/collectors/python.d.plugin/smartd_log/integrations/s.m.a.r.t..md b/src/collectors/python.d.plugin/smartd_log/integrations/s.m.a.r.t..md
new file mode 100644
index 000000000..73a96ae55
--- /dev/null
+++ b/src/collectors/python.d.plugin/smartd_log/integrations/s.m.a.r.t..md
@@ -0,0 +1,223 @@
+<!--startmeta
+custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/smartd_log/README.md"
+meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/smartd_log/metadata.yaml"
+sidebar_label: "S.M.A.R.T."
+learn_status: "Published"
+learn_rel_path: "Collecting Metrics/Hardware Devices and Sensors"
+most_popular: False
+message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE"
+endmeta-->
+
+# S.M.A.R.T.
+
+
+<img src="https://netdata.cloud/img/smart.png" width="150"/>
+
+
+Plugin: python.d.plugin
+Module: smartd_log
+
+<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" />
+
+## Overview
+
+This collector monitors HDD/SSD S.M.A.R.T. metrics about drive health and performance.
+
+
+It reads `smartd` log files to collect the metrics.
+
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+Upon satisfying the prerequisites, the collector will auto-detect metrics if written in either `/var/log/smartd/` or `/var/lib/smartmontools/`.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+The metrics listed below are split in terms of availability on device type, SCSI or ATA.
+
+### Per S.M.A.R.T. instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit | SCSI | ATA |
+|:------|:----------|:----|:---:|:---:|
+| smartd_log.read_error_rate | a dimension per device | value | | • |
+| smartd_log.seek_error_rate | a dimension per device | value | | • |
+| smartd_log.soft_read_error_rate | a dimension per device | errors | | • |
+| smartd_log.write_error_rate | a dimension per device | value | | • |
+| smartd_log.read_total_err_corrected | a dimension per device | errors | • | |
+| smartd_log.read_total_unc_errors | a dimension per device | errors | • | |
+| smartd_log.write_total_err_corrected | a dimension per device | errors | • | |
+| smartd_log.write_total_unc_errors | a dimension per device | errors | • | |
+| smartd_log.verify_total_err_corrected | a dimension per device | errors | • | |
+| smartd_log.verify_total_unc_errors | a dimension per device | errors | • | |
+| smartd_log.sata_interface_downshift | a dimension per device | events | | • |
+| smartd_log.udma_crc_error_count | a dimension per device | errors | | • |
+| smartd_log.throughput_performance | a dimension per device | value | | • |
+| smartd_log.seek_time_performance | a dimension per device | value | | • |
+| smartd_log.start_stop_count | a dimension per device | events | | • |
+| smartd_log.power_on_hours_count | a dimension per device | hours | | • |
+| smartd_log.power_cycle_count | a dimension per device | events | | • |
+| smartd_log.unexpected_power_loss | a dimension per device | events | | • |
+| smartd_log.spin_up_time | a dimension per device | ms | | • |
+| smartd_log.spin_up_retries | a dimension per device | retries | | • |
+| smartd_log.calibration_retries | a dimension per device | retries | | • |
+| smartd_log.airflow_temperature_celsius | a dimension per device | celsius | | • |
+| smartd_log.temperature_celsius | a dimension per device | celsius | • | • |
+| smartd_log.reallocated_sectors_count | a dimension per device | sectors | | • |
+| smartd_log.reserved_block_count | a dimension per device | percentage | | • |
+| smartd_log.program_fail_count | a dimension per device | errors | | • |
+| smartd_log.erase_fail_count | a dimension per device | failures | | • |
+| smartd_log.wear_leveller_worst_case_erase_count | a dimension per device | erases | | • |
+| smartd_log.unused_reserved_nand_blocks | a dimension per device | blocks | | • |
+| smartd_log.reallocation_event_count | a dimension per device | events | | • |
+| smartd_log.current_pending_sector_count | a dimension per device | sectors | | • |
+| smartd_log.offline_uncorrectable_sector_count | a dimension per device | sectors | | • |
+| smartd_log.percent_lifetime_used | a dimension per device | percentage | | • |
+| smartd_log.media_wearout_indicator | a dimension per device | percentage | | • |
+| smartd_log.nand_writes_1gib | a dimension per device | GiB | | • |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Configure `smartd` to write attribute information to files.
+
+`smartd` must be running with `-A` option to write `smartd` attribute information to files.
+
+For this you need to set `smartd_opts` (or `SMARTD_ARGS`, check _smartd.service_ content) in `/etc/default/smartmontools`:
+
+```
+# dump smartd attrs info every 600 seconds
+smartd_opts="-A /var/log/smartd/ -i 600"
+```
+
+You may need to create the smartd directory before smartd will write to it:
+
+```sh
+mkdir -p /var/log/smartd
+```
+
+Otherwise, all the smartd `.csv` files may get written to `/var/lib/smartmontools` (default location). See also <https://linux.die.net/man/8/smartd> for more info on the `-A --attributelog=PREFIX` command.
+
+`smartd` appends logs at every run. It's strongly recommended to use `logrotate` for smartd files.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/smartd_log.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/smartd_log.conf
+```
+#### Options
+
+This particular collector does not need further configuration to work if permissions are satisfied, but you can always customize it's data collection behavior.
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+<details><summary>Config options</summary>
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| log_path | path to smartd log files. | /var/log/smartd | yes |
+| exclude_disks | Space-separated patterns. If the pattern is in the drive name, the module will not collect data for it. | | no |
+| age | Time in minutes since the last dump to file. | 30 | no |
+| update_every | Sets the default data collection frequency. | 1 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+
+</details>
+
+#### Examples
+
+##### Basic
+
+A basic configuration example.
+
+```yaml
+custom:
+ name: smartd_log
+ log_path: '/var/log/smartd/'
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `smartd_log` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin smartd_log debug trace
+ ```
+
+
diff --git a/src/collectors/python.d.plugin/smartd_log/metadata.yaml b/src/collectors/python.d.plugin/smartd_log/metadata.yaml
new file mode 100644
index 000000000..d11949691
--- /dev/null
+++ b/src/collectors/python.d.plugin/smartd_log/metadata.yaml
@@ -0,0 +1,429 @@
+plugin_name: python.d.plugin
+modules:
+ - meta:
+ plugin_name: python.d.plugin
+ module_name: smartd_log
+ monitored_instance:
+ name: S.M.A.R.T.
+ link: "https://linux.die.net/man/8/smartd"
+ categories:
+ - data-collection.hardware-devices-and-sensors
+ icon_filename: "smart.png"
+ related_resources:
+ integrations:
+ list: []
+ info_provided_to_referring_integrations:
+ description: ""
+ keywords:
+ - smart
+ - S.M.A.R.T.
+ - SCSI devices
+ - ATA devices
+ most_popular: false
+ overview:
+ data_collection:
+ metrics_description: |
+ This collector monitors HDD/SSD S.M.A.R.T. metrics about drive health and performance.
+ method_description: |
+ It reads `smartd` log files to collect the metrics.
+ supported_platforms:
+ include: []
+ exclude: []
+ multi_instance: false
+ additional_permissions:
+ description: ""
+ default_behavior:
+ auto_detection:
+ description: Upon satisfying the prerequisites, the collector will auto-detect metrics if written in either `/var/log/smartd/` or `/var/lib/smartmontools/`.
+ limits:
+ description: ""
+ performance_impact:
+ description: ""
+ setup:
+ prerequisites:
+ list:
+ - title: Configure `smartd` to write attribute information to files.
+ description: |
+ `smartd` must be running with `-A` option to write `smartd` attribute information to files.
+
+ For this you need to set `smartd_opts` (or `SMARTD_ARGS`, check _smartd.service_ content) in `/etc/default/smartmontools`:
+
+ ```
+ # dump smartd attrs info every 600 seconds
+ smartd_opts="-A /var/log/smartd/ -i 600"
+ ```
+
+ You may need to create the smartd directory before smartd will write to it:
+
+ ```sh
+ mkdir -p /var/log/smartd
+ ```
+
+ Otherwise, all the smartd `.csv` files may get written to `/var/lib/smartmontools` (default location). See also <https://linux.die.net/man/8/smartd> for more info on the `-A --attributelog=PREFIX` command.
+
+ `smartd` appends logs at every run. It's strongly recommended to use `logrotate` for smartd files.
+ configuration:
+ file:
+ name: "python.d/smartd_log.conf"
+ options:
+ description: |
+ This particular collector does not need further configuration to work if permissions are satisfied, but you can always customize it's data collection behavior.
+
+ There are 2 sections:
+
+ * Global variables
+ * One or more JOBS that can define multiple different instances to monitor.
+
+ The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+ Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+ Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+ folding:
+ title: "Config options"
+ enabled: true
+ list:
+ - name: log_path
+ description: path to smartd log files.
+ default_value: /var/log/smartd
+ required: true
+ - name: exclude_disks
+ description: Space-separated patterns. If the pattern is in the drive name, the module will not collect data for it.
+ default_value: ""
+ required: false
+ - name: age
+ description: Time in minutes since the last dump to file.
+ default_value: 30
+ required: false
+ - name: update_every
+ description: Sets the default data collection frequency.
+ default_value: 1
+ required: false
+ - name: priority
+ description: Controls the order of charts at the netdata dashboard.
+ default_value: 60000
+ required: false
+ - name: autodetection_retry
+ description: Sets the job re-check interval in seconds.
+ default_value: 0
+ required: false
+ - name: penalty
+ description: Indicates whether to apply penalty to update_every in case of failures.
+ default_value: yes
+ required: false
+ - name: name
+ description: >
+ Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works.
+ default_value: ""
+ required: false
+ examples:
+ folding:
+ enabled: true
+ title: "Config"
+ list:
+ - name: Basic
+ description: A basic configuration example.
+ folding:
+ enabled: false
+ config: |
+ custom:
+ name: smartd_log
+ log_path: '/var/log/smartd/'
+ troubleshooting:
+ problems:
+ list: []
+ alerts: []
+ metrics:
+ folding:
+ title: Metrics
+ enabled: false
+ description: "The metrics listed below are split in terms of availability on device type, SCSI or ATA."
+ availability:
+ - "SCSI"
+ - "ATA"
+ scopes:
+ - name: global
+ description: "These metrics refer to the entire monitored application."
+ labels: []
+ metrics:
+ - name: smartd_log.read_error_rate
+ description: Read Error Rate
+ availability:
+ - ATA
+ unit: "value"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.seek_error_rate
+ description: Seek Error Rate
+ availability:
+ - ATA
+ unit: "value"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.soft_read_error_rate
+ description: Soft Read Error Rate
+ availability:
+ - ATA
+ unit: "errors"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.write_error_rate
+ description: Write Error Rate
+ availability:
+ - ATA
+ unit: "value"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.read_total_err_corrected
+ description: Read Error Corrected
+ availability:
+ - SCSI
+ unit: "errors"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.read_total_unc_errors
+ description: Read Error Uncorrected
+ availability:
+ - SCSI
+ unit: "errors"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.write_total_err_corrected
+ description: Write Error Corrected
+ availability:
+ - SCSI
+ unit: "errors"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.write_total_unc_errors
+ description: Write Error Uncorrected
+ availability:
+ - SCSI
+ unit: "errors"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.verify_total_err_corrected
+ description: Verify Error Corrected
+ availability:
+ - SCSI
+ unit: "errors"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.verify_total_unc_errors
+ description: Verify Error Uncorrected
+ availability:
+ - SCSI
+ unit: "errors"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.sata_interface_downshift
+ description: SATA Interface Downshift
+ availability:
+ - ATA
+ unit: "events"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.udma_crc_error_count
+ description: UDMA CRC Error Count
+ availability:
+ - ATA
+ unit: "errors"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.throughput_performance
+ description: Throughput Performance
+ availability:
+ - ATA
+ unit: "value"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.seek_time_performance
+ description: Seek Time Performance
+ availability:
+ - ATA
+ unit: "value"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.start_stop_count
+ description: Start/Stop Count
+ availability:
+ - ATA
+ unit: "events"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.power_on_hours_count
+ description: Power-On Hours Count
+ availability:
+ - ATA
+ unit: "hours"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.power_cycle_count
+ description: Power Cycle Count
+ availability:
+ - ATA
+ unit: "events"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.unexpected_power_loss
+ description: Unexpected Power Loss
+ availability:
+ - ATA
+ unit: "events"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.spin_up_time
+ description: Spin-Up Time
+ availability:
+ - ATA
+ unit: "ms"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.spin_up_retries
+ description: Spin-up Retries
+ availability:
+ - ATA
+ unit: "retries"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.calibration_retries
+ description: Calibration Retries
+ availability:
+ - ATA
+ unit: "retries"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.airflow_temperature_celsius
+ description: Airflow Temperature Celsius
+ availability:
+ - ATA
+ unit: "celsius"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.temperature_celsius
+ description: Temperature
+ availability:
+ - SCSI
+ - ATA
+ unit: "celsius"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.reallocated_sectors_count
+ description: Reallocated Sectors Count
+ availability:
+ - ATA
+ unit: "sectors"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.reserved_block_count
+ description: Reserved Block Count
+ availability:
+ - ATA
+ unit: "percentage"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.program_fail_count
+ description: Program Fail Count
+ availability:
+ - ATA
+ unit: "errors"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.erase_fail_count
+ description: Erase Fail Count
+ availability:
+ - ATA
+ unit: "failures"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.wear_leveller_worst_case_erase_count
+ description: Wear Leveller Worst Case Erase Count
+ availability:
+ - ATA
+ unit: "erases"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.unused_reserved_nand_blocks
+ description: Unused Reserved NAND Blocks
+ availability:
+ - ATA
+ unit: "blocks"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.reallocation_event_count
+ description: Reallocation Event Count
+ availability:
+ - ATA
+ unit: "events"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.current_pending_sector_count
+ description: Current Pending Sector Count
+ availability:
+ - ATA
+ unit: "sectors"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.offline_uncorrectable_sector_count
+ description: Offline Uncorrectable Sector Count
+ availability:
+ - ATA
+ unit: "sectors"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.percent_lifetime_used
+ description: Percent Lifetime Used
+ availability:
+ - ATA
+ unit: "percentage"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.media_wearout_indicator
+ description: Media Wearout Indicator
+ availability:
+ - ATA
+ unit: "percentage"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
+ - name: smartd_log.nand_writes_1gib
+ description: NAND Writes
+ availability:
+ - ATA
+ unit: "GiB"
+ chart_type: line
+ dimensions:
+ - name: a dimension per device
diff --git a/src/collectors/python.d.plugin/smartd_log/smartd_log.chart.py b/src/collectors/python.d.plugin/smartd_log/smartd_log.chart.py
new file mode 100644
index 000000000..a896164df
--- /dev/null
+++ b/src/collectors/python.d.plugin/smartd_log/smartd_log.chart.py
@@ -0,0 +1,790 @@
+# -*- coding: utf-8 -*-
+# Description: smart netdata python.d module
+# Author: ilyam8, vorph1
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+import os
+import re
+from copy import deepcopy
+from time import time
+
+from bases.FrameworkServices.SimpleService import SimpleService
+from bases.collection import read_last_line
+
+INCREMENTAL = 'incremental'
+ABSOLUTE = 'absolute'
+
+ATA = 'ata'
+SCSI = 'scsi'
+CSV = '.csv'
+
+DEF_RESCAN_INTERVAL = 60
+DEF_AGE = 30
+DEF_PATH = '/var/log/smartd'
+
+ATTR1 = '1'
+ATTR2 = '2'
+ATTR3 = '3'
+ATTR4 = '4'
+ATTR5 = '5'
+ATTR7 = '7'
+ATTR8 = '8'
+ATTR9 = '9'
+ATTR10 = '10'
+ATTR11 = '11'
+ATTR12 = '12'
+ATTR13 = '13'
+ATTR170 = '170'
+ATTR171 = '171'
+ATTR172 = '172'
+ATTR173 = '173'
+ATTR174 = '174'
+ATTR177 = '177'
+ATTR180 = '180'
+ATTR183 = '183'
+ATTR190 = '190'
+ATTR194 = '194'
+ATTR196 = '196'
+ATTR197 = '197'
+ATTR198 = '198'
+ATTR199 = '199'
+ATTR202 = '202'
+ATTR206 = '206'
+ATTR233 = '233'
+ATTR241 = '241'
+ATTR242 = '242'
+ATTR249 = '249'
+ATTR_READ_ERR_COR = 'read-total-err-corrected'
+ATTR_READ_ERR_UNC = 'read-total-unc-errors'
+ATTR_WRITE_ERR_COR = 'write-total-err-corrected'
+ATTR_WRITE_ERR_UNC = 'write-total-unc-errors'
+ATTR_VERIFY_ERR_COR = 'verify-total-err-corrected'
+ATTR_VERIFY_ERR_UNC = 'verify-total-unc-errors'
+ATTR_TEMPERATURE = 'temperature'
+
+RE_ATA = re.compile(
+ '(\d+);' # attribute
+ '(\d+);' # normalized value
+ '(\d+)', # raw value
+ re.X
+)
+
+RE_SCSI = re.compile(
+ '([a-z-]+);' # attribute
+ '([0-9.]+)', # raw value
+ re.X
+)
+
+ORDER = [
+ # errors
+ 'read_error_rate',
+ 'seek_error_rate',
+ 'soft_read_error_rate',
+ 'write_error_rate',
+ 'read_total_err_corrected',
+ 'read_total_unc_errors',
+ 'write_total_err_corrected',
+ 'write_total_unc_errors',
+ 'verify_total_err_corrected',
+ 'verify_total_unc_errors',
+ # external failure
+ 'sata_interface_downshift',
+ 'udma_crc_error_count',
+ # performance
+ 'throughput_performance',
+ 'seek_time_performance',
+ # power
+ 'start_stop_count',
+ 'power_on_hours_count',
+ 'power_cycle_count',
+ 'unexpected_power_loss',
+ # spin
+ 'spin_up_time',
+ 'spin_up_retries',
+ 'calibration_retries',
+ # temperature
+ 'airflow_temperature_celsius',
+ 'temperature_celsius',
+ # wear
+ 'reallocated_sectors_count',
+ 'reserved_block_count',
+ 'program_fail_count',
+ 'erase_fail_count',
+ 'wear_leveller_worst_case_erase_count',
+ 'unused_reserved_nand_blocks',
+ 'reallocation_event_count',
+ 'current_pending_sector_count',
+ 'offline_uncorrectable_sector_count',
+ 'percent_lifetime_used',
+ 'media_wearout_indicator',
+ 'total_lbas_written',
+ 'total_lbas_read',
+]
+
+CHARTS = {
+ 'read_error_rate': {
+ 'options': [None, 'Read Error Rate', 'value', 'errors', 'smartd_log.read_error_rate', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR1],
+ 'algo': ABSOLUTE,
+ },
+ 'seek_error_rate': {
+ 'options': [None, 'Seek Error Rate', 'value', 'errors', 'smartd_log.seek_error_rate', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR7],
+ 'algo': ABSOLUTE,
+ },
+ 'soft_read_error_rate': {
+ 'options': [None, 'Soft Read Error Rate', 'errors', 'errors', 'smartd_log.soft_read_error_rate', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR13],
+ 'algo': INCREMENTAL,
+ },
+ 'write_error_rate': {
+ 'options': [None, 'Write Error Rate', 'value', 'errors', 'smartd_log.write_error_rate', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR206],
+ 'algo': ABSOLUTE,
+ },
+ 'read_total_err_corrected': {
+ 'options': [None, 'Read Error Corrected', 'errors', 'errors', 'smartd_log.read_total_err_corrected', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR_READ_ERR_COR],
+ 'algo': INCREMENTAL,
+ },
+ 'read_total_unc_errors': {
+ 'options': [None, 'Read Error Uncorrected', 'errors', 'errors', 'smartd_log.read_total_unc_errors', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR_READ_ERR_UNC],
+ 'algo': INCREMENTAL,
+ },
+ 'write_total_err_corrected': {
+ 'options': [None, 'Write Error Corrected', 'errors', 'errors', 'smartd_log.write_total_err_corrected', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR_WRITE_ERR_COR],
+ 'algo': INCREMENTAL,
+ },
+ 'write_total_unc_errors': {
+ 'options': [None, 'Write Error Uncorrected', 'errors', 'errors', 'smartd_log.write_total_unc_errors', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR_WRITE_ERR_UNC],
+ 'algo': INCREMENTAL,
+ },
+ 'verify_total_err_corrected': {
+ 'options': [None, 'Verify Error Corrected', 'errors', 'errors', 'smartd_log.verify_total_err_corrected',
+ 'line'],
+ 'lines': [],
+ 'attrs': [ATTR_VERIFY_ERR_COR],
+ 'algo': INCREMENTAL,
+ },
+ 'verify_total_unc_errors': {
+ 'options': [None, 'Verify Error Uncorrected', 'errors', 'errors', 'smartd_log.verify_total_unc_errors', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR_VERIFY_ERR_UNC],
+ 'algo': INCREMENTAL,
+ },
+ 'sata_interface_downshift': {
+ 'options': [None, 'SATA Interface Downshift', 'events', 'external failure',
+ 'smartd_log.sata_interface_downshift', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR183],
+ 'algo': INCREMENTAL,
+ },
+ 'udma_crc_error_count': {
+ 'options': [None, 'UDMA CRC Error Count', 'errors', 'external failure', 'smartd_log.udma_crc_error_count',
+ 'line'],
+ 'lines': [],
+ 'attrs': [ATTR199],
+ 'algo': INCREMENTAL,
+ },
+ 'throughput_performance': {
+ 'options': [None, 'Throughput Performance', 'value', 'performance', 'smartd_log.throughput_performance',
+ 'line'],
+ 'lines': [],
+ 'attrs': [ATTR2],
+ 'algo': ABSOLUTE,
+ },
+ 'seek_time_performance': {
+ 'options': [None, 'Seek Time Performance', 'value', 'performance', 'smartd_log.seek_time_performance', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR8],
+ 'algo': ABSOLUTE,
+ },
+ 'start_stop_count': {
+ 'options': [None, 'Start/Stop Count', 'events', 'power', 'smartd_log.start_stop_count', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR4],
+ 'algo': ABSOLUTE,
+ },
+ 'power_on_hours_count': {
+ 'options': [None, 'Power-On Hours Count', 'hours', 'power', 'smartd_log.power_on_hours_count', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR9],
+ 'algo': ABSOLUTE,
+ },
+ 'power_cycle_count': {
+ 'options': [None, 'Power Cycle Count', 'events', 'power', 'smartd_log.power_cycle_count', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR12],
+ 'algo': ABSOLUTE,
+ },
+ 'unexpected_power_loss': {
+ 'options': [None, 'Unexpected Power Loss', 'events', 'power', 'smartd_log.unexpected_power_loss', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR174],
+ 'algo': ABSOLUTE,
+ },
+ 'spin_up_time': {
+ 'options': [None, 'Spin-Up Time', 'ms', 'spin', 'smartd_log.spin_up_time', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR3],
+ 'algo': ABSOLUTE,
+ },
+ 'spin_up_retries': {
+ 'options': [None, 'Spin-up Retries', 'retries', 'spin', 'smartd_log.spin_up_retries', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR10],
+ 'algo': INCREMENTAL,
+ },
+ 'calibration_retries': {
+ 'options': [None, 'Calibration Retries', 'retries', 'spin', 'smartd_log.calibration_retries', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR11],
+ 'algo': INCREMENTAL,
+ },
+ 'airflow_temperature_celsius': {
+ 'options': [None, 'Airflow Temperature Celsius', 'celsius', 'temperature',
+ 'smartd_log.airflow_temperature_celsius', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR190],
+ 'algo': ABSOLUTE,
+ },
+ 'temperature_celsius': {
+ 'options': [None, 'Temperature', 'celsius', 'temperature', 'smartd_log.temperature_celsius', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR194, ATTR_TEMPERATURE],
+ 'algo': ABSOLUTE,
+ },
+ 'reallocated_sectors_count': {
+ 'options': [None, 'Reallocated Sectors Count', 'sectors', 'wear', 'smartd_log.reallocated_sectors_count',
+ 'line'],
+ 'lines': [],
+ 'attrs': [ATTR5],
+ 'algo': ABSOLUTE,
+ },
+ 'reserved_block_count': {
+ 'options': [None, 'Reserved Block Count', 'percentage', 'wear', 'smartd_log.reserved_block_count', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR170],
+ 'algo': ABSOLUTE,
+ },
+ 'program_fail_count': {
+ 'options': [None, 'Program Fail Count', 'errors', 'wear', 'smartd_log.program_fail_count', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR171],
+ 'algo': INCREMENTAL,
+ },
+ 'erase_fail_count': {
+ 'options': [None, 'Erase Fail Count', 'failures', 'wear', 'smartd_log.erase_fail_count', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR172],
+ 'algo': INCREMENTAL,
+ },
+ 'wear_leveller_worst_case_erase_count': {
+ 'options': [None, 'Wear Leveller Worst Case Erase Count', 'erases', 'wear',
+ 'smartd_log.wear_leveller_worst_case_erase_count', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR173],
+ 'algo': ABSOLUTE,
+ },
+ 'unused_reserved_nand_blocks': {
+ 'options': [None, 'Unused Reserved NAND Blocks', 'blocks', 'wear', 'smartd_log.unused_reserved_nand_blocks',
+ 'line'],
+ 'lines': [],
+ 'attrs': [ATTR180],
+ 'algo': ABSOLUTE,
+ },
+ 'reallocation_event_count': {
+ 'options': [None, 'Reallocation Event Count', 'events', 'wear', 'smartd_log.reallocation_event_count', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR196],
+ 'algo': INCREMENTAL,
+ },
+ 'current_pending_sector_count': {
+ 'options': [None, 'Current Pending Sector Count', 'sectors', 'wear', 'smartd_log.current_pending_sector_count',
+ 'line'],
+ 'lines': [],
+ 'attrs': [ATTR197],
+ 'algo': ABSOLUTE,
+ },
+ 'offline_uncorrectable_sector_count': {
+ 'options': [None, 'Offline Uncorrectable Sector Count', 'sectors', 'wear',
+ 'smartd_log.offline_uncorrectable_sector_count', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR198],
+ 'algo': ABSOLUTE,
+
+ },
+ 'percent_lifetime_used': {
+ 'options': [None, 'Percent Lifetime Used', 'percentage', 'wear', 'smartd_log.percent_lifetime_used', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR202],
+ 'algo': ABSOLUTE,
+ },
+ 'media_wearout_indicator': {
+ 'options': [None, 'Media Wearout Indicator', 'percentage', 'wear', 'smartd_log.media_wearout_indicator', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR233, ATTR177],
+ 'algo': ABSOLUTE,
+ },
+ 'nand_writes_1gib': {
+ 'options': [None, 'NAND Writes', 'GiB', 'wear', 'smartd_log.nand_writes_1gib', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR249],
+ 'algo': ABSOLUTE,
+ },
+ 'total_lbas_written': {
+ 'options': [None, 'Total LBAs Written', 'sectors', 'wear', 'smartd_log.total_lbas_written', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR241],
+ 'algo': ABSOLUTE,
+ },
+ 'total_lbas_read': {
+ 'options': [None, 'Total LBAs Read', 'sectors', 'wear', 'smartd_log.total_lbas_read', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR242],
+ 'algo': ABSOLUTE,
+ },
+}
+
+# NOTE: 'parse_temp' decodes ATA 194 raw value. Not heavily tested. Written by @Ferroin
+# C code:
+# https://github.com/smartmontools/smartmontools/blob/master/smartmontools/atacmds.cpp#L2051
+#
+# Calling 'parse_temp' on the raw value will return a 4-tuple, containing
+# * temperature
+# * minimum
+# * maximum
+# * over-temperature count
+# substituting None for values it can't decode.
+#
+# Example:
+# >>> parse_temp(42952491042)
+# >>> (34, 10, 43, None)
+#
+#
+# def check_temp_word(i):
+# if i <= 0x7F:
+# return 0x11
+# elif i <= 0xFF:
+# return 0x01
+# elif 0xFF80 <= i:
+# return 0x10
+# return 0x00
+#
+#
+# def check_temp_range(t, b0, b1):
+# if b0 > b1:
+# t0, t1 = b1, b0
+# else:
+# t0, t1 = b0, b1
+#
+# if all([
+# -60 <= t0,
+# t0 <= t,
+# t <= t1,
+# t1 <= 120,
+# not (t0 == -1 and t1 <= 0)
+# ]):
+# return t0, t1
+# return None, None
+#
+#
+# def parse_temp(raw):
+# byte = list()
+# word = list()
+# for i in range(0, 6):
+# byte.append(0xFF & (raw >> (i * 8)))
+# for i in range(0, 3):
+# word.append(0xFFFF & (raw >> (i * 16)))
+#
+# ctwd = check_temp_word(word[0])
+#
+# if not word[2]:
+# if ctwd and not word[1]:
+# # byte[0] is temp, no other data
+# return byte[0], None, None, None
+#
+# if ctwd and all(check_temp_range(byte[0], byte[2], byte[3])):
+# # byte[0] is temp, byte[2] is max or min, byte[3] is min or max
+# trange = check_temp_range(byte[0], byte[2], byte[3])
+# return byte[0], trange[0], trange[1], None
+#
+# if ctwd and all(check_temp_range(byte[0], byte[1], byte[2])):
+# # byte[0] is temp, byte[1] is max or min, byte[2] is min or max
+# trange = check_temp_range(byte[0], byte[1], byte[2])
+# return byte[0], trange[0], trange[1], None
+#
+# return None, None, None, None
+#
+# if ctwd:
+# if all(
+# [
+# ctwd & check_temp_word(word[1]) & check_temp_word(word[2]) != 0x00,
+# all(check_temp_range(byte[0], byte[2], byte[4])),
+# ]
+# ):
+# # byte[0] is temp, byte[2] is max or min, byte[4] is min or max
+# trange = check_temp_range(byte[0], byte[2], byte[4])
+# return byte[0], trange[0], trange[1], None
+# else:
+# trange = check_temp_range(byte[0], byte[2], byte[3])
+# if word[2] < 0x7FFF and all(trange) and trange[1] >= 40:
+# # byte[0] is temp, byte[2] is max or min, byte[3] is min or max, word[2] is overtemp count
+# return byte[0], trange[0], trange[1], word[2]
+# # no data
+# return None, None, None, None
+
+
+CHARTED_ATTRS = dict((attr, k) for k, v in CHARTS.items() for attr in v['attrs'])
+
+
+class BaseAtaSmartAttribute:
+ def __init__(self, name, normalized_value, raw_value):
+ self.name = name
+ self.normalized_value = normalized_value
+ self.raw_value = raw_value
+
+ def value(self):
+ raise NotImplementedError
+
+
+class AtaRaw(BaseAtaSmartAttribute):
+ def value(self):
+ return self.raw_value
+
+
+class AtaNormalized(BaseAtaSmartAttribute):
+ def value(self):
+ return self.normalized_value
+
+
+class Ata3(BaseAtaSmartAttribute):
+ def value(self):
+ value = int(self.raw_value)
+ # https://github.com/netdata/netdata/issues/5919
+ #
+ # 3;151;38684000679;
+ # 423 (Average 447)
+ # 38684000679 & 0xFFF -> 423
+ # (38684000679 & 0xFFF0000) >> 16 -> 447
+ if value > 1e6:
+ return value & 0xFFF
+ return value
+
+
+class Ata9(BaseAtaSmartAttribute):
+ def value(self):
+ value = int(self.raw_value)
+ if value > 1e6:
+ return value & 0xFFFF
+ return value
+
+
+class Ata190(BaseAtaSmartAttribute):
+ def value(self):
+ return 100 - int(self.normalized_value)
+
+
+class Ata194(BaseAtaSmartAttribute):
+ # https://github.com/netdata/netdata/issues/3041
+ # https://github.com/netdata/netdata/issues/5919
+ #
+ # The low byte is the current temperature, the third lowest is the maximum, and the fifth lowest is the minimum
+ def value(self):
+ value = int(self.raw_value)
+ if value > 1e6:
+ return value & 0xFF
+ return min(int(self.normalized_value), int(self.raw_value))
+
+
+class BaseSCSISmartAttribute:
+ def __init__(self, name, raw_value):
+ self.name = name
+ self.raw_value = raw_value
+
+ def value(self):
+ raise NotImplementedError
+
+
+class SCSIRaw(BaseSCSISmartAttribute):
+ def value(self):
+ return self.raw_value
+
+
+def ata_attribute_factory(value):
+ name = value[0]
+
+ if name == ATTR3:
+ return Ata3(*value)
+ elif name == ATTR9:
+ return Ata9(*value)
+ elif name == ATTR190:
+ return Ata190(*value)
+ elif name == ATTR194:
+ return Ata194(*value)
+ elif name in [
+ ATTR1,
+ ATTR7,
+ ATTR177,
+ ATTR202,
+ ATTR206,
+ ATTR233,
+ ]:
+ return AtaNormalized(*value)
+
+ return AtaRaw(*value)
+
+
+def scsi_attribute_factory(value):
+ return SCSIRaw(*value)
+
+
+def attribute_factory(value):
+ name = value[0]
+ if name.isdigit():
+ return ata_attribute_factory(value)
+ return scsi_attribute_factory(value)
+
+
+def handle_error(*errors):
+ def on_method(method):
+ def on_call(*args):
+ try:
+ return method(*args)
+ except errors:
+ return None
+
+ return on_call
+
+ return on_method
+
+
+class DiskLogFile:
+ def __init__(self, full_path):
+ self.path = full_path
+ self.size = os.path.getsize(full_path)
+
+ @handle_error(OSError)
+ def is_changed(self):
+ return self.size != os.path.getsize(self.path)
+
+ @handle_error(OSError)
+ def is_active(self, current_time, limit):
+ return (current_time - os.path.getmtime(self.path)) / 60 < limit
+
+ @handle_error(OSError)
+ def read(self):
+ self.size = os.path.getsize(self.path)
+ return read_last_line(self.path)
+
+
+class BaseDisk:
+ def __init__(self, name, log_file):
+ self.raw_name = name
+ self.name = re.sub(r'_+', '_', name)
+ self.log_file = log_file
+ self.attrs = list()
+ self.alive = True
+ self.charted = False
+
+ def __eq__(self, other):
+ if isinstance(other, BaseDisk):
+ return self.raw_name == other.raw_name
+ return self.raw_name == other
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __hash__(self):
+ return hash(repr(self))
+
+ def parser(self, data):
+ raise NotImplementedError
+
+ @handle_error(TypeError)
+ def populate_attrs(self):
+ self.attrs = list()
+ line = self.log_file.read()
+ for value in self.parser(line):
+ self.attrs.append(attribute_factory(value))
+
+ return len(self.attrs)
+
+ def data(self):
+ data = dict()
+ for attr in self.attrs:
+ data['{0}_{1}'.format(self.name, attr.name)] = attr.value()
+ return data
+
+
+class ATADisk(BaseDisk):
+ def parser(self, data):
+ return RE_ATA.findall(data)
+
+
+class SCSIDisk(BaseDisk):
+ def parser(self, data):
+ return RE_SCSI.findall(data)
+
+
+class Service(SimpleService):
+ def __init__(self, configuration=None, name=None):
+ SimpleService.__init__(self, configuration=configuration, name=name)
+ self.order = ORDER
+ self.definitions = deepcopy(CHARTS)
+ self.log_path = configuration.get('log_path', DEF_PATH)
+ self.age = configuration.get('age', DEF_AGE)
+ self.exclude = configuration.get('exclude_disks', str()).split()
+ self.disks = list()
+ self.runs = 0
+ self.do_force_rescan = False
+
+ def check(self):
+ return self.scan() > 0
+
+ def get_data(self):
+ self.runs += 1
+
+ if self.do_force_rescan or self.runs % DEF_RESCAN_INTERVAL == 0:
+ self.cleanup()
+ self.scan()
+ self.do_force_rescan = False
+
+ data = dict()
+
+ for disk in self.disks:
+ if not disk.alive:
+ continue
+
+ if not disk.charted:
+ self.add_disk_to_charts(disk)
+
+ changed = disk.log_file.is_changed()
+
+ if changed is None:
+ disk.alive = False
+ self.do_force_rescan = True
+ continue
+
+ if changed and disk.populate_attrs() is None:
+ disk.alive = False
+ self.do_force_rescan = True
+ continue
+
+ data.update(disk.data())
+
+ return data
+
+ def cleanup(self):
+ current_time = time()
+ for disk in self.disks[:]:
+ if any(
+ [
+ not disk.alive,
+ not disk.log_file.is_active(current_time, self.age),
+ ]
+ ):
+ self.disks.remove(disk.raw_name)
+ self.remove_disk_from_charts(disk)
+
+ def scan(self):
+ self.debug('scanning {0}'.format(self.log_path))
+ current_time = time()
+
+ for full_name in os.listdir(self.log_path):
+ disk = self.create_disk_from_file(full_name, current_time)
+ if not disk:
+ continue
+ self.disks.append(disk)
+
+ return len(self.disks)
+
+ def create_disk_from_file(self, full_name, current_time):
+ if not full_name.endswith(CSV):
+ self.debug('skipping {0}: not a csv file'.format(full_name))
+ return None
+
+ name = os.path.basename(full_name).split('.')[-3]
+ path = os.path.join(self.log_path, full_name)
+
+ if name in self.disks:
+ self.debug('skipping {0}: already in disks'.format(full_name))
+ return None
+
+ if [p for p in self.exclude if p in name]:
+ self.debug('skipping {0}: filtered by `exclude` option'.format(full_name))
+ return None
+
+ if not os.access(path, os.R_OK):
+ self.debug('skipping {0}: not readable'.format(full_name))
+ return None
+
+ if os.path.getsize(path) == 0:
+ self.debug('skipping {0}: zero size'.format(full_name))
+ return None
+
+ if (current_time - os.path.getmtime(path)) / 60 > self.age:
+ self.debug('skipping {0}: haven\'t been updated for last {1} minutes'.format(full_name, self.age))
+ return None
+
+ if ATA in full_name:
+ disk = ATADisk(name, DiskLogFile(path))
+ elif SCSI in full_name:
+ disk = SCSIDisk(name, DiskLogFile(path))
+ else:
+ self.debug('skipping {0}: unknown type'.format(full_name))
+ return None
+
+ disk.populate_attrs()
+ if not disk.attrs:
+ self.error('skipping {0}: parsing failed'.format(full_name))
+ return None
+
+ self.debug('added {0}'.format(full_name))
+ return disk
+
+ def add_disk_to_charts(self, disk):
+ if len(self.charts) == 0 or disk.charted:
+ return
+ disk.charted = True
+
+ for attr in disk.attrs:
+ chart_id = CHARTED_ATTRS.get(attr.name)
+
+ if not chart_id or chart_id not in self.charts:
+ continue
+
+ chart = self.charts[chart_id]
+ dim = [
+ '{0}_{1}'.format(disk.name, attr.name),
+ disk.name,
+ CHARTS[chart_id]['algo'],
+ ]
+
+ if dim[0] in self.charts[chart_id].dimensions:
+ chart.hide_dimension(dim[0], reverse=True)
+ else:
+ chart.add_dimension(dim)
+
+ def remove_disk_from_charts(self, disk):
+ if len(self.charts) == 0 or not disk.charted:
+ return
+
+ for attr in disk.attrs:
+ chart_id = CHARTED_ATTRS.get(attr.name)
+
+ if not chart_id or chart_id not in self.charts:
+ continue
+
+ self.charts[chart_id].del_dimension('{0}_{1}'.format(disk.name, attr.name))
diff --git a/src/collectors/python.d.plugin/smartd_log/smartd_log.conf b/src/collectors/python.d.plugin/smartd_log/smartd_log.conf
new file mode 100644
index 000000000..3e81317f1
--- /dev/null
+++ b/src/collectors/python.d.plugin/smartd_log/smartd_log.conf
@@ -0,0 +1,76 @@
+# netdata python.d.plugin configuration for smartd log
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# penalty indicates whether to apply penalty to update_every in case of failures.
+# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes.
+# penalty: yes
+
+# autodetection_retry sets the job re-check interval in seconds.
+# The job is not deleted if check fails.
+# Attempts to start the job are made once every autodetection_retry.
+# This feature is disabled by default.
+# autodetection_retry: 0
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# penalty: yes # the JOB's penalty
+# autodetection_retry: 0 # the JOB's re-check interval in seconds
+#
+# Additionally to the above, smartd_log also supports the following:
+#
+# log_path: '/path/to/smartd_logs' # path to smartd log files. Default is /var/log/smartd
+# exclude_disks: 'PATTERN1 PATTERN2' # space separated patterns. If the pattern is in the drive name, the module will not collect data for it.
+# age: 30 # time in minutes since the last dump to file. If smartd has not dumped data within this time the job exits.
+#
+# ----------------------------------------------------------------------
+
+custom:
+ name: smartd_log
+ log_path: '/var/log/smartd/'
+
+debian:
+ name: smartd_log
+ log_path: '/var/lib/smartmontools/'