diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-08-10 09:18:49 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-08-10 09:18:49 +0000 |
commit | dd814a7c1a8de056a79f7238578b09236edd5506 (patch) | |
tree | 429e7eed5a634a4efe9a6877ce66da8e64aa1782 /collectors/python.d.plugin/smartd_log | |
parent | Adding upstream version 1.41.0. (diff) | |
download | netdata-upstream/1.42.0.tar.xz netdata-upstream/1.42.0.zip |
Adding upstream version 1.42.0.upstream/1.42.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'collectors/python.d.plugin/smartd_log')
-rw-r--r-- | collectors/python.d.plugin/smartd_log/metadata.yaml | 703 | ||||
-rw-r--r-- | collectors/python.d.plugin/smartd_log/metrics.csv | 36 |
2 files changed, 428 insertions, 311 deletions
diff --git a/collectors/python.d.plugin/smartd_log/metadata.yaml b/collectors/python.d.plugin/smartd_log/metadata.yaml index 334fb90c..d1194969 100644 --- a/collectors/python.d.plugin/smartd_log/metadata.yaml +++ b/collectors/python.d.plugin/smartd_log/metadata.yaml @@ -1,276 +1,429 @@ -meta: - plugin_name: python.d.plugin - module_name: smartd_log - monitored_instance: - name: S.M.A.R.T. - link: '' - categories: - - data-collection.hardware-devices-and-sensors - icon_filename: 'smart.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: [] - most_popular: false -overview: - data_collection: - metrics_description: 'Monitor S.M.A.R.T. metrics for insights into your hard drive health and performance. Enhance your hard drive performance and reliability with real-time insights and alerts from Netdata.' - method_description: '' - supported_platforms: - include: [] - exclude: [] - multi-instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: '' - limits: - description: '' - performance_impact: - description: '' -setup: - prerequisites: - list: [] - configuration: - file: - name: '' - description: '' - options: - description: '' - folding: - title: '' - enabled: true - list: [] - examples: - folding: - enabled: true - title: '' - list: [] -troubleshooting: - problems: - list: [] -alerts: [] -metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] +plugin_name: python.d.plugin +modules: + - meta: + plugin_name: python.d.plugin + module_name: smartd_log + monitored_instance: + name: S.M.A.R.T. + link: "https://linux.die.net/man/8/smartd" + categories: + - data-collection.hardware-devices-and-sensors + icon_filename: "smart.png" + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - smart + - S.M.A.R.T. + - SCSI devices + - ATA devices + most_popular: false + overview: + data_collection: + metrics_description: | + This collector monitors HDD/SSD S.M.A.R.T. metrics about drive health and performance. + method_description: | + It reads `smartd` log files to collect the metrics. + supported_platforms: + include: [] + exclude: [] + multi_instance: false + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: Upon satisfying the prerequisites, the collector will auto-detect metrics if written in either `/var/log/smartd/` or `/var/lib/smartmontools/`. + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Configure `smartd` to write attribute information to files. + description: | + `smartd` must be running with `-A` option to write `smartd` attribute information to files. + + For this you need to set `smartd_opts` (or `SMARTD_ARGS`, check _smartd.service_ content) in `/etc/default/smartmontools`: + + ``` + # dump smartd attrs info every 600 seconds + smartd_opts="-A /var/log/smartd/ -i 600" + ``` + + You may need to create the smartd directory before smartd will write to it: + + ```sh + mkdir -p /var/log/smartd + ``` + + Otherwise, all the smartd `.csv` files may get written to `/var/lib/smartmontools` (default location). See also <https://linux.die.net/man/8/smartd> for more info on the `-A --attributelog=PREFIX` command. + + `smartd` appends logs at every run. It's strongly recommended to use `logrotate` for smartd files. + configuration: + file: + name: "python.d/smartd_log.conf" + options: + description: | + This particular collector does not need further configuration to work if permissions are satisfied, but you can always customize it's data collection behavior. + + There are 2 sections: + + * Global variables + * One or more JOBS that can define multiple different instances to monitor. + + The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. + + Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. + + Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. + folding: + title: "Config options" + enabled: true + list: + - name: log_path + description: path to smartd log files. + default_value: /var/log/smartd + required: true + - name: exclude_disks + description: Space-separated patterns. If the pattern is in the drive name, the module will not collect data for it. + default_value: "" + required: false + - name: age + description: Time in minutes since the last dump to file. + default_value: 30 + required: false + - name: update_every + description: Sets the default data collection frequency. + default_value: 1 + required: false + - name: priority + description: Controls the order of charts at the netdata dashboard. + default_value: 60000 + required: false + - name: autodetection_retry + description: Sets the job re-check interval in seconds. + default_value: 0 + required: false + - name: penalty + description: Indicates whether to apply penalty to update_every in case of failures. + default_value: yes + required: false + - name: name + description: > + Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. + default_value: "" + required: false + examples: + folding: + enabled: true + title: "Config" + list: + - name: Basic + description: A basic configuration example. + folding: + enabled: false + config: | + custom: + name: smartd_log + log_path: '/var/log/smartd/' + troubleshooting: + problems: + list: [] + alerts: [] metrics: - - name: smartd_log.read_error_rate - description: Read Error Rate - unit: "value" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.seek_error_rate - description: Seek Error Rate - unit: "value" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.soft_read_error_rate - description: Soft Read Error Rate - unit: "errors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.write_error_rate - description: Write Error Rate - unit: "value" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.read_total_err_corrected - description: Read Error Corrected - unit: "errors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.read_total_unc_errors - description: Read Error Uncorrected - unit: "errors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.write_total_err_corrected - description: Write Error Corrected - unit: "errors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.write_total_unc_errors - description: Write Error Uncorrected - unit: "errors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.verify_total_err_corrected - description: Verify Error Corrected - unit: "errors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.verify_total_unc_errors - description: Verify Error Uncorrected - unit: "errors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.sata_interface_downshift - description: SATA Interface Downshift - unit: "events" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.udma_crc_error_count - description: UDMA CRC Error Count - unit: "errors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.throughput_performance - description: Throughput Performance - unit: "value" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.seek_time_performance - description: Seek Time Performance - unit: "value" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.start_stop_count - description: Start/Stop Count - unit: "events" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.power_on_hours_count - description: Power-On Hours Count - unit: "hours" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.power_cycle_count - description: Power Cycle Count - unit: "events" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.unexpected_power_loss - description: Unexpected Power Loss - unit: "events" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.spin_up_time - description: Spin-Up Time - unit: "ms" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.spin_up_retries - description: Spin-up Retries - unit: "retries" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.calibration_retries - description: Calibration Retries - unit: "retries" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.airflow_temperature_celsius - description: Airflow Temperature Celsius - unit: "celsius" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.temperature_celsius - description: Temperature - unit: "celsius" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.reallocated_sectors_count - description: Reallocated Sectors Count - unit: "sectors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.reserved_block_count - description: Reserved Block Count - unit: "percentage" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.program_fail_count - description: Program Fail Count - unit: "errors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.erase_fail_count - description: Erase Fail Count - unit: "failures" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.wear_leveller_worst_case_erase_count - description: Wear Leveller Worst Case Erase Count - unit: "erases" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.unused_reserved_nand_blocks - description: Unused Reserved NAND Blocks - unit: "blocks" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.reallocation_event_count - description: Reallocation Event Count - unit: "events" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.current_pending_sector_count - description: Current Pending Sector Count - unit: "sectors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.offline_uncorrectable_sector_count - description: Offline Uncorrectable Sector Count - unit: "sectors" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.percent_lifetime_used - description: Percent Lifetime Used - unit: "percentage" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.media_wearout_indicator - description: Media Wearout Indicator - unit: "percentage" - chart_type: line - dimensions: - - name: a dimension per device - - name: smartd_log.nand_writes_1gib - description: NAND Writes - unit: "GiB" - chart_type: line - dimensions: - - name: a dimension per device + folding: + title: Metrics + enabled: false + description: "The metrics listed below are split in terms of availability on device type, SCSI or ATA." + availability: + - "SCSI" + - "ATA" + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: smartd_log.read_error_rate + description: Read Error Rate + availability: + - ATA + unit: "value" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.seek_error_rate + description: Seek Error Rate + availability: + - ATA + unit: "value" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.soft_read_error_rate + description: Soft Read Error Rate + availability: + - ATA + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.write_error_rate + description: Write Error Rate + availability: + - ATA + unit: "value" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.read_total_err_corrected + description: Read Error Corrected + availability: + - SCSI + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.read_total_unc_errors + description: Read Error Uncorrected + availability: + - SCSI + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.write_total_err_corrected + description: Write Error Corrected + availability: + - SCSI + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.write_total_unc_errors + description: Write Error Uncorrected + availability: + - SCSI + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.verify_total_err_corrected + description: Verify Error Corrected + availability: + - SCSI + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.verify_total_unc_errors + description: Verify Error Uncorrected + availability: + - SCSI + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.sata_interface_downshift + description: SATA Interface Downshift + availability: + - ATA + unit: "events" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.udma_crc_error_count + description: UDMA CRC Error Count + availability: + - ATA + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.throughput_performance + description: Throughput Performance + availability: + - ATA + unit: "value" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.seek_time_performance + description: Seek Time Performance + availability: + - ATA + unit: "value" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.start_stop_count + description: Start/Stop Count + availability: + - ATA + unit: "events" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.power_on_hours_count + description: Power-On Hours Count + availability: + - ATA + unit: "hours" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.power_cycle_count + description: Power Cycle Count + availability: + - ATA + unit: "events" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.unexpected_power_loss + description: Unexpected Power Loss + availability: + - ATA + unit: "events" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.spin_up_time + description: Spin-Up Time + availability: + - ATA + unit: "ms" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.spin_up_retries + description: Spin-up Retries + availability: + - ATA + unit: "retries" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.calibration_retries + description: Calibration Retries + availability: + - ATA + unit: "retries" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.airflow_temperature_celsius + description: Airflow Temperature Celsius + availability: + - ATA + unit: "celsius" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.temperature_celsius + description: Temperature + availability: + - SCSI + - ATA + unit: "celsius" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.reallocated_sectors_count + description: Reallocated Sectors Count + availability: + - ATA + unit: "sectors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.reserved_block_count + description: Reserved Block Count + availability: + - ATA + unit: "percentage" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.program_fail_count + description: Program Fail Count + availability: + - ATA + unit: "errors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.erase_fail_count + description: Erase Fail Count + availability: + - ATA + unit: "failures" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.wear_leveller_worst_case_erase_count + description: Wear Leveller Worst Case Erase Count + availability: + - ATA + unit: "erases" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.unused_reserved_nand_blocks + description: Unused Reserved NAND Blocks + availability: + - ATA + unit: "blocks" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.reallocation_event_count + description: Reallocation Event Count + availability: + - ATA + unit: "events" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.current_pending_sector_count + description: Current Pending Sector Count + availability: + - ATA + unit: "sectors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.offline_uncorrectable_sector_count + description: Offline Uncorrectable Sector Count + availability: + - ATA + unit: "sectors" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.percent_lifetime_used + description: Percent Lifetime Used + availability: + - ATA + unit: "percentage" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.media_wearout_indicator + description: Media Wearout Indicator + availability: + - ATA + unit: "percentage" + chart_type: line + dimensions: + - name: a dimension per device + - name: smartd_log.nand_writes_1gib + description: NAND Writes + availability: + - ATA + unit: "GiB" + chart_type: line + dimensions: + - name: a dimension per device diff --git a/collectors/python.d.plugin/smartd_log/metrics.csv b/collectors/python.d.plugin/smartd_log/metrics.csv deleted file mode 100644 index 7dcc703c..00000000 --- a/collectors/python.d.plugin/smartd_log/metrics.csv +++ /dev/null @@ -1,36 +0,0 @@ -metric,scope,dimensions,unit,description,chart_type,labels,plugin,module -smartd_log.read_error_rate,,a dimension per device,value,Read Error Rate,line,,python.d.plugin,smartd_log -smartd_log.seek_error_rate,,a dimension per device,value,Seek Error Rate,line,,python.d.plugin,smartd_log -smartd_log.soft_read_error_rate,,a dimension per device,errors,Soft Read Error Rate,line,,python.d.plugin,smartd_log -smartd_log.write_error_rate,,a dimension per device,value,Write Error Rate,line,,python.d.plugin,smartd_log -smartd_log.read_total_err_corrected,,a dimension per device,errors,Read Error Corrected,line,,python.d.plugin,smartd_log -smartd_log.read_total_unc_errors,,a dimension per device,errors,Read Error Uncorrected,line,,python.d.plugin,smartd_log -smartd_log.write_total_err_corrected,,a dimension per device,errors,Write Error Corrected,line,,python.d.plugin,smartd_log -smartd_log.write_total_unc_errors,,a dimension per device,errors,Write Error Uncorrected,line,,python.d.plugin,smartd_log -smartd_log.verify_total_err_corrected,,a dimension per device,errors,Verify Error Corrected,line,,python.d.plugin,smartd_log -smartd_log.verify_total_unc_errors,,a dimension per device,errors,Verify Error Uncorrected,line,,python.d.plugin,smartd_log -smartd_log.sata_interface_downshift,,a dimension per device,events,SATA Interface Downshift,line,,python.d.plugin,smartd_log -smartd_log.udma_crc_error_count,,a dimension per device,errors,UDMA CRC Error Count,line,,python.d.plugin,smartd_log -smartd_log.throughput_performance,,a dimension per device,value,Throughput Performance,line,,python.d.plugin,smartd_log -smartd_log.seek_time_performance,,a dimension per device,value,Seek Time Performance,line,,python.d.plugin,smartd_log -smartd_log.start_stop_count,,a dimension per device,events,Start/Stop Count,line,,python.d.plugin,smartd_log -smartd_log.power_on_hours_count,,a dimension per device,hours,Power-On Hours Count,line,,python.d.plugin,smartd_log -smartd_log.power_cycle_count,,a dimension per device,events,Power Cycle Count,line,,python.d.plugin,smartd_log -smartd_log.unexpected_power_loss,,a dimension per device,events,Unexpected Power Loss,line,,python.d.plugin,smartd_log -smartd_log.spin_up_time,,a dimension per device,ms,Spin-Up Time,line,,python.d.plugin,smartd_log -smartd_log.spin_up_retries,,a dimension per device,retries,Spin-up Retries,line,,python.d.plugin,smartd_log -smartd_log.calibration_retries,,a dimension per device,retries,Calibration Retries,line,,python.d.plugin,smartd_log -smartd_log.airflow_temperature_celsius,,a dimension per device,celsius,Airflow Temperature Celsius,line,,python.d.plugin,smartd_log -smartd_log.temperature_celsius,,"a dimension per device",celsius,Temperature,line,,python.d.plugin,smartd_log -smartd_log.reallocated_sectors_count,,a dimension per device,sectors,Reallocated Sectors Count,line,,python.d.plugin,smartd_log -smartd_log.reserved_block_count,,a dimension per device,percentage,Reserved Block Count,line,,python.d.plugin,smartd_log -smartd_log.program_fail_count,,a dimension per device,errors,Program Fail Count,line,,python.d.plugin,smartd_log -smartd_log.erase_fail_count,,a dimension per device,failures,Erase Fail Count,line,,python.d.plugin,smartd_log -smartd_log.wear_leveller_worst_case_erase_count,,a dimension per device,erases,Wear Leveller Worst Case Erase Count,line,,python.d.plugin,smartd_log -smartd_log.unused_reserved_nand_blocks,,a dimension per device,blocks,Unused Reserved NAND Blocks,line,,python.d.plugin,smartd_log -smartd_log.reallocation_event_count,,a dimension per device,events,Reallocation Event Count,line,,python.d.plugin,smartd_log -smartd_log.current_pending_sector_count,,a dimension per device,sectors,Current Pending Sector Count,line,,python.d.plugin,smartd_log -smartd_log.offline_uncorrectable_sector_count,,a dimension per device,sectors,Offline Uncorrectable Sector Count,line,,python.d.plugin,smartd_log -smartd_log.percent_lifetime_used,,a dimension per device,percentage,Percent Lifetime Used,line,,python.d.plugin,smartd_log -smartd_log.media_wearout_indicator,,a dimension per device,percentage,Media Wearout Indicator,line,,python.d.plugin,smartd_log -smartd_log.nand_writes_1gib,,a dimension per device,GiB,NAND Writes,line,,python.d.plugin,smartd_log |