summaryrefslogtreecommitdiffstats
path: root/src/go/collectors/go.d.plugin/modules/nvme/metadata.yaml
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/go/collectors/go.d.plugin/modules/nvme/metadata.yaml213
1 files changed, 213 insertions, 0 deletions
diff --git a/src/go/collectors/go.d.plugin/modules/nvme/metadata.yaml b/src/go/collectors/go.d.plugin/modules/nvme/metadata.yaml
new file mode 100644
index 000000000..71a5be2e7
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/nvme/metadata.yaml
@@ -0,0 +1,213 @@
+plugin_name: go.d.plugin
+modules:
+ - meta:
+ id: collector-go.d.plugin-nvme
+ plugin_name: go.d.plugin
+ module_name: nvme
+ monitored_instance:
+ name: NVMe devices
+ link: ""
+ icon_filename: nvme.svg
+ categories:
+ - data-collection.storage-mount-points-and-filesystems
+ keywords:
+ - nvme
+ related_resources:
+ integrations:
+ list: []
+ info_provided_to_referring_integrations:
+ description: ""
+ most_popular: false
+ overview:
+ data_collection:
+ metrics_description: >
+ This collector monitors the health of NVMe devices using the command line
+ tool [nvme](https://github.com/linux-nvme/nvme-cli#nvme-cli), which can only be run by the root user. It uses `sudo` and
+ assumes it is set up so that the netdata user can execute `nvme` as root without a password.
+ method_description: ""
+ supported_platforms:
+ include: []
+ exclude: []
+ multi_instance: true
+ additional_permissions:
+ description: ""
+ default_behavior:
+ auto_detection:
+ description: ""
+ limits:
+ description: ""
+ performance_impact:
+ description: ""
+ setup:
+ prerequisites:
+ list:
+ - title: Install nvme-cli
+ description: |
+ See [Distro Support](https://github.com/linux-nvme/nvme-cli#distro-support). Install `nvme-cli` using your distribution's package manager.
+ - title: Allow netdata to execute nvme
+ description: |
+ Add the netdata user to `/etc/sudoers` (use `which nvme` to find the full path to the binary):
+
+ ```bash
+ netdata ALL=(root) NOPASSWD: /usr/sbin/nvme
+ ```
+ configuration:
+ file:
+ name: go.d/nvme.conf
+ options:
+ description: |
+ The following options can be defined globally: update_every, autodetection_retry.
+ folding:
+ title: Config options
+ enabled: true
+ list:
+ - name: update_every
+ description: Data collection frequency.
+ default_value: 10
+ required: false
+ - name: autodetection_retry
+ description: Recheck interval in seconds. Zero means no recheck will be scheduled.
+ default_value: 0
+ required: false
+ - name: binary_path
+ description: Path to nvme binary. The default is "nvme" and the executable is looked for in the directories specified in the PATH environment variable.
+ default_value: nvme
+ required: false
+ - name: timeout
+ description: nvme binary execution timeout.
+ default_value: 2
+ required: false
+ examples:
+ folding:
+ title: Config
+ enabled: true
+ list:
+ - name: Custom binary path
+ description: The executable is not in the directories specified in the PATH environment variable.
+ config: |
+ jobs:
+ - name: nvme
+ binary_path: /usr/local/sbin/nvme
+ troubleshooting:
+ problems:
+ list: []
+ alerts:
+ - name: nvme_device_critical_warnings_state
+ metric: nvme.device_critical_warnings_state
+ info: "NVMe device ${label:device} has critical warnings"
+ link: https://github.com/netdata/netdata/blob/master/src/health/health.d/nvme.conf
+ metrics:
+ folding:
+ title: Metrics
+ enabled: false
+ description: ""
+ availability: []
+ scopes:
+ - name: device
+ description: These metrics refer to the NVME device.
+ labels:
+ - name: device
+ description: NVMe device name
+ metrics:
+ - name: nvme.device_estimated_endurance_perc
+ description: Estimated endurance
+ unit: '%'
+ chart_type: line
+ dimensions:
+ - name: used
+ - name: nvme.device_available_spare_perc
+ description: Remaining spare capacity
+ unit: '%'
+ chart_type: line
+ dimensions:
+ - name: spare
+ - name: nvme.device_composite_temperature
+ description: Composite temperature
+ unit: celsius
+ chart_type: line
+ dimensions:
+ - name: temperature
+ - name: nvme.device_io_transferred_count
+ description: Amount of data transferred to and from device
+ unit: bytes
+ chart_type: area
+ dimensions:
+ - name: read
+ - name: written
+ - name: nvme.device_power_cycles_count
+ description: Power cycles
+ unit: cycles
+ chart_type: line
+ dimensions:
+ - name: power
+ - name: nvme.device_power_on_time
+ description: Power-on time
+ unit: seconds
+ chart_type: line
+ dimensions:
+ - name: power-on
+ - name: nvme.device_critical_warnings_state
+ description: Critical warnings state
+ unit: state
+ chart_type: line
+ dimensions:
+ - name: available_spare
+ - name: temp_threshold
+ - name: nvm_subsystem_reliability
+ - name: read_only
+ - name: volatile_mem_backup_failed
+ - name: persistent_memory_read_only
+ - name: nvme.device_unsafe_shutdowns_count
+ description: Unsafe shutdowns
+ unit: shutdowns
+ chart_type: line
+ dimensions:
+ - name: unsafe
+ - name: nvme.device_media_errors_rate
+ description: Media and data integrity errors
+ unit: errors/s
+ chart_type: line
+ dimensions:
+ - name: media
+ - name: nvme.device_error_log_entries_rate
+ description: Error log entries
+ unit: entries/s
+ chart_type: line
+ dimensions:
+ - name: error_log
+ - name: nvme.device_warning_composite_temperature_time
+ description: Warning composite temperature time
+ unit: seconds
+ chart_type: line
+ dimensions:
+ - name: wctemp
+ - name: nvme.device_critical_composite_temperature_time
+ description: Critical composite temperature time
+ unit: seconds
+ chart_type: line
+ dimensions:
+ - name: cctemp
+ - name: nvme.device_thermal_mgmt_temp1_transitions_rate
+ description: Thermal management temp1 transitions
+ unit: transitions/s
+ chart_type: line
+ dimensions:
+ - name: temp1
+ - name: nvme.device_thermal_mgmt_temp2_transitions_rate
+ description: Thermal management temp2 transitions
+ unit: transitions/s
+ chart_type: line
+ dimensions:
+ - name: temp2
+ - name: nvme.device_thermal_mgmt_temp1_time
+ description: Thermal management temp1 time
+ unit: seconds
+ chart_type: line
+ dimensions:
+ - name: temp1
+ - name: nvme.device_thermal_mgmt_temp2_time
+ description: Thermal management temp2 time
+ unit: seconds
+ chart_type: line
+ dimensions:
+ - name: temp2