summaryrefslogtreecommitdiffstats
path: root/src/go/collectors/go.d.plugin/modules/nvme/charts.go
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/go/collectors/go.d.plugin/modules/nvme/charts.go267
1 files changed, 267 insertions, 0 deletions
diff --git a/src/go/collectors/go.d.plugin/modules/nvme/charts.go b/src/go/collectors/go.d.plugin/modules/nvme/charts.go
new file mode 100644
index 000000000..8404d2dcc
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/nvme/charts.go
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package nvme
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/netdata/netdata/go/go.d.plugin/agent/module"
+)
+
+const (
+ _ = 2050 + iota // right after Disks section
+ prioDeviceEstimatedEndurancePerc
+ prioDeviceAvailableSparePerc
+ prioDeviceCompositeTemperature
+ prioDeviceIOTransferredCount
+ prioDevicePowerCyclesCount
+ prioDevicePowerOnTime
+ prioDeviceUnsafeShutdownsCount
+ prioDeviceCriticalWarningsState
+ prioDeviceMediaErrorsRate
+ prioDeviceErrorLogEntriesRate
+ prioDeviceWarningCompositeTemperatureTime
+ prioDeviceCriticalCompositeTemperatureTime
+ prioDeviceThmTemp1TransitionsCount
+ prioDeviceThmTemp2TransitionsRate
+ prioDeviceThmTemp1Time
+ prioDeviceThmTemp2Time
+)
+
+var deviceChartsTmpl = module.Charts{
+ deviceEstimatedEndurancePercChartTmpl.Copy(),
+ deviceAvailableSparePercChartTmpl.Copy(),
+ deviceCompositeTemperatureChartTmpl.Copy(),
+ deviceIOTransferredCountChartTmpl.Copy(),
+ devicePowerCyclesCountChartTmpl.Copy(),
+ devicePowerOnTimeChartTmpl.Copy(),
+ deviceUnsafeShutdownsCountChartTmpl.Copy(),
+ deviceCriticalWarningsStateChartTmpl.Copy(),
+ deviceMediaErrorsRateChartTmpl.Copy(),
+ deviceErrorLogEntriesRateChartTmpl.Copy(),
+ deviceWarnCompositeTemperatureTimeChartTmpl.Copy(),
+ deviceCritCompositeTemperatureTimeChartTmpl.Copy(),
+ deviceThmTemp1TransitionsRateChartTmpl.Copy(),
+ deviceThmTemp2TransitionsRateChartTmpl.Copy(),
+ deviceThmTemp1TimeChartTmpl.Copy(),
+ deviceThmTemp2TimeChartTmpl.Copy(),
+}
+
+var deviceEstimatedEndurancePercChartTmpl = module.Chart{
+ ID: "device_%s_estimated_endurance_perc",
+ Title: "Estimated endurance",
+ Units: "percentage",
+ Fam: "endurance",
+ Ctx: "nvme.device_estimated_endurance_perc",
+ Priority: prioDeviceEstimatedEndurancePerc,
+ Dims: module.Dims{
+ {ID: "device_%s_percentage_used", Name: "used"},
+ },
+}
+var deviceAvailableSparePercChartTmpl = module.Chart{
+ ID: "device_%s_available_spare_perc",
+ Title: "Remaining spare capacity",
+ Units: "percentage",
+ Fam: "spare",
+ Ctx: "nvme.device_available_spare_perc",
+ Priority: prioDeviceAvailableSparePerc,
+ Dims: module.Dims{
+ {ID: "device_%s_available_spare", Name: "spare"},
+ },
+}
+var deviceCompositeTemperatureChartTmpl = module.Chart{
+ ID: "device_%s_temperature",
+ Title: "Composite temperature",
+ Units: "celsius",
+ Fam: "temperature",
+ Ctx: "nvme.device_composite_temperature",
+ Priority: prioDeviceCompositeTemperature,
+ Dims: module.Dims{
+ {ID: "device_%s_temperature", Name: "temperature"},
+ },
+}
+var deviceIOTransferredCountChartTmpl = module.Chart{
+ ID: "device_%s_io_transferred_count",
+ Title: "Amount of data transferred to and from device",
+ Units: "bytes",
+ Fam: "transferred data",
+ Ctx: "nvme.device_io_transferred_count",
+ Priority: prioDeviceIOTransferredCount,
+ Type: module.Area,
+ Dims: module.Dims{
+ {ID: "device_%s_data_units_read", Name: "read"},
+ {ID: "device_%s_data_units_written", Name: "written", Mul: -1},
+ },
+}
+
+var devicePowerCyclesCountChartTmpl = module.Chart{
+ ID: "device_%s_power_cycles_count",
+ Title: "Power cycles",
+ Units: "cycles",
+ Fam: "power cycles",
+ Ctx: "nvme.device_power_cycles_count",
+ Priority: prioDevicePowerCyclesCount,
+ Dims: module.Dims{
+ {ID: "device_%s_power_cycles", Name: "power"},
+ },
+}
+var devicePowerOnTimeChartTmpl = module.Chart{
+ ID: "device_%s_power_on_time",
+ Title: "Power-on time",
+ Units: "seconds",
+ Fam: "power-on time",
+ Ctx: "nvme.device_power_on_time",
+ Priority: prioDevicePowerOnTime,
+ Dims: module.Dims{
+ {ID: "device_%s_power_on_time", Name: "power-on"},
+ },
+}
+var deviceCriticalWarningsStateChartTmpl = module.Chart{
+ ID: "device_%s_critical_warnings_state",
+ Title: "Critical warnings state",
+ Units: "state",
+ Fam: "critical warnings",
+ Ctx: "nvme.device_critical_warnings_state",
+ Priority: prioDeviceCriticalWarningsState,
+ Dims: module.Dims{
+ {ID: "device_%s_critical_warning_available_spare", Name: "available_spare"},
+ {ID: "device_%s_critical_warning_temp_threshold", Name: "temp_threshold"},
+ {ID: "device_%s_critical_warning_nvm_subsystem_reliability", Name: "nvm_subsystem_reliability"},
+ {ID: "device_%s_critical_warning_read_only", Name: "read_only"},
+ {ID: "device_%s_critical_warning_volatile_mem_backup_failed", Name: "volatile_mem_backup_failed"},
+ {ID: "device_%s_critical_warning_persistent_memory_read_only", Name: "persistent_memory_read_only"},
+ },
+}
+var deviceUnsafeShutdownsCountChartTmpl = module.Chart{
+ ID: "device_%s_unsafe_shutdowns_count",
+ Title: "Unsafe shutdowns",
+ Units: "shutdowns",
+ Fam: "shutdowns",
+ Ctx: "nvme.device_unsafe_shutdowns_count",
+ Priority: prioDeviceUnsafeShutdownsCount,
+ Dims: module.Dims{
+ {ID: "device_%s_unsafe_shutdowns", Name: "unsafe"},
+ },
+}
+var deviceMediaErrorsRateChartTmpl = module.Chart{
+ ID: "device_%s_media_errors_rate",
+ Title: "Media and data integrity errors",
+ Units: "errors/s",
+ Fam: "media errors",
+ Ctx: "nvme.device_media_errors_rate",
+ Priority: prioDeviceMediaErrorsRate,
+ Dims: module.Dims{
+ {ID: "device_%s_media_errors", Name: "media", Algo: module.Incremental},
+ },
+}
+var deviceErrorLogEntriesRateChartTmpl = module.Chart{
+ ID: "device_%s_error_log_entries_rate",
+ Title: "Error log entries",
+ Units: "entries/s",
+ Fam: "error log",
+ Ctx: "nvme.device_error_log_entries_rate",
+ Priority: prioDeviceErrorLogEntriesRate,
+ Dims: module.Dims{
+ {ID: "device_%s_num_err_log_entries", Name: "error_log", Algo: module.Incremental},
+ },
+}
+var deviceWarnCompositeTemperatureTimeChartTmpl = module.Chart{
+ ID: "device_%s_warning_composite_temperature_time",
+ Title: "Warning composite temperature time",
+ Units: "seconds",
+ Fam: "warn temp time",
+ Ctx: "nvme.device_warning_composite_temperature_time",
+ Priority: prioDeviceWarningCompositeTemperatureTime,
+ Dims: module.Dims{
+ {ID: "device_%s_warning_temp_time", Name: "wctemp"},
+ },
+}
+var deviceCritCompositeTemperatureTimeChartTmpl = module.Chart{
+ ID: "device_%s_critical_composite_temperature_time",
+ Title: "Critical composite temperature time",
+ Units: "seconds",
+ Fam: "crit temp time",
+ Ctx: "nvme.device_critical_composite_temperature_time",
+ Priority: prioDeviceCriticalCompositeTemperatureTime,
+ Dims: module.Dims{
+ {ID: "device_%s_critical_comp_time", Name: "cctemp"},
+ },
+}
+var (
+ deviceThmTemp1TransitionsRateChartTmpl = module.Chart{
+ ID: "device_%s_thm_temp1_transitions_rate",
+ Title: "Thermal management temp1 transitions",
+ Units: "transitions/s",
+ Fam: "thermal mgmt transitions",
+ Ctx: "nvme.device_thermal_mgmt_temp1_transitions_rate",
+ Priority: prioDeviceThmTemp1TransitionsCount,
+ Dims: module.Dims{
+ {ID: "device_%s_thm_temp1_trans_count", Name: "temp1", Algo: module.Incremental},
+ },
+ }
+ deviceThmTemp2TransitionsRateChartTmpl = module.Chart{
+ ID: "device_%s_thm_temp2_transitions_rate",
+ Title: "Thermal management temp2 transitions",
+ Units: "transitions/s",
+ Fam: "thermal mgmt transitions",
+ Ctx: "nvme.device_thermal_mgmt_temp2_transitions_rate",
+ Priority: prioDeviceThmTemp2TransitionsRate,
+ Dims: module.Dims{
+ {ID: "device_%s_thm_temp2_trans_count", Name: "temp2", Algo: module.Incremental},
+ },
+ }
+)
+var (
+ deviceThmTemp1TimeChartTmpl = module.Chart{
+ ID: "device_%s_thm_temp1_time",
+ Title: "Thermal management temp1 time",
+ Units: "seconds",
+ Fam: "thermal mgmt time",
+ Ctx: "nvme.device_thermal_mgmt_temp1_time",
+ Priority: prioDeviceThmTemp1Time,
+ Dims: module.Dims{
+ {ID: "device_%s_thm_temp1_total_time", Name: "temp1"},
+ },
+ }
+ deviceThmTemp2TimeChartTmpl = module.Chart{
+ ID: "device_%s_thm_temp2_time",
+ Title: "Thermal management temp1 time",
+ Units: "seconds",
+ Fam: "thermal mgmt time",
+ Ctx: "nvme.device_thermal_mgmt_temp2_time",
+ Priority: prioDeviceThmTemp2Time,
+ Dims: module.Dims{
+ {ID: "device_%s_thm_temp2_total_time", Name: "temp2"},
+ },
+ }
+)
+
+func (n *NVMe) addDeviceCharts(device string) {
+ charts := deviceChartsTmpl.Copy()
+
+ for _, chart := range *charts {
+ chart.ID = fmt.Sprintf(chart.ID, device)
+ chart.Labels = []module.Label{
+ {Key: "device", Value: device},
+ }
+ for _, dim := range chart.Dims {
+ dim.ID = fmt.Sprintf(dim.ID, device)
+ }
+ }
+
+ if err := n.Charts().Add(*charts...); err != nil {
+ n.Warning(err)
+ }
+}
+
+func (n *NVMe) removeDeviceCharts(device string) {
+ px := fmt.Sprintf("device_%s", device)
+
+ for _, chart := range *n.Charts() {
+ if strings.HasPrefix(chart.ID, px) {
+ chart.MarkRemove()
+ chart.MarkNotCreated()
+ }
+ }
+}