diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 11:19:16 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-07-24 09:53:24 +0000 |
commit | b5f8ee61a7f7e9bd291dd26b0585d03eb686c941 (patch) | |
tree | d4d31289c39fc00da064a825df13a0b98ce95b10 /src/go/collectors/go.d.plugin/modules/nvme/charts.go | |
parent | Adding upstream version 1.44.3. (diff) | |
download | netdata-upstream.tar.xz netdata-upstream.zip |
Adding upstream version 1.46.3.upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/go/collectors/go.d.plugin/modules/nvme/charts.go | 267 |
1 files changed, 267 insertions, 0 deletions
diff --git a/src/go/collectors/go.d.plugin/modules/nvme/charts.go b/src/go/collectors/go.d.plugin/modules/nvme/charts.go new file mode 100644 index 000000000..8404d2dcc --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/nvme/charts.go @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package nvme + +import ( + "fmt" + "strings" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" +) + +const ( + _ = 2050 + iota // right after Disks section + prioDeviceEstimatedEndurancePerc + prioDeviceAvailableSparePerc + prioDeviceCompositeTemperature + prioDeviceIOTransferredCount + prioDevicePowerCyclesCount + prioDevicePowerOnTime + prioDeviceUnsafeShutdownsCount + prioDeviceCriticalWarningsState + prioDeviceMediaErrorsRate + prioDeviceErrorLogEntriesRate + prioDeviceWarningCompositeTemperatureTime + prioDeviceCriticalCompositeTemperatureTime + prioDeviceThmTemp1TransitionsCount + prioDeviceThmTemp2TransitionsRate + prioDeviceThmTemp1Time + prioDeviceThmTemp2Time +) + +var deviceChartsTmpl = module.Charts{ + deviceEstimatedEndurancePercChartTmpl.Copy(), + deviceAvailableSparePercChartTmpl.Copy(), + deviceCompositeTemperatureChartTmpl.Copy(), + deviceIOTransferredCountChartTmpl.Copy(), + devicePowerCyclesCountChartTmpl.Copy(), + devicePowerOnTimeChartTmpl.Copy(), + deviceUnsafeShutdownsCountChartTmpl.Copy(), + deviceCriticalWarningsStateChartTmpl.Copy(), + deviceMediaErrorsRateChartTmpl.Copy(), + deviceErrorLogEntriesRateChartTmpl.Copy(), + deviceWarnCompositeTemperatureTimeChartTmpl.Copy(), + deviceCritCompositeTemperatureTimeChartTmpl.Copy(), + deviceThmTemp1TransitionsRateChartTmpl.Copy(), + deviceThmTemp2TransitionsRateChartTmpl.Copy(), + deviceThmTemp1TimeChartTmpl.Copy(), + deviceThmTemp2TimeChartTmpl.Copy(), +} + +var deviceEstimatedEndurancePercChartTmpl = module.Chart{ + ID: "device_%s_estimated_endurance_perc", + Title: "Estimated endurance", + Units: "percentage", + Fam: "endurance", + Ctx: "nvme.device_estimated_endurance_perc", + Priority: prioDeviceEstimatedEndurancePerc, + Dims: module.Dims{ + {ID: "device_%s_percentage_used", Name: "used"}, + }, +} +var deviceAvailableSparePercChartTmpl = module.Chart{ + ID: "device_%s_available_spare_perc", + Title: "Remaining spare capacity", + Units: "percentage", + Fam: "spare", + Ctx: "nvme.device_available_spare_perc", + Priority: prioDeviceAvailableSparePerc, + Dims: module.Dims{ + {ID: "device_%s_available_spare", Name: "spare"}, + }, +} +var deviceCompositeTemperatureChartTmpl = module.Chart{ + ID: "device_%s_temperature", + Title: "Composite temperature", + Units: "celsius", + Fam: "temperature", + Ctx: "nvme.device_composite_temperature", + Priority: prioDeviceCompositeTemperature, + Dims: module.Dims{ + {ID: "device_%s_temperature", Name: "temperature"}, + }, +} +var deviceIOTransferredCountChartTmpl = module.Chart{ + ID: "device_%s_io_transferred_count", + Title: "Amount of data transferred to and from device", + Units: "bytes", + Fam: "transferred data", + Ctx: "nvme.device_io_transferred_count", + Priority: prioDeviceIOTransferredCount, + Type: module.Area, + Dims: module.Dims{ + {ID: "device_%s_data_units_read", Name: "read"}, + {ID: "device_%s_data_units_written", Name: "written", Mul: -1}, + }, +} + +var devicePowerCyclesCountChartTmpl = module.Chart{ + ID: "device_%s_power_cycles_count", + Title: "Power cycles", + Units: "cycles", + Fam: "power cycles", + Ctx: "nvme.device_power_cycles_count", + Priority: prioDevicePowerCyclesCount, + Dims: module.Dims{ + {ID: "device_%s_power_cycles", Name: "power"}, + }, +} +var devicePowerOnTimeChartTmpl = module.Chart{ + ID: "device_%s_power_on_time", + Title: "Power-on time", + Units: "seconds", + Fam: "power-on time", + Ctx: "nvme.device_power_on_time", + Priority: prioDevicePowerOnTime, + Dims: module.Dims{ + {ID: "device_%s_power_on_time", Name: "power-on"}, + }, +} +var deviceCriticalWarningsStateChartTmpl = module.Chart{ + ID: "device_%s_critical_warnings_state", + Title: "Critical warnings state", + Units: "state", + Fam: "critical warnings", + Ctx: "nvme.device_critical_warnings_state", + Priority: prioDeviceCriticalWarningsState, + Dims: module.Dims{ + {ID: "device_%s_critical_warning_available_spare", Name: "available_spare"}, + {ID: "device_%s_critical_warning_temp_threshold", Name: "temp_threshold"}, + {ID: "device_%s_critical_warning_nvm_subsystem_reliability", Name: "nvm_subsystem_reliability"}, + {ID: "device_%s_critical_warning_read_only", Name: "read_only"}, + {ID: "device_%s_critical_warning_volatile_mem_backup_failed", Name: "volatile_mem_backup_failed"}, + {ID: "device_%s_critical_warning_persistent_memory_read_only", Name: "persistent_memory_read_only"}, + }, +} +var deviceUnsafeShutdownsCountChartTmpl = module.Chart{ + ID: "device_%s_unsafe_shutdowns_count", + Title: "Unsafe shutdowns", + Units: "shutdowns", + Fam: "shutdowns", + Ctx: "nvme.device_unsafe_shutdowns_count", + Priority: prioDeviceUnsafeShutdownsCount, + Dims: module.Dims{ + {ID: "device_%s_unsafe_shutdowns", Name: "unsafe"}, + }, +} +var deviceMediaErrorsRateChartTmpl = module.Chart{ + ID: "device_%s_media_errors_rate", + Title: "Media and data integrity errors", + Units: "errors/s", + Fam: "media errors", + Ctx: "nvme.device_media_errors_rate", + Priority: prioDeviceMediaErrorsRate, + Dims: module.Dims{ + {ID: "device_%s_media_errors", Name: "media", Algo: module.Incremental}, + }, +} +var deviceErrorLogEntriesRateChartTmpl = module.Chart{ + ID: "device_%s_error_log_entries_rate", + Title: "Error log entries", + Units: "entries/s", + Fam: "error log", + Ctx: "nvme.device_error_log_entries_rate", + Priority: prioDeviceErrorLogEntriesRate, + Dims: module.Dims{ + {ID: "device_%s_num_err_log_entries", Name: "error_log", Algo: module.Incremental}, + }, +} +var deviceWarnCompositeTemperatureTimeChartTmpl = module.Chart{ + ID: "device_%s_warning_composite_temperature_time", + Title: "Warning composite temperature time", + Units: "seconds", + Fam: "warn temp time", + Ctx: "nvme.device_warning_composite_temperature_time", + Priority: prioDeviceWarningCompositeTemperatureTime, + Dims: module.Dims{ + {ID: "device_%s_warning_temp_time", Name: "wctemp"}, + }, +} +var deviceCritCompositeTemperatureTimeChartTmpl = module.Chart{ + ID: "device_%s_critical_composite_temperature_time", + Title: "Critical composite temperature time", + Units: "seconds", + Fam: "crit temp time", + Ctx: "nvme.device_critical_composite_temperature_time", + Priority: prioDeviceCriticalCompositeTemperatureTime, + Dims: module.Dims{ + {ID: "device_%s_critical_comp_time", Name: "cctemp"}, + }, +} +var ( + deviceThmTemp1TransitionsRateChartTmpl = module.Chart{ + ID: "device_%s_thm_temp1_transitions_rate", + Title: "Thermal management temp1 transitions", + Units: "transitions/s", + Fam: "thermal mgmt transitions", + Ctx: "nvme.device_thermal_mgmt_temp1_transitions_rate", + Priority: prioDeviceThmTemp1TransitionsCount, + Dims: module.Dims{ + {ID: "device_%s_thm_temp1_trans_count", Name: "temp1", Algo: module.Incremental}, + }, + } + deviceThmTemp2TransitionsRateChartTmpl = module.Chart{ + ID: "device_%s_thm_temp2_transitions_rate", + Title: "Thermal management temp2 transitions", + Units: "transitions/s", + Fam: "thermal mgmt transitions", + Ctx: "nvme.device_thermal_mgmt_temp2_transitions_rate", + Priority: prioDeviceThmTemp2TransitionsRate, + Dims: module.Dims{ + {ID: "device_%s_thm_temp2_trans_count", Name: "temp2", Algo: module.Incremental}, + }, + } +) +var ( + deviceThmTemp1TimeChartTmpl = module.Chart{ + ID: "device_%s_thm_temp1_time", + Title: "Thermal management temp1 time", + Units: "seconds", + Fam: "thermal mgmt time", + Ctx: "nvme.device_thermal_mgmt_temp1_time", + Priority: prioDeviceThmTemp1Time, + Dims: module.Dims{ + {ID: "device_%s_thm_temp1_total_time", Name: "temp1"}, + }, + } + deviceThmTemp2TimeChartTmpl = module.Chart{ + ID: "device_%s_thm_temp2_time", + Title: "Thermal management temp1 time", + Units: "seconds", + Fam: "thermal mgmt time", + Ctx: "nvme.device_thermal_mgmt_temp2_time", + Priority: prioDeviceThmTemp2Time, + Dims: module.Dims{ + {ID: "device_%s_thm_temp2_total_time", Name: "temp2"}, + }, + } +) + +func (n *NVMe) addDeviceCharts(device string) { + charts := deviceChartsTmpl.Copy() + + for _, chart := range *charts { + chart.ID = fmt.Sprintf(chart.ID, device) + chart.Labels = []module.Label{ + {Key: "device", Value: device}, + } + for _, dim := range chart.Dims { + dim.ID = fmt.Sprintf(dim.ID, device) + } + } + + if err := n.Charts().Add(*charts...); err != nil { + n.Warning(err) + } +} + +func (n *NVMe) removeDeviceCharts(device string) { + px := fmt.Sprintf("device_%s", device) + + for _, chart := range *n.Charts() { + if strings.HasPrefix(chart.ID, px) { + chart.MarkRemove() + chart.MarkNotCreated() + } + } +} |