diff options
Diffstat (limited to 'src/go/collectors/go.d.plugin/modules/smartctl/charts.go')
-rw-r--r-- | src/go/collectors/go.d.plugin/modules/smartctl/charts.go | 299 |
1 files changed, 299 insertions, 0 deletions
diff --git a/src/go/collectors/go.d.plugin/modules/smartctl/charts.go b/src/go/collectors/go.d.plugin/modules/smartctl/charts.go new file mode 100644 index 000000000..2a5fea02b --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/smartctl/charts.go @@ -0,0 +1,299 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package smartctl + +import ( + "fmt" + "strings" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" +) + +const ( + prioDeviceSmartStatus = module.Priority + iota + prioDeviceAtaSmartErrorLogCount + prioDevicePowerOnTime + prioDeviceTemperature + prioDevicePowerCycleCount + + prioDeviceSmartAttributeDecoded + prioDeviceSmartAttributeNormalized +) + +var deviceChartsTmpl = module.Charts{ + devicePowerOnTimeChartTmpl.Copy(), + deviceTemperatureChartTmpl.Copy(), + devicePowerCycleCountChartTmpl.Copy(), + deviceSmartStatusChartTmpl.Copy(), + deviceAtaSmartErrorLogCountChartTmpl.Copy(), +} + +var ( + deviceSmartStatusChartTmpl = module.Chart{ + ID: "device_%s_type_%s_smart_status", + Title: "Device smart status", + Units: "status", + Fam: "smart status", + Ctx: "smartctl.device_smart_status", + Type: module.Line, + Priority: prioDeviceSmartStatus, + Dims: module.Dims{ + {ID: "device_%s_type_%s_smart_status_passed", Name: "passed"}, + {ID: "device_%s_type_%s_smart_status_failed", Name: "failed"}, + }, + } + deviceAtaSmartErrorLogCountChartTmpl = module.Chart{ + ID: "device_%s_type_%s_ata_smart_error_log_count", + Title: "Device ATA smart error log count", + Units: "logs", + Fam: "smart error log", + Ctx: "smartctl.device_ata_smart_error_log_count", + Type: module.Line, + Priority: prioDeviceAtaSmartErrorLogCount, + Dims: module.Dims{ + {ID: "device_%s_type_%s_ata_smart_error_log_summary_count", Name: "error_log"}, + }, + } + devicePowerOnTimeChartTmpl = module.Chart{ + ID: "device_%s_type_%s_power_on_time", + Title: "Device power on time", + Units: "seconds", + Fam: "power on time", + Ctx: "smartctl.device_power_on_time", + Type: module.Line, + Priority: prioDevicePowerOnTime, + Dims: module.Dims{ + {ID: "device_%s_type_%s_power_on_time", Name: "power_on_time"}, + }, + } + deviceTemperatureChartTmpl = module.Chart{ + ID: "device_%s_type_%s_temperature", + Title: "Device temperature", + Units: "Celsius", + Fam: "temperature", + Ctx: "smartctl.device_temperature", + Type: module.Line, + Priority: prioDeviceTemperature, + Dims: module.Dims{ + {ID: "device_%s_type_%s_temperature", Name: "temperature"}, + }, + } + devicePowerCycleCountChartTmpl = module.Chart{ + ID: "device_%s_type_%s_power_cycle_count", + Title: "Device power cycles", + Units: "cycles", + Fam: "power cycles", + Ctx: "smartctl.device_power_cycles_count", + Type: module.Line, + Priority: prioDevicePowerCycleCount, + Dims: module.Dims{ + {ID: "device_%s_type_%s_power_cycle_count", Name: "power"}, + }, + } +) + +var ( + deviceSmartAttributeDecodedChartTmpl = module.Chart{ + ID: "device_%s_type_%s_smart_attr_%s", + Title: "Device smart attribute %s", + Units: "value", + Fam: "attr %s", + Ctx: "smartctl.device_smart_attr_%s", + Type: module.Line, + Priority: prioDeviceSmartAttributeDecoded, + Dims: module.Dims{ + {ID: "device_%s_type_%s_attr_%s_decoded", Name: "%s"}, + }, + } + deviceSmartAttributeNormalizedChartTmpl = module.Chart{ + ID: "device_%s_type_%s_smart_attr_%s_normalized", + Title: "Device smart attribute normalized %s", + Units: "value", + Fam: "attr %s", + Ctx: "smartctl.device_smart_attr_%s_normalized", + Type: module.Line, + Priority: prioDeviceSmartAttributeNormalized, + Dims: module.Dims{ + {ID: "device_%s_type_%s_attr_%s_normalized", Name: "%s"}, + }, + } +) + +func (s *Smartctl) addDeviceCharts(dev *smartDevice) { + charts := module.Charts{} + + if cs := s.newDeviceCharts(dev); cs != nil && len(*cs) > 0 { + if err := charts.Add(*cs...); err != nil { + s.Warning(err) + } + } + if cs := s.newDeviceSmartAttrCharts(dev); cs != nil && len(*cs) > 0 { + if err := charts.Add(*cs...); err != nil { + s.Warning(err) + } + } + + if err := s.Charts().Add(charts...); err != nil { + s.Warning(err) + } +} + +func (s *Smartctl) removeDeviceCharts(scanDev *scanDevice) { + px := fmt.Sprintf("device_%s_%s_", scanDev.shortName(), scanDev.typ) + + for _, chart := range *s.Charts() { + if strings.HasPrefix(chart.ID, px) { + chart.MarkRemove() + chart.MarkNotCreated() + } + } +} + +func (s *Smartctl) newDeviceCharts(dev *smartDevice) *module.Charts { + + charts := deviceChartsTmpl.Copy() + + if _, ok := dev.powerOnTime(); !ok { + _ = charts.Remove(devicePowerOnTimeChartTmpl.ID) + } + if _, ok := dev.temperature(); !ok { + _ = charts.Remove(deviceTemperatureChartTmpl.ID) + } + if _, ok := dev.powerCycleCount(); !ok { + _ = charts.Remove(devicePowerOnTimeChartTmpl.ID) + } + if _, ok := dev.smartStatusPassed(); !ok { + _ = charts.Remove(deviceSmartStatusChartTmpl.ID) + } + if _, ok := dev.ataSmartErrorLogCount(); !ok { + _ = charts.Remove(deviceAtaSmartErrorLogCountChartTmpl.ID) + } + + for _, chart := range *charts { + chart.ID = fmt.Sprintf(chart.ID, dev.deviceName(), dev.deviceType()) + chart.Labels = []module.Label{ + {Key: "device_name", Value: dev.deviceName()}, + {Key: "device_type", Value: dev.deviceType()}, + {Key: "model_name", Value: dev.modelName()}, + {Key: "serial_number", Value: dev.serialNumber()}, + } + for _, dim := range chart.Dims { + dim.ID = fmt.Sprintf(dim.ID, dev.deviceName(), dev.deviceType()) + } + } + + return charts +} + +func (s *Smartctl) newDeviceSmartAttrCharts(dev *smartDevice) *module.Charts { + attrs, ok := dev.ataSmartAttributeTable() + if !ok { + return nil + } + charts := module.Charts{} + + for _, attr := range attrs { + if !isSmartAttrValid(attr) || + strings.HasPrefix(attr.name(), "Unknown") || + strings.HasPrefix(attr.name(), "Not_In_Use") { + continue + } + + cs := module.Charts{ + deviceSmartAttributeDecodedChartTmpl.Copy(), + deviceSmartAttributeNormalizedChartTmpl.Copy(), + } + + attrName := attributeNameMap(attr.name()) + cleanAttrName := cleanAttributeName(attrName) + + for _, chart := range cs { + if chart.ID == deviceSmartAttributeDecodedChartTmpl.ID { + chart.Units = attributeUnit(attrName) + } + chart.ID = fmt.Sprintf(chart.ID, dev.deviceName(), dev.deviceType(), cleanAttrName) + chart.Title = fmt.Sprintf(chart.Title, attrName) + chart.Fam = fmt.Sprintf(chart.Fam, cleanAttrName) + chart.Ctx = fmt.Sprintf(chart.Ctx, cleanAttrName) + chart.Labels = []module.Label{ + {Key: "device_name", Value: dev.deviceName()}, + {Key: "device_type", Value: dev.deviceType()}, + {Key: "model_name", Value: dev.modelName()}, + {Key: "serial_number", Value: dev.serialNumber()}, + } + for _, dim := range chart.Dims { + dim.ID = fmt.Sprintf(dim.ID, dev.deviceName(), dev.deviceType(), cleanAttrName) + dim.Name = fmt.Sprintf(dim.Name, cleanAttrName) + } + } + + if err := charts.Add(cs...); err != nil { + s.Warning(err) + } + } + + return &charts +} + +var attrNameReplacer = strings.NewReplacer(" ", "_", "/", "_") + +func cleanAttributeName(attrName string) string { + return strings.ToLower(attrNameReplacer.Replace(attrName)) +} + +func attributeUnit(attrName string) string { + units := map[string]string{ + "Airflow_Temperature_Cel": "Celsius", + "Case_Temperature": "Celsius", + "Drive_Temperature": "Celsius", + "Temperature_Case": "Celsius", + "Temperature_Celsius": "Celsius", + "Temperature_Internal": "Celsius", + "Power_On_Hours": "hours", + "Spin_Up_Time": "milliseconds", + "Media_Wearout_Indicator": "percent", + "Percent_Life_Remaining": "percent", + "Percent_Lifetime_Remain": "percent", + "Total_LBAs_Read": "sectors", + "Total_LBAs_Written": "sectors", + "Offline_Uncorrectable": "sectors", + "Pending_Sector_Count": "sectors", + "Reallocated_Sector_Ct": "sectors", + "Current_Pending_Sector": "sectors", + "Reported_Uncorrect": "errors", + "Command_Timeout": "events", + } + + if unit, ok := units[attrName]; ok { + return unit + } + + // TODO: convert to bytes during data collection? (examples: NAND_Writes_32MiB, Flash_Writes_GiB) + if strings.HasSuffix(attrName, "MiB") || strings.HasSuffix(attrName, "GiB") { + if strings.Contains(attrName, "Writes") { + return "writes" + } + if strings.Contains(attrName, "Reads") { + return "reads" + } + } + + if strings.Contains(attrName, "Error") { + return "errors" + } + + for _, s := range []string{"_Count", "_Cnt", "_Ct"} { + if strings.HasSuffix(attrName, s) { + return "events" + } + } + + return "value" +} + +func attributeNameMap(attrName string) string { + // TODO: Handle Vendor-Specific S.M.A.R.T. Attribute Naming + // S.M.A.R.T. attribute names can vary slightly between vendors (e.g., "Thermal_Throttle_St" vs. "Thermal_Throttle_Status"). + // This function ensures consistent naming. + return attrName +} |