diff options
Diffstat (limited to 'src/go/collectors/go.d.plugin/modules/smartctl/collect.go')
-rw-r--r-- | src/go/collectors/go.d.plugin/modules/smartctl/collect.go | 189 |
1 files changed, 189 insertions, 0 deletions
diff --git a/src/go/collectors/go.d.plugin/modules/smartctl/collect.go b/src/go/collectors/go.d.plugin/modules/smartctl/collect.go new file mode 100644 index 000000000..79cbb13d0 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/smartctl/collect.go @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package smartctl + +import ( + "fmt" + "maps" + "slices" + "strconv" + "strings" + "time" + + "github.com/tidwall/gjson" +) + +func (s *Smartctl) collect() (map[string]int64, error) { + now := time.Now() + + if s.forceScan || s.isTimeToScan(now) { + devices, err := s.scanDevices() + if err != nil { + return nil, err + } + + for k, dev := range s.scannedDevices { + if _, ok := devices[k]; !ok { + delete(s.scannedDevices, k) + delete(s.seenDevices, k) + s.removeDeviceCharts(dev) + } + } + + s.forceDevicePoll = !maps.Equal(s.scannedDevices, devices) + s.scannedDevices = devices + s.lastScanTime = now + s.forceScan = false + } + + if s.forceDevicePoll || s.isTimeToPollDevices(now) { + mx := make(map[string]int64) + + // TODO: make it concurrent + for _, d := range s.scannedDevices { + if err := s.collectScannedDevice(mx, d); err != nil { + return nil, err + } + } + + s.forceDevicePoll = false + s.lastDevicePollTime = now + s.mx = mx + } + + return s.mx, nil +} + +func (s *Smartctl) collectScannedDevice(mx map[string]int64, scanDev *scanDevice) error { + resp, err := s.exec.deviceInfo(scanDev.name, scanDev.typ, s.NoCheckPowerMode) + if err != nil { + if resp != nil && isDeviceOpenFailedNoSuchDevice(resp) { + s.Infof("smartctl reported that device '%s' type '%s' no longer exists", scanDev.name, scanDev.typ) + s.forceScan = true + return nil + } + return fmt.Errorf("failed to get device info for '%s' type '%s': %v", scanDev.name, scanDev.typ, err) + } + + if isDeviceInLowerPowerMode(resp) { + s.Debugf("device '%s' type '%s' is in a low-power mode, skipping", scanDev.name, scanDev.typ) + return nil + } + + dev := newSmartDevice(resp) + if !isSmartDeviceValid(dev) { + return nil + } + + if !s.seenDevices[scanDev.key()] { + s.seenDevices[scanDev.key()] = true + s.addDeviceCharts(dev) + } + + s.collectSmartDevice(mx, dev) + + return nil +} + +func (s *Smartctl) collectSmartDevice(mx map[string]int64, dev *smartDevice) { + px := fmt.Sprintf("device_%s_type_%s_", dev.deviceName(), dev.deviceType()) + + if v, ok := dev.powerOnTime(); ok { + mx[px+"power_on_time"] = v + } + if v, ok := dev.temperature(); ok { + mx[px+"temperature"] = v + } + if v, ok := dev.powerCycleCount(); ok { + mx[px+"power_cycle_count"] = v + } + if v, ok := dev.smartStatusPassed(); ok { + mx[px+"smart_status_passed"] = 0 + mx[px+"smart_status_failed"] = 0 + if v { + mx[px+"smart_status_passed"] = 1 + } else { + mx[px+"smart_status_failed"] = 1 + } + } + if v, ok := dev.ataSmartErrorLogCount(); ok { + mx[px+"ata_smart_error_log_summary_count"] = v + } + + if attrs, ok := dev.ataSmartAttributeTable(); ok { + for _, attr := range attrs { + if !isSmartAttrValid(attr) { + continue + } + n := strings.ToLower(attr.name()) + n = strings.ReplaceAll(n, " ", "_") + px := fmt.Sprintf("%sattr_%s_", px, n) + + if v, err := strconv.ParseInt(attr.value(), 10, 64); err == nil { + mx[px+"normalized"] = v + } + + if v, err := strconv.ParseInt(attr.rawValue(), 10, 64); err == nil { + mx[px+"raw"] = v + } + + rs := strings.TrimSpace(attr.rawString()) + if i := strings.IndexByte(rs, ' '); i != -1 { + rs = rs[:i] + } + if v, err := strconv.ParseInt(rs, 10, 64); err == nil { + mx[px+"decoded"] = v + } + } + } +} + +func (s *Smartctl) isTimeToScan(now time.Time) bool { + return now.After(s.lastScanTime.Add(s.ScanEvery.Duration())) +} + +func (s *Smartctl) isTimeToPollDevices(now time.Time) bool { + return now.After(s.lastDevicePollTime.Add(s.PollDevicesEvery.Duration())) + +} + +func isSmartDeviceValid(d *smartDevice) bool { + return d.deviceName() != "" && d.deviceType() != "" +} + +func isSmartAttrValid(a *smartAttribute) bool { + return a.id() != "" && a.name() != "" +} + +func isDeviceInLowerPowerMode(r *gjson.Result) bool { + if !isExitStatusHasBit(r, 1) { + return false + } + + messages := r.Get("smartctl.messages").Array() + + return slices.ContainsFunc(messages, func(msg gjson.Result) bool { + text := msg.Get("string").String() + return strings.HasPrefix(text, "Device is in") && strings.Contains(text, "mode") + }) +} + +func isDeviceOpenFailedNoSuchDevice(r *gjson.Result) bool { + if !isExitStatusHasBit(r, 1) { + return false + } + + messages := r.Get("smartctl.messages").Array() + + return slices.ContainsFunc(messages, func(msg gjson.Result) bool { + text := msg.Get("string").String() + return strings.HasSuffix(text, "No such device") + }) +} + +func isExitStatusHasBit(r *gjson.Result, bit int) bool { + // https://manpages.debian.org/bullseye/smartmontools/smartctl.8.en.html#EXIT_STATUS + status := int(r.Get("smartctl.exit_status").Int()) + mask := 1 << bit + return (status & mask) != 0 +} |