1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
// SPDX-License-Identifier: GPL-3.0-or-later
package nvidia_smi
import (
_ "embed"
"errors"
"time"
"github.com/netdata/netdata/go/go.d.plugin/agent/module"
"github.com/netdata/netdata/go/go.d.plugin/pkg/web"
)
//go:embed "config_schema.json"
var configSchema string
func init() {
module.Register("nvidia_smi", module.Creator{
JobConfigSchema: configSchema,
Defaults: module.Defaults{
Disabled: true,
UpdateEvery: 10,
},
Create: func() module.Module { return New() },
})
}
func New() *NvidiaSMI {
return &NvidiaSMI{
Config: Config{
Timeout: web.Duration(time.Second * 10),
UseCSVFormat: true,
},
binName: "nvidia-smi",
charts: &module.Charts{},
gpus: make(map[string]bool),
migs: make(map[string]bool),
}
}
type Config struct {
UpdateEvery int `yaml:"update_every" json:"update_every"`
Timeout web.Duration `yaml:"timeout" json:"timeout"`
BinaryPath string `yaml:"binary_path" json:"binary_path"`
UseCSVFormat bool `yaml:"use_csv_format" json:"use_csv_format"`
}
type (
NvidiaSMI struct {
module.Base
Config `yaml:",inline" json:""`
charts *module.Charts
exec nvidiaSMI
binName string
gpuQueryProperties []string
gpus map[string]bool
migs map[string]bool
}
nvidiaSMI interface {
queryGPUInfoXML() ([]byte, error)
queryGPUInfoCSV(properties []string) ([]byte, error)
queryHelpQueryGPU() ([]byte, error)
}
)
func (nv *NvidiaSMI) Configuration() any {
return nv.Config
}
func (nv *NvidiaSMI) Init() error {
if nv.exec == nil {
smi, err := nv.initNvidiaSMIExec()
if err != nil {
nv.Error(err)
return err
}
nv.exec = smi
}
return nil
}
func (nv *NvidiaSMI) Check() error {
mx, err := nv.collect()
if err != nil {
nv.Error(err)
return err
}
if len(mx) == 0 {
return errors.New("no metrics collected")
}
return nil
}
func (nv *NvidiaSMI) Charts() *module.Charts {
return nv.charts
}
func (nv *NvidiaSMI) Collect() map[string]int64 {
mx, err := nv.collect()
if err != nil {
nv.Error(err)
}
if len(mx) == 0 {
return nil
}
return mx
}
func (nv *NvidiaSMI) Cleanup() {}
|