summaryrefslogtreecommitdiffstats
path: root/src/go/collectors/go.d.plugin/modules/vsphere
diff options
context:
space:
mode:
Diffstat (limited to '')
l---------src/go/collectors/go.d.plugin/modules/vsphere/README.md1
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/charts.go506
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/client/client.go180
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/client/client_test.go175
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/client/keepalive.go45
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/collect.go132
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/config_schema.json245
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/discover.go31
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/discover/build.go180
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/discover/discover.go163
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/discover/discover_test.go179
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/discover/filter.go60
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/discover/hierarchy.go100
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/discover/metric_lists.go135
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/init.go66
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/integrations/vmware_vcenter_server.md322
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/match/match.go233
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/match/match_test.go287
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/metadata.yaml439
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/metrics.txt328
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/resources/resources.go137
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/scrape/scrape.go159
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/scrape/scrape_test.go70
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/scrape/throttled_caller.go33
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/scrape/throttled_caller_test.go42
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/task.go61
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/task_test.go41
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/testdata/config.json27
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/testdata/config.yaml22
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/vsphere.go144
-rw-r--r--src/go/collectors/go.d.plugin/modules/vsphere/vsphere_test.go488
31 files changed, 5031 insertions, 0 deletions
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/README.md b/src/go/collectors/go.d.plugin/modules/vsphere/README.md
new file mode 120000
index 000000000..0a6b0146e
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/README.md
@@ -0,0 +1 @@
+integrations/vmware_vcenter_server.md \ No newline at end of file
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/charts.go b/src/go/collectors/go.d.plugin/modules/vsphere/charts.go
new file mode 100644
index 000000000..ed4db941d
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/charts.go
@@ -0,0 +1,506 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package vsphere
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/netdata/netdata/go/go.d.plugin/agent/module"
+ rs "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/resources"
+)
+
+const (
+ prioVMCPUUtilization = module.Priority + iota
+ prioVmMemoryUtilization
+ prioVmMemoryUsage
+ prioVmMemorySwapUsage
+ prioVmMemorySwapIO
+ prioVmDiskIO
+ prioVmDiskMaxLatency
+ prioVmNetworkTraffic
+ prioVmNetworkPackets
+ prioVmNetworkDrops
+ prioVmOverallStatus
+ prioVmSystemUptime
+
+ prioHostCPUUtilization
+ prioHostMemoryUtilization
+ prioHostMemoryUsage
+ prioHostMemorySwapIO
+ prioHostDiskIO
+ prioHostDiskMaxLatency
+ prioHostNetworkTraffic
+ prioHostNetworkPackets
+ prioHostNetworkDrops
+ prioHostNetworkErrors
+ prioHostOverallStatus
+ prioHostSystemUptime
+)
+
+var (
+ vmChartsTmpl = module.Charts{
+ vmCPUUtilizationChartTmpl.Copy(),
+
+ vmMemoryUtilizationChartTmpl.Copy(),
+ vmMemoryUsageChartTmpl.Copy(),
+ vmMemorySwapUsageChartTmpl.Copy(),
+ vmMemorySwapIOChartTmpl.Copy(),
+
+ vmDiskIOChartTmpl.Copy(),
+ vmDiskMaxLatencyChartTmpl.Copy(),
+
+ vmNetworkTrafficChartTmpl.Copy(),
+ vmNetworkPacketsChartTmpl.Copy(),
+ vmNetworkDropsChartTmpl.Copy(),
+
+ vmOverallStatusChartTmpl.Copy(),
+
+ vmSystemUptimeChartTmpl.Copy(),
+ }
+
+ vmCPUUtilizationChartTmpl = module.Chart{
+ ID: "%s_cpu_utilization",
+ Title: "Virtual Machine CPU utilization",
+ Units: "percentage",
+ Fam: "vms cpu",
+ Ctx: "vsphere.vm_cpu_utilization",
+ Priority: prioVMCPUUtilization,
+ Dims: module.Dims{
+ {ID: "%s_cpu.usage.average", Name: "used", Div: 100},
+ },
+ }
+
+ // Ref: https://www.vmware.com/support/developer/converter-sdk/conv51_apireference/memory_counters.html
+ vmMemoryUtilizationChartTmpl = module.Chart{
+ ID: "%s_mem_utilization",
+ Title: "Virtual Machine memory utilization",
+ Units: "percentage",
+ Fam: "vms mem",
+ Ctx: "vsphere.vm_mem_utilization",
+ Priority: prioVmMemoryUtilization,
+ Dims: module.Dims{
+ {ID: "%s_mem.usage.average", Name: "used", Div: 100},
+ },
+ }
+ vmMemoryUsageChartTmpl = module.Chart{
+ ID: "%s_mem_usage",
+ Title: "Virtual Machine memory usage",
+ Units: "KiB",
+ Fam: "vms mem",
+ Ctx: "vsphere.vm_mem_usage",
+ Priority: prioVmMemoryUsage,
+ Dims: module.Dims{
+ {ID: "%s_mem.granted.average", Name: "granted"},
+ {ID: "%s_mem.consumed.average", Name: "consumed"},
+ {ID: "%s_mem.active.average", Name: "active"},
+ {ID: "%s_mem.shared.average", Name: "shared"},
+ },
+ }
+ vmMemorySwapUsageChartTmpl = module.Chart{
+ ID: "%s_mem_swap_usage",
+ Title: "Virtual Machine VMKernel memory swap usage",
+ Units: "KiB",
+ Fam: "vms mem",
+ Ctx: "vsphere.vm_mem_swap_usage",
+ Priority: prioVmMemorySwapUsage,
+ Dims: module.Dims{
+ {ID: "%s_mem.swapped.average", Name: "swapped"},
+ },
+ }
+ vmMemorySwapIOChartTmpl = module.Chart{
+ ID: "%s_mem_swap_io_rate",
+ Title: "Virtual Machine VMKernel memory swap IO",
+ Units: "KiB/s",
+ Fam: "vms mem",
+ Ctx: "vsphere.vm_mem_swap_io",
+ Type: module.Area,
+ Priority: prioVmMemorySwapIO,
+ Dims: module.Dims{
+ {ID: "%s_mem.swapinRate.average", Name: "in"},
+ {ID: "%s_mem.swapoutRate.average", Name: "out"},
+ },
+ }
+
+ vmDiskIOChartTmpl = module.Chart{
+ ID: "%s_disk_io",
+ Title: "Virtual Machine disk IO",
+ Units: "KiB/s",
+ Fam: "vms disk",
+ Ctx: "vsphere.vm_disk_io",
+ Type: module.Area,
+ Priority: prioVmDiskIO,
+ Dims: module.Dims{
+ {ID: "%s_disk.read.average", Name: "read"},
+ {ID: "%s_disk.write.average", Name: "write", Mul: -1},
+ },
+ }
+ vmDiskMaxLatencyChartTmpl = module.Chart{
+ ID: "%s_disk_max_latency",
+ Title: "Virtual Machine disk max latency",
+ Units: "milliseconds",
+ Fam: "vms disk",
+ Ctx: "vsphere.vm_disk_max_latency",
+ Priority: prioVmDiskMaxLatency,
+ Dims: module.Dims{
+ {ID: "%s_disk.maxTotalLatency.latest", Name: "latency"},
+ },
+ }
+
+ vmNetworkTrafficChartTmpl = module.Chart{
+ ID: "%s_net_traffic",
+ Title: "Virtual Machine network traffic",
+ Units: "KiB/s",
+ Fam: "vms net",
+ Ctx: "vsphere.vm_net_traffic",
+ Type: module.Area,
+ Priority: prioVmNetworkTraffic,
+ Dims: module.Dims{
+ {ID: "%s_net.bytesRx.average", Name: "received"},
+ {ID: "%s_net.bytesTx.average", Name: "sent", Mul: -1},
+ },
+ }
+ vmNetworkPacketsChartTmpl = module.Chart{
+ ID: "%s_net_packets",
+ Title: "Virtual Machine network packets",
+ Units: "packets",
+ Fam: "vms net",
+ Ctx: "vsphere.vm_net_packets",
+ Priority: prioVmNetworkPackets,
+ Dims: module.Dims{
+ {ID: "%s_net.packetsRx.summation", Name: "received"},
+ {ID: "%s_net.packetsTx.summation", Name: "sent", Mul: -1},
+ },
+ }
+ vmNetworkDropsChartTmpl = module.Chart{
+ ID: "%s_net_drops",
+ Title: "Virtual Machine network dropped packets",
+ Units: "drops",
+ Fam: "vms net",
+ Ctx: "vsphere.vm_net_drops",
+ Priority: prioVmNetworkDrops,
+ Dims: module.Dims{
+ {ID: "%s_net.droppedRx.summation", Name: "received"},
+ {ID: "%s_net.droppedTx.summation", Name: "sent", Mul: -1},
+ },
+ }
+
+ vmOverallStatusChartTmpl = module.Chart{
+ ID: "%s_overall_status",
+ Title: "Virtual Machine overall alarm status",
+ Units: "status",
+ Fam: "vms status",
+ Ctx: "vsphere.vm_overall_status",
+ Priority: prioVmOverallStatus,
+ Dims: module.Dims{
+ {ID: "%s_overall.status.green", Name: "green"},
+ {ID: "%s_overall.status.red", Name: "red"},
+ {ID: "%s_overall.status.yellow", Name: "yellow"},
+ {ID: "%s_overall.status.gray", Name: "gray"},
+ },
+ }
+
+ vmSystemUptimeChartTmpl = module.Chart{
+ ID: "%s_system_uptime",
+ Title: "Virtual Machine system uptime",
+ Units: "seconds",
+ Fam: "vms uptime",
+ Ctx: "vsphere.vm_system_uptime",
+ Priority: prioVmSystemUptime,
+ Dims: module.Dims{
+ {ID: "%s_sys.uptime.latest", Name: "uptime"},
+ },
+ }
+)
+
+var (
+ hostChartsTmpl = module.Charts{
+ hostCPUUtilizationChartTmpl.Copy(),
+
+ hostMemUtilizationChartTmpl.Copy(),
+ hostMemUsageChartTmpl.Copy(),
+ hostMemSwapIOChartTmpl.Copy(),
+
+ hostDiskIOChartTmpl.Copy(),
+ hostDiskMaxLatencyChartTmpl.Copy(),
+
+ hostNetworkTraffic.Copy(),
+ hostNetworkPacketsChartTmpl.Copy(),
+ hostNetworkDropsChartTmpl.Copy(),
+ hostNetworkErrorsChartTmpl.Copy(),
+
+ hostOverallStatusChartTmpl.Copy(),
+
+ hostSystemUptimeChartTmpl.Copy(),
+ }
+ hostCPUUtilizationChartTmpl = module.Chart{
+ ID: "%s_cpu_usage_total",
+ Title: "ESXi Host CPU utilization",
+ Units: "percentage",
+ Fam: "hosts cpu",
+ Ctx: "vsphere.host_cpu_utilization",
+ Priority: prioHostCPUUtilization,
+ Dims: module.Dims{
+ {ID: "%s_cpu.usage.average", Name: "used", Div: 100},
+ },
+ }
+ hostMemUtilizationChartTmpl = module.Chart{
+ ID: "%s_mem_utilization",
+ Title: "ESXi Host memory utilization",
+ Units: "percentage",
+ Fam: "hosts mem",
+ Ctx: "vsphere.host_mem_utilization",
+ Priority: prioHostMemoryUtilization,
+ Dims: module.Dims{
+ {ID: "%s_mem.usage.average", Name: "used", Div: 100},
+ },
+ }
+ hostMemUsageChartTmpl = module.Chart{
+ ID: "%s_mem_usage",
+ Title: "ESXi Host memory usage",
+ Units: "KiB",
+ Fam: "hosts mem",
+ Ctx: "vsphere.host_mem_usage",
+ Priority: prioHostMemoryUsage,
+ Dims: module.Dims{
+ {ID: "%s_mem.granted.average", Name: "granted"},
+ {ID: "%s_mem.consumed.average", Name: "consumed"},
+ {ID: "%s_mem.active.average", Name: "active"},
+ {ID: "%s_mem.shared.average", Name: "shared"},
+ {ID: "%s_mem.sharedcommon.average", Name: "sharedcommon"},
+ },
+ }
+ hostMemSwapIOChartTmpl = module.Chart{
+ ID: "%s_mem_swap_rate",
+ Title: "ESXi Host VMKernel memory swap IO",
+ Units: "KiB/s",
+ Fam: "hosts mem",
+ Ctx: "vsphere.host_mem_swap_io",
+ Type: module.Area,
+ Priority: prioHostMemorySwapIO,
+ Dims: module.Dims{
+ {ID: "%s_mem.swapinRate.average", Name: "in"},
+ {ID: "%s_mem.swapoutRate.average", Name: "out"},
+ },
+ }
+
+ hostDiskIOChartTmpl = module.Chart{
+ ID: "%s_disk_io",
+ Title: "ESXi Host disk IO",
+ Units: "KiB/s",
+ Fam: "hosts disk",
+ Ctx: "vsphere.host_disk_io",
+ Type: module.Area,
+ Priority: prioHostDiskIO,
+ Dims: module.Dims{
+ {ID: "%s_disk.read.average", Name: "read"},
+ {ID: "%s_disk.write.average", Name: "write", Mul: -1},
+ },
+ }
+ hostDiskMaxLatencyChartTmpl = module.Chart{
+ ID: "%s_disk_max_latency",
+ Title: "ESXi Host disk max latency",
+ Units: "milliseconds",
+ Fam: "hosts disk",
+ Ctx: "vsphere.host_disk_max_latency",
+ Priority: prioHostDiskMaxLatency,
+ Dims: module.Dims{
+ {ID: "%s_disk.maxTotalLatency.latest", Name: "latency"},
+ },
+ }
+
+ hostNetworkTraffic = module.Chart{
+ ID: "%s_net_traffic",
+ Title: "ESXi Host network traffic",
+ Units: "KiB/s",
+ Fam: "hosts net",
+ Ctx: "vsphere.host_net_traffic",
+ Type: module.Area,
+ Priority: prioHostNetworkTraffic,
+ Dims: module.Dims{
+ {ID: "%s_net.bytesRx.average", Name: "received"},
+ {ID: "%s_net.bytesTx.average", Name: "sent", Mul: -1},
+ },
+ }
+ hostNetworkPacketsChartTmpl = module.Chart{
+ ID: "%s_net_packets",
+ Title: "ESXi Host network packets",
+ Units: "packets",
+ Fam: "hosts net",
+ Ctx: "vsphere.host_net_packets",
+ Priority: prioHostNetworkPackets,
+ Dims: module.Dims{
+ {ID: "%s_net.packetsRx.summation", Name: "received"},
+ {ID: "%s_net.packetsTx.summation", Name: "sent", Mul: -1},
+ },
+ }
+ hostNetworkDropsChartTmpl = module.Chart{
+ ID: "%s_net_drops_total",
+ Title: "ESXi Host network drops",
+ Units: "drops",
+ Fam: "hosts net",
+ Ctx: "vsphere.host_net_drops",
+ Priority: prioHostNetworkDrops,
+ Dims: module.Dims{
+ {ID: "%s_net.droppedRx.summation", Name: "received"},
+ {ID: "%s_net.droppedTx.summation", Name: "sent", Mul: -1},
+ },
+ }
+ hostNetworkErrorsChartTmpl = module.Chart{
+ ID: "%s_net_errors",
+ Title: "ESXi Host network errors",
+ Units: "errors",
+ Fam: "hosts net",
+ Ctx: "vsphere.host_net_errors",
+ Priority: prioHostNetworkErrors,
+ Dims: module.Dims{
+ {ID: "%s_net.errorsRx.summation", Name: "received"},
+ {ID: "%s_net.errorsTx.summation", Name: "sent", Mul: -1},
+ },
+ }
+
+ hostOverallStatusChartTmpl = module.Chart{
+ ID: "%s_overall_status",
+ Title: "ESXi Host overall alarm status",
+ Units: "status",
+ Fam: "hosts status",
+ Ctx: "vsphere.host_overall_status",
+ Priority: prioHostOverallStatus,
+ Dims: module.Dims{
+ {ID: "%s_overall.status.green", Name: "green"},
+ {ID: "%s_overall.status.red", Name: "red"},
+ {ID: "%s_overall.status.yellow", Name: "yellow"},
+ {ID: "%s_overall.status.gray", Name: "gray"},
+ },
+ }
+ hostSystemUptimeChartTmpl = module.Chart{
+ ID: "%s_system_uptime",
+ Title: "ESXi Host system uptime",
+ Units: "seconds",
+ Fam: "hosts uptime",
+ Ctx: "vsphere.host_system_uptime",
+ Priority: prioHostSystemUptime,
+ Dims: module.Dims{
+ {ID: "%s_sys.uptime.latest", Name: "uptime"},
+ },
+ }
+)
+
+const failedUpdatesLimit = 10
+
+func (vs *VSphere) updateCharts() {
+ for id, fails := range vs.discoveredHosts {
+ if fails >= failedUpdatesLimit {
+ vs.removeFromCharts(id)
+ delete(vs.charted, id)
+ delete(vs.discoveredHosts, id)
+ continue
+ }
+
+ host := vs.resources.Hosts.Get(id)
+ if host == nil || vs.charted[id] || fails != 0 {
+ continue
+ }
+
+ vs.charted[id] = true
+ charts := newHostCharts(host)
+ if err := vs.Charts().Add(*charts...); err != nil {
+ vs.Error(err)
+ }
+ }
+
+ for id, fails := range vs.discoveredVMs {
+ if fails >= failedUpdatesLimit {
+ vs.removeFromCharts(id)
+ delete(vs.charted, id)
+ delete(vs.discoveredVMs, id)
+ continue
+ }
+
+ vm := vs.resources.VMs.Get(id)
+ if vm == nil || vs.charted[id] || fails != 0 {
+ continue
+ }
+
+ vs.charted[id] = true
+ charts := newVMCHarts(vm)
+ if err := vs.Charts().Add(*charts...); err != nil {
+ vs.Error(err)
+ }
+ }
+}
+
+func newVMCHarts(vm *rs.VM) *module.Charts {
+ charts := vmChartsTmpl.Copy()
+
+ for _, chart := range *charts {
+ chart.ID = fmt.Sprintf(chart.ID, vm.ID)
+ chart.Labels = []module.Label{
+ {Key: "datacenter", Value: vm.Hier.DC.Name},
+ {Key: "cluster", Value: getVMClusterName(vm)},
+ {Key: "host", Value: vm.Hier.Host.Name},
+ {Key: "vm", Value: vm.Name},
+ }
+ for _, dim := range chart.Dims {
+ dim.ID = fmt.Sprintf(dim.ID, vm.ID)
+ }
+ }
+
+ return charts
+}
+
+func getVMClusterName(vm *rs.VM) string {
+ if vm.Hier.Cluster.Name == vm.Hier.Host.Name {
+ return ""
+ }
+ return vm.Hier.Cluster.Name
+}
+
+func newHostCharts(host *rs.Host) *module.Charts {
+ charts := hostChartsTmpl.Copy()
+
+ for _, chart := range *charts {
+ chart.ID = fmt.Sprintf(chart.ID, host.ID)
+ chart.Labels = []module.Label{
+ {Key: "datacenter", Value: host.Hier.DC.Name},
+ {Key: "cluster", Value: getHostClusterName(host)},
+ {Key: "host", Value: host.Name},
+ }
+
+ for _, dim := range chart.Dims {
+ dim.ID = fmt.Sprintf(dim.ID, host.ID)
+ }
+ }
+
+ return charts
+}
+
+func getHostClusterName(host *rs.Host) string {
+ if host.Hier.Cluster.Name == host.Name {
+ return ""
+ }
+ return host.Hier.Cluster.Name
+}
+
+func (vs *VSphere) removeFromCharts(prefix string) {
+ for _, c := range *vs.Charts() {
+ if strings.HasPrefix(c.ID, prefix) {
+ c.MarkRemove()
+ c.MarkNotCreated()
+ }
+ }
+}
+
+//func findMetricSeriesByPrefix(ms []performance.MetricSeries, prefix string) []performance.MetricSeries {
+// from := sort.Search(len(ms), func(i int) bool { return ms[i].Name >= prefix })
+//
+// if from == len(ms) || !strings.HasPrefix(ms[from].Name, prefix) {
+// return nil
+// }
+//
+// until := from + 1
+// for until < len(ms) && strings.HasPrefix(ms[until].Name, prefix) {
+// until++
+// }
+// return ms[from:until]
+//}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/client/client.go b/src/go/collectors/go.d.plugin/modules/vsphere/client/client.go
new file mode 100644
index 000000000..827351cf8
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/client/client.go
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package client
+
+import (
+ "context"
+ "net/http"
+ "net/url"
+ "time"
+
+ "github.com/netdata/netdata/go/go.d.plugin/pkg/tlscfg"
+
+ "github.com/vmware/govmomi"
+ "github.com/vmware/govmomi/performance"
+ "github.com/vmware/govmomi/session"
+ "github.com/vmware/govmomi/view"
+ "github.com/vmware/govmomi/vim25"
+ "github.com/vmware/govmomi/vim25/mo"
+ "github.com/vmware/govmomi/vim25/soap"
+ "github.com/vmware/govmomi/vim25/types"
+)
+
+const (
+ datacenter = "Datacenter"
+ folder = "Folder"
+ computeResource = "ComputeResource"
+ hostSystem = "HostSystem"
+ virtualMachine = "VirtualMachine"
+
+ maxIdleConnections = 32
+)
+
+type Config struct {
+ URL string
+ User string
+ Password string
+ tlscfg.TLSConfig
+ Timeout time.Duration
+}
+
+type Client struct {
+ client *govmomi.Client
+ root *view.ContainerView
+ perf *performance.Manager
+}
+
+func newSoapClient(config Config) (*soap.Client, error) {
+ soapURL, err := soap.ParseURL(config.URL)
+ if err != nil || soapURL == nil {
+ return nil, err
+ }
+ soapURL.User = url.UserPassword(config.User, config.Password)
+ soapClient := soap.NewClient(soapURL, config.TLSConfig.InsecureSkipVerify)
+
+ tlsConfig, err := tlscfg.NewTLSConfig(config.TLSConfig)
+ if err != nil {
+ return nil, err
+ }
+ if tlsConfig != nil && len(tlsConfig.Certificates) > 0 {
+ soapClient.SetCertificate(tlsConfig.Certificates[0])
+ }
+ if config.TLSConfig.TLSCA != "" {
+ if err := soapClient.SetRootCAs(config.TLSConfig.TLSCA); err != nil {
+ return nil, err
+ }
+ }
+
+ if t, ok := soapClient.Transport.(*http.Transport); ok {
+ t.MaxIdleConnsPerHost = maxIdleConnections
+ t.TLSHandshakeTimeout = config.Timeout
+ }
+ soapClient.Timeout = config.Timeout
+
+ return soapClient, nil
+}
+
+func newContainerView(ctx context.Context, client *govmomi.Client) (*view.ContainerView, error) {
+ viewManager := view.NewManager(client.Client)
+ return viewManager.CreateContainerView(ctx, client.ServiceContent.RootFolder, []string{}, true)
+}
+
+func newPerformanceManager(client *vim25.Client) *performance.Manager {
+ perfManager := performance.NewManager(client)
+ perfManager.Sort = true
+ return perfManager
+}
+
+func New(config Config) (*Client, error) {
+ ctx := context.Background()
+ soapClient, err := newSoapClient(config)
+ if err != nil {
+ return nil, err
+ }
+
+ vimClient, err := vim25.NewClient(ctx, soapClient)
+ if err != nil {
+ return nil, err
+ }
+
+ vmomiClient := &govmomi.Client{
+ Client: vimClient,
+ SessionManager: session.NewManager(vimClient),
+ }
+
+ userInfo := url.UserPassword(config.User, config.Password)
+ addKeepAlive(vmomiClient, userInfo)
+
+ err = vmomiClient.Login(ctx, userInfo)
+ if err != nil {
+ return nil, err
+ }
+
+ containerView, err := newContainerView(ctx, vmomiClient)
+ if err != nil {
+ return nil, err
+ }
+
+ perfManager := newPerformanceManager(vimClient)
+
+ client := &Client{
+ client: vmomiClient,
+ perf: perfManager,
+ root: containerView,
+ }
+
+ return client, nil
+}
+
+func (c *Client) IsSessionActive() (bool, error) {
+ return c.client.SessionManager.SessionIsActive(context.Background())
+}
+
+func (c *Client) Version() string {
+ return c.client.ServiceContent.About.Version
+}
+
+func (c *Client) Login(userinfo *url.Userinfo) error {
+ return c.client.Login(context.Background(), userinfo)
+}
+
+func (c *Client) Logout() error {
+ return c.client.Logout(context.Background())
+}
+
+func (c *Client) PerformanceMetrics(pqs []types.PerfQuerySpec) ([]performance.EntityMetric, error) {
+ metrics, err := c.perf.Query(context.Background(), pqs)
+ if err != nil {
+ return nil, err
+ }
+ return c.perf.ToMetricSeries(context.Background(), metrics)
+}
+
+func (c *Client) Datacenters(pathSet ...string) (dcs []mo.Datacenter, err error) {
+ err = c.root.Retrieve(context.Background(), []string{datacenter}, pathSet, &dcs)
+ return
+}
+
+func (c *Client) Folders(pathSet ...string) (folders []mo.Folder, err error) {
+ err = c.root.Retrieve(context.Background(), []string{folder}, pathSet, &folders)
+ return
+}
+
+func (c *Client) ComputeResources(pathSet ...string) (computes []mo.ComputeResource, err error) {
+ err = c.root.Retrieve(context.Background(), []string{computeResource}, pathSet, &computes)
+ return
+}
+
+func (c *Client) Hosts(pathSet ...string) (hosts []mo.HostSystem, err error) {
+ err = c.root.Retrieve(context.Background(), []string{hostSystem}, pathSet, &hosts)
+ return
+}
+
+func (c *Client) VirtualMachines(pathSet ...string) (vms []mo.VirtualMachine, err error) {
+ err = c.root.Retrieve(context.Background(), []string{virtualMachine}, pathSet, &vms)
+ return
+}
+
+func (c *Client) CounterInfoByName() (map[string]*types.PerfCounterInfo, error) {
+ return c.perf.CounterInfoByName(context.Background())
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/client/client_test.go b/src/go/collectors/go.d.plugin/modules/vsphere/client/client_test.go
new file mode 100644
index 000000000..163829f41
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/client/client_test.go
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package client
+
+import (
+ "crypto/tls"
+ "net/url"
+ "testing"
+ "time"
+
+ "github.com/netdata/netdata/go/go.d.plugin/pkg/tlscfg"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "github.com/vmware/govmomi/simulator"
+ "github.com/vmware/govmomi/vim25/mo"
+ "github.com/vmware/govmomi/vim25/types"
+)
+
+func TestNew(t *testing.T) {
+ client, teardown := prepareClient(t)
+ defer teardown()
+
+ v, err := client.IsSessionActive()
+ assert.NoError(t, err)
+ assert.True(t, v)
+}
+
+func TestClient_Version(t *testing.T) {
+ client, teardown := prepareClient(t)
+ defer teardown()
+
+ assert.NotEmpty(t, client.Version())
+}
+
+func TestClient_CounterInfoByName(t *testing.T) {
+ client, teardown := prepareClient(t)
+ defer teardown()
+
+ v, err := client.CounterInfoByName()
+ assert.NoError(t, err)
+ assert.IsType(t, map[string]*types.PerfCounterInfo{}, v)
+ assert.NotEmpty(t, v)
+}
+
+func TestClient_IsSessionActive(t *testing.T) {
+ client, teardown := prepareClient(t)
+ defer teardown()
+
+ v, err := client.IsSessionActive()
+ assert.NoError(t, err)
+ assert.True(t, v)
+}
+
+func TestClient_Login(t *testing.T) {
+ client, teardown := prepareClient(t)
+ defer teardown()
+
+ assert.NoError(t, client.Logout())
+
+ err := client.Login(url.UserPassword("admin", "password"))
+ assert.NoError(t, err)
+
+ ok, err := client.IsSessionActive()
+ assert.NoError(t, err)
+ assert.True(t, ok)
+}
+
+func TestClient_Logout(t *testing.T) {
+ client, teardown := prepareClient(t)
+ defer teardown()
+
+ assert.NoError(t, client.Logout())
+
+ v, err := client.IsSessionActive()
+ assert.NoError(t, err)
+ assert.False(t, v)
+}
+
+func TestClient_Datacenters(t *testing.T) {
+ client, teardown := prepareClient(t)
+ defer teardown()
+
+ dcs, err := client.Datacenters()
+ assert.NoError(t, err)
+ assert.NotEmpty(t, dcs)
+}
+
+func TestClient_Folders(t *testing.T) {
+ client, teardown := prepareClient(t)
+ defer teardown()
+
+ folders, err := client.Folders()
+ assert.NoError(t, err)
+ assert.NotEmpty(t, folders)
+}
+
+func TestClient_ComputeResources(t *testing.T) {
+ client, teardown := prepareClient(t)
+ defer teardown()
+
+ computes, err := client.ComputeResources()
+ assert.NoError(t, err)
+ assert.NotEmpty(t, computes)
+}
+
+func TestClient_Hosts(t *testing.T) {
+ client, teardown := prepareClient(t)
+ defer teardown()
+
+ hosts, err := client.Hosts()
+ assert.NoError(t, err)
+ assert.NotEmpty(t, hosts)
+}
+
+func TestClient_VirtualMachines(t *testing.T) {
+ client, teardown := prepareClient(t)
+ defer teardown()
+
+ vms, err := client.VirtualMachines()
+ assert.NoError(t, err)
+ assert.NotEmpty(t, vms)
+}
+
+func TestClient_PerformanceMetrics(t *testing.T) {
+ client, teardown := prepareClient(t)
+ defer teardown()
+
+ hosts, err := client.Hosts()
+ require.NoError(t, err)
+ metrics, err := client.PerformanceMetrics(hostsPerfQuerySpecs(hosts))
+ require.NoError(t, err)
+ assert.True(t, len(metrics) > 0)
+}
+
+func prepareClient(t *testing.T) (client *Client, teardown func()) {
+ model, srv := createSim(t)
+ teardown = func() { model.Remove(); srv.Close() }
+ return newClient(t, srv.URL), teardown
+}
+
+func newClient(t *testing.T, vCenterURL *url.URL) *Client {
+ client, err := New(Config{
+ URL: vCenterURL.String(),
+ User: "admin",
+ Password: "password",
+ Timeout: time.Second * 3,
+ TLSConfig: tlscfg.TLSConfig{InsecureSkipVerify: true},
+ })
+ require.NoError(t, err)
+ return client
+}
+
+func createSim(t *testing.T) (*simulator.Model, *simulator.Server) {
+ model := simulator.VPX()
+ err := model.Create()
+ require.NoError(t, err)
+ model.Service.TLS = new(tls.Config)
+ return model, model.Service.NewServer()
+}
+
+func hostsPerfQuerySpecs(hosts []mo.HostSystem) []types.PerfQuerySpec {
+ var pqs []types.PerfQuerySpec
+ for _, host := range hosts {
+ pq := types.PerfQuerySpec{
+ Entity: host.Reference(),
+ MaxSample: 1,
+ MetricId: []types.PerfMetricId{{CounterId: 32, Instance: ""}},
+ IntervalId: 20,
+ Format: "normal",
+ }
+ pqs = append(pqs, pq)
+ }
+ return pqs
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/client/keepalive.go b/src/go/collectors/go.d.plugin/modules/vsphere/client/keepalive.go
new file mode 100644
index 000000000..0ce1ef5c0
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/client/keepalive.go
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package client
+
+import (
+ "context"
+ "net/url"
+ "time"
+
+ "github.com/vmware/govmomi"
+ "github.com/vmware/govmomi/session"
+ "github.com/vmware/govmomi/vim25/methods"
+ "github.com/vmware/govmomi/vim25/soap"
+ "github.com/vmware/govmomi/vim25/types"
+)
+
+const (
+ keepAliveEvery = time.Second * 15
+)
+
+// TODO: survive vCenter reboot, it looks like we need to re New()
+func addKeepAlive(client *govmomi.Client, userinfo *url.Userinfo) {
+ f := func(rt soap.RoundTripper) error {
+ _, err := methods.GetCurrentTime(context.Background(), rt)
+ if err == nil {
+ return nil
+ }
+
+ if !isNotAuthenticated(err) {
+ return nil
+ }
+
+ _ = client.Login(context.Background(), userinfo)
+ return nil
+ }
+ client.Client.RoundTripper = session.KeepAliveHandler(client.Client.RoundTripper, keepAliveEvery, f)
+}
+
+func isNotAuthenticated(err error) bool {
+ if !soap.IsSoapFault(err) {
+ return false
+ }
+ _, ok := soap.ToSoapFault(err).VimFault().(*types.NotAuthenticated)
+ return ok
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/collect.go b/src/go/collectors/go.d.plugin/modules/vsphere/collect.go
new file mode 100644
index 000000000..1aa9af9c1
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/collect.go
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package vsphere
+
+import (
+ "errors"
+ "fmt"
+ "time"
+
+ rs "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/resources"
+
+ "github.com/vmware/govmomi/performance"
+)
+
+// ManagedEntityStatus
+var overallStatuses = []string{"green", "red", "yellow", "gray"}
+
+func (vs *VSphere) collect() (map[string]int64, error) {
+ vs.collectionLock.Lock()
+ defer vs.collectionLock.Unlock()
+
+ vs.Debug("starting collection process")
+ t := time.Now()
+ mx := make(map[string]int64)
+
+ err := vs.collectHosts(mx)
+ if err != nil {
+ return nil, err
+ }
+
+ err = vs.collectVMs(mx)
+ if err != nil {
+ return nil, err
+ }
+
+ vs.updateCharts()
+
+ vs.Debugf("metrics collected, process took %s", time.Since(t))
+
+ return mx, nil
+}
+
+func (vs *VSphere) collectHosts(mx map[string]int64) error {
+ if len(vs.resources.Hosts) == 0 {
+ return nil
+ }
+ // NOTE: returns unsorted if at least one types.PerfMetricId Instance is not ""
+ metrics := vs.ScrapeHosts(vs.resources.Hosts)
+ if len(metrics) == 0 {
+ return errors.New("failed to scrape hosts metrics")
+ }
+
+ vs.collectHostsMetrics(mx, metrics)
+
+ return nil
+}
+
+func (vs *VSphere) collectHostsMetrics(mx map[string]int64, metrics []performance.EntityMetric) {
+ for k := range vs.discoveredHosts {
+ vs.discoveredHosts[k]++
+ }
+
+ for _, metric := range metrics {
+ if host := vs.resources.Hosts.Get(metric.Entity.Value); host != nil {
+ vs.discoveredHosts[host.ID] = 0
+ writeHostMetrics(mx, host, metric.Value)
+ }
+ }
+}
+
+func writeHostMetrics(mx map[string]int64, host *rs.Host, metrics []performance.MetricSeries) {
+ for _, metric := range metrics {
+ if len(metric.Value) == 0 || metric.Value[0] == -1 {
+ continue
+ }
+ key := fmt.Sprintf("%s_%s", host.ID, metric.Name)
+ mx[key] = metric.Value[0]
+ }
+ for _, v := range overallStatuses {
+ key := fmt.Sprintf("%s_overall.status.%s", host.ID, v)
+ mx[key] = boolToInt(host.OverallStatus == v)
+ }
+}
+
+func (vs *VSphere) collectVMs(mx map[string]int64) error {
+ if len(vs.resources.VMs) == 0 {
+ return nil
+ }
+ // NOTE: returns unsorted if at least one types.PerfMetricId Instance is not ""
+ ems := vs.ScrapeVMs(vs.resources.VMs)
+ if len(ems) == 0 {
+ return errors.New("failed to scrape vms metrics")
+ }
+
+ vs.collectVMsMetrics(mx, ems)
+
+ return nil
+}
+
+func (vs *VSphere) collectVMsMetrics(mx map[string]int64, metrics []performance.EntityMetric) {
+ for id := range vs.discoveredVMs {
+ vs.discoveredVMs[id]++
+ }
+
+ for _, metric := range metrics {
+ if vm := vs.resources.VMs.Get(metric.Entity.Value); vm != nil {
+ writeVMMetrics(mx, vm, metric.Value)
+ vs.discoveredVMs[vm.ID] = 0
+ }
+ }
+}
+
+func writeVMMetrics(mx map[string]int64, vm *rs.VM, metrics []performance.MetricSeries) {
+ for _, metric := range metrics {
+ if len(metric.Value) == 0 || metric.Value[0] == -1 {
+ continue
+ }
+ key := fmt.Sprintf("%s_%s", vm.ID, metric.Name)
+ mx[key] = metric.Value[0]
+ }
+ for _, v := range overallStatuses {
+ key := fmt.Sprintf("%s_overall.status.%s", vm.ID, v)
+ mx[key] = boolToInt(vm.OverallStatus == v)
+ }
+}
+
+func boolToInt(v bool) int64 {
+ if v {
+ return 1
+ }
+ return 0
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/config_schema.json b/src/go/collectors/go.d.plugin/modules/vsphere/config_schema.json
new file mode 100644
index 000000000..b338102c2
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/config_schema.json
@@ -0,0 +1,245 @@
+{
+ "jsonSchema": {
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "title": "VMware vCenter Server collector configuration.",
+ "type": "object",
+ "properties": {
+ "update_every": {
+ "title": "Update every",
+ "description": "Data collection interval, measured in seconds.",
+ "type": "integer",
+ "minimum": 1,
+ "default": 20
+ },
+ "url": {
+ "title": "URL",
+ "description": "The base URL of the VMware vCenter Server.",
+ "type": "string",
+ "format": "uri"
+ },
+ "timeout": {
+ "title": "Timeout",
+ "description": "The timeout in seconds for the HTTP request.",
+ "type": "number",
+ "minimum": 0.5,
+ "default": 20
+ },
+ "discovery_interval": {
+ "title": "Discovery interval",
+ "description": "Hosts and VMs discovery interval in seconds.",
+ "type": "number",
+ "minimum": 60,
+ "default": 300
+ },
+ "not_follow_redirects": {
+ "title": "Not follow redirects",
+ "description": "If set, the client will not follow HTTP redirects automatically.",
+ "type": "boolean"
+ },
+ "host_include": {
+ "title": "Host selectors",
+ "description": "Configuration for monitoring specific hosts. The selector format follows the pattern `/Datacenter/Cluster/Host`, where each value can be set using [Netdata simple patterns](https://github.com/netdata/netdata/tree/master/src/libnetdata/simple_pattern#readme).",
+ "type": [
+ "array",
+ "null"
+ ],
+ "uniqueItems": true,
+ "items": {
+ "title": "Host selector",
+ "description": "",
+ "type": "string",
+ "default": "/*/*/*",
+ "pattern": "^$|^/"
+ },
+ "default": [
+ "/*"
+ ]
+ },
+ "vm_include": {
+ "title": "Virtual machine selectors",
+ "description": "Configuration for monitoring specific virtual machines. The selector format follows the pattern `/Datacenter/Cluster/Host/VM`, where each value can be set using [Netdata simple patterns](https://github.com/netdata/netdata/tree/master/src/libnetdata/simple_pattern#readme).",
+ "type": [
+ "array",
+ "null"
+ ],
+ "uniqueItems": true,
+ "items": {
+ "title": "VM selector",
+ "description": "",
+ "type": "string",
+ "default": "/*/*/*/*",
+ "pattern": "^$|^/"
+ },
+ "default": [
+ "/*"
+ ]
+ },
+ "username": {
+ "title": "Username",
+ "description": "The username for basic authentication.",
+ "type": "string",
+ "sensitive": true
+ },
+ "password": {
+ "title": "Password",
+ "description": "The password for basic authentication.",
+ "type": "string",
+ "sensitive": true
+ },
+ "proxy_url": {
+ "title": "Proxy URL",
+ "description": "The URL of the proxy server.",
+ "type": "string"
+ },
+ "proxy_username": {
+ "title": "Proxy username",
+ "description": "The username for proxy authentication.",
+ "type": "string",
+ "sensitive": true
+ },
+ "proxy_password": {
+ "title": "Proxy password",
+ "description": "The password for proxy authentication.",
+ "type": "string",
+ "sensitive": true
+ },
+ "headers": {
+ "title": "Headers",
+ "description": "Additional HTTP headers to include in the request.",
+ "type": [
+ "object",
+ "null"
+ ],
+ "additionalProperties": {
+ "type": "string"
+ }
+ },
+ "tls_skip_verify": {
+ "title": "Skip TLS verification",
+ "description": "If set, TLS certificate verification will be skipped.",
+ "type": "boolean"
+ },
+ "tls_ca": {
+ "title": "TLS CA",
+ "description": "The path to the CA certificate file for TLS verification.",
+ "type": "string",
+ "pattern": "^$|^/"
+ },
+ "tls_cert": {
+ "title": "TLS certificate",
+ "description": "The path to the client certificate file for TLS authentication.",
+ "type": "string",
+ "pattern": "^$|^/"
+ },
+ "tls_key": {
+ "title": "TLS key",
+ "description": "The path to the client key file for TLS authentication.",
+ "type": "string",
+ "pattern": "^$|^/"
+ },
+ "body": {
+ "title": "Body",
+ "type": "string"
+ },
+ "method": {
+ "title": "Method",
+ "type": "string"
+ }
+ },
+ "required": [
+ "url",
+ "username",
+ "password",
+ "host_include",
+ "vm_include"
+ ],
+ "additionalProperties": false,
+ "patternProperties": {
+ "^name$": {}
+ }
+ },
+ "uiSchema": {
+ "uiOptions": {
+ "fullPage": true
+ },
+ "ui:flavour": "tabs",
+ "ui:options": {
+ "tabs": [
+ {
+ "title": "Base",
+ "fields": [
+ "update_every",
+ "url",
+ "timeout",
+ "discovery_interval",
+ "not_follow_redirects"
+ ]
+ },
+ {
+ "title": "Hosts & VMs selector",
+ "fields": [
+ "host_include",
+ "vm_include"
+ ]
+ },
+ {
+ "title": "Auth",
+ "fields": [
+ "username",
+ "password"
+ ]
+ },
+ {
+ "title": "TLS",
+ "fields": [
+ "tls_skip_verify",
+ "tls_ca",
+ "tls_cert",
+ "tls_key"
+ ]
+ },
+ {
+ "title": "Proxy",
+ "fields": [
+ "proxy_url",
+ "proxy_username",
+ "proxy_password"
+ ]
+ },
+ {
+ "title": "Headers",
+ "fields": [
+ "headers"
+ ]
+ }
+ ]
+ },
+ "body": {
+ "ui:widget": "hidden"
+ },
+ "method": {
+ "ui:widget": "hidden"
+ },
+ "url": {
+ "ui:placeholder": "https://203.0.113.0"
+ },
+ "timeout": {
+ "ui:help": "Accepts decimals for precise control (e.g., type 1.5 for 1.5 seconds)."
+ },
+ "host_include": {
+ "ui:listFlavour": "list"
+ },
+ "vm_include": {
+ "ui:listFlavour": "list"
+ },
+ "username": {
+ "ui:placeholder": "admin@vsphere.local"
+ },
+ "password": {
+ "ui:widget": "password"
+ },
+ "proxy_password": {
+ "ui:widget": "password"
+ }
+ }
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/discover.go b/src/go/collectors/go.d.plugin/modules/vsphere/discover.go
new file mode 100644
index 000000000..1ea0a4d6e
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/discover.go
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package vsphere
+
+func (vs *VSphere) goDiscovery() {
+ if vs.discoveryTask != nil {
+ vs.discoveryTask.stop()
+ }
+ vs.Infof("starting discovery process, will do discovery every %s", vs.DiscoveryInterval)
+
+ job := func() {
+ err := vs.discoverOnce()
+ if err != nil {
+ vs.Errorf("error on discovering : %v", err)
+ }
+ }
+ vs.discoveryTask = newTask(job, vs.DiscoveryInterval.Duration())
+}
+
+func (vs *VSphere) discoverOnce() error {
+ res, err := vs.Discover()
+ if err != nil {
+ return err
+ }
+
+ vs.collectionLock.Lock()
+ vs.resources = res
+ vs.collectionLock.Unlock()
+
+ return nil
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/discover/build.go b/src/go/collectors/go.d.plugin/modules/vsphere/discover/build.go
new file mode 100644
index 000000000..3bf9bfb48
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/discover/build.go
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package discover
+
+import (
+ "time"
+
+ rs "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/resources"
+
+ "github.com/vmware/govmomi/vim25/mo"
+)
+
+func (d Discoverer) build(raw *resources) *rs.Resources {
+ d.Debug("discovering : building : starting building resources process")
+ t := time.Now()
+
+ var res rs.Resources
+ res.DataCenters = d.buildDatacenters(raw.dcs)
+ res.Folders = d.buildFolders(raw.folders)
+ res.Clusters = d.buildClusters(raw.clusters)
+ fixClustersParentID(&res)
+ res.Hosts = d.buildHosts(raw.hosts)
+ res.VMs = d.buildVMs(raw.vms)
+
+ d.Infof("discovering : building : built %d/%d dcs, %d/%d folders, %d/%d clusters, %d/%d hosts, %d/%d vms, process took %s",
+ len(res.DataCenters),
+ len(raw.dcs),
+ len(res.Folders),
+ len(raw.folders),
+ len(res.Clusters),
+ len(raw.clusters),
+ len(res.Hosts),
+ len(raw.hosts),
+ len(res.VMs),
+ len(raw.vms),
+ time.Since(t),
+ )
+ return &res
+}
+
+// cluster parent is folder by default
+// should be called after buildDatacenters, buildFolders and buildClusters
+func fixClustersParentID(res *rs.Resources) {
+ for _, c := range res.Clusters {
+ c.ParentID = findClusterDcID(c.ParentID, res.Folders)
+ }
+}
+
+func findClusterDcID(parentID string, folders rs.Folders) string {
+ f := folders.Get(parentID)
+ if f == nil {
+ return parentID
+ }
+ return findClusterDcID(f.ParentID, folders)
+}
+
+func (Discoverer) buildDatacenters(raw []mo.Datacenter) rs.DataCenters {
+ dcs := make(rs.DataCenters)
+ for _, d := range raw {
+ dcs.Put(newDC(d))
+ }
+ return dcs
+}
+
+func newDC(raw mo.Datacenter) *rs.Datacenter {
+ // Datacenter1 datacenter-2 group-h4 group-v3
+ return &rs.Datacenter{
+ Name: raw.Name,
+ ID: raw.Reference().Value,
+ }
+}
+
+func (Discoverer) buildFolders(raw []mo.Folder) rs.Folders {
+ fs := make(rs.Folders)
+ for _, d := range raw {
+ fs.Put(newFolder(d))
+ }
+ return fs
+}
+
+func newFolder(raw mo.Folder) *rs.Folder {
+ // vm group-v55 datacenter-54
+ // host group-h56 datacenter-54
+ // datastore group-s57 datacenter-54
+ // network group-n58 datacenter-54
+ return &rs.Folder{
+ Name: raw.Name,
+ ID: raw.Reference().Value,
+ ParentID: raw.Parent.Value,
+ }
+}
+
+func (Discoverer) buildClusters(raw []mo.ComputeResource) rs.Clusters {
+ clusters := make(rs.Clusters)
+ for _, c := range raw {
+ clusters.Put(newCluster(c))
+ }
+ return clusters
+}
+
+func newCluster(raw mo.ComputeResource) *rs.Cluster {
+ // s - dummy cluster, c - created by user cluster
+ // 192.168.0.201 domain-s61 group-h4
+ // New Cluster1 domain-c52 group-h67
+ return &rs.Cluster{
+ Name: raw.Name,
+ ID: raw.Reference().Value,
+ ParentID: raw.Parent.Value,
+ }
+}
+
+const (
+ poweredOn = "poweredOn"
+)
+
+func (d Discoverer) buildHosts(raw []mo.HostSystem) rs.Hosts {
+ var num int
+ hosts := make(rs.Hosts)
+ for _, h := range raw {
+ // poweredOn | poweredOff | standBy | unknown
+ if h.Runtime.PowerState != poweredOn {
+ num++
+ continue
+ }
+ // connected | notResponding | disconnected
+ //if v.Runtime.ConnectionState == "" {
+ //
+ //}
+ hosts.Put(newHost(h))
+ }
+ if num > 0 {
+ d.Infof("discovering : building : removed %d hosts (not powered on)", num)
+ }
+ return hosts
+}
+
+func newHost(raw mo.HostSystem) *rs.Host {
+ // 192.168.0.201 host-22 domain-s61
+ // 192.168.0.202 host-28 domain-c52
+ // 192.168.0.203 host-33 domain-c52
+ return &rs.Host{
+ Name: raw.Name,
+ ID: raw.Reference().Value,
+ ParentID: raw.Parent.Value,
+ OverallStatus: string(raw.Summary.OverallStatus),
+ Ref: raw.Reference(),
+ }
+}
+
+func (d Discoverer) buildVMs(raw []mo.VirtualMachine) rs.VMs {
+ var num int
+ vms := make(rs.VMs)
+ for _, v := range raw {
+ // poweredOff | poweredOn | suspended
+ if v.Runtime.PowerState != poweredOn {
+ num++
+ continue
+ }
+ // connected | disconnected | orphaned | inaccessible | invalid
+ //if v.Runtime.ConnectionState == "" {
+ //
+ //}
+ vms.Put(newVM(v))
+ }
+ if num > 0 {
+ d.Infof("discovering : building : removed %d vms (not powered on)", num)
+ }
+ return vms
+}
+
+func newVM(raw mo.VirtualMachine) *rs.VM {
+ // deb91 vm-25 group-v3 host-22
+ return &rs.VM{
+ Name: raw.Name,
+ ID: raw.Reference().Value,
+ ParentID: raw.Runtime.Host.Value,
+ OverallStatus: string(raw.Summary.OverallStatus),
+ Ref: raw.Reference(),
+ }
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/discover/discover.go b/src/go/collectors/go.d.plugin/modules/vsphere/discover/discover.go
new file mode 100644
index 000000000..0d68b71c3
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/discover/discover.go
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package discover
+
+import (
+ "fmt"
+ "strings"
+ "time"
+
+ "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/match"
+ rs "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/resources"
+
+ "github.com/netdata/netdata/go/go.d.plugin/logger"
+ "github.com/vmware/govmomi/vim25/mo"
+ "github.com/vmware/govmomi/vim25/types"
+)
+
+type Client interface {
+ Datacenters(pathSet ...string) ([]mo.Datacenter, error)
+ Folders(pathSet ...string) ([]mo.Folder, error)
+ ComputeResources(pathSet ...string) ([]mo.ComputeResource, error)
+ Hosts(pathSet ...string) ([]mo.HostSystem, error)
+ VirtualMachines(pathSet ...string) ([]mo.VirtualMachine, error)
+
+ CounterInfoByName() (map[string]*types.PerfCounterInfo, error)
+}
+
+func New(client Client) *Discoverer {
+ return &Discoverer{
+ Client: client,
+ }
+}
+
+type Discoverer struct {
+ *logger.Logger
+ Client
+ match.HostMatcher
+ match.VMMatcher
+}
+
+type resources struct {
+ dcs []mo.Datacenter
+ folders []mo.Folder
+ clusters []mo.ComputeResource
+ hosts []mo.HostSystem
+ vms []mo.VirtualMachine
+}
+
+func (d Discoverer) Discover() (*rs.Resources, error) {
+ startTime := time.Now()
+ raw, err := d.discover()
+ if err != nil {
+ return nil, fmt.Errorf("discovering resources : %v", err)
+ }
+
+ res := d.build(raw)
+
+ err = d.setHierarchy(res)
+ if err != nil {
+ // TODO: handle objects w/o hier?
+ d.Error(err)
+ }
+
+ numH := len(res.Hosts)
+ numV := len(res.VMs)
+ removed := d.removeUnmatched(res)
+ if removed == (numH + numV) {
+ return nil, fmt.Errorf("all resoursces were filtered (%d hosts, %d vms)", numH, numV)
+ }
+
+ err = d.collectMetricLists(res)
+ if err != nil {
+ return nil, fmt.Errorf("collecting metric lists : %v", err)
+ }
+
+ d.Infof("discovering : discovered %d/%d hosts, %d/%d vms, the whole process took %s",
+ len(res.Hosts),
+ len(raw.hosts),
+ len(res.VMs),
+ len(raw.vms),
+ time.Since(startTime))
+
+ return res, nil
+}
+
+var (
+ // properties to set
+ datacenterPathSet = []string{"name", "parent"}
+ folderPathSet = []string{"name", "parent"}
+ clusterPathSet = []string{"name", "parent"}
+ hostPathSet = []string{"name", "parent", "runtime.powerState", "summary.overallStatus"}
+ vmPathSet = []string{"name", "runtime.host", "runtime.powerState", "summary.overallStatus"}
+)
+
+func (d Discoverer) discover() (*resources, error) {
+ d.Debug("discovering : starting resource discovering process")
+
+ start := time.Now()
+ t := start
+ datacenters, err := d.Datacenters(datacenterPathSet...)
+ if err != nil {
+ return nil, err
+ }
+ d.Debugf("discovering : found %d dcs, process took %s", len(datacenters), time.Since(t))
+
+ t = time.Now()
+ folders, err := d.Folders(folderPathSet...)
+ if err != nil {
+ return nil, err
+ }
+ d.Debugf("discovering : found %d folders, process took %s", len(folders), time.Since(t))
+
+ t = time.Now()
+ clusters, err := d.ComputeResources(clusterPathSet...)
+ if err != nil {
+ return nil, err
+ }
+ d.Debugf("discovering : found %d clusters, process took %s", len(clusters), time.Since(t))
+
+ t = time.Now()
+ hosts, err := d.Hosts(hostPathSet...)
+ if err != nil {
+ return nil, err
+ }
+ d.Debugf("discovering : found %d hosts, process took %s", len(hosts), time.Since(t))
+
+ t = time.Now()
+ vms, err := d.VirtualMachines(vmPathSet...)
+ if err != nil {
+ return nil, err
+ }
+ d.Debugf("discovering : found %d vms, process took %s", len(hosts), time.Since(t))
+
+ raw := resources{
+ dcs: datacenters,
+ folders: folders,
+ clusters: clusters,
+ hosts: hosts,
+ vms: vms,
+ }
+
+ d.Infof("discovering : found %d dcs, %d folders, %d clusters (%d dummy), %d hosts, %d vms, process took %s",
+ len(raw.dcs),
+ len(raw.folders),
+ len(clusters),
+ numOfDummyClusters(clusters),
+ len(raw.hosts),
+ len(raw.vms),
+ time.Since(start),
+ )
+
+ return &raw, nil
+}
+
+func numOfDummyClusters(clusters []mo.ComputeResource) (num int) {
+ for _, c := range clusters {
+ // domain-s61 | domain-c52
+ if strings.HasPrefix(c.Reference().Value, "domain-s") {
+ num++
+ }
+ }
+ return num
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/discover/discover_test.go b/src/go/collectors/go.d.plugin/modules/vsphere/discover/discover_test.go
new file mode 100644
index 000000000..01f83fd38
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/discover/discover_test.go
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package discover
+
+import (
+ "crypto/tls"
+ "net/url"
+ "testing"
+ "time"
+
+ "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/client"
+ rs "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/resources"
+ "github.com/netdata/netdata/go/go.d.plugin/pkg/tlscfg"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "github.com/vmware/govmomi/simulator"
+)
+
+func TestDiscoverer_Discover(t *testing.T) {
+ d, _, teardown := prepareDiscovererSim(t)
+ defer teardown()
+
+ res, err := d.Discover()
+
+ require.NoError(t, err)
+ assert.True(t, len(res.DataCenters) > 0)
+ assert.True(t, len(res.Folders) > 0)
+ assert.True(t, len(res.Clusters) > 0)
+ assert.True(t, len(res.Hosts) > 0)
+ assert.True(t, len(res.VMs) > 0)
+ assert.True(t, isHierarchySet(res))
+ assert.True(t, isMetricListsCollected(res))
+}
+
+func TestDiscoverer_discover(t *testing.T) {
+ d, model, teardown := prepareDiscovererSim(t)
+ defer teardown()
+
+ raw, err := d.discover()
+
+ require.NoError(t, err)
+ count := model.Count()
+ assert.Lenf(t, raw.dcs, count.Datacenter, "datacenters")
+ assert.Lenf(t, raw.folders, count.Folder-1, "folders") // minus root folder
+ dummyClusters := model.Host * count.Datacenter
+ assert.Lenf(t, raw.clusters, count.Cluster+dummyClusters, "clusters")
+ assert.Lenf(t, raw.hosts, count.Host, "hosts")
+ assert.Lenf(t, raw.vms, count.Machine, "hosts")
+}
+
+func TestDiscoverer_build(t *testing.T) {
+ d, _, teardown := prepareDiscovererSim(t)
+ defer teardown()
+
+ raw, err := d.discover()
+ require.NoError(t, err)
+
+ res := d.build(raw)
+
+ assert.Lenf(t, res.DataCenters, len(raw.dcs), "datacenters")
+ assert.Lenf(t, res.Folders, len(raw.folders), "folders")
+ assert.Lenf(t, res.Clusters, len(raw.clusters), "clusters")
+ assert.Lenf(t, res.Hosts, len(raw.hosts), "hosts")
+ assert.Lenf(t, res.VMs, len(raw.vms), "hosts")
+}
+
+func TestDiscoverer_setHierarchy(t *testing.T) {
+ d, _, teardown := prepareDiscovererSim(t)
+ defer teardown()
+
+ raw, err := d.discover()
+ require.NoError(t, err)
+ res := d.build(raw)
+
+ err = d.setHierarchy(res)
+
+ require.NoError(t, err)
+ assert.True(t, isHierarchySet(res))
+}
+
+func TestDiscoverer_removeUnmatched(t *testing.T) {
+ d, _, teardown := prepareDiscovererSim(t)
+ defer teardown()
+
+ d.HostMatcher = falseHostMatcher{}
+ d.VMMatcher = falseVMMatcher{}
+ raw, err := d.discover()
+ require.NoError(t, err)
+ res := d.build(raw)
+
+ numVMs, numHosts := len(res.VMs), len(res.Hosts)
+ assert.Equal(t, numVMs+numHosts, d.removeUnmatched(res))
+ assert.Lenf(t, res.Hosts, 0, "hosts")
+ assert.Lenf(t, res.VMs, 0, "vms")
+}
+
+func TestDiscoverer_collectMetricLists(t *testing.T) {
+ d, _, teardown := prepareDiscovererSim(t)
+ defer teardown()
+
+ raw, err := d.discover()
+ require.NoError(t, err)
+
+ res := d.build(raw)
+ err = d.collectMetricLists(res)
+
+ require.NoError(t, err)
+ assert.True(t, isMetricListsCollected(res))
+}
+
+func prepareDiscovererSim(t *testing.T) (d *Discoverer, model *simulator.Model, teardown func()) {
+ model, srv := createSim(t)
+ teardown = func() { model.Remove(); srv.Close() }
+ c := newClient(t, srv.URL)
+
+ return New(c), model, teardown
+}
+
+func newClient(t *testing.T, vCenterURL *url.URL) *client.Client {
+ c, err := client.New(client.Config{
+ URL: vCenterURL.String(),
+ User: "admin",
+ Password: "password",
+ Timeout: time.Second * 3,
+ TLSConfig: tlscfg.TLSConfig{InsecureSkipVerify: true},
+ })
+ require.NoError(t, err)
+ return c
+}
+
+func createSim(t *testing.T) (*simulator.Model, *simulator.Server) {
+ model := simulator.VPX()
+ err := model.Create()
+ require.NoError(t, err)
+ model.Service.TLS = new(tls.Config)
+ return model, model.Service.NewServer()
+}
+
+func isHierarchySet(res *rs.Resources) bool {
+ for _, c := range res.Clusters {
+ if !c.Hier.IsSet() {
+ return false
+ }
+ }
+ for _, h := range res.Hosts {
+ if !h.Hier.IsSet() {
+ return false
+ }
+ }
+ for _, v := range res.VMs {
+ if !v.Hier.IsSet() {
+ return false
+ }
+ }
+ return true
+}
+
+func isMetricListsCollected(res *rs.Resources) bool {
+ for _, h := range res.Hosts {
+ if h.MetricList == nil {
+ return false
+ }
+ }
+ for _, v := range res.VMs {
+ if v.MetricList == nil {
+ return false
+ }
+ }
+ return true
+}
+
+type falseHostMatcher struct{}
+
+func (falseHostMatcher) Match(*rs.Host) bool { return false }
+
+type falseVMMatcher struct{}
+
+func (falseVMMatcher) Match(*rs.VM) bool { return false }
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/discover/filter.go b/src/go/collectors/go.d.plugin/modules/vsphere/discover/filter.go
new file mode 100644
index 000000000..73c1481e3
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/discover/filter.go
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package discover
+
+import (
+ "time"
+
+ rs "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/resources"
+)
+
+func (d Discoverer) matchHost(host *rs.Host) bool {
+ if d.HostMatcher == nil {
+ return true
+ }
+ return d.HostMatcher.Match(host)
+}
+
+func (d Discoverer) matchVM(vm *rs.VM) bool {
+ if d.VMMatcher == nil {
+ return true
+ }
+ return d.VMMatcher.Match(vm)
+}
+
+func (d Discoverer) removeUnmatched(res *rs.Resources) (removed int) {
+ d.Debug("discovering : filtering : starting filtering resources process")
+ t := time.Now()
+ numH, numV := len(res.Hosts), len(res.VMs)
+ removed += d.removeUnmatchedHosts(res.Hosts)
+ removed += d.removeUnmatchedVMs(res.VMs)
+ d.Infof("discovering : filtering : filtered %d/%d hosts, %d/%d vms, process took %s",
+ numH-len(res.Hosts),
+ numH,
+ numV-len(res.VMs),
+ numV,
+ time.Since(t))
+ return
+}
+
+func (d Discoverer) removeUnmatchedHosts(hosts rs.Hosts) (removed int) {
+ for _, v := range hosts {
+ if !d.matchHost(v) {
+ removed++
+ hosts.Remove(v.ID)
+ }
+ }
+ d.Debugf("discovering : filtering : removed %d unmatched hosts", removed)
+ return removed
+}
+
+func (d Discoverer) removeUnmatchedVMs(vms rs.VMs) (removed int) {
+ for _, v := range vms {
+ if !d.matchVM(v) {
+ removed++
+ vms.Remove(v.ID)
+ }
+ }
+ d.Debugf("discovering : filtering : removed %d unmatched vms", removed)
+ return removed
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/discover/hierarchy.go b/src/go/collectors/go.d.plugin/modules/vsphere/discover/hierarchy.go
new file mode 100644
index 000000000..4cea75dcd
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/discover/hierarchy.go
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package discover
+
+import (
+ "time"
+
+ rs "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/resources"
+)
+
+func (d Discoverer) setHierarchy(res *rs.Resources) error {
+ d.Debug("discovering : hierarchy : start setting resources hierarchy process")
+ t := time.Now()
+
+ c := d.setClustersHierarchy(res)
+ h := d.setHostsHierarchy(res)
+ v := d.setVMsHierarchy(res)
+
+ // notSet := len(res.Clusters) + len(res.Hosts) + len(res.VMs) - (c + h + v)
+ d.Infof("discovering : hierarchy : set %d/%d clusters, %d/%d hosts, %d/%d vms, process took %s",
+ c, len(res.Clusters),
+ h, len(res.Hosts),
+ v, len(res.VMs),
+ time.Since(t),
+ )
+
+ return nil
+}
+
+func (d Discoverer) setClustersHierarchy(res *rs.Resources) (set int) {
+ for _, cluster := range res.Clusters {
+ if setClusterHierarchy(cluster, res) {
+ set++
+ }
+ }
+ return set
+}
+
+func (d Discoverer) setHostsHierarchy(res *rs.Resources) (set int) {
+ for _, host := range res.Hosts {
+ if setHostHierarchy(host, res) {
+ set++
+ }
+ }
+ return set
+}
+
+func (d Discoverer) setVMsHierarchy(res *rs.Resources) (set int) {
+ for _, vm := range res.VMs {
+ if setVMHierarchy(vm, res) {
+ set++
+ }
+ }
+ return set
+}
+
+func setClusterHierarchy(cluster *rs.Cluster, res *rs.Resources) bool {
+ dc := res.DataCenters.Get(cluster.ParentID)
+ if dc == nil {
+ return false
+ }
+ cluster.Hier.DC.Set(dc.ID, dc.Name)
+ return cluster.Hier.IsSet()
+}
+
+func setHostHierarchy(host *rs.Host, res *rs.Resources) bool {
+ cr := res.Clusters.Get(host.ParentID)
+ if cr == nil {
+ return false
+ }
+ host.Hier.Cluster.Set(cr.ID, cr.Name)
+
+ dc := res.DataCenters.Get(cr.ParentID)
+ if dc == nil {
+ return false
+ }
+ host.Hier.DC.Set(dc.ID, dc.Name)
+ return host.Hier.IsSet()
+}
+
+func setVMHierarchy(vm *rs.VM, res *rs.Resources) bool {
+ h := res.Hosts.Get(vm.ParentID)
+ if h == nil {
+ return false
+ }
+ vm.Hier.Host.Set(h.ID, h.Name)
+
+ cr := res.Clusters.Get(h.ParentID)
+ if cr == nil {
+ return false
+ }
+ vm.Hier.Cluster.Set(cr.ID, cr.Name)
+
+ dc := res.DataCenters.Get(cr.ParentID)
+ if dc == nil {
+ return false
+ }
+ vm.Hier.DC.Set(dc.ID, dc.Name)
+ return vm.Hier.IsSet()
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/discover/metric_lists.go b/src/go/collectors/go.d.plugin/modules/vsphere/discover/metric_lists.go
new file mode 100644
index 000000000..0eecb81ea
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/discover/metric_lists.go
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package discover
+
+import (
+ "sort"
+ "time"
+
+ rs "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/resources"
+
+ "github.com/vmware/govmomi/performance"
+ "github.com/vmware/govmomi/vim25/types"
+)
+
+func (d Discoverer) collectMetricLists(res *rs.Resources) error {
+ d.Debug("discovering : metric lists : starting resources metric lists collection process")
+ t := time.Now()
+ perfCounters, err := d.CounterInfoByName()
+ if err != nil {
+ return err
+ }
+
+ hostML := simpleHostMetricList(perfCounters)
+ for _, h := range res.Hosts {
+ h.MetricList = hostML
+ }
+ vmML := simpleVMMetricList(perfCounters)
+ for _, v := range res.VMs {
+ v.MetricList = vmML
+ }
+
+ d.Infof("discovering : metric lists : collected metric lists for %d/%d hosts, %d/%d vms, process took %s",
+ len(res.Hosts),
+ len(res.Hosts),
+ len(res.VMs),
+ len(res.VMs),
+ time.Since(t),
+ )
+
+ return nil
+}
+
+func simpleHostMetricList(pci map[string]*types.PerfCounterInfo) performance.MetricList {
+ return simpleMetricList(hostMetrics, pci)
+}
+
+func simpleVMMetricList(pci map[string]*types.PerfCounterInfo) performance.MetricList {
+ return simpleMetricList(vmMetrics, pci)
+}
+
+func simpleMetricList(metrics []string, pci map[string]*types.PerfCounterInfo) performance.MetricList {
+ sort.Strings(metrics)
+
+ var pml performance.MetricList
+ for _, v := range metrics {
+ m, ok := pci[v]
+ if !ok {
+ // TODO: should be logged
+ continue
+ }
+ // TODO: only summary metrics for now
+ // TODO: some metrics only appear if Instance is *, for example
+ // virtualDisk.totalWriteLatency.average.scsi0:0
+ // virtualDisk.numberWriteAveraged.average.scsi0:0
+ // virtualDisk.write.average.scsi0:0
+ // virtualDisk.totalReadLatency.average.scsi0:0
+ // virtualDisk.numberReadAveraged.average.scsi0:0
+ // virtualDisk.read.average.scsi0:0
+ // disk.numberReadAveraged.average
+ // disk.numberWriteAveraged.average
+ // TODO: metrics will be unsorted after if at least one Instance is *
+ pml = append(pml, types.PerfMetricId{CounterId: m.Key, Instance: ""})
+ }
+ return pml
+}
+
+var (
+ vmMetrics = []string{
+ "cpu.usage.average",
+
+ "mem.usage.average",
+ "mem.granted.average",
+ "mem.consumed.average",
+ "mem.active.average",
+ "mem.shared.average",
+ // Refers to VMkernel swapping!
+ "mem.swapinRate.average",
+ "mem.swapoutRate.average",
+ "mem.swapped.average",
+
+ "net.bytesRx.average",
+ "net.bytesTx.average",
+ "net.packetsRx.summation",
+ "net.packetsTx.summation",
+ "net.droppedRx.summation",
+ "net.droppedTx.summation",
+
+ // the only summary disk metrics
+ "disk.read.average",
+ "disk.write.average",
+ "disk.maxTotalLatency.latest",
+
+ "sys.uptime.latest",
+ }
+
+ hostMetrics = []string{
+ "cpu.usage.average",
+
+ "mem.usage.average",
+ "mem.granted.average",
+ "mem.consumed.average",
+ "mem.active.average",
+ "mem.shared.average",
+ "mem.sharedcommon.average",
+ // Refers to VMkernel swapping!
+ "mem.swapinRate.average",
+ "mem.swapoutRate.average",
+
+ "net.bytesRx.average",
+ "net.bytesTx.average",
+ "net.packetsRx.summation",
+ "net.packetsTx.summation",
+ "net.droppedRx.summation",
+ "net.droppedTx.summation",
+ "net.errorsRx.summation",
+ "net.errorsTx.summation",
+
+ // the only summary disk metrics
+ "disk.read.average",
+ "disk.write.average",
+ "disk.maxTotalLatency.latest",
+
+ "sys.uptime.latest",
+ }
+)
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/init.go b/src/go/collectors/go.d.plugin/modules/vsphere/init.go
new file mode 100644
index 000000000..eb98e92df
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/init.go
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package vsphere
+
+import (
+ "errors"
+
+ "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/client"
+ "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/discover"
+ "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/scrape"
+)
+
+func (vs *VSphere) validateConfig() error {
+ const minRecommendedUpdateEvery = 20
+
+ if vs.URL == "" {
+ return errors.New("URL is not set")
+ }
+ if vs.Username == "" || vs.Password == "" {
+ return errors.New("username or password not set")
+ }
+ if vs.UpdateEvery < minRecommendedUpdateEvery {
+ vs.Warningf("update_every is to low, minimum recommended is %d", minRecommendedUpdateEvery)
+ }
+ return nil
+}
+
+func (vs *VSphere) initClient() (*client.Client, error) {
+ config := client.Config{
+ URL: vs.URL,
+ User: vs.Username,
+ Password: vs.Password,
+ Timeout: vs.Timeout.Duration(),
+ TLSConfig: vs.Client.TLSConfig,
+ }
+ return client.New(config)
+}
+
+func (vs *VSphere) initDiscoverer(c *client.Client) error {
+ d := discover.New(c)
+ d.Logger = vs.Logger
+
+ hm, err := vs.HostsInclude.Parse()
+ if err != nil {
+ return err
+ }
+ if hm != nil {
+ d.HostMatcher = hm
+ }
+ vmm, err := vs.VMsInclude.Parse()
+ if err != nil {
+ return err
+ }
+ if vmm != nil {
+ d.VMMatcher = vmm
+ }
+
+ vs.discoverer = d
+ return nil
+}
+
+func (vs *VSphere) initScraper(c *client.Client) {
+ ms := scrape.New(c)
+ ms.Logger = vs.Logger
+ vs.scraper = ms
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/integrations/vmware_vcenter_server.md b/src/go/collectors/go.d.plugin/modules/vsphere/integrations/vmware_vcenter_server.md
new file mode 100644
index 000000000..6d7465554
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/integrations/vmware_vcenter_server.md
@@ -0,0 +1,322 @@
+<!--startmeta
+custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/go/collectors/go.d.plugin/modules/vsphere/README.md"
+meta_yaml: "https://github.com/netdata/netdata/edit/master/src/go/collectors/go.d.plugin/modules/vsphere/metadata.yaml"
+sidebar_label: "VMware vCenter Server"
+learn_status: "Published"
+learn_rel_path: "Collecting Metrics/Containers and VMs"
+most_popular: True
+message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE"
+endmeta-->
+
+# VMware vCenter Server
+
+
+<img src="https://netdata.cloud/img/vmware.svg" width="150"/>
+
+
+Plugin: go.d.plugin
+Module: vsphere
+
+<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" />
+
+## Overview
+
+This collector monitors hosts and vms performance statistics from `vCenter` servers.
+
+> **Warning**: The `vsphere` collector cannot re-login and continue collecting metrics after a vCenter reboot.
+> go.d.plugin needs to be restarted.
+
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default `update_every` is 20 seconds, and it doesn't make sense to decrease the value.
+**VMware real-time statistics are generated at the 20-second specificity**.
+
+It is likely that 20 seconds is not enough for big installations and the value should be tuned.
+
+To get a better view we recommend running the collector in debug mode and seeing how much time it will take to collect metrics.
+
+<details>
+<summary>Example (all not related debug lines were removed)</summary>
+
+```
+[ilyam@pc]$ ./go.d.plugin -d -m vsphere
+[ DEBUG ] vsphere[vsphere] discover.go:94 discovering : starting resource discovering process
+[ DEBUG ] vsphere[vsphere] discover.go:102 discovering : found 3 dcs, process took 49.329656ms
+[ DEBUG ] vsphere[vsphere] discover.go:109 discovering : found 12 folders, process took 49.538688ms
+[ DEBUG ] vsphere[vsphere] discover.go:116 discovering : found 3 clusters, process took 47.722692ms
+[ DEBUG ] vsphere[vsphere] discover.go:123 discovering : found 2 hosts, process took 52.966995ms
+[ DEBUG ] vsphere[vsphere] discover.go:130 discovering : found 2 vms, process took 49.832979ms
+[ INFO ] vsphere[vsphere] discover.go:140 discovering : found 3 dcs, 12 folders, 3 clusters (2 dummy), 2 hosts, 3 vms, process took 249.655993ms
+[ DEBUG ] vsphere[vsphere] build.go:12 discovering : building : starting building resources process
+[ INFO ] vsphere[vsphere] build.go:23 discovering : building : built 3/3 dcs, 12/12 folders, 3/3 clusters, 2/2 hosts, 3/3 vms, process took 63.3µs
+[ DEBUG ] vsphere[vsphere] hierarchy.go:10 discovering : hierarchy : start setting resources hierarchy process
+[ INFO ] vsphere[vsphere] hierarchy.go:18 discovering : hierarchy : set 3/3 clusters, 2/2 hosts, 3/3 vms, process took 6.522µs
+[ DEBUG ] vsphere[vsphere] filter.go:24 discovering : filtering : starting filtering resources process
+[ DEBUG ] vsphere[vsphere] filter.go:45 discovering : filtering : removed 0 unmatched hosts
+[ DEBUG ] vsphere[vsphere] filter.go:56 discovering : filtering : removed 0 unmatched vms
+[ INFO ] vsphere[vsphere] filter.go:29 discovering : filtering : filtered 0/2 hosts, 0/3 vms, process took 42.973µs
+[ DEBUG ] vsphere[vsphere] metric_lists.go:14 discovering : metric lists : starting resources metric lists collection process
+[ INFO ] vsphere[vsphere] metric_lists.go:30 discovering : metric lists : collected metric lists for 2/2 hosts, 3/3 vms, process took 275.60764ms
+[ INFO ] vsphere[vsphere] discover.go:74 discovering : discovered 2/2 hosts, 3/3 vms, the whole process took 525.614041ms
+[ INFO ] vsphere[vsphere] discover.go:11 starting discovery process, will do discovery every 5m0s
+[ DEBUG ] vsphere[vsphere] collect.go:11 starting collection process
+[ DEBUG ] vsphere[vsphere] scrape.go:48 scraping : scraped metrics for 2/2 hosts, process took 96.257374ms
+[ DEBUG ] vsphere[vsphere] scrape.go:60 scraping : scraped metrics for 3/3 vms, process took 57.879697ms
+[ DEBUG ] vsphere[vsphere] collect.go:23 metrics collected, process took 154.77997ms
+```
+
+</details>
+
+There you can see that discovering took `525.614041ms`, and collecting metrics took `154.77997ms`. Discovering is a separate thread, it doesn't affect collecting.
+`update_every` and `timeout` parameters should be adjusted based on these numbers.
+
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per virtual machine
+
+These metrics refer to the Virtual Machine.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| datacenter | Datacenter name |
+| cluster | Cluster name |
+| host | Host name |
+| vm | Virtual Machine name |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| vsphere.vm_cpu_utilization | used | percentage |
+| vsphere.vm_mem_utilization | used | percentage |
+| vsphere.vm_mem_usage | granted, consumed, active, shared | KiB |
+| vsphere.vm_mem_swap_usage | swapped | KiB |
+| vsphere.vm_mem_swap_io | in, out | KiB/s |
+| vsphere.vm_disk_io | read, write | KiB/s |
+| vsphere.vm_disk_max_latency | latency | milliseconds |
+| vsphere.vm_net_traffic | received, sent | KiB/s |
+| vsphere.vm_net_packets | received, sent | packets |
+| vsphere.vm_net_drops | received, sent | packets |
+| vsphere.vm_overall_status | green, red, yellow, gray | status |
+| vsphere.vm_system_uptime | uptime | seconds |
+
+### Per host
+
+These metrics refer to the ESXi host.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| datacenter | Datacenter name |
+| cluster | Cluster name |
+| host | Host name |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| vsphere.host_cpu_utilization | used | percentage |
+| vsphere.host_mem_utilization | used | percentage |
+| vsphere.host_mem_usage | granted, consumed, active, shared, sharedcommon | KiB |
+| vsphere.host_mem_swap_io | in, out | KiB/s |
+| vsphere.host_disk_io | read, write | KiB/s |
+| vsphere.host_disk_max_latency | latency | milliseconds |
+| vsphere.host_net_traffic | received, sent | KiB/s |
+| vsphere.host_net_packets | received, sent | packets |
+| vsphere.host_net_drops | received, sent | packets |
+| vsphere.host_net_errors | received, sent | errors |
+| vsphere.host_overall_status | green, red, yellow, gray | status |
+| vsphere.host_system_uptime | uptime | seconds |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ vsphere_vm_cpu_utilization ](https://github.com/netdata/netdata/blob/master/src/health/health.d/vsphere.conf) | vsphere.vm_cpu_utilization | Virtual Machine CPU utilization |
+| [ vsphere_vm_mem_usage ](https://github.com/netdata/netdata/blob/master/src/health/health.d/vsphere.conf) | vsphere.vm_mem_utilization | Virtual Machine memory utilization |
+| [ vsphere_host_cpu_utilization ](https://github.com/netdata/netdata/blob/master/src/health/health.d/vsphere.conf) | vsphere.host_cpu_utilization | ESXi Host CPU utilization |
+| [ vsphere_host_mem_utilization ](https://github.com/netdata/netdata/blob/master/src/health/health.d/vsphere.conf) | vsphere.host_mem_utilization | ESXi Host memory utilization |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `go.d/vsphere.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config go.d/vsphere.conf
+```
+#### Options
+
+The following options can be defined globally: update_every, autodetection_retry.
+
+
+<details open><summary>Config options</summary>
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Data collection frequency. | 20 | no |
+| autodetection_retry | Recheck interval in seconds. Zero means no recheck will be scheduled. | 0 | no |
+| url | vCenter server URL. | | yes |
+| host_include | Hosts selector (filter). | | no |
+| vm_include | Virtual machines selector (filter). | | no |
+| discovery_interval | Hosts and VMs discovery interval. | 300 | no |
+| timeout | HTTP request timeout. | 20 | no |
+| username | Username for basic HTTP authentication. | | no |
+| password | Password for basic HTTP authentication. | | no |
+| proxy_url | Proxy URL. | | no |
+| proxy_username | Username for proxy basic HTTP authentication. | | no |
+| proxy_password | Password for proxy basic HTTP authentication. | | no |
+| not_follow_redirects | Redirect handling policy. Controls whether the client follows redirects. | no | no |
+| tls_skip_verify | Server certificate chain and hostname validation policy. Controls whether the client performs this check. | no | no |
+| tls_ca | Certification authority that the client uses when verifying the server's certificates. | | no |
+| tls_cert | Client TLS certificate. | | no |
+| tls_key | Client TLS key. | | no |
+
+##### host_include
+
+Metrics of hosts matching the selector will be collected.
+
+- Include pattern syntax: "/Datacenter pattern/Cluster pattern/Host pattern".
+- Match pattern syntax: [simple patterns](/src/libnetdata/simple_pattern/README.md#simple-patterns).
+- Syntax:
+
+ ```yaml
+ host_include:
+ - '/DC1/*' # select all hosts from datacenter DC1
+ - '/DC2/*/!Host2 *' # select all hosts from datacenter DC2 except HOST2
+ - '/DC3/Cluster3/*' # select all hosts from datacenter DC3 cluster Cluster3
+ ```
+
+
+##### vm_include
+
+Metrics of VMs matching the selector will be collected.
+
+- Include pattern syntax: "/Datacenter pattern/Cluster pattern/Host pattern/VM pattern".
+- Match pattern syntax: [simple patterns](/src/libnetdata/simple_pattern/README.md#simple-patterns).
+- Syntax:
+
+ ```yaml
+ vm_include:
+ - '/DC1/*' # select all VMs from datacenter DC
+ - '/DC2/*/*/!VM2 *' # select all VMs from datacenter DC2 except VM2
+ - '/DC3/Cluster3/*' # select all VMs from datacenter DC3 cluster Cluster3
+ ```
+
+
+</details>
+
+#### Examples
+
+##### Basic
+
+A basic example configuration.
+
+```yaml
+jobs:
+ - name : vcenter1
+ url : https://203.0.113.1
+ username : admin@vsphere.local
+ password : somepassword
+
+```
+##### Multi-instance
+
+> **Note**: When you define multiple jobs, their names must be unique.
+
+Collecting metrics from local and remote instances.
+
+
+<details open><summary>Config</summary>
+
+```yaml
+jobs:
+ - name : vcenter1
+ url : https://203.0.113.1
+ username : admin@vsphere.local
+ password : somepassword
+
+ - name : vcenter2
+ url : https://203.0.113.10
+ username : admin@vsphere.local
+ password : somepassword
+
+```
+</details>
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `vsphere` collector, run the `go.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `go.d.plugin` to debug the collector:
+
+ ```bash
+ ./go.d.plugin -d -m vsphere
+ ```
+
+
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/match/match.go b/src/go/collectors/go.d.plugin/modules/vsphere/match/match.go
new file mode 100644
index 000000000..846e6f371
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/match/match.go
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package match
+
+import (
+ "fmt"
+ "strings"
+
+ rs "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/resources"
+ "github.com/netdata/netdata/go/go.d.plugin/pkg/matcher"
+)
+
+type HostMatcher interface {
+ Match(*rs.Host) bool
+}
+
+type VMMatcher interface {
+ Match(*rs.VM) bool
+}
+
+type (
+ hostDCMatcher struct{ m matcher.Matcher }
+ hostClusterMatcher struct{ m matcher.Matcher }
+ hostHostMatcher struct{ m matcher.Matcher }
+ vmDCMatcher struct{ m matcher.Matcher }
+ vmClusterMatcher struct{ m matcher.Matcher }
+ vmHostMatcher struct{ m matcher.Matcher }
+ vmVMMatcher struct{ m matcher.Matcher }
+ orHostMatcher struct{ lhs, rhs HostMatcher }
+ orVMMatcher struct{ lhs, rhs VMMatcher }
+ andHostMatcher struct{ lhs, rhs HostMatcher }
+ andVMMatcher struct{ lhs, rhs VMMatcher }
+)
+
+func (m hostDCMatcher) Match(host *rs.Host) bool { return m.m.MatchString(host.Hier.DC.Name) }
+func (m hostClusterMatcher) Match(host *rs.Host) bool { return m.m.MatchString(host.Hier.Cluster.Name) }
+func (m hostHostMatcher) Match(host *rs.Host) bool { return m.m.MatchString(host.Name) }
+func (m vmDCMatcher) Match(vm *rs.VM) bool { return m.m.MatchString(vm.Hier.DC.Name) }
+func (m vmClusterMatcher) Match(vm *rs.VM) bool { return m.m.MatchString(vm.Hier.Cluster.Name) }
+func (m vmHostMatcher) Match(vm *rs.VM) bool { return m.m.MatchString(vm.Hier.Host.Name) }
+func (m vmVMMatcher) Match(vm *rs.VM) bool { return m.m.MatchString(vm.Name) }
+func (m orHostMatcher) Match(host *rs.Host) bool { return m.lhs.Match(host) || m.rhs.Match(host) }
+func (m orVMMatcher) Match(vm *rs.VM) bool { return m.lhs.Match(vm) || m.rhs.Match(vm) }
+func (m andHostMatcher) Match(host *rs.Host) bool { return m.lhs.Match(host) && m.rhs.Match(host) }
+func (m andVMMatcher) Match(vm *rs.VM) bool { return m.lhs.Match(vm) && m.rhs.Match(vm) }
+
+func newAndHostMatcher(lhs, rhs HostMatcher, others ...HostMatcher) andHostMatcher {
+ m := andHostMatcher{lhs: lhs, rhs: rhs}
+ switch len(others) {
+ case 0:
+ return m
+ default:
+ return newAndHostMatcher(m, others[0], others[1:]...)
+ }
+}
+
+func newAndVMMatcher(lhs, rhs VMMatcher, others ...VMMatcher) andVMMatcher {
+ m := andVMMatcher{lhs: lhs, rhs: rhs}
+ switch len(others) {
+ case 0:
+ return m
+ default:
+ return newAndVMMatcher(m, others[0], others[1:]...)
+ }
+}
+
+func newOrHostMatcher(lhs, rhs HostMatcher, others ...HostMatcher) orHostMatcher {
+ m := orHostMatcher{lhs: lhs, rhs: rhs}
+ switch len(others) {
+ case 0:
+ return m
+ default:
+ return newOrHostMatcher(m, others[0], others[1:]...)
+ }
+}
+
+func newOrVMMatcher(lhs, rhs VMMatcher, others ...VMMatcher) orVMMatcher {
+ m := orVMMatcher{lhs: lhs, rhs: rhs}
+ switch len(others) {
+ case 0:
+ return m
+ default:
+ return newOrVMMatcher(m, others[0], others[1:]...)
+ }
+}
+
+type (
+ VMIncludes []string
+ HostIncludes []string
+)
+
+func (vi VMIncludes) Parse() (VMMatcher, error) {
+ var ms []VMMatcher
+ for _, v := range vi {
+ m, err := parseVMInclude(v)
+ if err != nil {
+ return nil, err
+ }
+ if m == nil {
+ continue
+ }
+ ms = append(ms, m)
+ }
+
+ switch len(ms) {
+ case 0:
+ return nil, nil
+ case 1:
+ return ms[0], nil
+ default:
+ return newOrVMMatcher(ms[0], ms[1], ms[2:]...), nil
+ }
+}
+
+func (hi HostIncludes) Parse() (HostMatcher, error) {
+ var ms []HostMatcher
+ for _, v := range hi {
+ m, err := parseHostInclude(v)
+ if err != nil {
+ return nil, err
+ }
+ if m == nil {
+ continue
+ }
+ ms = append(ms, m)
+ }
+
+ switch len(ms) {
+ case 0:
+ return nil, nil
+ case 1:
+ return ms[0], nil
+ default:
+ return newOrHostMatcher(ms[0], ms[1], ms[2:]...), nil
+ }
+}
+
+const (
+ datacenterIdx = iota
+ clusterIdx
+ hostIdx
+ vmIdx
+)
+
+func cleanInclude(include string) string {
+ return strings.Trim(include, "/")
+}
+
+func parseHostInclude(include string) (HostMatcher, error) {
+ if !isIncludeFormatValid(include) {
+ return nil, fmt.Errorf("bad include format: %s", include)
+ }
+
+ include = cleanInclude(include)
+ parts := strings.Split(include, "/") // /dc/clusterIdx/hostIdx
+ var ms []HostMatcher
+
+ for i, v := range parts {
+ m, err := parseSubInclude(v)
+ if err != nil {
+ return nil, err
+ }
+ switch i {
+ case datacenterIdx:
+ ms = append(ms, hostDCMatcher{m})
+ case clusterIdx:
+ ms = append(ms, hostClusterMatcher{m})
+ case hostIdx:
+ ms = append(ms, hostHostMatcher{m})
+ default:
+ }
+ }
+
+ switch len(ms) {
+ case 0:
+ return nil, nil
+ case 1:
+ return ms[0], nil
+ default:
+ return newAndHostMatcher(ms[0], ms[1], ms[2:]...), nil
+ }
+}
+
+func parseVMInclude(include string) (VMMatcher, error) {
+ if !isIncludeFormatValid(include) {
+ return nil, fmt.Errorf("bad include format: %s", include)
+ }
+
+ include = cleanInclude(include)
+ parts := strings.Split(include, "/") // /dc/clusterIdx/hostIdx/vmIdx
+ var ms []VMMatcher
+
+ for i, v := range parts {
+ m, err := parseSubInclude(v)
+ if err != nil {
+ return nil, err
+ }
+ switch i {
+ case datacenterIdx:
+ ms = append(ms, vmDCMatcher{m})
+ case clusterIdx:
+ ms = append(ms, vmClusterMatcher{m})
+ case hostIdx:
+ ms = append(ms, vmHostMatcher{m})
+ case vmIdx:
+ ms = append(ms, vmVMMatcher{m})
+ }
+ }
+
+ switch len(ms) {
+ case 0:
+ return nil, nil
+ case 1:
+ return ms[0], nil
+ default:
+ return newAndVMMatcher(ms[0], ms[1], ms[2:]...), nil
+ }
+}
+
+func parseSubInclude(sub string) (matcher.Matcher, error) {
+ sub = strings.TrimSpace(sub)
+ if sub == "" || sub == "!*" {
+ return matcher.FALSE(), nil
+ }
+ if sub == "*" {
+ return matcher.TRUE(), nil
+ }
+ return matcher.NewSimplePatternsMatcher(sub)
+}
+
+func isIncludeFormatValid(line string) bool {
+ return strings.HasPrefix(line, "/")
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/match/match_test.go b/src/go/collectors/go.d.plugin/modules/vsphere/match/match_test.go
new file mode 100644
index 000000000..6bfe91853
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/match/match_test.go
@@ -0,0 +1,287 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package match
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/resources"
+ "github.com/netdata/netdata/go/go.d.plugin/pkg/matcher"
+
+ "github.com/stretchr/testify/assert"
+)
+
+var (
+ trueHostDC = hostDCMatcher{matcher.TRUE()}
+ falseHostDC = hostDCMatcher{matcher.FALSE()}
+ trueVMDC = vmDCMatcher{matcher.TRUE()}
+ falseVMDC = vmDCMatcher{matcher.FALSE()}
+)
+
+func TestOrHostMatcher_Match(t *testing.T) {
+ tests := map[string]struct {
+ expected bool
+ lhs HostMatcher
+ rhs HostMatcher
+ }{
+ "true, true": {expected: true, lhs: trueHostDC, rhs: trueHostDC},
+ "true, false": {expected: true, lhs: trueHostDC, rhs: falseHostDC},
+ "false, true": {expected: true, lhs: falseHostDC, rhs: trueHostDC},
+ "false, false": {expected: false, lhs: falseHostDC, rhs: falseHostDC},
+ }
+
+ var host resources.Host
+ for name, test := range tests {
+ t.Run(name, func(t *testing.T) {
+ m := newOrHostMatcher(test.lhs, test.rhs)
+ assert.Equal(t, test.expected, m.Match(&host))
+ })
+ }
+}
+
+func TestAndHostMatcher_Match(t *testing.T) {
+ tests := map[string]struct {
+ expected bool
+ lhs HostMatcher
+ rhs HostMatcher
+ }{
+ "true, true": {expected: true, lhs: trueHostDC, rhs: trueHostDC},
+ "true, false": {expected: false, lhs: trueHostDC, rhs: falseHostDC},
+ "false, true": {expected: false, lhs: falseHostDC, rhs: trueHostDC},
+ "false, false": {expected: false, lhs: falseHostDC, rhs: falseHostDC},
+ }
+
+ var host resources.Host
+ for name, test := range tests {
+ t.Run(name, func(t *testing.T) {
+ m := newAndHostMatcher(test.lhs, test.rhs)
+ assert.Equal(t, test.expected, m.Match(&host))
+ })
+ }
+}
+
+func TestOrVMMatcher_Match(t *testing.T) {
+ tests := map[string]struct {
+ expected bool
+ lhs VMMatcher
+ rhs VMMatcher
+ }{
+ "true, true": {expected: true, lhs: trueVMDC, rhs: trueVMDC},
+ "true, false": {expected: true, lhs: trueVMDC, rhs: falseVMDC},
+ "false, true": {expected: true, lhs: falseVMDC, rhs: trueVMDC},
+ "false, false": {expected: false, lhs: falseVMDC, rhs: falseVMDC},
+ }
+
+ var vm resources.VM
+ for name, test := range tests {
+ t.Run(name, func(t *testing.T) {
+ m := newOrVMMatcher(test.lhs, test.rhs)
+ assert.Equal(t, test.expected, m.Match(&vm))
+ })
+ }
+}
+
+func TestAndVMMatcher_Match(t *testing.T) {
+ tests := map[string]struct {
+ expected bool
+ lhs VMMatcher
+ rhs VMMatcher
+ }{
+ "true, true": {expected: true, lhs: trueVMDC, rhs: trueVMDC},
+ "true, false": {expected: false, lhs: trueVMDC, rhs: falseVMDC},
+ "false, true": {expected: false, lhs: falseVMDC, rhs: trueVMDC},
+ "false, false": {expected: false, lhs: falseVMDC, rhs: falseVMDC},
+ }
+
+ var vm resources.VM
+ for name, test := range tests {
+ t.Run(name, func(t *testing.T) {
+ m := newAndVMMatcher(test.lhs, test.rhs)
+ assert.Equal(t, test.expected, m.Match(&vm))
+ })
+ }
+}
+
+func TestHostIncludes_Parse(t *testing.T) {
+ tests := map[string]struct {
+ valid bool
+ expected HostMatcher
+ }{
+ "": {valid: false},
+ "*/C1/H1": {valid: false},
+ "/": {valid: true, expected: falseHostDC},
+ "/*": {valid: true, expected: trueHostDC},
+ "/!*": {valid: true, expected: falseHostDC},
+ "/!*/": {valid: true, expected: falseHostDC},
+ "/!*/ ": {
+ valid: true,
+ expected: andHostMatcher{
+ lhs: falseHostDC,
+ rhs: hostClusterMatcher{matcher.FALSE()},
+ },
+ },
+ "/DC1* DC2* !*/Cluster*": {
+ valid: true,
+ expected: andHostMatcher{
+ lhs: hostDCMatcher{mustSP("DC1* DC2* !*")},
+ rhs: hostClusterMatcher{mustSP("Cluster*")},
+ },
+ },
+ "/*/*/HOST1*": {
+ valid: true,
+ expected: andHostMatcher{
+ lhs: andHostMatcher{
+ lhs: trueHostDC,
+ rhs: hostClusterMatcher{matcher.TRUE()},
+ },
+ rhs: hostHostMatcher{mustSP("HOST1*")},
+ },
+ },
+ "/*/*/HOST1*/*/*": {
+ valid: true,
+ expected: andHostMatcher{
+ lhs: andHostMatcher{
+ lhs: trueHostDC,
+ rhs: hostClusterMatcher{matcher.TRUE()},
+ },
+ rhs: hostHostMatcher{mustSP("HOST1*")},
+ },
+ },
+ "[/DC1*,/DC2*]": {
+ valid: true,
+ expected: orHostMatcher{
+ lhs: hostDCMatcher{mustSP("DC1*")},
+ rhs: hostDCMatcher{mustSP("DC2*")},
+ },
+ },
+ "[/DC1*,/DC2*,/DC3*/Cluster1*/H*]": {
+ valid: true,
+ expected: orHostMatcher{
+ lhs: orHostMatcher{
+ lhs: hostDCMatcher{mustSP("DC1*")},
+ rhs: hostDCMatcher{mustSP("DC2*")},
+ },
+ rhs: andHostMatcher{
+ lhs: andHostMatcher{
+ lhs: hostDCMatcher{mustSP("DC3*")},
+ rhs: hostClusterMatcher{mustSP("Cluster1*")},
+ },
+ rhs: hostHostMatcher{mustSP("H*")},
+ },
+ },
+ },
+ }
+
+ for name, test := range tests {
+ t.Run(name, func(t *testing.T) {
+ includes := prepareIncludes(name)
+ m, err := HostIncludes(includes).Parse()
+
+ if !test.valid {
+ assert.Error(t, err)
+ } else {
+ assert.Equal(t, test.expected, m)
+ }
+ })
+ }
+}
+
+func TestVMIncludes_Parse(t *testing.T) {
+ tests := map[string]struct {
+ valid bool
+ includes []string
+ expected VMMatcher
+ }{
+ "": {valid: false},
+ "*/C1/H1/V1": {valid: false},
+ "/*": {valid: true, expected: trueVMDC},
+ "/!*": {valid: true, expected: falseVMDC},
+ "/!*/": {valid: true, expected: falseVMDC},
+ "/!*/ ": {
+ valid: true,
+ expected: andVMMatcher{
+ lhs: falseVMDC,
+ rhs: vmClusterMatcher{matcher.FALSE()},
+ },
+ },
+ "/DC1* DC2* !*/Cluster*": {
+ valid: true,
+ expected: andVMMatcher{
+ lhs: vmDCMatcher{mustSP("DC1* DC2* !*")},
+ rhs: vmClusterMatcher{mustSP("Cluster*")},
+ },
+ },
+ "/*/*/HOST1": {
+ valid: true,
+ expected: andVMMatcher{
+ lhs: andVMMatcher{
+ lhs: trueVMDC,
+ rhs: vmClusterMatcher{matcher.TRUE()},
+ },
+ rhs: vmHostMatcher{mustSP("HOST1")},
+ },
+ },
+ "/*/*/HOST1*/*/*": {
+ valid: true,
+ expected: andVMMatcher{
+ lhs: andVMMatcher{
+ lhs: andVMMatcher{
+ lhs: trueVMDC,
+ rhs: vmClusterMatcher{matcher.TRUE()},
+ },
+ rhs: vmHostMatcher{mustSP("HOST1*")},
+ },
+ rhs: vmVMMatcher{matcher.TRUE()},
+ },
+ },
+ "[/DC1*,/DC2*]": {
+ valid: true,
+ expected: orVMMatcher{
+ lhs: vmDCMatcher{mustSP("DC1*")},
+ rhs: vmDCMatcher{mustSP("DC2*")},
+ },
+ },
+ "[/DC1*,/DC2*,/DC3*/Cluster1*/H*/VM*]": {
+ valid: true,
+ expected: orVMMatcher{
+ lhs: orVMMatcher{
+ lhs: vmDCMatcher{mustSP("DC1*")},
+ rhs: vmDCMatcher{mustSP("DC2*")},
+ },
+ rhs: andVMMatcher{
+ lhs: andVMMatcher{
+ lhs: andVMMatcher{
+ lhs: vmDCMatcher{mustSP("DC3*")},
+ rhs: vmClusterMatcher{mustSP("Cluster1*")},
+ },
+ rhs: vmHostMatcher{mustSP("H*")},
+ },
+ rhs: vmVMMatcher{mustSP("VM*")},
+ },
+ },
+ },
+ }
+
+ for name, test := range tests {
+ t.Run(name, func(t *testing.T) {
+ includes := prepareIncludes(name)
+ m, err := VMIncludes(includes).Parse()
+
+ if !test.valid {
+ assert.Error(t, err)
+ } else {
+ assert.Equal(t, test.expected, m)
+ }
+ })
+ }
+}
+
+func prepareIncludes(include string) []string {
+ trimmed := strings.Trim(include, "[]")
+ return strings.Split(trimmed, ",")
+}
+
+func mustSP(expr string) matcher.Matcher {
+ return matcher.Must(matcher.NewSimplePatternsMatcher(expr))
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/metadata.yaml b/src/go/collectors/go.d.plugin/modules/vsphere/metadata.yaml
new file mode 100644
index 000000000..b40c7af93
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/metadata.yaml
@@ -0,0 +1,439 @@
+plugin_name: go.d.plugin
+modules:
+ - meta:
+ id: collector-go.d.plugin-vsphere
+ plugin_name: go.d.plugin
+ module_name: vsphere
+ monitored_instance:
+ name: VMware vCenter Server
+ link: https://www.vmware.com/products/vcenter-server.html
+ icon_filename: vmware.svg
+ categories:
+ - data-collection.containers-and-vms
+ keywords:
+ - vmware
+ - esxi
+ - vcenter
+ related_resources:
+ integrations:
+ list: []
+ info_provided_to_referring_integrations:
+ description: ""
+ most_popular: true
+ overview:
+ data_collection:
+ metrics_description: |
+ This collector monitors hosts and vms performance statistics from `vCenter` servers.
+
+ > **Warning**: The `vsphere` collector cannot re-login and continue collecting metrics after a vCenter reboot.
+ > go.d.plugin needs to be restarted.
+ method_description: ""
+ supported_platforms:
+ include: []
+ exclude: []
+ multi_instance: true
+ additional_permissions:
+ description: ""
+ default_behavior:
+ auto_detection:
+ description: ""
+ limits:
+ description: ""
+ performance_impact:
+ description: |
+ The default `update_every` is 20 seconds, and it doesn't make sense to decrease the value.
+ **VMware real-time statistics are generated at the 20-second specificity**.
+
+ It is likely that 20 seconds is not enough for big installations and the value should be tuned.
+
+ To get a better view we recommend running the collector in debug mode and seeing how much time it will take to collect metrics.
+
+ <details>
+ <summary>Example (all not related debug lines were removed)</summary>
+
+ ```
+ [ilyam@pc]$ ./go.d.plugin -d -m vsphere
+ [ DEBUG ] vsphere[vsphere] discover.go:94 discovering : starting resource discovering process
+ [ DEBUG ] vsphere[vsphere] discover.go:102 discovering : found 3 dcs, process took 49.329656ms
+ [ DEBUG ] vsphere[vsphere] discover.go:109 discovering : found 12 folders, process took 49.538688ms
+ [ DEBUG ] vsphere[vsphere] discover.go:116 discovering : found 3 clusters, process took 47.722692ms
+ [ DEBUG ] vsphere[vsphere] discover.go:123 discovering : found 2 hosts, process took 52.966995ms
+ [ DEBUG ] vsphere[vsphere] discover.go:130 discovering : found 2 vms, process took 49.832979ms
+ [ INFO ] vsphere[vsphere] discover.go:140 discovering : found 3 dcs, 12 folders, 3 clusters (2 dummy), 2 hosts, 3 vms, process took 249.655993ms
+ [ DEBUG ] vsphere[vsphere] build.go:12 discovering : building : starting building resources process
+ [ INFO ] vsphere[vsphere] build.go:23 discovering : building : built 3/3 dcs, 12/12 folders, 3/3 clusters, 2/2 hosts, 3/3 vms, process took 63.3µs
+ [ DEBUG ] vsphere[vsphere] hierarchy.go:10 discovering : hierarchy : start setting resources hierarchy process
+ [ INFO ] vsphere[vsphere] hierarchy.go:18 discovering : hierarchy : set 3/3 clusters, 2/2 hosts, 3/3 vms, process took 6.522µs
+ [ DEBUG ] vsphere[vsphere] filter.go:24 discovering : filtering : starting filtering resources process
+ [ DEBUG ] vsphere[vsphere] filter.go:45 discovering : filtering : removed 0 unmatched hosts
+ [ DEBUG ] vsphere[vsphere] filter.go:56 discovering : filtering : removed 0 unmatched vms
+ [ INFO ] vsphere[vsphere] filter.go:29 discovering : filtering : filtered 0/2 hosts, 0/3 vms, process took 42.973µs
+ [ DEBUG ] vsphere[vsphere] metric_lists.go:14 discovering : metric lists : starting resources metric lists collection process
+ [ INFO ] vsphere[vsphere] metric_lists.go:30 discovering : metric lists : collected metric lists for 2/2 hosts, 3/3 vms, process took 275.60764ms
+ [ INFO ] vsphere[vsphere] discover.go:74 discovering : discovered 2/2 hosts, 3/3 vms, the whole process took 525.614041ms
+ [ INFO ] vsphere[vsphere] discover.go:11 starting discovery process, will do discovery every 5m0s
+ [ DEBUG ] vsphere[vsphere] collect.go:11 starting collection process
+ [ DEBUG ] vsphere[vsphere] scrape.go:48 scraping : scraped metrics for 2/2 hosts, process took 96.257374ms
+ [ DEBUG ] vsphere[vsphere] scrape.go:60 scraping : scraped metrics for 3/3 vms, process took 57.879697ms
+ [ DEBUG ] vsphere[vsphere] collect.go:23 metrics collected, process took 154.77997ms
+ ```
+
+ </details>
+
+ There you can see that discovering took `525.614041ms`, and collecting metrics took `154.77997ms`. Discovering is a separate thread, it doesn't affect collecting.
+ `update_every` and `timeout` parameters should be adjusted based on these numbers.
+ setup:
+ prerequisites:
+ list: []
+ configuration:
+ file:
+ name: go.d/vsphere.conf
+ options:
+ description: |
+ The following options can be defined globally: update_every, autodetection_retry.
+ folding:
+ title: Config options
+ enabled: true
+ list:
+ - name: update_every
+ description: Data collection frequency.
+ default_value: 20
+ required: false
+ - name: autodetection_retry
+ description: Recheck interval in seconds. Zero means no recheck will be scheduled.
+ default_value: 0
+ required: false
+ - name: url
+ description: vCenter server URL.
+ default_value: ""
+ required: true
+ - name: host_include
+ description: Hosts selector (filter).
+ default_value: ""
+ required: false
+ detailed_description: |
+ Metrics of hosts matching the selector will be collected.
+
+ - Include pattern syntax: "/Datacenter pattern/Cluster pattern/Host pattern".
+ - Match pattern syntax: [simple patterns](/src/libnetdata/simple_pattern/README.md#simple-patterns).
+ - Syntax:
+
+ ```yaml
+ host_include:
+ - '/DC1/*' # select all hosts from datacenter DC1
+ - '/DC2/*/!Host2 *' # select all hosts from datacenter DC2 except HOST2
+ - '/DC3/Cluster3/*' # select all hosts from datacenter DC3 cluster Cluster3
+ ```
+ - name: vm_include
+ description: Virtual machines selector (filter).
+ default_value: ""
+ required: false
+ detailed_description: |
+ Metrics of VMs matching the selector will be collected.
+
+ - Include pattern syntax: "/Datacenter pattern/Cluster pattern/Host pattern/VM pattern".
+ - Match pattern syntax: [simple patterns](/src/libnetdata/simple_pattern/README.md#simple-patterns).
+ - Syntax:
+
+ ```yaml
+ vm_include:
+ - '/DC1/*' # select all VMs from datacenter DC
+ - '/DC2/*/*/!VM2 *' # select all VMs from datacenter DC2 except VM2
+ - '/DC3/Cluster3/*' # select all VMs from datacenter DC3 cluster Cluster3
+ ```
+ - name: discovery_interval
+ description: Hosts and VMs discovery interval.
+ default_value: 300
+ required: false
+ - name: timeout
+ description: HTTP request timeout.
+ default_value: 20
+ required: false
+ - name: username
+ description: Username for basic HTTP authentication.
+ default_value: ""
+ required: false
+ - name: password
+ description: Password for basic HTTP authentication.
+ default_value: ""
+ required: false
+ - name: proxy_url
+ description: Proxy URL.
+ default_value: ""
+ required: false
+ - name: proxy_username
+ description: Username for proxy basic HTTP authentication.
+ default_value: ""
+ required: false
+ - name: proxy_password
+ description: Password for proxy basic HTTP authentication.
+ default_value: ""
+ required: false
+ - name: not_follow_redirects
+ description: Redirect handling policy. Controls whether the client follows redirects.
+ default_value: no
+ required: false
+ - name: tls_skip_verify
+ description: Server certificate chain and hostname validation policy. Controls whether the client performs this check.
+ default_value: no
+ required: false
+ - name: tls_ca
+ description: Certification authority that the client uses when verifying the server's certificates.
+ default_value: ""
+ required: false
+ - name: tls_cert
+ description: Client TLS certificate.
+ default_value: ""
+ required: false
+ - name: tls_key
+ description: Client TLS key.
+ default_value: ""
+ required: false
+ examples:
+ folding:
+ title: Config
+ enabled: true
+ list:
+ - name: Basic
+ folding:
+ enabled: false
+ description: A basic example configuration.
+ config: |
+ jobs:
+ - name : vcenter1
+ url : https://203.0.113.1
+ username : admin@vsphere.local
+ password : somepassword
+ - name: Multi-instance
+ description: |
+ > **Note**: When you define multiple jobs, their names must be unique.
+
+ Collecting metrics from local and remote instances.
+ config: |
+ jobs:
+ - name : vcenter1
+ url : https://203.0.113.1
+ username : admin@vsphere.local
+ password : somepassword
+
+ - name : vcenter2
+ url : https://203.0.113.10
+ username : admin@vsphere.local
+ password : somepassword
+ troubleshooting:
+ problems:
+ list: []
+ alerts:
+ - name: vsphere_vm_cpu_utilization
+ metric: vsphere.vm_cpu_utilization
+ info: Virtual Machine CPU utilization
+ link: https://github.com/netdata/netdata/blob/master/src/health/health.d/vsphere.conf
+ - name: vsphere_vm_mem_usage
+ metric: vsphere.vm_mem_utilization
+ info: Virtual Machine memory utilization
+ link: https://github.com/netdata/netdata/blob/master/src/health/health.d/vsphere.conf
+ - name: vsphere_host_cpu_utilization
+ metric: vsphere.host_cpu_utilization
+ info: ESXi Host CPU utilization
+ link: https://github.com/netdata/netdata/blob/master/src/health/health.d/vsphere.conf
+ - name: vsphere_host_mem_utilization
+ metric: vsphere.host_mem_utilization
+ info: ESXi Host memory utilization
+ link: https://github.com/netdata/netdata/blob/master/src/health/health.d/vsphere.conf
+ metrics:
+ folding:
+ title: Metrics
+ enabled: false
+ description: ""
+ availability: []
+ scopes:
+ - name: virtual machine
+ description: These metrics refer to the Virtual Machine.
+ labels:
+ - name: datacenter
+ description: Datacenter name
+ - name: cluster
+ description: Cluster name
+ - name: host
+ description: Host name
+ - name: vm
+ description: Virtual Machine name
+ metrics:
+ - name: vsphere.vm_cpu_utilization
+ description: Virtual Machine CPU utilization
+ unit: percentage
+ chart_type: line
+ dimensions:
+ - name: used
+ - name: vsphere.vm_mem_utilization
+ description: Virtual Machine memory utilization
+ unit: percentage
+ chart_type: line
+ dimensions:
+ - name: used
+ - name: vsphere.vm_mem_usage
+ description: Virtual Machine memory usage
+ unit: KiB
+ chart_type: line
+ dimensions:
+ - name: granted
+ - name: consumed
+ - name: active
+ - name: shared
+ - name: vsphere.vm_mem_swap_usage
+ description: Virtual Machine VMKernel memory swap usage
+ unit: KiB
+ chart_type: line
+ dimensions:
+ - name: swapped
+ - name: vsphere.vm_mem_swap_io
+ description: Virtual Machine VMKernel memory swap IO
+ unit: KiB/s
+ chart_type: area
+ dimensions:
+ - name: in
+ - name: out
+ - name: vsphere.vm_disk_io
+ description: Virtual Machine disk IO
+ unit: KiB/s
+ chart_type: area
+ dimensions:
+ - name: read
+ - name: write
+ - name: vsphere.vm_disk_max_latency
+ description: Virtual Machine disk max latency
+ unit: milliseconds
+ chart_type: line
+ dimensions:
+ - name: latency
+ - name: vsphere.vm_net_traffic
+ description: Virtual Machine network traffic
+ unit: KiB/s
+ chart_type: area
+ dimensions:
+ - name: received
+ - name: sent
+ - name: vsphere.vm_net_packets
+ description: Virtual Machine network packets
+ unit: packets
+ chart_type: line
+ dimensions:
+ - name: received
+ - name: sent
+ - name: vsphere.vm_net_drops
+ description: Virtual Machine network dropped packets
+ unit: packets
+ chart_type: line
+ dimensions:
+ - name: received
+ - name: sent
+ - name: vsphere.vm_overall_status
+ description: Virtual Machine overall alarm status
+ unit: status
+ chart_type: line
+ dimensions:
+ - name: green
+ - name: red
+ - name: yellow
+ - name: gray
+ - name: vsphere.vm_system_uptime
+ description: Virtual Machine system uptime
+ unit: seconds
+ chart_type: line
+ dimensions:
+ - name: uptime
+ - name: host
+ description: These metrics refer to the ESXi host.
+ labels:
+ - name: datacenter
+ description: Datacenter name
+ - name: cluster
+ description: Cluster name
+ - name: host
+ description: Host name
+ metrics:
+ - name: vsphere.host_cpu_utilization
+ description: ESXi Host CPU utilization
+ unit: percentage
+ chart_type: line
+ dimensions:
+ - name: used
+ - name: vsphere.host_mem_utilization
+ description: ESXi Host memory utilization
+ unit: percentage
+ chart_type: line
+ dimensions:
+ - name: used
+ - name: vsphere.host_mem_usage
+ description: ESXi Host memory usage
+ unit: KiB
+ chart_type: line
+ dimensions:
+ - name: granted
+ - name: consumed
+ - name: active
+ - name: shared
+ - name: sharedcommon
+ - name: vsphere.host_mem_swap_io
+ description: ESXi Host VMKernel memory swap IO
+ unit: KiB/s
+ chart_type: area
+ dimensions:
+ - name: in
+ - name: out
+ - name: vsphere.host_disk_io
+ description: ESXi Host disk IO
+ unit: KiB/s
+ chart_type: area
+ dimensions:
+ - name: read
+ - name: write
+ - name: vsphere.host_disk_max_latency
+ description: ESXi Host disk max latency
+ unit: milliseconds
+ chart_type: line
+ dimensions:
+ - name: latency
+ - name: vsphere.host_net_traffic
+ description: ESXi Host network traffic
+ unit: KiB/s
+ chart_type: line
+ dimensions:
+ - name: received
+ - name: sent
+ - name: vsphere.host_net_packets
+ description: ESXi Host network packets
+ unit: packets
+ chart_type: line
+ dimensions:
+ - name: received
+ - name: sent
+ - name: vsphere.host_net_drops
+ description: ESXi Host network drops
+ unit: packets
+ chart_type: line
+ dimensions:
+ - name: received
+ - name: sent
+ - name: vsphere.host_net_errors
+ description: ESXi Host network errors
+ unit: errors
+ chart_type: line
+ dimensions:
+ - name: received
+ - name: sent
+ - name: vsphere.host_overall_status
+ description: ESXi Host overall alarm status
+ unit: status
+ chart_type: line
+ dimensions:
+ - name: green
+ - name: red
+ - name: yellow
+ - name: gray
+ - name: vsphere.host_system_uptime
+ description: ESXi Host system uptime
+ unit: seconds
+ chart_type: line
+ dimensions:
+ - name: uptime
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/metrics.txt b/src/go/collectors/go.d.plugin/modules/vsphere/metrics.txt
new file mode 100644
index 000000000..30c1f55e2
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/metrics.txt
@@ -0,0 +1,328 @@
+// [units, statsType, hasInstance]
+
+/*
+ virtualMachine:
+
+ cpu.run.summation [ms, delta, true] [Time the virtual machine is scheduled to run]
+ cpu.ready.summation [ms, delta, true] [Time that the virtual machine was ready, but could not get scheduled to run on the physical CPU during last measurement interval]
+ cpu.usagemhz.average [MHz, rate, true] [CPU usage in megahertz during the interval]
+ cpu.demandEntitlementRatio.latest [%, absolute, false] [CPU resource entitlement to CPU demand ratio (in percents)]
+ cpu.used.summation [ms, delta, true] [Total CPU usage]
+ cpu.idle.summation [ms, delta, true] [Total time that the CPU spent in an idle state]
+ cpu.maxlimited.summation [ms, delta, true] [Time the virtual machine is ready to run, but is not run due to maxing out its CPU limit setting]
+ cpu.overlap.summation [ms, delta, true] [Time the virtual machine was interrupted to perform system services on behalf of itself or other virtual machines]
+ cpu.system.summation [ms, delta, false] [Amount of time spent on system processes on each virtual CPU in the virtual machine]
+ cpu.demand.average [MHz, absolute, false] [The amount of CPU resources a virtual machine would use if there were no CPU contention or CPU limit]
+ cpu.wait.summation [ms, delta, true] [Total CPU time spent in wait state]
+ cpu.latency.average [%, rate, false] [Percent of time the virtual machine is unable to run because it is contending for access to the physical CPU(s)]
+ cpu.costop.summation [ms, delta, true] [Time the virtual machine is ready to run, but is unable to run due to co-scheduling constraints]
+ cpu.entitlement.latest [MHz, absolute, false] [CPU resources devoted by the ESX scheduler]
+ cpu.readiness.average [%, rate, true] [Percentage of time that the virtual machine was ready, but could not get scheduled to run on the physical CPU]
+ cpu.swapwait.summation [ms, delta, true] [CPU time spent waiting for swap-in]
+ cpu.usage.average [%, rate, false] [CPU usage as a percentage during the interval]
+
+ datastore.totalReadLatency.average [ms, absolute, true] [The average time a read from the datastore takes]
+ datastore.read.average [KBps, rate, true] [Rate of reading data from the datastore]
+ datastore.write.average [KBps, rate, true] [Rate of writing data to the datastore]
+ datastore.maxTotalLatency.latest [ms, absolute, false] [Highest latency value across all datastores used by the host]
+ datastore.numberWriteAveraged.average [num, rate, true] [Average number of write commands issued per second to the datastore during the collection interval]
+ datastore.totalWriteLatency.average [ms, absolute, true] [The average time a write to the datastore takes]
+ datastore.numberReadAveraged.average [num, rate, true] [Average number of read commands issued per second to the datastore during the collection interval]
+
+ disk.read.average [KBps, rate, true] [Average number of kilobytes read from the disk each second during the collection interval]
+ disk.commands.summation [num, delta, true] [Number of SCSI commands issued during the collection interval]
+ disk.commandsAborted.summation [num, delta, true] [Number of SCSI commands aborted during the collection interval]
+ disk.busResets.summation [num, delta, true] [Number of SCSI-bus reset commands issued during the collection interval]
+ disk.maxTotalLatency.latest [ms, absolute, false] [Highest latency value across all disks used by the host]
+ disk.write.average [KBps, rate, true] [Average number of kilobytes written to disk each second during the collection interval]
+ disk.numberReadAveraged.average [num, rate, true] [Average number of disk reads per second during the collection interval]
+ disk.usage.average [KBps, rate, false] [Aggregated disk I/O rate. For hosts, this metric includes the rates for all virtual machines running on the host during the collection interval.]
+ disk.numberWrite.summation [num, delta, true] [Number of disk writes during the collection interval]
+ disk.commandsAveraged.average [num, rate, true] [Average number of SCSI commands issued per second during the collection interval]
+ disk.numberWriteAveraged.average [num, rate, true] [Average number of disk writes per second during the collection interval]
+ disk.numberRead.summation [num, delta, true] [Number of disk reads during the collection interval]
+
+ mem.vmmemctltarget.average [KB, absolute, false] [Desired amount of guest physical memory the balloon driver needs to reclaim, as determined by ESXi]
+ mem.overhead.average [KB, absolute, false] [host physical memory consumed by ESXi data structures for running the virtual machines]
+ mem.zipSaved.latest [KB, absolute, false] [host physical memory, reclaimed from a virtual machine, by memory compression. This value is less than the value of 'Compressed' memory]
+ mem.overheadMax.average [KB, absolute, false] [host physical memory reserved by ESXi, for its data structures, for running the virtual machine]
+ mem.consumed.average [KB, absolute, false] [Amount of host physical memory consumed for backing up guest physical memory pages]
+ mem.overheadTouched.average [KB, absolute, false] [Estimate of the host physical memory, from Overhead consumed, that is actively read or written to by ESXi]
+ mem.compressionRate.average [KBps, rate, false] [Rate of guest physical memory page compression by ESXi]
+ mem.swapin.average [KB, absolute, false] [Amount of guest physical memory that is swapped in from the swap space since the virtual machine has been powered on. This value is less than or equal to the 'Swap out' counter]
+ mem.swaptarget.average [KB, absolute, false] [Amount of memory that ESXi needs to reclaim by swapping]
+ mem.activewrite.average [KB, absolute, false] [Amount of guest physical memory that is being actively written by guest. Activeness is estimated by ESXi]
+ mem.decompressionRate.average [KBps, rate, false] [Rate of guest physical memory decompression]
+ mem.entitlement.average [KB, absolute, false] [Amount of host physical memory the virtual machine deserves, as determined by ESXi]
+ mem.swapoutRate.average [KBps, rate, false] [Rate at which guest physical memory is swapped out to the swap space]
+ mem.swapout.average [KB, absolute, false] [Amount of guest physical memory that is swapped out from the virtual machine to its swap space since it has been powered on.]
+ mem.shared.average [KB, absolute, false] [Amount of guest physical memory that is shared within a single virtual machine or across virtual machines]
+ mem.compressed.average [KB, absolute, false] [Guest physical memory pages that have undergone memory compression]
+ mem.llSwapOutRate.average [KBps, rate, false] [Rate at which guest physical memory is swapped out to the host swap cache]
+ mem.latency.average [%, absolute, false] [Percentage of time the virtual machine spent waiting to swap in or decompress guest physical memory]
+ mem.llSwapInRate.average [KBps, rate, false] [Rate at which guest physical memory is swapped in from the host swap cache]
+ mem.zero.average [KB, absolute, false] [Guest physical memory pages whose content is 0x00]
+ mem.swapinRate.average [KBps, rate, false] [Rate at which guest physical memory is swapped in from the swap space]
+ mem.llSwapUsed.average [KB, absolute, false] [Storage space consumed on the host swap cache for storing swapped guest physical memory pages]
+ mem.vmmemctl.average [KB, absolute, false] [Amount of guest physical memory reclaimed from the virtual machine by the balloon driver in the guest]
+ mem.active.average [KB, absolute, false] [Amount of guest physical memory that is being actively read or written by guest. Activeness is estimated by ESXi]
+ mem.granted.average [KB, absolute, false] [Amount of host physical memory or physical memory that is mapped for a virtual machine or a host]
+ mem.usage.average [%, absolute, false] [Percentage of host physical memory that has been consumed]
+ mem.zipped.latest [KB, absolute, false] [Amount of guest physical memory pages compressed by ESXi]
+ mem.swapped.average [KB, absolute, false] [Amount of guest physical memory that is swapped out to the swap space]
+
+ net.droppedTx.summation [num, delta, true] [Number of transmits dropped]
+ net.bytesTx.average [KBps, rate, true] [Average amount of data transmitted per second]
+ net.transmitted.average [KBps, rate, true] [Average rate at which data was transmitted during the interval]
+ net.droppedRx.summation [num, delta, true] [Number of receives dropped]
+ net.bytesRx.average [KBps, rate, true] [Average amount of data received per second]
+ net.usage.average [KBps, rate, true] [Network utilization (combined transmit-rates and receive-rates) during the interval]
+ net.multicastRx.summation [num, delta, true] [Number of multicast packets received during the sampling interval]
+ net.broadcastTx.summation [num, delta, true] [Number of broadcast packets transmitted during the sampling interval]
+ net.received.average [KBps, rate, true] [Average rate at which data was received during the interval]
+ net.broadcastRx.summation [num, delta, true] [Number of broadcast packets received during the sampling interval]
+ net.pnicBytesRx.average [KBps, rate, true] [pnicBytesRx]
+ net.pnicBytesTx.average [KBps, rate, true] [pnicBytesTx]
+ net.multicastTx.summation [num, delta, true] [Number of multicast packets transmitted during the sampling interval]
+ net.packetsTx.summation [num, delta, true] [Number of packets transmitted during the interval]
+ net.packetsRx.summation [num, delta, true] [Number of packets received during the interval]
+
+ power.energy.summation [J, delta, false] [Total energy used since last stats reset]
+ power.power.average [W, rate, false] [Current power usage]
+
+ rescpu.actpk5.latest [%, absolute, false] [CPU active peak over 5 minutes]
+ rescpu.actpk15.latest [%, absolute, false] [CPU active peak over 15 minutes]
+ rescpu.sampleCount.latest [num, absolute, false] [Group CPU sample count]
+ rescpu.runav15.latest [%, absolute, false] [CPU running average over 15 minutes]
+ rescpu.actav1.latest [%, absolute, false] [CPU active average over 1 minute]
+ rescpu.runpk1.latest [%, absolute, false] [CPU running peak over 1 minute]
+ rescpu.actav5.latest [%, absolute, false] [CPU active average over 5 minutes]
+ rescpu.maxLimited5.latest [%, absolute, false] [Amount of CPU resources over the limit that were refused, average over 5 minutes]
+ rescpu.maxLimited1.latest [%, absolute, false] [Amount of CPU resources over the limit that were refused, average over 1 minute]
+ rescpu.runav5.latest [%, absolute, false] [CPU running average over 5 minutes]
+ rescpu.samplePeriod.latest [ms, absolute, false] [Group CPU sample period]
+ rescpu.runpk15.latest [%, absolute, false] [CPU running peak over 15 minutes]
+ rescpu.maxLimited15.latest [%, absolute, false] [Amount of CPU resources over the limit that were refused, average over 15 minutes]
+ rescpu.actav15.latest [%, absolute, false] [CPU active average over 15 minutes]
+ rescpu.runav1.latest [%, absolute, false] [CPU running average over 1 minute]
+ rescpu.runpk5.latest [%, absolute, false] [CPU running peak over 5 minutes]
+ rescpu.actpk1.latest [%, absolute, false] [CPU active peak over 1 minute]
+
+ sys.uptime.latest [s, absolute, false] [Total time elapsed, in seconds, since last system startup]
+ sys.heartbeat.latest [num, absolute, false] [Number of heartbeats issued per virtual machine during the interval]
+ sys.osUptime.latest [s, absolute, false] [Total time elapsed, in seconds, since last operating system boot-up]
+
+ virtualDisk.numberReadAveraged.average [num, rate, true] [Average number of read commands issued per second to the virtual disk during the collection interval]
+ virtualDisk.largeSeeks.latest [num, absolute, true] [Number of seeks during the interval that were greater than 8192 LBNs apart]
+ virtualDisk.readOIO.latest [num, absolute, true] [Average number of outstanding read requests to the virtual disk during the collection interval]
+ virtualDisk.mediumSeeks.latest [num, absolute, true] [Number of seeks during the interval that were between 64 and 8192 LBNs apart]
+ virtualDisk.write.average [KBps, rate, true] [Rate of writing data to the virtual disk]
+ virtualDisk.smallSeeks.latest [num, absolute, true] [Number of seeks during the interval that were less than 64 LBNs apart]
+ virtualDisk.read.average [KBps, rate, true] [Rate of reading data from the virtual disk]
+ virtualDisk.writeLatencyUS.latest [µs, absolute, true] [Write latency in microseconds]
+ virtualDisk.writeOIO.latest [num, absolute, true] [Average number of outstanding write requests to the virtual disk during the collection interval]
+ virtualDisk.totalWriteLatency.average [ms, absolute, true] [The average time a write to the virtual disk takes]
+ virtualDisk.readLoadMetric.latest [num, absolute, true] [Storage DRS virtual disk metric for the read workload model]
+ virtualDisk.numberWriteAveraged.average [num, rate, true] [Average number of write commands issued per second to the virtual disk during the collection interval]
+ virtualDisk.writeLoadMetric.latest [num, absolute, true] [Storage DRS virtual disk metric for the write workload model]
+ virtualDisk.totalReadLatency.average [ms, absolute, true] [The average time a read from the virtual disk takes]
+ virtualDisk.readIOSize.latest [num, absolute, true] [Average read request size in bytes]
+ virtualDisk.writeIOSize.latest [num, absolute, true] [Average write request size in bytes]
+ virtualDisk.readLatencyUS.latest [µs, absolute, true] [Read latency in microseconds]
+*/
+
+/*
+ HOST:
+
+ cpu.usage.average [%, rate, true] [CPU usage as a percentage during the interval]
+ cpu.wait.summation [ms, delta, false] [Total CPU time spent in wait state]
+ cpu.ready.summation [ms, delta, false] [Time that the virtual machine was ready, but could not get scheduled to run on the physical CPU during last measurement interval]
+ cpu.used.summation [ms, delta, true] [Total CPU usage]
+ cpu.demand.average [MHz, absolute, false] [The amount of CPU resources a virtual machine would use if there were no CPU contention or CPU limit]
+ cpu.idle.summation [ms, delta, true] [Total time that the CPU spent in an idle state]
+ cpu.latency.average [%, rate, false] [Percent of time the virtual machine is unable to run because it is contending for access to the physical CPU(s)]
+ cpu.utilization.average [%, rate, true] [CPU utilization as a percentage during the interval (CPU usage and CPU utilization might be different due to power management technologies or hyper-threading)]
+ cpu.coreUtilization.average [%, rate, true] [CPU utilization of the corresponding core (if hyper-threading is enabled) as a percentage during the interval (A core is utilized if either or both of its logical CPUs are utilized)]
+ cpu.costop.summation [ms, delta, false] [Time the virtual machine is ready to run, but is unable to run due to co-scheduling constraints]
+ cpu.totalCapacity.average [MHz, absolute, false] [Total CPU capacity reserved by and available for virtual machines]
+ cpu.usagemhz.average [MHz, rate, false] [CPU usage in megahertz during the interval]
+ cpu.swapwait.summation [ms, delta, false] [CPU time spent waiting for swap-in]
+ cpu.reservedCapacity.average [MHz, absolute, false] [Total CPU capacity reserved by virtual machines]
+ cpu.readiness.average [%, rate, false] [Percentage of time that the virtual machine was ready, but could not get scheduled to run on the physical CPU]
+
+ datastore.datastoreReadLoadMetric.latest [num, absolute, true] [Storage DRS datastore metric for read workload model]
+ datastore.datastoreNormalReadLatency.latest [num, absolute, true] [Storage DRS datastore normalized read latency]
+ datastore.datastoreWriteLoadMetric.latest [num, absolute, true] [Storage DRS datastore metric for write workload model]
+ datastore.datastoreMaxQueueDepth.latest [num, absolute, true] [Storage I/O Control datastore maximum queue depth]
+ datastore.totalReadLatency.average [ms, absolute, true] [The average time a read from the datastore takes]
+ datastore.datastoreWriteOIO.latest [num, absolute, true] [Storage DRS datastore outstanding write requests]
+ datastore.datastoreReadIops.latest [num, absolute, true] [Storage DRS datastore read I/O rate]
+ datastore.sizeNormalizedDatastoreLatency.average [µs, absolute, true] [Storage I/O Control size-normalized I/O latency]
+ datastore.datastoreIops.average [num, absolute, true] [Storage I/O Control aggregated IOPS]
+ datastore.datastoreVMObservedLatency.latest [µs, absolute, true] [The average datastore latency as seen by virtual machines]
+ datastore.unmapIOs.summation [num, delta, true] [unmapIOs]
+ datastore.numberWriteAveraged.average [num, rate, true] [Average number of write commands issued per second to the datastore during the collection interval]
+ datastore.datastoreNormalWriteLatency.latest [num, absolute, true] [Storage DRS datastore normalized write latency]
+ datastore.numberReadAveraged.average [num, rate, true] [Average number of read commands issued per second to the datastore during the collection interval]
+ datastore.unmapSize.summation [MB, delta, true] [unmapSize]
+ datastore.datastoreReadOIO.latest [num, absolute, true] [Storage DRS datastore outstanding read requests]
+ datastore.write.average [KBps, rate, true] [Rate of writing data to the datastore]
+ datastore.totalWriteLatency.average [ms, absolute, true] [The average time a write to the datastore takes]
+ datastore.datastoreWriteIops.latest [num, absolute, true] [Storage DRS datastore write I/O rate]
+ datastore.datastoreReadBytes.latest [num, absolute, true] [Storage DRS datastore bytes read]
+ datastore.read.average [KBps, rate, true] [Rate of reading data from the datastore]
+ datastore.siocActiveTimePercentage.average [%, absolute, true] [Percentage of time Storage I/O Control actively controlled datastore latency]
+ datastore.datastoreWriteBytes.latest [num, absolute, true] [Storage DRS datastore bytes written]
+ datastore.maxTotalLatency.latest [ms, absolute, false] [Highest latency value across all datastores used by the host]
+
+ disk.queueReadLatency.average [ms, absolute, true] [Average amount of time spent in the VMkernel queue, per SCSI read command, during the collection interval]
+ disk.numberReadAveraged.average [num, rate, true] [Average number of disk reads per second during the collection interval]
+ disk.numberRead.summation [num, delta, true] [Number of disk reads during the collection interval]
+ disk.queueWriteLatency.average [ms, absolute, true] [Average amount of time spent in the VMkernel queue, per SCSI write command, during the collection interval]
+ disk.totalWriteLatency.average [ms, absolute, true] [Average amount of time taken during the collection interval to process a SCSI write command issued by the guest OS to the virtual machine]
+ disk.kernelWriteLatency.average [ms, absolute, true] [Average amount of time, in milliseconds, spent by VMkernel to process each SCSI write command]
+ disk.read.average [KBps, rate, true] [Average number of kilobytes read from the disk each second during the collection interval]
+ disk.usage.average [KBps, rate, false] [Aggregated disk I/O rate. For hosts, this metric includes the rates for all virtual machines running on the host during the collection interval.]
+ disk.kernelLatency.average [ms, absolute, true] [Average amount of time, in milliseconds, spent by VMkernel to process each SCSI command]
+ disk.commandsAveraged.average [num, rate, true] [Average number of SCSI commands issued per second during the collection interval]
+ disk.numberWrite.summation [num, delta, true] [Number of disk writes during the collection interval]
+ disk.write.average [KBps, rate, true] [Average number of kilobytes written to disk each second during the collection interval]
+ disk.queueLatency.average [ms, absolute, true] [Average amount of time spent in the VMkernel queue, per SCSI command, during the collection interval]
+ disk.busResets.summation [num, delta, true] [Number of SCSI-bus reset commands issued during the collection interval]
+ disk.maxTotalLatency.latest [ms, absolute, false] [Highest latency value across all disks used by the host]
+ disk.kernelReadLatency.average [ms, absolute, true] [Average amount of time, in milliseconds, spent by VMkernel to process each SCSI read command]
+ disk.deviceLatency.average [ms, absolute, true] [Average amount of time, in milliseconds, to complete a SCSI command from the physical device]
+ disk.totalLatency.average [ms, absolute, true] [Average amount of time taken during the collection interval to process a SCSI command issued by the guest OS to the virtual machine]
+ disk.commands.summation [num, delta, true] [Number of SCSI commands issued during the collection interval]
+ disk.numberWriteAveraged.average [num, rate, true] [Average number of disk writes per second during the collection interval]
+ disk.totalReadLatency.average [ms, absolute, true] [Average amount of time taken during the collection interval to process a SCSI read command issued from the guest OS to the virtual machine]
+ disk.maxQueueDepth.average [num, absolute, true] [Maximum queue depth]
+ disk.deviceWriteLatency.average [ms, absolute, true] [Average amount of time, in milliseconds, to write to the physical device]
+ disk.commandsAborted.summation [num, delta, true] [Number of SCSI commands aborted during the collection interval]
+ disk.deviceReadLatency.average [ms, absolute, true] [Average amount of time, in milliseconds, to read from the physical device]
+
+ hbr.hbrNetRx.average [KBps, rate, false] [Average amount of data received per second]
+ hbr.hbrNumVms.average [num, absolute, false] [Current number of replicated virtual machines]
+ hbr.hbrNetTx.average [KBps, rate, false] [Average amount of data transmitted per second]
+
+ mem.reservedCapacity.average [MB, absolute, false] [Memory reservation consumed by powered-on virtual machines]
+ mem.swapinRate.average [KBps, rate, false] [Rate at which guest physical memory is swapped in from the swap space]
+ mem.zero.average [KB, absolute, false] [Guest physical memory pages whose content is 0x00]
+ mem.heapfree.average [KB, absolute, false] [Free address space in the heap of ESXi. This is less than or equal to Heap]
+ mem.sharedcommon.average [KB, absolute, false] [Amount of host physical memory that backs shared guest physical memory (Shared)]
+ mem.swapin.average [KB, absolute, false] [Amount of guest physical memory that is swapped in from the swap space since the virtual machine has been powered on. This value is less than or equal to the 'Swap out' counter]
+ mem.unreserved.average [KB, absolute, false] [Amount by which reservation can be raised]
+ mem.lowfreethreshold.average [KB, absolute, false] [Threshold of free host physical memory below which ESXi will begin actively reclaiming memory from virtual machines by swapping, compression and ballooning]
+ mem.state.latest [num, absolute, false] [Current memory availability state of ESXi. Possible values are high, clear, soft, hard, low. The state value determines the techniques used for memory reclamation from virtual machines]
+ mem.decompressionRate.average [KBps, rate, false] [Rate of guest physical memory decompression]
+ mem.swapout.average [KB, absolute, false] [Amount of guest physical memory that is swapped out from the virtual machine to its swap space since it has been powered on.]
+ mem.vmfs.pbc.capMissRatio.latest [%, absolute, false] [Trailing average of the ratio of capacity misses to compulsory misses for the VMFS PB Cache]
+ mem.swapused.average [KB, absolute, false] [Swap storage space consumed]
+ mem.consumed.average [KB, absolute, false] [Amount of host physical memory consumed for backing up guest physical memory pages]
+ mem.llSwapOutRate.average [KBps, rate, false] [Rate at which guest physical memory is swapped out to the host swap cache]
+ mem.llSwapOut.average [KB, absolute, false] [Amount of guest physical memory swapped out to the host swap cache]
+ mem.swapoutRate.average [KBps, rate, false] [Rate at which guest physical memory is swapped out to the swap space]
+ mem.llSwapIn.average [KB, absolute, false] [Amount of guest physical memory swapped in from host cache]
+ mem.active.average [KB, absolute, false] [Amount of guest physical memory that is being actively read or written by guest. Activeness is estimated by ESXi]
+ mem.latency.average [%, absolute, false] [Percentage of time the virtual machine spent waiting to swap in or decompress guest physical memory]
+ mem.llSwapInRate.average [KBps, rate, false] [Rate at which guest physical memory is swapped in from the host swap cache]
+ mem.vmfs.pbc.sizeMax.latest [MB, absolute, false] [Maximum size the VMFS Pointer Block Cache can grow to]
+ mem.vmmemctl.average [KB, absolute, false] [Amount of guest physical memory reclaimed from the virtual machine by the balloon driver in the guest]
+ mem.vmfs.pbc.size.latest [MB, absolute, false] [Space used for holding VMFS Pointer Blocks in memory]
+ mem.overhead.average [KB, absolute, false] [host physical memory consumed by ESXi data structures for running the virtual machines]
+ mem.vmfs.pbc.workingSet.latest [TB, absolute, false] [Amount of file blocks whose addresses are cached in the VMFS PB Cache]
+ mem.shared.average [KB, absolute, false] [Amount of guest physical memory that is shared within a single virtual machine or across virtual machines]
+ mem.usage.average [%, absolute, false] [Percentage of host physical memory that has been consumed]
+ mem.vmfs.pbc.workingSetMax.latest [TB, absolute, false] [Maximum amount of file blocks whose addresses are cached in the VMFS PB Cache]
+ mem.sysUsage.average [KB, absolute, false] [Amount of host physical memory consumed by VMkernel]
+ mem.compressed.average [KB, absolute, false] [Guest physical memory pages that have undergone memory compression]
+ mem.vmfs.pbc.overhead.latest [KB, absolute, false] [Amount of VMFS heap used by the VMFS PB Cache]
+ mem.totalCapacity.average [MB, absolute, false] [Total reservation, available and consumed, for powered-on virtual machines]
+ mem.activewrite.average [KB, absolute, false] [Amount of guest physical memory that is being actively written by guest. Activeness is estimated by ESXi]
+ mem.granted.average [KB, absolute, false] [Amount of host physical memory or physical memory that is mapped for a virtual machine or a host]
+ mem.compressionRate.average [KBps, rate, false] [Rate of guest physical memory page compression by ESXi]
+ mem.heap.average [KB, absolute, false] [Virtual address space of ESXi that is dedicated to its heap]
+ mem.llSwapUsed.average [KB, absolute, false] [Storage space consumed on the host swap cache for storing swapped guest physical memory pages]
+
+ net.bytesTx.average [KBps, rate, true] [Average amount of data transmitted per second]
+ net.droppedRx.summation [num, delta, true] [Number of receives dropped]
+ net.transmitted.average [KBps, rate, true] [Average rate at which data was transmitted during the interval]
+ net.multicastTx.summation [num, delta, true] [Number of multicast packets transmitted during the sampling interval]
+ net.errorsTx.summation [num, delta, true] [Number of packets with errors transmitted during the sampling interval]
+ net.unknownProtos.summation [num, delta, true] [Number of frames with unknown protocol received during the sampling interval]
+ net.multicastRx.summation [num, delta, true] [Number of multicast packets received during the sampling interval]
+ net.broadcastTx.summation [num, delta, true] [Number of broadcast packets transmitted during the sampling interval]
+ net.received.average [KBps, rate, true] [Average rate at which data was received during the interval]
+ net.droppedTx.summation [num, delta, true] [Number of transmits dropped]
+ net.usage.average [KBps, rate, true] [Network utilization (combined transmit-rates and receive-rates) during the interval]
+ net.broadcastRx.summation [num, delta, true] [Number of broadcast packets received during the sampling interval]
+ net.packetsRx.summation [num, delta, true] [Number of packets received during the interval]
+ net.packetsTx.summation [num, delta, true] [Number of packets transmitted during the interval]
+ net.errorsRx.summation [num, delta, true] [Number of packets with errors received during the sampling interval]
+ net.bytesRx.average [KBps, rate, true] [Average amount of data received per second]
+
+ power.energy.summation [J, delta, false] [Total energy used since last stats reset]
+ power.power.average [W, rate, false] [Current power usage]
+ power.powerCap.average [W, absolute, false] [Maximum allowed power usage]
+
+ rescpu.sampleCount.latest [num, absolute, false] [Group CPU sample count]
+ rescpu.maxLimited5.latest [%, absolute, false] [Amount of CPU resources over the limit that were refused, average over 5 minutes]
+ rescpu.runav1.latest [%, absolute, false] [CPU running average over 1 minute]
+ rescpu.actpk5.latest [%, absolute, false] [CPU active peak over 5 minutes]
+ rescpu.runav5.latest [%, absolute, false] [CPU running average over 5 minutes]
+ rescpu.actav1.latest [%, absolute, false] [CPU active average over 1 minute]
+ rescpu.runav15.latest [%, absolute, false] [CPU running average over 15 minutes]
+ rescpu.actav15.latest [%, absolute, false] [CPU active average over 15 minutes]
+ rescpu.actav5.latest [%, absolute, false] [CPU active average over 5 minutes]
+ rescpu.maxLimited15.latest [%, absolute, false] [Amount of CPU resources over the limit that were refused, average over 15 minutes]
+ rescpu.actpk1.latest [%, absolute, false] [CPU active peak over 1 minute]
+ rescpu.runpk15.latest [%, absolute, false] [CPU running peak over 15 minutes]
+ rescpu.samplePeriod.latest [ms, absolute, false] [Group CPU sample period]
+ rescpu.actpk15.latest [%, absolute, false] [CPU active peak over 15 minutes]
+ rescpu.runpk5.latest [%, absolute, false] [CPU running peak over 5 minutes]
+ rescpu.runpk1.latest [%, absolute, false] [CPU running peak over 1 minute]
+ rescpu.maxLimited1.latest [%, absolute, false] [Amount of CPU resources over the limit that were refused, average over 1 minute]
+
+ storageAdapter.read.average [KBps, rate, true] [Rate of reading data by the storage adapter]
+ storageAdapter.commandsAveraged.average [num, rate, true] [Average number of commands issued per second by the storage adapter during the collection interval]
+ storageAdapter.numberWriteAveraged.average [num, rate, true] [Average number of write commands issued per second by the storage adapter during the collection interval]
+ storageAdapter.totalWriteLatency.average [ms, absolute, true] [The average time a write by the storage adapter takes]
+ storageAdapter.totalReadLatency.average [ms, absolute, true] [The average time a read by the storage adapter takes]
+ storageAdapter.write.average [KBps, rate, true] [Rate of writing data by the storage adapter]
+ storageAdapter.numberReadAveraged.average [num, rate, true] [Average number of read commands issued per second by the storage adapter during the collection interval]
+ storageAdapter.maxTotalLatency.latest [ms, absolute, false] [Highest latency value across all storage adapters used by the host]
+ storagePath.numberWriteAveraged.average [num, rate, true] [Average number of write commands issued per second on the storage path during the collection interval]
+ storagePath.write.average [KBps, rate, true] [Rate of writing data on the storage path]
+ storagePath.maxTotalLatency.latest [ms, absolute, false] [Highest latency value across all storage paths used by the host]
+ storagePath.read.average [KBps, rate, true] [Rate of reading data on the storage path]
+ storagePath.numberReadAveraged.average [num, rate, true] [Average number of read commands issued per second on the storage path during the collection interval]
+ storagePath.totalWriteLatency.average [ms, absolute, true] [The average time a write issued on the storage path takes]
+ storagePath.totalReadLatency.average [ms, absolute, true] [The average time a read issued on the storage path takes]
+ storagePath.commandsAveraged.average [num, rate, true] [Average number of commands issued per second on the storage path during the collection interval]
+
+ sys.resourceMemTouched.latest [KB, absolute, true] [Memory touched by the system resource group]
+ sys.resourceMemSwapped.latest [KB, absolute, true] [Memory swapped out by the system resource group]
+ sys.resourceMemShared.latest [KB, absolute, true] [Memory saved due to sharing by the system resource group]
+ sys.resourceMemZero.latest [KB, absolute, true] [Zero filled memory used by the system resource group]
+ sys.resourceMemMapped.latest [KB, absolute, true] [Memory mapped by the system resource group]
+ sys.resourceCpuAllocShares.latest [num, absolute, true] [CPU allocation shares of the system resource group]
+ sys.resourceFdUsage.latest [num, absolute, true] [Number of file descriptors used by the system resource group]
+ sys.resourceCpuAct5.latest [%, absolute, true] [CPU active average over 5 minutes of the system resource group]
+ sys.resourceCpuAct1.latest [%, absolute, true] [CPU active average over 1 minute of the system resource group]
+ sys.resourceCpuUsage.average [MHz, rate, true] [Amount of CPU used by the Service Console and other applications during the interval]
+ sys.resourceMemOverhead.latest [KB, absolute, true] [Overhead memory consumed by the system resource group]
+ sys.resourceMemCow.latest [KB, absolute, true] [Memory shared by the system resource group]
+ sys.resourceCpuAllocMax.latest [MHz, absolute, true] [CPU allocation limit (in MHz) of the system resource group]
+ sys.resourceMemAllocMax.latest [KB, absolute, true] [Memory allocation limit (in KB) of the system resource group]
+ sys.resourceMemAllocMin.latest [KB, absolute, true] [Memory allocation reservation (in KB) of the system resource group]
+ sys.resourceCpuAllocMin.latest [MHz, absolute, true] [CPU allocation reservation (in MHz) of the system resource group]
+ sys.resourceCpuMaxLimited1.latest [%, absolute, true] [CPU maximum limited over 1 minute of the system resource group]
+ sys.resourceMemAllocShares.latest [num, absolute, true] [Memory allocation shares of the system resource group]
+ sys.resourceMemConsumed.latest [KB, absolute, true] [Memory consumed by the system resource group]
+ sys.uptime.latest [s, absolute, false] [Total time elapsed, in seconds, since last system startup]
+ sys.resourceCpuMaxLimited5.latest [%, absolute, true] [CPU maximum limited over 5 minutes of the system resource group]
+ sys.resourceCpuRun5.latest [%, absolute, true] [CPU running average over 5 minutes of the system resource group]
+ sys.resourceCpuRun1.latest [%, absolute, true] [CPU running average over 1 minute of the system resource group]
+
+ vflashModule.numActiveVMDKs.latest [num, absolute, true] [Number of caches controlled by the virtual flash module]
+*/
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/resources/resources.go b/src/go/collectors/go.d.plugin/modules/vsphere/resources/resources.go
new file mode 100644
index 000000000..8f967f16c
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/resources/resources.go
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package resources
+
+import (
+ "github.com/vmware/govmomi/performance"
+ "github.com/vmware/govmomi/vim25/types"
+)
+
+/*
+
+```
+Virtual Datacenter Architecture Representation (partial).
+
+<root>
++-DC0 # Virtual datacenter
+ +-datastore # Datastore folder (created by system)
+ | +-Datastore1
+ |
+ +-host # Host folder (created by system)
+ | +-Folder1 # Host and Cluster folder
+ | | +-NestedFolder1
+ | | | +-Cluster1
+ | | | | +-Host1
+ | +-Cluster2
+ | | +-Host2
+ | | | +-VM1
+ | | | +-VM2
+ | | | +-hadoop1
+ | +-Host3 # Dummy folder for non-clustered host (created by system)
+ | | +-Host3
+ | | | +-VM3
+ | | | +-VM4
+ | | |
+ +-vm # VM folder (created by system)
+ | +-VM1
+ | +-VM2
+ | +-Folder2 # VM and Template folder
+ | | +-hadoop1
+ | | +-NestedFolder1
+ | | | +-VM3
+ | | | +-VM4
+```
+*/
+
+type Resources struct {
+ DataCenters DataCenters
+ Folders Folders
+ Clusters Clusters
+ Hosts Hosts
+ VMs VMs
+}
+
+type (
+ Datacenter struct {
+ Name string
+ ID string
+ }
+
+ Folder struct {
+ Name string
+ ID string
+ ParentID string
+ }
+
+ HierarchyValue struct {
+ ID, Name string
+ }
+
+ ClusterHierarchy struct {
+ DC HierarchyValue
+ }
+ Cluster struct {
+ Name string
+ ID string
+ ParentID string
+ Hier ClusterHierarchy
+ }
+
+ HostHierarchy struct {
+ DC HierarchyValue
+ Cluster HierarchyValue
+ }
+ Host struct {
+ Name string
+ ID string
+ ParentID string
+ Hier HostHierarchy
+ OverallStatus string
+ MetricList performance.MetricList
+ Ref types.ManagedObjectReference
+ }
+
+ VMHierarchy struct {
+ DC HierarchyValue
+ Cluster HierarchyValue
+ Host HierarchyValue
+ }
+
+ VM struct {
+ Name string
+ ID string
+ ParentID string
+ Hier VMHierarchy
+ OverallStatus string
+ MetricList performance.MetricList
+ Ref types.ManagedObjectReference
+ }
+)
+
+func (v *HierarchyValue) IsSet() bool { return v.ID != "" && v.Name != "" }
+func (v *HierarchyValue) Set(id, name string) { v.ID = id; v.Name = name }
+
+func (h ClusterHierarchy) IsSet() bool { return h.DC.IsSet() }
+func (h HostHierarchy) IsSet() bool { return h.DC.IsSet() && h.Cluster.IsSet() }
+func (h VMHierarchy) IsSet() bool { return h.DC.IsSet() && h.Cluster.IsSet() && h.Host.IsSet() }
+
+type (
+ DataCenters map[string]*Datacenter
+ Folders map[string]*Folder
+ Clusters map[string]*Cluster
+ Hosts map[string]*Host
+ VMs map[string]*VM
+)
+
+func (dcs DataCenters) Put(dc *Datacenter) { dcs[dc.ID] = dc }
+func (dcs DataCenters) Get(id string) *Datacenter { return dcs[id] }
+func (fs Folders) Put(folder *Folder) { fs[folder.ID] = folder }
+func (fs Folders) Get(id string) *Folder { return fs[id] }
+func (cs Clusters) Put(cluster *Cluster) { cs[cluster.ID] = cluster }
+func (cs Clusters) Get(id string) *Cluster { return cs[id] }
+func (hs Hosts) Put(host *Host) { hs[host.ID] = host }
+func (hs Hosts) Remove(id string) { delete(hs, id) }
+func (hs Hosts) Get(id string) *Host { return hs[id] }
+func (vs VMs) Put(vm *VM) { vs[vm.ID] = vm }
+func (vs VMs) Remove(id string) { delete(vs, id) }
+func (vs VMs) Get(id string) *VM { return vs[id] }
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/scrape/scrape.go b/src/go/collectors/go.d.plugin/modules/vsphere/scrape/scrape.go
new file mode 100644
index 000000000..adda665cc
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/scrape/scrape.go
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package scrape
+
+import (
+ "fmt"
+ "strconv"
+ "strings"
+ "sync"
+ "time"
+
+ rs "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/resources"
+
+ "github.com/netdata/netdata/go/go.d.plugin/logger"
+ "github.com/vmware/govmomi/performance"
+ "github.com/vmware/govmomi/vim25/types"
+)
+
+type Client interface {
+ Version() string
+ PerformanceMetrics([]types.PerfQuerySpec) ([]performance.EntityMetric, error)
+}
+
+func New(client Client) *Scraper {
+ v := &Scraper{Client: client}
+ v.calcMaxQuery()
+ return v
+}
+
+type Scraper struct {
+ *logger.Logger
+ Client
+ maxQuery int
+}
+
+// Default settings for vCenter 6.5 and above is 256, prior versions of vCenter have this set to 64.
+func (s *Scraper) calcMaxQuery() {
+ major, minor, err := parseVersion(s.Version())
+ if err != nil || major < 6 || minor == 0 {
+ s.maxQuery = 64
+ return
+ }
+ s.maxQuery = 256
+}
+
+func (s *Scraper) ScrapeHosts(hosts rs.Hosts) []performance.EntityMetric {
+ t := time.Now()
+ pqs := newHostsPerfQuerySpecs(hosts)
+ ms := s.scrapeMetrics(pqs)
+ s.Debugf("scraping : scraped metrics for %d/%d hosts, process took %s",
+ len(ms),
+ len(hosts),
+ time.Since(t),
+ )
+ return ms
+}
+
+func (s *Scraper) ScrapeVMs(vms rs.VMs) []performance.EntityMetric {
+ t := time.Now()
+ pqs := newVMsPerfQuerySpecs(vms)
+ ms := s.scrapeMetrics(pqs)
+ s.Debugf("scraping : scraped metrics for %d/%d vms, process took %s",
+ len(ms),
+ len(vms),
+ time.Since(t),
+ )
+ return ms
+}
+
+func (s *Scraper) scrapeMetrics(pqs []types.PerfQuerySpec) []performance.EntityMetric {
+ tc := newThrottledCaller(5)
+ var ms []performance.EntityMetric
+ lock := &sync.Mutex{}
+
+ chunks := chunkify(pqs, s.maxQuery)
+ for _, chunk := range chunks {
+ pqs := chunk
+ job := func() {
+ s.scrape(&ms, lock, pqs)
+ }
+ tc.call(job)
+ }
+ tc.wait()
+
+ return ms
+}
+
+func (s *Scraper) scrape(metrics *[]performance.EntityMetric, lock *sync.Mutex, pqs []types.PerfQuerySpec) {
+ m, err := s.PerformanceMetrics(pqs)
+ if err != nil {
+ s.Error(err)
+ return
+ }
+
+ lock.Lock()
+ *metrics = append(*metrics, m...)
+ lock.Unlock()
+}
+
+func chunkify(pqs []types.PerfQuerySpec, chunkSize int) (chunks [][]types.PerfQuerySpec) {
+ for i := 0; i < len(pqs); i += chunkSize {
+ end := i + chunkSize
+ if end > len(pqs) {
+ end = len(pqs)
+ }
+ chunks = append(chunks, pqs[i:end])
+ }
+ return chunks
+}
+
+const (
+ pqsMaxSample = 1
+ pqsIntervalID = 20
+ pqsFormat = "normal"
+)
+
+func newHostsPerfQuerySpecs(hosts rs.Hosts) []types.PerfQuerySpec {
+ pqs := make([]types.PerfQuerySpec, 0, len(hosts))
+ for _, host := range hosts {
+ pq := types.PerfQuerySpec{
+ Entity: host.Ref,
+ MaxSample: pqsMaxSample,
+ MetricId: host.MetricList,
+ IntervalId: pqsIntervalID,
+ Format: pqsFormat,
+ }
+ pqs = append(pqs, pq)
+ }
+ return pqs
+}
+
+func newVMsPerfQuerySpecs(vms rs.VMs) []types.PerfQuerySpec {
+ pqs := make([]types.PerfQuerySpec, 0, len(vms))
+ for _, vm := range vms {
+ pq := types.PerfQuerySpec{
+ Entity: vm.Ref,
+ MaxSample: pqsMaxSample,
+ MetricId: vm.MetricList,
+ IntervalId: pqsIntervalID,
+ Format: pqsFormat,
+ }
+ pqs = append(pqs, pq)
+ }
+ return pqs
+}
+
+func parseVersion(version string) (major, minor int, err error) {
+ parts := strings.Split(version, ".")
+ if len(parts) < 2 {
+ return 0, 0, fmt.Errorf("unparsable version string : %s", version)
+ }
+ if major, err = strconv.Atoi(parts[0]); err != nil {
+ return 0, 0, err
+ }
+ if minor, err = strconv.Atoi(parts[1]); err != nil {
+ return 0, 0, err
+ }
+ return major, minor, nil
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/scrape/scrape_test.go b/src/go/collectors/go.d.plugin/modules/vsphere/scrape/scrape_test.go
new file mode 100644
index 000000000..0576850f6
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/scrape/scrape_test.go
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package scrape
+
+import (
+ "crypto/tls"
+ "net/url"
+ "testing"
+ "time"
+
+ "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/client"
+ "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/discover"
+ rs "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/resources"
+ "github.com/netdata/netdata/go/go.d.plugin/pkg/tlscfg"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "github.com/vmware/govmomi/simulator"
+)
+
+func TestNew(t *testing.T) {
+}
+
+func TestScraper_ScrapeVMs(t *testing.T) {
+ s, res, teardown := prepareScraper(t)
+ defer teardown()
+
+ metrics := s.ScrapeVMs(res.VMs)
+ assert.Len(t, metrics, len(res.VMs))
+}
+
+func TestScraper_ScrapeHosts(t *testing.T) {
+ s, res, teardown := prepareScraper(t)
+ defer teardown()
+
+ metrics := s.ScrapeHosts(res.Hosts)
+ assert.Len(t, metrics, len(res.Hosts))
+}
+
+func prepareScraper(t *testing.T) (s *Scraper, res *rs.Resources, teardown func()) {
+ model, srv := createSim(t)
+ teardown = func() { model.Remove(); srv.Close() }
+
+ c := newClient(t, srv.URL)
+ d := discover.New(c)
+ res, err := d.Discover()
+ require.NoError(t, err)
+
+ return New(c), res, teardown
+}
+
+func newClient(t *testing.T, vCenterURL *url.URL) *client.Client {
+ c, err := client.New(client.Config{
+ URL: vCenterURL.String(),
+ User: "admin",
+ Password: "password",
+ Timeout: time.Second * 3,
+ TLSConfig: tlscfg.TLSConfig{InsecureSkipVerify: true},
+ })
+ require.NoError(t, err)
+ return c
+}
+
+func createSim(t *testing.T) (*simulator.Model, *simulator.Server) {
+ model := simulator.VPX()
+ err := model.Create()
+ require.NoError(t, err)
+ model.Service.TLS = new(tls.Config)
+ return model, model.Service.NewServer()
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/scrape/throttled_caller.go b/src/go/collectors/go.d.plugin/modules/vsphere/scrape/throttled_caller.go
new file mode 100644
index 000000000..5127c28c1
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/scrape/throttled_caller.go
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package scrape
+
+import "sync"
+
+type throttledCaller struct {
+ limit chan struct{}
+ wg sync.WaitGroup
+}
+
+func newThrottledCaller(limit int) *throttledCaller {
+ if limit <= 0 {
+ panic("limit must be > 0")
+ }
+ return &throttledCaller{limit: make(chan struct{}, limit)}
+}
+
+func (t *throttledCaller) call(job func()) {
+ t.wg.Add(1)
+ go func() {
+ defer t.wg.Done()
+ t.limit <- struct{}{}
+ defer func() {
+ <-t.limit
+ }()
+ job()
+ }()
+}
+
+func (t *throttledCaller) wait() {
+ t.wg.Wait()
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/scrape/throttled_caller_test.go b/src/go/collectors/go.d.plugin/modules/vsphere/scrape/throttled_caller_test.go
new file mode 100644
index 000000000..545ed1603
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/scrape/throttled_caller_test.go
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package scrape
+
+import (
+ "sync"
+ "sync/atomic"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func Test_throttledCaller(t *testing.T) {
+ var current int64
+ var max int64
+ var total int64
+ var mux sync.Mutex
+ limit := 5
+ n := 10000
+ tc := newThrottledCaller(limit)
+
+ for i := 0; i < n; i++ {
+ job := func() {
+ atomic.AddInt64(&total, 1)
+ atomic.AddInt64(&current, 1)
+ time.Sleep(100 * time.Microsecond)
+
+ mux.Lock()
+ defer mux.Unlock()
+ if atomic.LoadInt64(&current) > max {
+ max = atomic.LoadInt64(&current)
+ }
+ atomic.AddInt64(&current, -1)
+ }
+ tc.call(job)
+ }
+ tc.wait()
+
+ assert.Equal(t, int64(n), total)
+ assert.Equal(t, max, int64(limit))
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/task.go b/src/go/collectors/go.d.plugin/modules/vsphere/task.go
new file mode 100644
index 000000000..103ca1ed6
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/task.go
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package vsphere
+
+import (
+ "sync"
+ "time"
+)
+
+func newTask(doWork func(), doEvery time.Duration) *task {
+ task := task{
+ done: make(chan struct{}),
+ running: make(chan struct{}),
+ }
+
+ go func() {
+ t := time.NewTicker(doEvery)
+ defer func() {
+ t.Stop()
+ close(task.running)
+ }()
+ for {
+ select {
+ case <-task.done:
+ return
+ case <-t.C:
+ doWork()
+ }
+ }
+ }()
+
+ return &task
+}
+
+type task struct {
+ once sync.Once
+ done chan struct{}
+ running chan struct{}
+}
+
+func (t *task) stop() {
+ t.once.Do(func() { close(t.done) })
+}
+
+func (t *task) isStopped() bool {
+ select {
+ case <-t.done:
+ return true
+ default:
+ return false
+ }
+}
+
+func (t *task) isRunning() bool {
+ select {
+ case <-t.running:
+ return false
+ default:
+ return true
+ }
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/task_test.go b/src/go/collectors/go.d.plugin/modules/vsphere/task_test.go
new file mode 100644
index 000000000..ed55a28a3
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/task_test.go
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package vsphere
+
+import (
+ "sync/atomic"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func Test_task(t *testing.T) {
+ var i int64
+ job := func() {
+ atomic.AddInt64(&i, 1)
+ }
+
+ task := newTask(job, time.Millisecond*200)
+ defer task.stop()
+ time.Sleep(time.Second)
+ assert.True(t, atomic.LoadInt64(&i) > 0)
+}
+
+func Test_task_isStopped(t *testing.T) {
+ task := newTask(func() {}, time.Second)
+ assert.False(t, task.isStopped())
+
+ task.stop()
+ time.Sleep(time.Millisecond * 500)
+ assert.True(t, task.isStopped())
+}
+
+func Test_task_isRunning(t *testing.T) {
+ task := newTask(func() {}, time.Second)
+ assert.True(t, task.isRunning())
+
+ task.stop()
+ time.Sleep(time.Millisecond * 500)
+ assert.False(t, task.isRunning())
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/testdata/config.json b/src/go/collectors/go.d.plugin/modules/vsphere/testdata/config.json
new file mode 100644
index 000000000..3e4a77396
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/testdata/config.json
@@ -0,0 +1,27 @@
+{
+ "update_every": 123,
+ "url": "ok",
+ "body": "ok",
+ "method": "ok",
+ "headers": {
+ "ok": "ok"
+ },
+ "username": "ok",
+ "password": "ok",
+ "proxy_url": "ok",
+ "proxy_username": "ok",
+ "proxy_password": "ok",
+ "timeout": 123.123,
+ "not_follow_redirects": true,
+ "tls_ca": "ok",
+ "tls_cert": "ok",
+ "tls_key": "ok",
+ "tls_skip_verify": true,
+ "discovery_interval": 123.123,
+ "host_include": [
+ "ok"
+ ],
+ "vm_include": [
+ "ok"
+ ]
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/testdata/config.yaml b/src/go/collectors/go.d.plugin/modules/vsphere/testdata/config.yaml
new file mode 100644
index 000000000..d15e2346f
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/testdata/config.yaml
@@ -0,0 +1,22 @@
+update_every: 123
+url: "ok"
+body: "ok"
+method: "ok"
+headers:
+ ok: "ok"
+username: "ok"
+password: "ok"
+proxy_url: "ok"
+proxy_username: "ok"
+proxy_password: "ok"
+timeout: 123.123
+not_follow_redirects: yes
+tls_ca: "ok"
+tls_cert: "ok"
+tls_key: "ok"
+tls_skip_verify: yes
+discovery_interval: 123.123
+host_include:
+ - "ok"
+vm_include:
+ - "ok"
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/vsphere.go b/src/go/collectors/go.d.plugin/modules/vsphere/vsphere.go
new file mode 100644
index 000000000..6473ac58d
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/vsphere.go
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package vsphere
+
+import (
+ _ "embed"
+ "sync"
+ "time"
+
+ "github.com/netdata/netdata/go/go.d.plugin/agent/module"
+ "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/match"
+ rs "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/resources"
+ "github.com/netdata/netdata/go/go.d.plugin/pkg/web"
+
+ "github.com/vmware/govmomi/performance"
+)
+
+//go:embed "config_schema.json"
+var configSchema string
+
+func init() {
+ module.Register("vsphere", module.Creator{
+ JobConfigSchema: configSchema,
+ Defaults: module.Defaults{
+ UpdateEvery: 20,
+ },
+ Create: func() module.Module { return New() },
+ Config: func() any { return &Config{} },
+ })
+}
+
+func New() *VSphere {
+ return &VSphere{
+ Config: Config{
+ HTTP: web.HTTP{
+ Client: web.Client{
+ Timeout: web.Duration(time.Second * 20),
+ },
+ },
+ DiscoveryInterval: web.Duration(time.Minute * 5),
+ HostsInclude: []string{"/*"},
+ VMsInclude: []string{"/*"},
+ },
+ collectionLock: &sync.RWMutex{},
+ charts: &module.Charts{},
+ discoveredHosts: make(map[string]int),
+ discoveredVMs: make(map[string]int),
+ charted: make(map[string]bool),
+ }
+}
+
+type Config struct {
+ UpdateEvery int `yaml:"update_every,omitempty" json:"update_every"`
+ web.HTTP `yaml:",inline" json:""`
+ DiscoveryInterval web.Duration `yaml:"discovery_interval,omitempty" json:"discovery_interval"`
+ HostsInclude match.HostIncludes `yaml:"host_include,omitempty" json:"host_include"`
+ VMsInclude match.VMIncludes `yaml:"vm_include,omitempty" json:"vm_include"`
+}
+
+type (
+ VSphere struct {
+ module.Base
+ Config `yaml:",inline" json:""`
+
+ charts *module.Charts
+
+ discoverer
+ scraper
+
+ collectionLock *sync.RWMutex
+ resources *rs.Resources
+ discoveryTask *task
+ discoveredHosts map[string]int
+ discoveredVMs map[string]int
+ charted map[string]bool
+ }
+ discoverer interface {
+ Discover() (*rs.Resources, error)
+ }
+ scraper interface {
+ ScrapeHosts(rs.Hosts) []performance.EntityMetric
+ ScrapeVMs(rs.VMs) []performance.EntityMetric
+ }
+)
+
+func (vs *VSphere) Configuration() any {
+ return vs.Config
+}
+
+func (vs *VSphere) Init() error {
+ if err := vs.validateConfig(); err != nil {
+ vs.Errorf("error on validating config: %v", err)
+ return err
+ }
+
+ vsClient, err := vs.initClient()
+ if err != nil {
+ vs.Errorf("error on creating vsphere client: %v", err)
+ return err
+ }
+
+ if err := vs.initDiscoverer(vsClient); err != nil {
+ vs.Errorf("error on creating vsphere discoverer: %v", err)
+ return err
+ }
+
+ vs.initScraper(vsClient)
+
+ if err := vs.discoverOnce(); err != nil {
+ vs.Errorf("error on discovering: %v", err)
+ return err
+ }
+
+ vs.goDiscovery()
+
+ return nil
+}
+
+func (vs *VSphere) Check() error {
+ return nil
+}
+
+func (vs *VSphere) Charts() *module.Charts {
+ return vs.charts
+}
+
+func (vs *VSphere) Collect() map[string]int64 {
+ mx, err := vs.collect()
+ if err != nil {
+ vs.Error(err)
+ }
+
+ if len(mx) == 0 {
+ return nil
+ }
+ return mx
+}
+
+func (vs *VSphere) Cleanup() {
+ if vs.discoveryTask == nil {
+ return
+ }
+ vs.discoveryTask.stop()
+}
diff --git a/src/go/collectors/go.d.plugin/modules/vsphere/vsphere_test.go b/src/go/collectors/go.d.plugin/modules/vsphere/vsphere_test.go
new file mode 100644
index 000000000..8c0045d88
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/vsphere/vsphere_test.go
@@ -0,0 +1,488 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+package vsphere
+
+import (
+ "crypto/tls"
+ "os"
+ "strings"
+ "testing"
+ "time"
+
+ "github.com/netdata/netdata/go/go.d.plugin/agent/module"
+ "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/discover"
+ "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/match"
+ rs "github.com/netdata/netdata/go/go.d.plugin/modules/vsphere/resources"
+ "github.com/netdata/netdata/go/go.d.plugin/pkg/web"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "github.com/vmware/govmomi/performance"
+ "github.com/vmware/govmomi/simulator"
+)
+
+var (
+ dataConfigJSON, _ = os.ReadFile("testdata/config.json")
+ dataConfigYAML, _ = os.ReadFile("testdata/config.yaml")
+)
+
+func Test_testDataIsValid(t *testing.T) {
+ for name, data := range map[string][]byte{
+ "dataConfigJSON": dataConfigJSON,
+ "dataConfigYAML": dataConfigYAML,
+ } {
+ require.NotNil(t, data, name)
+ }
+}
+
+func TestVSphere_ConfigurationSerialize(t *testing.T) {
+ module.TestConfigurationSerialize(t, &VSphere{}, dataConfigJSON, dataConfigYAML)
+}
+
+func TestVSphere_Init(t *testing.T) {
+ vSphere, _, teardown := prepareVSphereSim(t)
+ defer teardown()
+
+ assert.NoError(t, vSphere.Init())
+ assert.NotNil(t, vSphere.discoverer)
+ assert.NotNil(t, vSphere.scraper)
+ assert.NotNil(t, vSphere.resources)
+ assert.NotNil(t, vSphere.discoveryTask)
+ assert.True(t, vSphere.discoveryTask.isRunning())
+}
+
+func TestVSphere_Init_ReturnsFalseIfURLNotSet(t *testing.T) {
+ vSphere, _, teardown := prepareVSphereSim(t)
+ defer teardown()
+ vSphere.URL = ""
+
+ assert.Error(t, vSphere.Init())
+}
+
+func TestVSphere_Init_ReturnsFalseIfUsernameNotSet(t *testing.T) {
+ vSphere, _, teardown := prepareVSphereSim(t)
+ defer teardown()
+ vSphere.Username = ""
+
+ assert.Error(t, vSphere.Init())
+}
+
+func TestVSphere_Init_ReturnsFalseIfPasswordNotSet(t *testing.T) {
+ vSphere, _, teardown := prepareVSphereSim(t)
+ defer teardown()
+ vSphere.Password = ""
+
+ assert.Error(t, vSphere.Init())
+}
+
+func TestVSphere_Init_ReturnsFalseIfClientWrongTLSCA(t *testing.T) {
+ vSphere, _, teardown := prepareVSphereSim(t)
+ defer teardown()
+ vSphere.Client.TLSConfig.TLSCA = "testdata/tls"
+
+ assert.Error(t, vSphere.Init())
+}
+
+func TestVSphere_Init_ReturnsFalseIfConnectionRefused(t *testing.T) {
+ vSphere, _, teardown := prepareVSphereSim(t)
+ defer teardown()
+ vSphere.URL = "http://127.0.0.1:32001"
+
+ assert.Error(t, vSphere.Init())
+}
+
+func TestVSphere_Init_ReturnsFalseIfInvalidHostVMIncludeFormat(t *testing.T) {
+ vSphere, _, teardown := prepareVSphereSim(t)
+ defer teardown()
+
+ vSphere.HostsInclude = match.HostIncludes{"invalid"}
+ assert.Error(t, vSphere.Init())
+
+ vSphere.HostsInclude = vSphere.HostsInclude[:0]
+
+ vSphere.VMsInclude = match.VMIncludes{"invalid"}
+ assert.Error(t, vSphere.Init())
+}
+
+func TestVSphere_Check(t *testing.T) {
+ assert.NoError(t, New().Check())
+}
+
+func TestVSphere_Charts(t *testing.T) {
+ assert.NotNil(t, New().Charts())
+}
+
+func TestVSphere_Cleanup(t *testing.T) {
+ vSphere, _, teardown := prepareVSphereSim(t)
+ defer teardown()
+
+ require.NoError(t, vSphere.Init())
+
+ vSphere.Cleanup()
+ time.Sleep(time.Second)
+ assert.True(t, vSphere.discoveryTask.isStopped())
+ assert.False(t, vSphere.discoveryTask.isRunning())
+}
+
+func TestVSphere_Cleanup_NotPanicsIfNotInitialized(t *testing.T) {
+ assert.NotPanics(t, New().Cleanup)
+}
+
+func TestVSphere_Collect(t *testing.T) {
+ vSphere, model, teardown := prepareVSphereSim(t)
+ defer teardown()
+
+ require.NoError(t, vSphere.Init())
+
+ vSphere.scraper = mockScraper{vSphere.scraper}
+
+ expected := map[string]int64{
+ "host-20_cpu.usage.average": 100,
+ "host-20_disk.maxTotalLatency.latest": 100,
+ "host-20_disk.read.average": 100,
+ "host-20_disk.write.average": 100,
+ "host-20_mem.active.average": 100,
+ "host-20_mem.consumed.average": 100,
+ "host-20_mem.granted.average": 100,
+ "host-20_mem.shared.average": 100,
+ "host-20_mem.sharedcommon.average": 100,
+ "host-20_mem.swapinRate.average": 100,
+ "host-20_mem.swapoutRate.average": 100,
+ "host-20_mem.usage.average": 100,
+ "host-20_net.bytesRx.average": 100,
+ "host-20_net.bytesTx.average": 100,
+ "host-20_net.droppedRx.summation": 100,
+ "host-20_net.droppedTx.summation": 100,
+ "host-20_net.errorsRx.summation": 100,
+ "host-20_net.errorsTx.summation": 100,
+ "host-20_net.packetsRx.summation": 100,
+ "host-20_net.packetsTx.summation": 100,
+ "host-20_overall.status.gray": 1,
+ "host-20_overall.status.green": 0,
+ "host-20_overall.status.red": 0,
+ "host-20_overall.status.yellow": 0,
+ "host-20_sys.uptime.latest": 100,
+ "host-34_cpu.usage.average": 100,
+ "host-34_disk.maxTotalLatency.latest": 100,
+ "host-34_disk.read.average": 100,
+ "host-34_disk.write.average": 100,
+ "host-34_mem.active.average": 100,
+ "host-34_mem.consumed.average": 100,
+ "host-34_mem.granted.average": 100,
+ "host-34_mem.shared.average": 100,
+ "host-34_mem.sharedcommon.average": 100,
+ "host-34_mem.swapinRate.average": 100,
+ "host-34_mem.swapoutRate.average": 100,
+ "host-34_mem.usage.average": 100,
+ "host-34_net.bytesRx.average": 100,
+ "host-34_net.bytesTx.average": 100,
+ "host-34_net.droppedRx.summation": 100,
+ "host-34_net.droppedTx.summation": 100,
+ "host-34_net.errorsRx.summation": 100,
+ "host-34_net.errorsTx.summation": 100,
+ "host-34_net.packetsRx.summation": 100,
+ "host-34_net.packetsTx.summation": 100,
+ "host-34_overall.status.gray": 1,
+ "host-34_overall.status.green": 0,
+ "host-34_overall.status.red": 0,
+ "host-34_overall.status.yellow": 0,
+ "host-34_sys.uptime.latest": 100,
+ "host-42_cpu.usage.average": 100,
+ "host-42_disk.maxTotalLatency.latest": 100,
+ "host-42_disk.read.average": 100,
+ "host-42_disk.write.average": 100,
+ "host-42_mem.active.average": 100,
+ "host-42_mem.consumed.average": 100,
+ "host-42_mem.granted.average": 100,
+ "host-42_mem.shared.average": 100,
+ "host-42_mem.sharedcommon.average": 100,
+ "host-42_mem.swapinRate.average": 100,
+ "host-42_mem.swapoutRate.average": 100,
+ "host-42_mem.usage.average": 100,
+ "host-42_net.bytesRx.average": 100,
+ "host-42_net.bytesTx.average": 100,
+ "host-42_net.droppedRx.summation": 100,
+ "host-42_net.droppedTx.summation": 100,
+ "host-42_net.errorsRx.summation": 100,
+ "host-42_net.errorsTx.summation": 100,
+ "host-42_net.packetsRx.summation": 100,
+ "host-42_net.packetsTx.summation": 100,
+ "host-42_overall.status.gray": 1,
+ "host-42_overall.status.green": 0,
+ "host-42_overall.status.red": 0,
+ "host-42_overall.status.yellow": 0,
+ "host-42_sys.uptime.latest": 100,
+ "host-50_cpu.usage.average": 100,
+ "host-50_disk.maxTotalLatency.latest": 100,
+ "host-50_disk.read.average": 100,
+ "host-50_disk.write.average": 100,
+ "host-50_mem.active.average": 100,
+ "host-50_mem.consumed.average": 100,
+ "host-50_mem.granted.average": 100,
+ "host-50_mem.shared.average": 100,
+ "host-50_mem.sharedcommon.average": 100,
+ "host-50_mem.swapinRate.average": 100,
+ "host-50_mem.swapoutRate.average": 100,
+ "host-50_mem.usage.average": 100,
+ "host-50_net.bytesRx.average": 100,
+ "host-50_net.bytesTx.average": 100,
+ "host-50_net.droppedRx.summation": 100,
+ "host-50_net.droppedTx.summation": 100,
+ "host-50_net.errorsRx.summation": 100,
+ "host-50_net.errorsTx.summation": 100,
+ "host-50_net.packetsRx.summation": 100,
+ "host-50_net.packetsTx.summation": 100,
+ "host-50_overall.status.gray": 1,
+ "host-50_overall.status.green": 0,
+ "host-50_overall.status.red": 0,
+ "host-50_overall.status.yellow": 0,
+ "host-50_sys.uptime.latest": 100,
+ "vm-55_cpu.usage.average": 200,
+ "vm-55_disk.maxTotalLatency.latest": 200,
+ "vm-55_disk.read.average": 200,
+ "vm-55_disk.write.average": 200,
+ "vm-55_mem.active.average": 200,
+ "vm-55_mem.consumed.average": 200,
+ "vm-55_mem.granted.average": 200,
+ "vm-55_mem.shared.average": 200,
+ "vm-55_mem.swapinRate.average": 200,
+ "vm-55_mem.swapoutRate.average": 200,
+ "vm-55_mem.swapped.average": 200,
+ "vm-55_mem.usage.average": 200,
+ "vm-55_net.bytesRx.average": 200,
+ "vm-55_net.bytesTx.average": 200,
+ "vm-55_net.droppedRx.summation": 200,
+ "vm-55_net.droppedTx.summation": 200,
+ "vm-55_net.packetsRx.summation": 200,
+ "vm-55_net.packetsTx.summation": 200,
+ "vm-55_overall.status.gray": 0,
+ "vm-55_overall.status.green": 1,
+ "vm-55_overall.status.red": 0,
+ "vm-55_overall.status.yellow": 0,
+ "vm-55_sys.uptime.latest": 200,
+ "vm-58_cpu.usage.average": 200,
+ "vm-58_disk.maxTotalLatency.latest": 200,
+ "vm-58_disk.read.average": 200,
+ "vm-58_disk.write.average": 200,
+ "vm-58_mem.active.average": 200,
+ "vm-58_mem.consumed.average": 200,
+ "vm-58_mem.granted.average": 200,
+ "vm-58_mem.shared.average": 200,
+ "vm-58_mem.swapinRate.average": 200,
+ "vm-58_mem.swapoutRate.average": 200,
+ "vm-58_mem.swapped.average": 200,
+ "vm-58_mem.usage.average": 200,
+ "vm-58_net.bytesRx.average": 200,
+ "vm-58_net.bytesTx.average": 200,
+ "vm-58_net.droppedRx.summation": 200,
+ "vm-58_net.droppedTx.summation": 200,
+ "vm-58_net.packetsRx.summation": 200,
+ "vm-58_net.packetsTx.summation": 200,
+ "vm-58_overall.status.gray": 0,
+ "vm-58_overall.status.green": 1,
+ "vm-58_overall.status.red": 0,
+ "vm-58_overall.status.yellow": 0,
+ "vm-58_sys.uptime.latest": 200,
+ "vm-61_cpu.usage.average": 200,
+ "vm-61_disk.maxTotalLatency.latest": 200,
+ "vm-61_disk.read.average": 200,
+ "vm-61_disk.write.average": 200,
+ "vm-61_mem.active.average": 200,
+ "vm-61_mem.consumed.average": 200,
+ "vm-61_mem.granted.average": 200,
+ "vm-61_mem.shared.average": 200,
+ "vm-61_mem.swapinRate.average": 200,
+ "vm-61_mem.swapoutRate.average": 200,
+ "vm-61_mem.swapped.average": 200,
+ "vm-61_mem.usage.average": 200,
+ "vm-61_net.bytesRx.average": 200,
+ "vm-61_net.bytesTx.average": 200,
+ "vm-61_net.droppedRx.summation": 200,
+ "vm-61_net.droppedTx.summation": 200,
+ "vm-61_net.packetsRx.summation": 200,
+ "vm-61_net.packetsTx.summation": 200,
+ "vm-61_overall.status.gray": 0,
+ "vm-61_overall.status.green": 1,
+ "vm-61_overall.status.red": 0,
+ "vm-61_overall.status.yellow": 0,
+ "vm-61_sys.uptime.latest": 200,
+ "vm-64_cpu.usage.average": 200,
+ "vm-64_disk.maxTotalLatency.latest": 200,
+ "vm-64_disk.read.average": 200,
+ "vm-64_disk.write.average": 200,
+ "vm-64_mem.active.average": 200,
+ "vm-64_mem.consumed.average": 200,
+ "vm-64_mem.granted.average": 200,
+ "vm-64_mem.shared.average": 200,
+ "vm-64_mem.swapinRate.average": 200,
+ "vm-64_mem.swapoutRate.average": 200,
+ "vm-64_mem.swapped.average": 200,
+ "vm-64_mem.usage.average": 200,
+ "vm-64_net.bytesRx.average": 200,
+ "vm-64_net.bytesTx.average": 200,
+ "vm-64_net.droppedRx.summation": 200,
+ "vm-64_net.droppedTx.summation": 200,
+ "vm-64_net.packetsRx.summation": 200,
+ "vm-64_net.packetsTx.summation": 200,
+ "vm-64_overall.status.gray": 0,
+ "vm-64_overall.status.green": 1,
+ "vm-64_overall.status.red": 0,
+ "vm-64_overall.status.yellow": 0,
+ "vm-64_sys.uptime.latest": 200,
+ }
+
+ collected := vSphere.Collect()
+ require.Equal(t, expected, collected)
+
+ count := model.Count()
+ assert.Len(t, vSphere.discoveredHosts, count.Host)
+ assert.Len(t, vSphere.discoveredVMs, count.Machine)
+ assert.Len(t, vSphere.charted, count.Host+count.Machine)
+
+ assert.Len(t, *vSphere.Charts(), count.Host*len(hostChartsTmpl)+count.Machine*len(vmChartsTmpl))
+ ensureCollectedHasAllChartsDimsVarsIDs(t, vSphere, collected)
+}
+
+func TestVSphere_Collect_RemoveHostsVMsInRuntime(t *testing.T) {
+ vSphere, _, teardown := prepareVSphereSim(t)
+ defer teardown()
+
+ require.NoError(t, vSphere.Init())
+ require.NoError(t, vSphere.Check())
+
+ okHostID := "host-50"
+ okVMID := "vm-64"
+ vSphere.discoverer.(*discover.Discoverer).HostMatcher = mockHostMatcher{okHostID}
+ vSphere.discoverer.(*discover.Discoverer).VMMatcher = mockVMMatcher{okVMID}
+
+ require.NoError(t, vSphere.discoverOnce())
+
+ numOfRuns := 5
+ for i := 0; i < numOfRuns; i++ {
+ vSphere.Collect()
+ }
+
+ host := vSphere.resources.Hosts.Get(okHostID)
+ for k, v := range vSphere.discoveredHosts {
+ if k == host.ID {
+ assert.Equal(t, 0, v)
+ } else {
+ assert.Equal(t, numOfRuns, v)
+ }
+ }
+
+ vm := vSphere.resources.VMs.Get(okVMID)
+ for id, fails := range vSphere.discoveredVMs {
+ if id == vm.ID {
+ assert.Equal(t, 0, fails)
+ } else {
+ assert.Equal(t, numOfRuns, fails)
+ }
+
+ }
+
+ for i := numOfRuns; i < failedUpdatesLimit; i++ {
+ vSphere.Collect()
+ }
+
+ assert.Len(t, vSphere.discoveredHosts, 1)
+ assert.Len(t, vSphere.discoveredVMs, 1)
+ assert.Len(t, vSphere.charted, 2)
+
+ for _, c := range *vSphere.Charts() {
+ if strings.HasPrefix(c.ID, okHostID) || strings.HasPrefix(c.ID, okVMID) {
+ assert.False(t, c.Obsolete)
+ } else {
+ assert.True(t, c.Obsolete)
+ }
+ }
+}
+
+func TestVSphere_Collect_Run(t *testing.T) {
+ vSphere, model, teardown := prepareVSphereSim(t)
+ defer teardown()
+
+ vSphere.DiscoveryInterval = web.Duration(time.Second * 2)
+ require.NoError(t, vSphere.Init())
+ require.NoError(t, vSphere.Check())
+
+ runs := 20
+ for i := 0; i < runs; i++ {
+ assert.True(t, len(vSphere.Collect()) > 0)
+ if i < 6 {
+ time.Sleep(time.Second)
+ }
+ }
+
+ count := model.Count()
+ assert.Len(t, vSphere.discoveredHosts, count.Host)
+ assert.Len(t, vSphere.discoveredVMs, count.Machine)
+ assert.Len(t, vSphere.charted, count.Host+count.Machine)
+ assert.Len(t, *vSphere.charts, count.Host*len(hostChartsTmpl)+count.Machine*len(vmChartsTmpl))
+}
+
+func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, vSphere *VSphere, collected map[string]int64) {
+ for _, chart := range *vSphere.Charts() {
+ for _, dim := range chart.Dims {
+ _, ok := collected[dim.ID]
+ assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID)
+ }
+ for _, v := range chart.Vars {
+ _, ok := collected[v.ID]
+ assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID)
+ }
+ }
+}
+
+func prepareVSphereSim(t *testing.T) (vSphere *VSphere, model *simulator.Model, teardown func()) {
+ model, srv := createSim(t)
+ vSphere = New()
+ teardown = func() { model.Remove(); srv.Close(); vSphere.Cleanup() }
+
+ vSphere.Username = "administrator"
+ vSphere.Password = "password"
+ vSphere.URL = srv.URL.String()
+ vSphere.TLSConfig.InsecureSkipVerify = true
+
+ return vSphere, model, teardown
+}
+
+func createSim(t *testing.T) (*simulator.Model, *simulator.Server) {
+ model := simulator.VPX()
+ err := model.Create()
+ require.NoError(t, err)
+ model.Service.TLS = new(tls.Config)
+ return model, model.Service.NewServer()
+}
+
+type mockScraper struct {
+ scraper
+}
+
+func (s mockScraper) ScrapeHosts(hosts rs.Hosts) []performance.EntityMetric {
+ ms := s.scraper.ScrapeHosts(hosts)
+ return populateMetrics(ms, 100)
+}
+func (s mockScraper) ScrapeVMs(vms rs.VMs) []performance.EntityMetric {
+ ms := s.scraper.ScrapeVMs(vms)
+ return populateMetrics(ms, 200)
+}
+
+func populateMetrics(ms []performance.EntityMetric, value int64) []performance.EntityMetric {
+ for i := range ms {
+ for ii := range ms[i].Value {
+ v := &ms[i].Value[ii].Value
+ if *v == nil {
+ *v = append(*v, value)
+ } else {
+ (*v)[0] = value
+ }
+ }
+ }
+ return ms
+}
+
+type mockHostMatcher struct{ name string }
+type mockVMMatcher struct{ name string }
+
+func (m mockHostMatcher) Match(host *rs.Host) bool { return m.name == host.ID }
+func (m mockVMMatcher) Match(vm *rs.VM) bool { return m.name == vm.ID }