diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 11:19:16 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-07-24 09:53:24 +0000 |
commit | b5f8ee61a7f7e9bd291dd26b0585d03eb686c941 (patch) | |
tree | d4d31289c39fc00da064a825df13a0b98ce95b10 /src/go/collectors/go.d.plugin/modules/k8s_kubelet/collect.go | |
parent | Adding upstream version 1.44.3. (diff) | |
download | netdata-b5f8ee61a7f7e9bd291dd26b0585d03eb686c941.tar.xz netdata-b5f8ee61a7f7e9bd291dd26b0585d03eb686c941.zip |
Adding upstream version 1.46.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/go/collectors/go.d.plugin/modules/k8s_kubelet/collect.go | 348 |
1 files changed, 348 insertions, 0 deletions
diff --git a/src/go/collectors/go.d.plugin/modules/k8s_kubelet/collect.go b/src/go/collectors/go.d.plugin/modules/k8s_kubelet/collect.go new file mode 100644 index 000000000..350c0bf45 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/k8s_kubelet/collect.go @@ -0,0 +1,348 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package k8s_kubelet + +import ( + "math" + + mtx "github.com/netdata/netdata/go/go.d.plugin/pkg/metrics" + "github.com/netdata/netdata/go/go.d.plugin/pkg/prometheus" + "github.com/netdata/netdata/go/go.d.plugin/pkg/stm" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" +) + +func (k *Kubelet) collect() (map[string]int64, error) { + raw, err := k.prom.ScrapeSeries() + + if err != nil { + return nil, err + } + + mx := newMetrics() + + k.collectToken(raw, mx) + k.collectRESTClientHTTPRequests(raw, mx) + k.collectAPIServer(raw, mx) + k.collectKubelet(raw, mx) + k.collectVolumeManager(raw, mx) + + return stm.ToMap(mx), nil +} + +func (k *Kubelet) collectLogsUsagePerPod(raw prometheus.Series, mx *metrics) { + chart := k.charts.Get("kubelet_pods_log_filesystem_used_bytes") + seen := make(map[string]bool) + + for _, metric := range raw.FindByName("kubelet_container_log_filesystem_used_bytes") { + pod := metric.Labels.Get("pod") + namespace := metric.Labels.Get("namespace") + + if pod == "" || namespace == "" { + continue + } + + key := namespace + "_" + pod + dimID := "kubelet_log_file_system_usage_" + key + + if !chart.HasDim(dimID) { + _ = chart.AddDim(&Dim{ID: dimID, Name: pod}) + chart.MarkNotCreated() + } + + seen[dimID] = true + v := mx.Kubelet.PodLogFileSystemUsage[key] + v.Add(metric.Value) + mx.Kubelet.PodLogFileSystemUsage[key] = v + } + + for _, dim := range chart.Dims { + if seen[dim.ID] { + continue + } + _ = chart.MarkDimRemove(dim.ID, false) + chart.MarkNotCreated() + } +} + +func (k *Kubelet) collectVolumeManager(raw prometheus.Series, mx *metrics) { + vmPlugins := make(map[string]*volumeManagerPlugin) + + for _, metric := range raw.FindByName("volume_manager_total_volumes") { + pluginName := metric.Labels.Get("plugin_name") + state := metric.Labels.Get("state") + + if !k.collectedVMPlugins[pluginName] { + _ = k.charts.Add(newVolumeManagerChart(pluginName)) + k.collectedVMPlugins[pluginName] = true + } + if _, ok := vmPlugins[pluginName]; !ok { + vmPlugins[pluginName] = &volumeManagerPlugin{} + } + + switch state { + case "actual_state_of_world": + vmPlugins[pluginName].State.Actual.Set(metric.Value) + case "desired_state_of_world": + vmPlugins[pluginName].State.Desired.Set(metric.Value) + } + } + + mx.VolumeManager.Plugins = vmPlugins +} + +func (k *Kubelet) collectKubelet(raw prometheus.Series, mx *metrics) { + value := raw.FindByName("kubelet_node_config_error").Max() + mx.Kubelet.NodeConfigError.Set(value) + + /* + # HELP kubelet_running_containers [ALPHA] Number of containers currently running + # TYPE kubelet_running_containers gauge + kubelet_running_containers{container_state="created"} 1 + kubelet_running_containers{container_state="exited"} 13 + kubelet_running_containers{container_state="running"} 42 + kubelet_running_containers{container_state="unknown"} 1 + */ + + ms := raw.FindByName("kubelet_running_container_count") + value = ms.Max() + if ms.Len() == 0 { + for _, m := range raw.FindByName("kubelet_running_containers") { + if m.Labels.Get("container_state") == "running" { + value = m.Value + break + } + } + } + mx.Kubelet.RunningContainerCount.Set(value) + + /* + # HELP kubelet_running_pods [ALPHA] Number of pods currently running + # TYPE kubelet_running_pods gauge + kubelet_running_pods 37 + */ + value = raw.FindByNames("kubelet_running_pod_count", "kubelet_running_pods").Max() + mx.Kubelet.RunningPodCount.Set(value) + + k.collectRuntimeOperations(raw, mx) + k.collectRuntimeOperationsErrors(raw, mx) + k.collectDockerOperations(raw, mx) + k.collectDockerOperationsErrors(raw, mx) + k.collectPLEGRelisting(raw, mx) + k.collectLogsUsagePerPod(raw, mx) +} + +func (k *Kubelet) collectAPIServer(raw prometheus.Series, mx *metrics) { + value := raw.FindByName("apiserver_audit_requests_rejected_total").Max() + mx.APIServer.Audit.Requests.Rejected.Set(value) + + value = raw.FindByName("apiserver_storage_data_key_generation_failures_total").Max() + mx.APIServer.Storage.DataKeyGeneration.Failures.Set(value) + + value = raw.FindByName("apiserver_storage_envelope_transformation_cache_misses_total").Max() + mx.APIServer.Storage.EnvelopeTransformation.CacheMisses.Set(value) + + k.collectStorageDataKeyGenerationLatencies(raw, mx) +} + +func (k *Kubelet) collectToken(raw prometheus.Series, mx *metrics) { + value := raw.FindByName("get_token_count").Max() + mx.Token.Count.Set(value) + + value = raw.FindByName("get_token_fail_count").Max() + mx.Token.FailCount.Set(value) +} + +func (k *Kubelet) collectPLEGRelisting(raw prometheus.Series, mx *metrics) { + // Summary + for _, metric := range raw.FindByName("kubelet_pleg_relist_interval_microseconds") { + if math.IsNaN(metric.Value) { + continue + } + quantile := metric.Labels.Get("quantile") + switch quantile { + case "0.5": + mx.Kubelet.PLEG.Relist.Interval.Quantile05.Set(metric.Value) + case "0.9": + mx.Kubelet.PLEG.Relist.Interval.Quantile09.Set(metric.Value) + case "0.99": + mx.Kubelet.PLEG.Relist.Interval.Quantile099.Set(metric.Value) + } + } + for _, metric := range raw.FindByName("kubelet_pleg_relist_latency_microseconds") { + if math.IsNaN(metric.Value) { + continue + } + quantile := metric.Labels.Get("quantile") + switch quantile { + case "0.5": + mx.Kubelet.PLEG.Relist.Latency.Quantile05.Set(metric.Value) + case "0.9": + mx.Kubelet.PLEG.Relist.Latency.Quantile09.Set(metric.Value) + case "0.99": + mx.Kubelet.PLEG.Relist.Latency.Quantile099.Set(metric.Value) + } + } +} + +func (k *Kubelet) collectStorageDataKeyGenerationLatencies(raw prometheus.Series, mx *metrics) { + latencies := &mx.APIServer.Storage.DataKeyGeneration.Latencies + metricName := "apiserver_storage_data_key_generation_latencies_microseconds_bucket" + + for _, metric := range raw.FindByName(metricName) { + value := metric.Value + bucket := metric.Labels.Get("le") + switch bucket { + case "5": + latencies.LE5.Set(value) + case "10": + latencies.LE10.Set(value) + case "20": + latencies.LE20.Set(value) + case "40": + latencies.LE40.Set(value) + case "80": + latencies.LE80.Set(value) + case "160": + latencies.LE160.Set(value) + case "320": + latencies.LE320.Set(value) + case "640": + latencies.LE640.Set(value) + case "1280": + latencies.LE1280.Set(value) + case "2560": + latencies.LE2560.Set(value) + case "5120": + latencies.LE5120.Set(value) + case "10240": + latencies.LE10240.Set(value) + case "20480": + latencies.LE20480.Set(value) + case "40960": + latencies.LE40960.Set(value) + case "+Inf": + latencies.LEInf.Set(value) + } + } + + latencies.LEInf.Sub(latencies.LE40960.Value()) + latencies.LE40960.Sub(latencies.LE20480.Value()) + latencies.LE20480.Sub(latencies.LE10240.Value()) + latencies.LE10240.Sub(latencies.LE5120.Value()) + latencies.LE5120.Sub(latencies.LE2560.Value()) + latencies.LE2560.Sub(latencies.LE1280.Value()) + latencies.LE1280.Sub(latencies.LE640.Value()) + latencies.LE640.Sub(latencies.LE320.Value()) + latencies.LE320.Sub(latencies.LE160.Value()) + latencies.LE160.Sub(latencies.LE80.Value()) + latencies.LE80.Sub(latencies.LE40.Value()) + latencies.LE40.Sub(latencies.LE20.Value()) + latencies.LE20.Sub(latencies.LE10.Value()) + latencies.LE10.Sub(latencies.LE5.Value()) +} + +func (k *Kubelet) collectRESTClientHTTPRequests(raw prometheus.Series, mx *metrics) { + metricName := "rest_client_requests_total" + chart := k.charts.Get("rest_client_requests_by_code") + + for _, metric := range raw.FindByName(metricName) { + code := metric.Labels.Get("code") + if code == "" { + continue + } + dimID := "rest_client_requests_" + code + if !chart.HasDim(dimID) { + _ = chart.AddDim(&Dim{ID: dimID, Name: code, Algo: module.Incremental}) + chart.MarkNotCreated() + } + mx.RESTClient.Requests.ByStatusCode[code] = mtx.Gauge(metric.Value) + } + + chart = k.charts.Get("rest_client_requests_by_method") + + for _, metric := range raw.FindByName(metricName) { + method := metric.Labels.Get("method") + if method == "" { + continue + } + dimID := "rest_client_requests_" + method + if !chart.HasDim(dimID) { + _ = chart.AddDim(&Dim{ID: dimID, Name: method, Algo: module.Incremental}) + chart.MarkNotCreated() + } + mx.RESTClient.Requests.ByMethod[method] = mtx.Gauge(metric.Value) + } +} + +func (k *Kubelet) collectRuntimeOperations(raw prometheus.Series, mx *metrics) { + chart := k.charts.Get("kubelet_runtime_operations") + + // kubelet_runtime_operations_total + for _, metric := range raw.FindByNames("kubelet_runtime_operations", "kubelet_runtime_operations_total") { + opType := metric.Labels.Get("operation_type") + if opType == "" { + continue + } + dimID := "kubelet_runtime_operations_" + opType + if !chart.HasDim(dimID) { + _ = chart.AddDim(&Dim{ID: dimID, Name: opType, Algo: module.Incremental}) + chart.MarkNotCreated() + } + mx.Kubelet.Runtime.Operations[opType] = mtx.Gauge(metric.Value) + } +} + +func (k *Kubelet) collectRuntimeOperationsErrors(raw prometheus.Series, mx *metrics) { + chart := k.charts.Get("kubelet_runtime_operations_errors") + + // kubelet_runtime_operations_errors_total + for _, metric := range raw.FindByNames("kubelet_runtime_operations_errors", "kubelet_runtime_operations_errors_total") { + opType := metric.Labels.Get("operation_type") + if opType == "" { + continue + } + dimID := "kubelet_runtime_operations_errors_" + opType + if !chart.HasDim(dimID) { + _ = chart.AddDim(&Dim{ID: dimID, Name: opType, Algo: module.Incremental}) + chart.MarkNotCreated() + } + mx.Kubelet.Runtime.OperationsErrors[opType] = mtx.Gauge(metric.Value) + } +} + +func (k *Kubelet) collectDockerOperations(raw prometheus.Series, mx *metrics) { + chart := k.charts.Get("kubelet_docker_operations") + + // kubelet_docker_operations_total + for _, metric := range raw.FindByNames("kubelet_docker_operations", "kubelet_docker_operations_total") { + opType := metric.Labels.Get("operation_type") + if opType == "" { + continue + } + dimID := "kubelet_docker_operations_" + opType + if !chart.HasDim(dimID) { + _ = chart.AddDim(&Dim{ID: dimID, Name: opType, Algo: module.Incremental}) + chart.MarkNotCreated() + } + mx.Kubelet.Docker.Operations[opType] = mtx.Gauge(metric.Value) + } +} + +func (k *Kubelet) collectDockerOperationsErrors(raw prometheus.Series, mx *metrics) { + chart := k.charts.Get("kubelet_docker_operations_errors") + + // kubelet_docker_operations_errors_total + for _, metric := range raw.FindByNames("kubelet_docker_operations_errors", "kubelet_docker_operations_errors_total") { + opType := metric.Labels.Get("operation_type") + if opType == "" { + continue + } + dimID := "kubelet_docker_operations_errors_" + opType + if !chart.HasDim(dimID) { + _ = chart.AddDim(&Dim{ID: dimID, Name: opType, Algo: module.Incremental}) + chart.MarkNotCreated() + } + mx.Kubelet.Docker.OperationsErrors[opType] = mtx.Gauge(metric.Value) + } +} |