summaryrefslogtreecommitdiffstats
path: root/src/go/collectors/go.d.plugin/modules/k8s_state/collect.go
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/go/collectors/go.d.plugin/modules/k8s_state/collect.go264
1 files changed, 264 insertions, 0 deletions
diff --git a/src/go/collectors/go.d.plugin/modules/k8s_state/collect.go b/src/go/collectors/go.d.plugin/modules/k8s_state/collect.go
new file mode 100644
index 000000000..033d330ce
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/k8s_state/collect.go
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package k8s_state
+
+import (
+ "errors"
+ "fmt"
+ "strings"
+ "time"
+
+ "github.com/netdata/netdata/go/go.d.plugin/agent/module"
+
+ corev1 "k8s.io/api/core/v1"
+)
+
+const precision = 1000
+
+func (ks *KubeState) collect() (map[string]int64, error) {
+ if ks.discoverer == nil {
+ return nil, errors.New("nil discoverer")
+ }
+
+ ks.once.Do(func() {
+ ks.startTime = time.Now()
+ in := make(chan resource)
+
+ ks.wg.Add(1)
+ go func() { defer ks.wg.Done(); ks.runUpdateState(in) }()
+
+ ks.wg.Add(1)
+ go func() { defer ks.wg.Done(); ks.discoverer.run(ks.ctx, in) }()
+
+ ks.kubeClusterID = ks.getKubeClusterID()
+ ks.kubeClusterName = ks.getKubeClusterName()
+ if chart := ks.Charts().Get(discoveryStatusChart.ID); chart != nil {
+ chart.Labels = []module.Label{
+ {Key: labelKeyClusterID, Value: ks.kubeClusterID, Source: module.LabelSourceK8s},
+ {Key: labelKeyClusterName, Value: ks.kubeClusterName, Source: module.LabelSourceK8s},
+ }
+ }
+ })
+
+ mx := map[string]int64{
+ "discovery_node_discoverer_state": 1,
+ "discovery_pod_discoverer_state": 1,
+ }
+
+ if !ks.discoverer.ready() || time.Since(ks.startTime) < ks.initDelay {
+ return mx, nil
+ }
+
+ ks.state.Lock()
+ defer ks.state.Unlock()
+
+ ks.collectKubeState(mx)
+
+ return mx, nil
+}
+
+func (ks *KubeState) collectKubeState(mx map[string]int64) {
+ for _, ns := range ks.state.nodes {
+ ns.resetStats()
+ }
+ ks.collectPodsState(mx)
+ ks.collectNodesState(mx)
+}
+
+func (ks *KubeState) collectPodsState(mx map[string]int64) {
+ now := time.Now()
+ for _, ps := range ks.state.pods {
+ if ps.deleted {
+ delete(ks.state.pods, podSource(ps.namespace, ps.name))
+ ks.removePodCharts(ps)
+ continue
+ }
+ if ps.new {
+ ps.new = false
+ ks.addPodCharts(ps)
+ ps.unscheduled = ps.nodeName == ""
+ } else if ps.unscheduled && ps.nodeName != "" {
+ ps.unscheduled = false
+ ks.updatePodChartsNodeLabel(ps)
+ }
+
+ ns := ks.state.nodes[nodeSource(ps.nodeName)]
+ if ns != nil {
+ ns.stats.pods++
+ ns.stats.reqCPU += ps.reqCPU
+ ns.stats.limitCPU += ps.limitCPU
+ ns.stats.reqMem += ps.reqMem
+ ns.stats.limitMem += ps.limitMem
+ ns.stats.podsCondPodReady += condStatusToInt(ps.condPodReady)
+ ns.stats.podsCondPodScheduled += condStatusToInt(ps.condPodScheduled)
+ ns.stats.podsCondPodInitialized += condStatusToInt(ps.condPodInitialized)
+ ns.stats.podsCondContainersReady += condStatusToInt(ps.condContainersReady)
+ ns.stats.podsReadinessReady += boolToInt(ps.condPodReady == corev1.ConditionTrue)
+ ns.stats.podsReadinessUnready += boolToInt(ps.condPodReady != corev1.ConditionTrue)
+ ns.stats.podsPhasePending += boolToInt(ps.phase == corev1.PodPending)
+ ns.stats.podsPhaseRunning += boolToInt(ps.phase == corev1.PodRunning)
+ ns.stats.podsPhaseSucceeded += boolToInt(ps.phase == corev1.PodSucceeded)
+ ns.stats.podsPhaseFailed += boolToInt(ps.phase == corev1.PodFailed)
+ for _, cs := range ps.initContainers {
+ ns.stats.initContainers++
+ ns.stats.initContStateRunning += boolToInt(cs.stateRunning)
+ ns.stats.initContStateWaiting += boolToInt(cs.stateWaiting)
+ ns.stats.initContStateTerminated += boolToInt(cs.stateTerminated)
+ }
+ for _, cs := range ps.containers {
+ ns.stats.containers++
+ ns.stats.contStateRunning += boolToInt(cs.stateRunning)
+ ns.stats.contStateWaiting += boolToInt(cs.stateWaiting)
+ ns.stats.contStateTerminated += boolToInt(cs.stateTerminated)
+ }
+ }
+
+ px := fmt.Sprintf("pod_%s_", ps.id())
+
+ mx[px+"cond_podready"] = condStatusToInt(ps.condPodReady)
+ mx[px+"cond_podscheduled"] = condStatusToInt(ps.condPodScheduled)
+ mx[px+"cond_podinitialized"] = condStatusToInt(ps.condPodInitialized)
+ mx[px+"cond_containersready"] = condStatusToInt(ps.condContainersReady)
+ mx[px+"phase_running"] = boolToInt(ps.phase == corev1.PodRunning)
+ mx[px+"phase_failed"] = boolToInt(ps.phase == corev1.PodFailed)
+ mx[px+"phase_succeeded"] = boolToInt(ps.phase == corev1.PodSucceeded)
+ mx[px+"phase_pending"] = boolToInt(ps.phase == corev1.PodPending)
+ mx[px+"age"] = int64(now.Sub(ps.creationTime).Seconds())
+ mx[px+"cpu_requests_used"] = ps.reqCPU
+ mx[px+"cpu_limits_used"] = ps.limitCPU
+ mx[px+"mem_requests_used"] = ps.reqMem
+ mx[px+"mem_limits_used"] = ps.limitMem
+
+ mx[px+"init_containers"] = int64(len(ps.initContainers))
+ mx[px+"containers"] = int64(len(ps.containers))
+
+ mx[px+"init_containers_state_running"] = 0
+ mx[px+"init_containers_state_waiting"] = 0
+ mx[px+"init_containers_state_terminated"] = 0
+ for _, cs := range ps.initContainers {
+ mx[px+"init_containers_state_running"] += boolToInt(cs.stateRunning)
+ mx[px+"init_containers_state_waiting"] += boolToInt(cs.stateWaiting)
+ mx[px+"init_containers_state_terminated"] += boolToInt(cs.stateTerminated)
+ }
+ mx[px+"containers_state_running"] = 0
+ mx[px+"containers_state_waiting"] = 0
+ mx[px+"containers_state_terminated"] = 0
+ for _, cs := range ps.containers {
+ if cs.new {
+ cs.new = false
+ ks.addContainerCharts(ps, cs)
+ }
+ mx[px+"containers_state_running"] += boolToInt(cs.stateRunning)
+ mx[px+"containers_state_waiting"] += boolToInt(cs.stateWaiting)
+ mx[px+"containers_state_terminated"] += boolToInt(cs.stateTerminated)
+
+ ppx := fmt.Sprintf("%scontainer_%s_", px, cs.name)
+ mx[ppx+"state_running"] = boolToInt(cs.stateRunning)
+ mx[ppx+"state_waiting"] = boolToInt(cs.stateWaiting)
+ mx[ppx+"state_terminated"] = boolToInt(cs.stateTerminated)
+ mx[ppx+"readiness"] = boolToInt(cs.ready)
+ mx[ppx+"restarts"] = cs.restarts
+ for _, r := range cs.stateWaitingReasons {
+ if r.new {
+ r.new = false
+ ks.addContainerWaitingStateReasonToChart(ps, cs, r.reason)
+ }
+ mx[ppx+"state_waiting_reason_"+r.reason] = boolToInt(r.active)
+ }
+ for _, r := range cs.stateTerminatedReasons {
+ if r.new {
+ r.new = false
+ ks.addContainerTerminatedStateReasonToChart(ps, cs, r.reason)
+ }
+ mx[ppx+"state_terminated_reason_"+r.reason] = boolToInt(r.active)
+ }
+ }
+ }
+}
+
+func (ks *KubeState) collectNodesState(mx map[string]int64) {
+ now := time.Now()
+ for _, ns := range ks.state.nodes {
+ if ns.deleted {
+ delete(ks.state.nodes, nodeSource(ns.name))
+ ks.removeNodeCharts(ns)
+ continue
+ }
+ if ns.new {
+ ns.new = false
+ ks.addNodeCharts(ns)
+ }
+
+ px := fmt.Sprintf("node_%s_", ns.id())
+
+ for typ, cond := range ns.conditions {
+ if cond.new {
+ cond.new = false
+ ks.addNodeConditionToCharts(ns, typ)
+ }
+ mx[px+"cond_"+strings.ToLower(typ)] = condStatusToInt(cond.status)
+ }
+
+ mx[px+"age"] = int64(now.Sub(ns.creationTime).Seconds())
+ mx[px+"alloc_pods_util"] = calcPercentage(ns.stats.pods, ns.allocatablePods)
+ mx[px+"pods_readiness_ready"] = ns.stats.podsReadinessReady
+ mx[px+"pods_readiness_unready"] = ns.stats.podsReadinessUnready
+ mx[px+"pods_readiness"] = calcPercentage(ns.stats.podsReadinessReady, ns.stats.pods)
+ mx[px+"pods_phase_running"] = ns.stats.podsPhaseRunning
+ mx[px+"pods_phase_failed"] = ns.stats.podsPhaseFailed
+ mx[px+"pods_phase_succeeded"] = ns.stats.podsPhaseSucceeded
+ mx[px+"pods_phase_pending"] = ns.stats.podsPhasePending
+ mx[px+"pods_cond_podready"] = ns.stats.podsCondPodReady
+ mx[px+"pods_cond_podscheduled"] = ns.stats.podsCondPodScheduled
+ mx[px+"pods_cond_podinitialized"] = ns.stats.podsCondPodInitialized
+ mx[px+"pods_cond_containersready"] = ns.stats.podsCondContainersReady
+ mx[px+"pods_cond_containersready"] = ns.stats.podsCondContainersReady
+ mx[px+"schedulability_schedulable"] = boolToInt(!ns.unSchedulable)
+ mx[px+"schedulability_unschedulable"] = boolToInt(ns.unSchedulable)
+ mx[px+"alloc_pods_available"] = ns.allocatablePods - ns.stats.pods
+ mx[px+"alloc_pods_allocated"] = ns.stats.pods
+ mx[px+"alloc_cpu_requests_util"] = calcPercentage(ns.stats.reqCPU, ns.allocatableCPU)
+ mx[px+"alloc_cpu_limits_util"] = calcPercentage(ns.stats.limitCPU, ns.allocatableCPU)
+ mx[px+"alloc_mem_requests_util"] = calcPercentage(ns.stats.reqMem, ns.allocatableMem)
+ mx[px+"alloc_mem_limits_util"] = calcPercentage(ns.stats.limitMem, ns.allocatableMem)
+ mx[px+"alloc_cpu_requests_used"] = ns.stats.reqCPU
+ mx[px+"alloc_cpu_limits_used"] = ns.stats.limitCPU
+ mx[px+"alloc_mem_requests_used"] = ns.stats.reqMem
+ mx[px+"alloc_mem_limits_used"] = ns.stats.limitMem
+ mx[px+"init_containers"] = ns.stats.initContainers
+ mx[px+"containers"] = ns.stats.containers
+ mx[px+"containers_state_running"] = ns.stats.contStateRunning
+ mx[px+"containers_state_waiting"] = ns.stats.contStateWaiting
+ mx[px+"containers_state_terminated"] = ns.stats.contStateTerminated
+ mx[px+"init_containers_state_running"] = ns.stats.initContStateRunning
+ mx[px+"init_containers_state_waiting"] = ns.stats.initContStateWaiting
+ mx[px+"init_containers_state_terminated"] = ns.stats.initContStateTerminated
+ }
+}
+
+func boolToInt(v bool) int64 {
+ if v {
+ return 1
+ }
+ return 0
+}
+
+func condStatusToInt(cs corev1.ConditionStatus) int64 {
+ switch cs {
+ case corev1.ConditionFalse:
+ return 0
+ case corev1.ConditionTrue:
+ return 1
+ case corev1.ConditionUnknown:
+ return 0
+ default:
+ return 0
+ }
+}
+
+func calcPercentage(value, total int64) int64 {
+ if total == 0 {
+ return 0
+ }
+ return int64(float64(value) / float64(total) * 100 * precision)
+}