summaryrefslogtreecommitdiffstats
path: root/src/go/collectors/go.d.plugin/modules/envoy/collect.go
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/go/collectors/go.d.plugin/modules/envoy/collect.go423
1 files changed, 423 insertions, 0 deletions
diff --git a/src/go/collectors/go.d.plugin/modules/envoy/collect.go b/src/go/collectors/go.d.plugin/modules/envoy/collect.go
new file mode 100644
index 000000000..a7c74379d
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/envoy/collect.go
@@ -0,0 +1,423 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package envoy
+
+import (
+ "strconv"
+ "strings"
+
+ "github.com/netdata/netdata/go/go.d.plugin/pkg/prometheus"
+
+ "github.com/prometheus/prometheus/model/labels"
+)
+
+// Server stats: https://www.envoyproxy.io/docs/envoy/latest/configuration/observability/statistics#
+// Server state: https://www.envoyproxy.io/docs/envoy/latest/api-v3/admin/v3/server_info.proto#enum-admin-v3-serverinfo-state
+// Listener stats: https://www.envoyproxy.io/docs/envoy/latest/configuration/listeners/stats
+
+func (e *Envoy) collect() (map[string]int64, error) {
+ mfs, err := e.prom.Scrape()
+ if err != nil {
+ return nil, err
+ }
+
+ mx := make(map[string]int64)
+
+ e.collectServerStats(mx, mfs)
+ e.collectClusterManagerStats(mx, mfs)
+ e.collectClusterUpstreamStats(mx, mfs)
+ e.collectListenerManagerStats(mx, mfs)
+ e.collectListenerAdminDownstreamStats(mx, mfs)
+ e.collectListenerDownstreamStats(mx, mfs)
+
+ return mx, nil
+}
+
+func (e *Envoy) collectServerStats(mx map[string]int64, mfs prometheus.MetricFamilies) {
+ seen := make(map[string]bool)
+ for _, n := range []string{
+ "envoy_server_uptime",
+ "envoy_server_memory_allocated",
+ "envoy_server_memory_heap_size",
+ "envoy_server_memory_physical_size",
+ "envoy_server_parent_connections",
+ "envoy_server_total_connections",
+ } {
+ e.collectGauge(mfs, n, func(name string, m prometheus.Metric) {
+ id := e.joinLabels(m.Labels())
+ seen[id] = true
+
+ if !e.servers[id] {
+ e.servers[id] = true
+ e.addServerCharts(id, m.Labels())
+ }
+
+ mx[join(name, id)] += int64(m.Gauge().Value())
+ })
+ }
+
+ e.collectGauge(mfs, "envoy_server_state", func(name string, m prometheus.Metric) {
+ id := e.joinLabels(m.Labels())
+ for _, v := range []string{"live", "draining", "pre_initializing", "initializing"} {
+ mx[join(name, v, id)] = 0
+ }
+
+ switch m.Gauge().Value() {
+ case 0:
+ mx[join(name, "live", id)] = 1
+ case 1:
+ mx[join(name, "draining", id)] = 1
+ case 2:
+ mx[join(name, "pre_initializing", id)] = 1
+ case 3:
+ mx[join(name, "initializing", id)] = 1
+ }
+ })
+
+ for id := range e.servers {
+ if id != "" && !seen[id] {
+ delete(e.servers, id)
+ e.removeCharts(id)
+ }
+ }
+}
+
+func (e *Envoy) collectClusterManagerStats(mx map[string]int64, mfs prometheus.MetricFamilies) {
+ seen := make(map[string]bool)
+ for _, n := range []string{
+ "envoy_cluster_manager_cluster_added",
+ "envoy_cluster_manager_cluster_modified",
+ "envoy_cluster_manager_cluster_removed",
+ "envoy_cluster_manager_cluster_updated",
+ "envoy_cluster_manager_cluster_updated_via_merge",
+ "envoy_cluster_manager_update_merge_cancelled",
+ "envoy_cluster_manager_update_out_of_merge_window",
+ } {
+ e.collectCounter(mfs, n, func(name string, m prometheus.Metric) {
+ id := e.joinLabels(m.Labels())
+ seen[id] = true
+
+ if !e.clusterMgrs[id] {
+ e.clusterMgrs[id] = true
+ e.addClusterManagerCharts(id, m.Labels())
+ }
+
+ mx[join(name, id)] += int64(m.Counter().Value())
+ })
+ }
+
+ for _, n := range []string{
+ "envoy_cluster_manager_active_clusters",
+ "envoy_cluster_manager_warming_clusters",
+ } {
+ e.collectGauge(mfs, n, func(name string, m prometheus.Metric) {
+ id := e.joinLabels(m.Labels())
+ mx[join(name, id)] += int64(m.Gauge().Value())
+ })
+ }
+
+ for id := range e.clusterMgrs {
+ if id != "" && !seen[id] {
+ delete(e.clusterMgrs, id)
+ e.removeCharts(id)
+ }
+ }
+}
+
+func (e *Envoy) collectListenerAdminDownstreamStats(mx map[string]int64, mfs prometheus.MetricFamilies) {
+ seen := make(map[string]bool)
+ for _, n := range []string{
+ "envoy_listener_admin_downstream_cx_total",
+ "envoy_listener_admin_downstream_cx_destroy",
+ "envoy_listener_admin_downstream_cx_transport_socket_connect_timeout",
+ "envoy_listener_admin_downstream_cx_overflow",
+ "envoy_listener_admin_downstream_cx_overload_reject",
+ "envoy_listener_admin_downstream_global_cx_overflow",
+ "envoy_listener_admin_downstream_pre_cx_timeout",
+ "envoy_listener_admin_downstream_listener_filter_remote_close",
+ "envoy_listener_admin_downstream_listener_filter_error",
+ } {
+ e.collectCounter(mfs, n, func(name string, m prometheus.Metric) {
+ id := e.joinLabels(m.Labels())
+ seen[id] = true
+
+ if !e.listenerAdminDownstream[id] {
+ e.listenerAdminDownstream[id] = true
+ e.addListenerAdminDownstreamCharts(id, m.Labels())
+ }
+
+ mx[join(name, id)] += int64(m.Counter().Value())
+ })
+ }
+ for _, n := range []string{
+ "envoy_listener_admin_downstream_cx_active",
+ "envoy_listener_admin_downstream_pre_cx_active",
+ } {
+ e.collectGauge(mfs, n, func(name string, m prometheus.Metric) {
+ id := e.joinLabels(m.Labels())
+ seen[id] = true
+
+ if !e.listenerAdminDownstream[id] {
+ e.listenerAdminDownstream[id] = true
+ e.addListenerAdminDownstreamCharts(id, m.Labels())
+ }
+
+ mx[join(name, id)] += int64(m.Gauge().Value())
+ })
+ }
+
+ for id := range e.listenerAdminDownstream {
+ if id != "" && !seen[id] {
+ delete(e.listenerAdminDownstream, id)
+ e.removeCharts(id)
+ }
+ }
+}
+
+func (e *Envoy) collectListenerDownstreamStats(mx map[string]int64, mfs prometheus.MetricFamilies) {
+ seen := make(map[string]bool)
+ for _, n := range []string{
+ "envoy_listener_downstream_cx_total",
+ "envoy_listener_downstream_cx_destroy",
+ "envoy_listener_downstream_cx_transport_socket_connect_timeout",
+ "envoy_listener_downstream_cx_overflow",
+ "envoy_listener_downstream_cx_overload_reject",
+ "envoy_listener_downstream_global_cx_overflow",
+ "envoy_listener_downstream_pre_cx_timeout",
+ "envoy_listener_downstream_listener_filter_remote_close",
+ "envoy_listener_downstream_listener_filter_error",
+ } {
+ e.collectCounter(mfs, n, func(name string, m prometheus.Metric) {
+ id := e.joinLabels(m.Labels())
+ seen[id] = true
+
+ if !e.listenerDownstream[id] {
+ e.listenerDownstream[id] = true
+ e.addListenerDownstreamCharts(id, m.Labels())
+ }
+
+ mx[join(name, id)] += int64(m.Counter().Value())
+ })
+ }
+ for _, n := range []string{
+ "envoy_listener_downstream_cx_active",
+ "envoy_listener_downstream_pre_cx_active",
+ } {
+ e.collectGauge(mfs, n, func(name string, m prometheus.Metric) {
+ id := e.joinLabels(m.Labels())
+ seen[id] = true
+
+ if !e.listenerDownstream[id] {
+ e.listenerDownstream[id] = true
+ e.addListenerDownstreamCharts(id, m.Labels())
+ }
+
+ mx[join(name, id)] += int64(m.Gauge().Value())
+ })
+ }
+
+ for id := range e.listenerDownstream {
+ if id != "" && !seen[id] {
+ delete(e.listenerDownstream, id)
+ e.removeCharts(id)
+ }
+ }
+}
+
+func (e *Envoy) collectClusterUpstreamStats(mx map[string]int64, mfs prometheus.MetricFamilies) {
+ seen := make(map[string]bool)
+ for _, n := range []string{
+ "envoy_cluster_upstream_cx_total",
+ "envoy_cluster_upstream_cx_http1_total",
+ "envoy_cluster_upstream_cx_http2_total",
+ "envoy_cluster_upstream_cx_http3_total",
+ "envoy_cluster_upstream_cx_http3_total",
+ "envoy_cluster_upstream_cx_connect_fail",
+ "envoy_cluster_upstream_cx_connect_timeout",
+ "envoy_cluster_upstream_cx_idle_timeout",
+ "envoy_cluster_upstream_cx_max_duration_reached",
+ "envoy_cluster_upstream_cx_connect_attempts_exceeded",
+ "envoy_cluster_upstream_cx_overflow",
+ "envoy_cluster_upstream_cx_destroy",
+ "envoy_cluster_upstream_cx_destroy_local",
+ "envoy_cluster_upstream_cx_destroy_remote",
+ "envoy_cluster_upstream_cx_rx_bytes_total",
+ "envoy_cluster_upstream_cx_tx_bytes_total",
+ "envoy_cluster_upstream_rq_total",
+ "envoy_cluster_upstream_rq_pending_total",
+ "envoy_cluster_upstream_rq_pending_overflow",
+ "envoy_cluster_upstream_rq_pending_failure_eject",
+ "envoy_cluster_upstream_rq_cancelled",
+ "envoy_cluster_upstream_rq_maintenance_mode",
+ "envoy_cluster_upstream_rq_timeout",
+ "envoy_cluster_upstream_rq_max_duration_reached",
+ "envoy_cluster_upstream_rq_per_try_timeout",
+ "envoy_cluster_upstream_rq_rx_reset",
+ "envoy_cluster_upstream_rq_tx_reset",
+ "envoy_cluster_upstream_rq_retry",
+ "envoy_cluster_upstream_rq_retry_backoff_exponential",
+ "envoy_cluster_upstream_rq_retry_backoff_ratelimited",
+ "envoy_cluster_upstream_rq_retry_success",
+ "envoy_cluster_membership_change",
+ "envoy_cluster_update_success",
+ "envoy_cluster_update_failure",
+ "envoy_cluster_update_empty",
+ "envoy_cluster_update_no_rebuild",
+ } {
+ e.collectCounter(mfs, n, func(name string, m prometheus.Metric) {
+ id := e.joinLabels(m.Labels())
+ seen[id] = true
+
+ if !e.clusterUpstream[id] {
+ e.clusterUpstream[id] = true
+ e.addClusterUpstreamCharts(id, m.Labels())
+ }
+
+ mx[join(name, id)] += int64(m.Counter().Value())
+ })
+ }
+
+ for _, n := range []string{
+ "envoy_cluster_upstream_cx_active",
+ "envoy_cluster_upstream_cx_rx_bytes_buffered",
+ "envoy_cluster_upstream_cx_tx_bytes_buffered",
+ "envoy_cluster_upstream_rq_active",
+ "envoy_cluster_upstream_rq_pending_active",
+ "envoy_cluster_membership_healthy",
+ "envoy_cluster_membership_degraded",
+ "envoy_cluster_membership_excluded",
+ } {
+ e.collectGauge(mfs, n, func(name string, m prometheus.Metric) {
+ id := e.joinLabels(m.Labels())
+ seen[id] = true
+
+ if !e.clusterUpstream[id] {
+ e.clusterUpstream[id] = true
+ e.addClusterUpstreamCharts(id, m.Labels())
+ }
+
+ mx[join(name, id)] += int64(m.Gauge().Value())
+ })
+ }
+
+ for id := range e.clusterUpstream {
+ if id != "" && !seen[id] {
+ delete(e.clusterUpstream, id)
+ e.removeCharts(id)
+ }
+ }
+}
+
+func (e *Envoy) collectListenerManagerStats(mx map[string]int64, mfs prometheus.MetricFamilies) {
+ seen := make(map[string]bool)
+ for _, n := range []string{
+ "envoy_listener_manager_listener_added",
+ "envoy_listener_manager_listener_modified",
+ "envoy_listener_manager_listener_removed",
+ "envoy_listener_manager_listener_stopped",
+ "envoy_listener_manager_listener_create_success",
+ "envoy_listener_manager_listener_create_failure",
+ "envoy_listener_manager_listener_in_place_updated",
+ } {
+ e.collectCounter(mfs, n, func(name string, m prometheus.Metric) {
+ id := e.joinLabels(m.Labels())
+ seen[id] = true
+
+ if !e.listenerMgrs[id] {
+ e.listenerMgrs[id] = true
+ e.addListenerManagerCharts(id, m.Labels())
+ }
+
+ mx[join(name, id)] += int64(m.Counter().Value())
+ })
+ }
+
+ for _, n := range []string{
+ "envoy_listener_manager_total_listeners_warming",
+ "envoy_listener_manager_total_listeners_active",
+ "envoy_listener_manager_total_listeners_draining",
+ } {
+ e.collectGauge(mfs, n, func(name string, m prometheus.Metric) {
+ id := e.joinLabels(m.Labels())
+ seen[id] = true
+
+ if !e.listenerMgrs[id] {
+ e.listenerMgrs[id] = true
+ e.addListenerManagerCharts(id, m.Labels())
+ }
+
+ mx[join(name, id)] += int64(m.Gauge().Value())
+ })
+ }
+
+ for id := range e.listenerMgrs {
+ if id != "" && !seen[id] {
+ delete(e.listenerMgrs, id)
+ e.removeCharts(id)
+ }
+ }
+}
+
+func (e *Envoy) collectGauge(mfs prometheus.MetricFamilies, metric string, process func(name string, m prometheus.Metric)) {
+ if mf := mfs.GetGauge(metric); mf != nil {
+ for _, m := range mf.Metrics() {
+ process(mf.Name(), m)
+ }
+ }
+}
+
+func (e *Envoy) collectCounter(mfs prometheus.MetricFamilies, metric string, process func(name string, m prometheus.Metric)) {
+ if mf := mfs.GetCounter(metric); mf != nil {
+ for _, m := range mf.Metrics() {
+ process(mf.Name(), m)
+ }
+ }
+}
+
+func (e *Envoy) joinLabels(labels labels.Labels) string {
+ var buf strings.Builder
+ first := true
+ for _, lbl := range labels {
+ v := lbl.Value
+ if v == "" {
+ continue
+ }
+ if strings.IndexByte(v, ' ') != -1 {
+ v = spaceReplacer.Replace(v)
+ }
+ if strings.IndexByte(v, '\\') != -1 {
+ if v = decodeLabelValue(v); strings.IndexByte(v, '\\') != -1 {
+ v = backslashReplacer.Replace(v)
+ }
+ }
+ if first {
+ buf.WriteString(v)
+ first = false
+ } else {
+ buf.WriteString("_" + v)
+ }
+ }
+ return buf.String()
+}
+
+var (
+ spaceReplacer = strings.NewReplacer(" ", "_")
+ backslashReplacer = strings.NewReplacer(`\`, "_")
+)
+
+func decodeLabelValue(value string) string {
+ v, err := strconv.Unquote("\"" + value + "\"")
+ if err != nil {
+ return value
+ }
+ return v
+}
+
+func join(name string, elems ...string) string {
+ for _, v := range elems {
+ if v != "" {
+ name += "_" + v
+ }
+ }
+ return name
+}