// SPDX-License-Identifier: GPL-3.0-or-later package envoy import ( "strconv" "strings" "github.com/netdata/netdata/go/go.d.plugin/pkg/prometheus" "github.com/prometheus/prometheus/model/labels" ) // Server stats: https://www.envoyproxy.io/docs/envoy/latest/configuration/observability/statistics# // Server state: https://www.envoyproxy.io/docs/envoy/latest/api-v3/admin/v3/server_info.proto#enum-admin-v3-serverinfo-state // Listener stats: https://www.envoyproxy.io/docs/envoy/latest/configuration/listeners/stats func (e *Envoy) collect() (map[string]int64, error) { mfs, err := e.prom.Scrape() if err != nil { return nil, err } mx := make(map[string]int64) e.collectServerStats(mx, mfs) e.collectClusterManagerStats(mx, mfs) e.collectClusterUpstreamStats(mx, mfs) e.collectListenerManagerStats(mx, mfs) e.collectListenerAdminDownstreamStats(mx, mfs) e.collectListenerDownstreamStats(mx, mfs) return mx, nil } func (e *Envoy) collectServerStats(mx map[string]int64, mfs prometheus.MetricFamilies) { seen := make(map[string]bool) for _, n := range []string{ "envoy_server_uptime", "envoy_server_memory_allocated", "envoy_server_memory_heap_size", "envoy_server_memory_physical_size", "envoy_server_parent_connections", "envoy_server_total_connections", } { e.collectGauge(mfs, n, func(name string, m prometheus.Metric) { id := e.joinLabels(m.Labels()) seen[id] = true if !e.servers[id] { e.servers[id] = true e.addServerCharts(id, m.Labels()) } mx[join(name, id)] += int64(m.Gauge().Value()) }) } e.collectGauge(mfs, "envoy_server_state", func(name string, m prometheus.Metric) { id := e.joinLabels(m.Labels()) for _, v := range []string{"live", "draining", "pre_initializing", "initializing"} { mx[join(name, v, id)] = 0 } switch m.Gauge().Value() { case 0: mx[join(name, "live", id)] = 1 case 1: mx[join(name, "draining", id)] = 1 case 2: mx[join(name, "pre_initializing", id)] = 1 case 3: mx[join(name, "initializing", id)] = 1 } }) for id := range e.servers { if id != "" && !seen[id] { delete(e.servers, id) e.removeCharts(id) } } } func (e *Envoy) collectClusterManagerStats(mx map[string]int64, mfs prometheus.MetricFamilies) { seen := make(map[string]bool) for _, n := range []string{ "envoy_cluster_manager_cluster_added", "envoy_cluster_manager_cluster_modified", "envoy_cluster_manager_cluster_removed", "envoy_cluster_manager_cluster_updated", "envoy_cluster_manager_cluster_updated_via_merge", "envoy_cluster_manager_update_merge_cancelled", "envoy_cluster_manager_update_out_of_merge_window", } { e.collectCounter(mfs, n, func(name string, m prometheus.Metric) { id := e.joinLabels(m.Labels()) seen[id] = true if !e.clusterMgrs[id] { e.clusterMgrs[id] = true e.addClusterManagerCharts(id, m.Labels()) } mx[join(name, id)] += int64(m.Counter().Value()) }) } for _, n := range []string{ "envoy_cluster_manager_active_clusters", "envoy_cluster_manager_warming_clusters", } { e.collectGauge(mfs, n, func(name string, m prometheus.Metric) { id := e.joinLabels(m.Labels()) mx[join(name, id)] += int64(m.Gauge().Value()) }) } for id := range e.clusterMgrs { if id != "" && !seen[id] { delete(e.clusterMgrs, id) e.removeCharts(id) } } } func (e *Envoy) collectListenerAdminDownstreamStats(mx map[string]int64, mfs prometheus.MetricFamilies) { seen := make(map[string]bool) for _, n := range []string{ "envoy_listener_admin_downstream_cx_total", "envoy_listener_admin_downstream_cx_destroy", "envoy_listener_admin_downstream_cx_transport_socket_connect_timeout", "envoy_listener_admin_downstream_cx_overflow", "envoy_listener_admin_downstream_cx_overload_reject", "envoy_listener_admin_downstream_global_cx_overflow", "envoy_listener_admin_downstream_pre_cx_timeout", "envoy_listener_admin_downstream_listener_filter_remote_close", "envoy_listener_admin_downstream_listener_filter_error", } { e.collectCounter(mfs, n, func(name string, m prometheus.Metric) { id := e.joinLabels(m.Labels()) seen[id] = true if !e.listenerAdminDownstream[id] { e.listenerAdminDownstream[id] = true e.addListenerAdminDownstreamCharts(id, m.Labels()) } mx[join(name, id)] += int64(m.Counter().Value()) }) } for _, n := range []string{ "envoy_listener_admin_downstream_cx_active", "envoy_listener_admin_downstream_pre_cx_active", } { e.collectGauge(mfs, n, func(name string, m prometheus.Metric) { id := e.joinLabels(m.Labels()) seen[id] = true if !e.listenerAdminDownstream[id] { e.listenerAdminDownstream[id] = true e.addListenerAdminDownstreamCharts(id, m.Labels()) } mx[join(name, id)] += int64(m.Gauge().Value()) }) } for id := range e.listenerAdminDownstream { if id != "" && !seen[id] { delete(e.listenerAdminDownstream, id) e.removeCharts(id) } } } func (e *Envoy) collectListenerDownstreamStats(mx map[string]int64, mfs prometheus.MetricFamilies) { seen := make(map[string]bool) for _, n := range []string{ "envoy_listener_downstream_cx_total", "envoy_listener_downstream_cx_destroy", "envoy_listener_downstream_cx_transport_socket_connect_timeout", "envoy_listener_downstream_cx_overflow", "envoy_listener_downstream_cx_overload_reject", "envoy_listener_downstream_global_cx_overflow", "envoy_listener_downstream_pre_cx_timeout", "envoy_listener_downstream_listener_filter_remote_close", "envoy_listener_downstream_listener_filter_error", } { e.collectCounter(mfs, n, func(name string, m prometheus.Metric) { id := e.joinLabels(m.Labels()) seen[id] = true if !e.listenerDownstream[id] { e.listenerDownstream[id] = true e.addListenerDownstreamCharts(id, m.Labels()) } mx[join(name, id)] += int64(m.Counter().Value()) }) } for _, n := range []string{ "envoy_listener_downstream_cx_active", "envoy_listener_downstream_pre_cx_active", } { e.collectGauge(mfs, n, func(name string, m prometheus.Metric) { id := e.joinLabels(m.Labels()) seen[id] = true if !e.listenerDownstream[id] { e.listenerDownstream[id] = true e.addListenerDownstreamCharts(id, m.Labels()) } mx[join(name, id)] += int64(m.Gauge().Value()) }) } for id := range e.listenerDownstream { if id != "" && !seen[id] { delete(e.listenerDownstream, id) e.removeCharts(id) } } } func (e *Envoy) collectClusterUpstreamStats(mx map[string]int64, mfs prometheus.MetricFamilies) { seen := make(map[string]bool) for _, n := range []string{ "envoy_cluster_upstream_cx_total", "envoy_cluster_upstream_cx_http1_total", "envoy_cluster_upstream_cx_http2_total", "envoy_cluster_upstream_cx_http3_total", "envoy_cluster_upstream_cx_http3_total", "envoy_cluster_upstream_cx_connect_fail", "envoy_cluster_upstream_cx_connect_timeout", "envoy_cluster_upstream_cx_idle_timeout", "envoy_cluster_upstream_cx_max_duration_reached", "envoy_cluster_upstream_cx_connect_attempts_exceeded", "envoy_cluster_upstream_cx_overflow", "envoy_cluster_upstream_cx_destroy", "envoy_cluster_upstream_cx_destroy_local", "envoy_cluster_upstream_cx_destroy_remote", "envoy_cluster_upstream_cx_rx_bytes_total", "envoy_cluster_upstream_cx_tx_bytes_total", "envoy_cluster_upstream_rq_total", "envoy_cluster_upstream_rq_pending_total", "envoy_cluster_upstream_rq_pending_overflow", "envoy_cluster_upstream_rq_pending_failure_eject", "envoy_cluster_upstream_rq_cancelled", "envoy_cluster_upstream_rq_maintenance_mode", "envoy_cluster_upstream_rq_timeout", "envoy_cluster_upstream_rq_max_duration_reached", "envoy_cluster_upstream_rq_per_try_timeout", "envoy_cluster_upstream_rq_rx_reset", "envoy_cluster_upstream_rq_tx_reset", "envoy_cluster_upstream_rq_retry", "envoy_cluster_upstream_rq_retry_backoff_exponential", "envoy_cluster_upstream_rq_retry_backoff_ratelimited", "envoy_cluster_upstream_rq_retry_success", "envoy_cluster_membership_change", "envoy_cluster_update_success", "envoy_cluster_update_failure", "envoy_cluster_update_empty", "envoy_cluster_update_no_rebuild", } { e.collectCounter(mfs, n, func(name string, m prometheus.Metric) { id := e.joinLabels(m.Labels()) seen[id] = true if !e.clusterUpstream[id] { e.clusterUpstream[id] = true e.addClusterUpstreamCharts(id, m.Labels()) } mx[join(name, id)] += int64(m.Counter().Value()) }) } for _, n := range []string{ "envoy_cluster_upstream_cx_active", "envoy_cluster_upstream_cx_rx_bytes_buffered", "envoy_cluster_upstream_cx_tx_bytes_buffered", "envoy_cluster_upstream_rq_active", "envoy_cluster_upstream_rq_pending_active", "envoy_cluster_membership_healthy", "envoy_cluster_membership_degraded", "envoy_cluster_membership_excluded", } { e.collectGauge(mfs, n, func(name string, m prometheus.Metric) { id := e.joinLabels(m.Labels()) seen[id] = true if !e.clusterUpstream[id] { e.clusterUpstream[id] = true e.addClusterUpstreamCharts(id, m.Labels()) } mx[join(name, id)] += int64(m.Gauge().Value()) }) } for id := range e.clusterUpstream { if id != "" && !seen[id] { delete(e.clusterUpstream, id) e.removeCharts(id) } } } func (e *Envoy) collectListenerManagerStats(mx map[string]int64, mfs prometheus.MetricFamilies) { seen := make(map[string]bool) for _, n := range []string{ "envoy_listener_manager_listener_added", "envoy_listener_manager_listener_modified", "envoy_listener_manager_listener_removed", "envoy_listener_manager_listener_stopped", "envoy_listener_manager_listener_create_success", "envoy_listener_manager_listener_create_failure", "envoy_listener_manager_listener_in_place_updated", } { e.collectCounter(mfs, n, func(name string, m prometheus.Metric) { id := e.joinLabels(m.Labels()) seen[id] = true if !e.listenerMgrs[id] { e.listenerMgrs[id] = true e.addListenerManagerCharts(id, m.Labels()) } mx[join(name, id)] += int64(m.Counter().Value()) }) } for _, n := range []string{ "envoy_listener_manager_total_listeners_warming", "envoy_listener_manager_total_listeners_active", "envoy_listener_manager_total_listeners_draining", } { e.collectGauge(mfs, n, func(name string, m prometheus.Metric) { id := e.joinLabels(m.Labels()) seen[id] = true if !e.listenerMgrs[id] { e.listenerMgrs[id] = true e.addListenerManagerCharts(id, m.Labels()) } mx[join(name, id)] += int64(m.Gauge().Value()) }) } for id := range e.listenerMgrs { if id != "" && !seen[id] { delete(e.listenerMgrs, id) e.removeCharts(id) } } } func (e *Envoy) collectGauge(mfs prometheus.MetricFamilies, metric string, process func(name string, m prometheus.Metric)) { if mf := mfs.GetGauge(metric); mf != nil { for _, m := range mf.Metrics() { process(mf.Name(), m) } } } func (e *Envoy) collectCounter(mfs prometheus.MetricFamilies, metric string, process func(name string, m prometheus.Metric)) { if mf := mfs.GetCounter(metric); mf != nil { for _, m := range mf.Metrics() { process(mf.Name(), m) } } } func (e *Envoy) joinLabels(labels labels.Labels) string { var buf strings.Builder first := true for _, lbl := range labels { v := lbl.Value if v == "" { continue } if strings.IndexByte(v, ' ') != -1 { v = spaceReplacer.Replace(v) } if strings.IndexByte(v, '\\') != -1 { if v = decodeLabelValue(v); strings.IndexByte(v, '\\') != -1 { v = backslashReplacer.Replace(v) } } if first { buf.WriteString(v) first = false } else { buf.WriteString("_" + v) } } return buf.String() } var ( spaceReplacer = strings.NewReplacer(" ", "_") backslashReplacer = strings.NewReplacer(`\`, "_") ) func decodeLabelValue(value string) string { v, err := strconv.Unquote("\"" + value + "\"") if err != nil { return value } return v } func join(name string, elems ...string) string { for _, v := range elems { if v != "" { name += "_" + v } } return name }