From 5da14042f70711ea5cf66e034699730335462f66 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 5 May 2024 14:08:03 +0200 Subject: Merging upstream version 1.45.3+dfsg. Signed-off-by: Daniel Baumann --- .../modules/envoy/integrations/envoy.md | 271 +++++++++++++++++++++ 1 file changed, 271 insertions(+) create mode 100644 src/go/collectors/go.d.plugin/modules/envoy/integrations/envoy.md (limited to 'src/go/collectors/go.d.plugin/modules/envoy/integrations') diff --git a/src/go/collectors/go.d.plugin/modules/envoy/integrations/envoy.md b/src/go/collectors/go.d.plugin/modules/envoy/integrations/envoy.md new file mode 100644 index 000000000..52c6406e8 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/envoy/integrations/envoy.md @@ -0,0 +1,271 @@ + + +# Envoy + + + + + +Plugin: go.d.plugin +Module: envoy + + + +## Overview + +This collector monitors Envoy proxies. It collects server, cluster, and listener metrics. + + + + +This collector is supported on all platforms. + +This collector supports collecting metrics from multiple instances of this integration, including remote instances. + + +### Default Behavior + +#### Auto-Detection + +By default, it detects Envoy instances running on localhost. + + +#### Limits + +The default configuration for this integration does not impose any limits on data collection. + +#### Performance Impact + +The default configuration for this integration is not expected to impose a significant performance impact on the system. + + +## Metrics + +Metrics grouped by *scope*. + +The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. + + + +### Per Envoy instance + +Envoy exposes metrics in Prometheus format. All metric labels are added to charts. + +This scope has no labels. + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| envoy.server_state | live, draining, pre_initializing, initializing | state | +| envoy.server_connections_count | connections | connections | +| envoy.server_parent_connections_count | connections | connections | +| envoy.server_memory_allocated_size | allocated | bytes | +| envoy.server_memory_heap_size | heap | bytes | +| envoy.server_memory_physical_size | physical | bytes | +| envoy.server_uptime | uptime | seconds | +| envoy.cluster_manager_cluster_count | active, not_active | clusters | +| envoy.cluster_manager_cluster_changes_rate | added, modified, removed | clusters/s | +| envoy.cluster_manager_cluster_updates_rate | cluster | updates/s | +| envoy.cluster_manager_cluster_updated_via_merge_rate | via_merge | updates/s | +| envoy.cluster_manager_update_merge_cancelled_rate | merge_cancelled | updates/s | +| envoy.cluster_manager_update_out_of_merge_window_rate | out_of_merge_window | updates/s | +| envoy.cluster_membership_endpoints_count | healthy, degraded, excluded | endpoints | +| envoy.cluster_membership_changes_rate | membership | changes/s | +| envoy.cluster_membership_updates_rate | success, failure, empty, no_rebuild | updates/s | +| envoy.cluster_upstream_cx_active_count | active | connections | +| envoy.cluster_upstream_cx_rate | created | connections/s | +| envoy.cluster_upstream_cx_http_rate | http1, http2, http3 | connections/s | +| envoy.cluster_upstream_cx_destroy_rate | local, remote | connections/s | +| envoy.cluster_upstream_cx_connect_fail_rate | failed | connections/s | +| envoy.cluster_upstream_cx_connect_timeout_rate | timeout | connections/s | +| envoy.cluster_upstream_cx_bytes_rate | received, sent | bytes/s | +| envoy.cluster_upstream_cx_bytes_buffered_size | received, send | bytes | +| envoy.cluster_upstream_rq_active_count | active | requests | +| envoy.cluster_upstream_rq_rate | requests | requests/s | +| envoy.cluster_upstream_rq_failed_rate | cancelled, maintenance_mode, timeout, max_duration_reached, per_try_timeout, reset_local, reset_remote | requests/s | +| envoy.cluster_upstream_rq_pending_active_count | active_pending | requests | +| envoy.cluster_upstream_rq_pending_rate | pending | requests/s | +| envoy.cluster_upstream_rq_pending_failed_rate | overflow, failure_eject | requests/s | +| envoy.cluster_upstream_rq_retry_rate | request | retries/s | +| envoy.cluster_upstream_rq_retry_success_rate | success | retries/s | +| envoy.cluster_upstream_rq_retry_backoff_rate | exponential, ratelimited | retries/s | +| envoy.listener_manager_listeners_count | active, warming, draining | listeners | +| envoy.listener_manager_listener_changes_rate | added, modified, removed, stopped | listeners/s | +| envoy.listener_manager_listener_object_events_rate | create_success, create_failure, in_place_updated | objects/s | +| envoy.listener_admin_downstream_cx_active_count | active | connections | +| envoy.listener_admin_downstream_cx_rate | created | connections/s | +| envoy.listener_admin_downstream_cx_destroy_rate | destroyed | connections/s | +| envoy.listener_admin_downstream_cx_transport_socket_connect_timeout_rate | timeout | connections/s | +| envoy.listener_admin_downstream_cx_rejected_rate | overflow, overload, global_overflow | connections/s | +| envoy.listener_admin_downstream_listener_filter_remote_close_rate | closed | connections/s | +| envoy.listener_admin_downstream_listener_filter_error_rate | read | errors/s | +| envoy.listener_admin_downstream_pre_cx_active_count | active | sockets | +| envoy.listener_admin_downstream_pre_cx_timeout_rate | timeout | sockets/s | +| envoy.listener_downstream_cx_active_count | active | connections | +| envoy.listener_downstream_cx_rate | created | connections/s | +| envoy.listener_downstream_cx_destroy_rate | destroyed | connections/s | +| envoy.listener_downstream_cx_transport_socket_connect_timeout_rate | timeout | connections/s | +| envoy.listener_downstream_cx_rejected_rate | overflow, overload, global_overflow | connections/s | +| envoy.listener_downstream_listener_filter_remote_close_rate | closed | connections/s | +| envoy.listener_downstream_listener_filter_error_rate | read | errors/s | +| envoy.listener_downstream_pre_cx_active_count | active | sockets | +| envoy.listener_downstream_pre_cx_timeout_rate | timeout | sockets/s | + + + +## Alerts + +There are no alerts configured by default for this integration. + + +## Setup + +### Prerequisites + +No action required. + +### Configuration + +#### File + +The configuration file name for this integration is `go.d/envoy.conf`. + + +You can edit the configuration file using the `edit-config` script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory). + +```bash +cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata +sudo ./edit-config go.d/envoy.conf +``` +#### Options + +The following options can be defined globally: update_every, autodetection_retry. + + +
Config options + +| Name | Description | Default | Required | +|:----|:-----------|:-------|:--------:| +| update_every | Data collection frequency. | 1 | no | +| autodetection_retry | Recheck interval in seconds. Zero means no recheck will be scheduled. | 0 | no | +| url | Server URL. | http://127.0.0.1:9091/stats/prometheus | yes | +| timeout | HTTP request timeout. | 1 | no | +| username | Username for basic HTTP authentication. | | no | +| password | Password for basic HTTP authentication. | | no | +| proxy_url | Proxy URL. | | no | +| proxy_username | Username for proxy basic HTTP authentication. | | no | +| proxy_password | Password for proxy basic HTTP authentication. | | no | +| method | HTTP request method. | GET | no | +| body | HTTP request body. | | no | +| headers | HTTP request headers. | | no | +| not_follow_redirects | Redirect handling policy. Controls whether the client follows redirects. | no | no | +| tls_skip_verify | Server certificate chain and hostname validation policy. Controls whether the client performs this check. | no | no | +| tls_ca | Certification authority that the client uses when verifying the server's certificates. | | no | +| tls_cert | Client TLS certificate. | | no | +| tls_key | Client TLS key. | | no | + +
+ +#### Examples + +##### Basic + +A basic example configuration. + +```yaml +jobs: + - name: local + url: http://127.0.0.1:9901/stats/prometheus + +``` +##### HTTP authentication + +Basic HTTP authentication. + +
Config + +```yaml +jobs: + - name: local + url: http://127.0.0.1:9901/stats/prometheus + username: username + password: password + +``` +
+ +##### HTTPS with self-signed certificate + +Do not validate server certificate chain and hostname. + + +
Config + +```yaml +jobs: + - name: local + url: https://127.0.0.1:9901/stats/prometheus + tls_skip_verify: yes + +``` +
+ +##### Multi-instance + +> **Note**: When you define multiple jobs, their names must be unique. + +Collecting metrics from local and remote instances. + + +
Config + +```yaml +jobs: + - name: local + url: http://127.0.0.1:9901/stats/prometheus + + - name: remote + url: http://192.0.2.1:9901/stats/prometheus + +``` +
+ + + +## Troubleshooting + +### Debug Mode + +To troubleshoot issues with the `envoy` collector, run the `go.d.plugin` with the debug option enabled. The output +should give you clues as to why the collector isn't working. + +- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on + your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. + + ```bash + cd /usr/libexec/netdata/plugins.d/ + ``` + +- Switch to the `netdata` user. + + ```bash + sudo -u netdata -s + ``` + +- Run the `go.d.plugin` to debug the collector: + + ```bash + ./go.d.plugin -d -m envoy + ``` + + -- cgit v1.2.3