From 483926a283e118590da3f9ecfa75a8a4d62143ce Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 1 Dec 2021 07:15:11 +0100 Subject: Merging upstream version 1.32.0. Signed-off-by: Daniel Baumann --- exporting/prometheus/README.md | 70 ++++++++++++------------ exporting/prometheus/prometheus.c | 8 ++- exporting/prometheus/remote_write/README.md | 7 +++ exporting/prometheus/remote_write/remote_write.c | 2 + 4 files changed, 49 insertions(+), 38 deletions(-) (limited to 'exporting/prometheus') diff --git a/exporting/prometheus/README.md b/exporting/prometheus/README.md index d718a366e..ef6f61358 100644 --- a/exporting/prometheus/README.md +++ b/exporting/prometheus/README.md @@ -128,46 +128,46 @@ scrape_configs: #### Install nodes.yml -The following is completely optional, it will enable Prometheus to generate alerts from some NetData sources. Tweak the +The following is completely optional, it will enable Prometheus to generate alerts from some Netdata sources. Tweak the values to your own needs. We will use the following `nodes.yml` file below. Save it at `/opt/prometheus/nodes.yml`, and add a _- "nodes.yml"_ entry under the _rule_files:_ section in the example prometheus.yml file above. ```yaml groups: -- name: nodes - - rules: - - alert: node_high_cpu_usage_70 - expr: avg(rate(netdata_cpu_cpu_percentage_average{dimension="idle"}[1m])) by (job) > 70 - for: 1m - annotations: - description: '{{ $labels.job }} on ''{{ $labels.job }}'' CPU usage is at {{ humanize $value }}%.' - summary: CPU alert for container node '{{ $labels.job }}' - - - alert: node_high_memory_usage_70 - expr: 100 / sum(netdata_system_ram_MB_average) by (job) - * sum(netdata_system_ram_MB_average{dimension=~"free|cached"}) by (job) < 30 - for: 1m - annotations: - description: '{{ $labels.job }} memory usage is {{ humanize $value}}%.' - summary: Memory alert for container node '{{ $labels.job }}' - - - alert: node_low_root_filesystem_space_20 - expr: 100 / sum(netdata_disk_space_GB_average{family="/"}) by (job) - * sum(netdata_disk_space_GB_average{family="/",dimension=~"avail|cached"}) by (job) < 20 - for: 1m - annotations: - description: '{{ $labels.job }} root filesystem space is {{ humanize $value}}%.' - summary: Root filesystem alert for container node '{{ $labels.job }}' - - - alert: node_root_filesystem_fill_rate_6h - expr: predict_linear(netdata_disk_space_GB_average{family="/",dimension=~"avail|cached"}[1h], 6 * 3600) < 0 - for: 1h - labels: - severity: critical - annotations: - description: Container node {{ $labels.job }} root filesystem is going to fill up in 6h. - summary: Disk fill alert for Swarm node '{{ $labels.job }}' + - name: nodes + + rules: + - alert: node_high_cpu_usage_70 + expr: sum(sum_over_time(netdata_system_cpu_percentage_average{dimension=~"(user|system|softirq|irq|guest)"}[10m])) by (job) / sum(count_over_time(netdata_system_cpu_percentage_average{dimension="idle"}[10m])) by (job) > 70 + for: 1m + annotations: + description: '{{ $labels.job }} on ''{{ $labels.job }}'' CPU usage is at {{ humanize $value }}%.' + summary: CPU alert for container node '{{ $labels.job }}' + + - alert: node_high_memory_usage_70 + expr: 100 / sum(netdata_system_ram_MB_average) by (job) + * sum(netdata_system_ram_MB_average{dimension=~"free|cached"}) by (job) < 30 + for: 1m + annotations: + description: '{{ $labels.job }} memory usage is {{ humanize $value}}%.' + summary: Memory alert for container node '{{ $labels.job }}' + + - alert: node_low_root_filesystem_space_20 + expr: 100 / sum(netdata_disk_space_GB_average{family="/"}) by (job) + * sum(netdata_disk_space_GB_average{family="/",dimension=~"avail|cached"}) by (job) < 20 + for: 1m + annotations: + description: '{{ $labels.job }} root filesystem space is {{ humanize $value}}%.' + summary: Root filesystem alert for container node '{{ $labels.job }}' + + - alert: node_root_filesystem_fill_rate_6h + expr: predict_linear(netdata_disk_space_GB_average{family="/",dimension=~"avail|cached"}[1h], 6 * 3600) < 0 + for: 1h + labels: + severity: critical + annotations: + description: Container node {{ $labels.job }} root filesystem is going to fill up in 6h. + summary: Disk fill alert for Swarm node '{{ $labels.job }}' ``` #### Install prometheus.service diff --git a/exporting/prometheus/prometheus.c b/exporting/prometheus/prometheus.c index 6759313c3..0a3190074 100644 --- a/exporting/prometheus/prometheus.c +++ b/exporting/prometheus/prometheus.c @@ -16,7 +16,9 @@ */ inline int can_send_rrdset(struct instance *instance, RRDSET *st) { +#ifdef NETDATA_INTERNAL_CHECKS RRDHOST *host = st->rrdhost; +#endif if (unlikely(rrdset_flag_check(st, RRDSET_FLAG_EXPORTING_IGNORE))) return 0; @@ -136,7 +138,7 @@ static inline time_t prometheus_server_last_access(const char *server, RRDHOST * * Copy and sanitize name. * * @param d a destination string. - * @param s a source sting. + * @param s a source string. * @param usable the number of characters to copy. * @return Returns the length of the copied string. */ @@ -161,7 +163,7 @@ inline size_t prometheus_name_copy(char *d, const char *s, size_t usable) * Copy and sanitize label. * * @param d a destination string. - * @param s a source sting. + * @param s a source string. * @param usable the number of characters to copy. * @return Returns the length of the copied string. */ @@ -190,7 +192,7 @@ inline size_t prometheus_label_copy(char *d, const char *s, size_t usable) * Copy and sanitize units. * * @param d a destination string. - * @param s a source sting. + * @param s a source string. * @param usable the number of characters to copy. * @param showoldunits set this flag to 1 to show old (before v1.12) units. * @return Returns the destination string. diff --git a/exporting/prometheus/remote_write/README.md b/exporting/prometheus/remote_write/README.md index fe901024b..ce379063e 100644 --- a/exporting/prometheus/remote_write/README.md +++ b/exporting/prometheus/remote_write/README.md @@ -41,6 +41,13 @@ For example, if your endpoint is `http://example.domain:example_port/storage/rea remote write URL path = /storage/read ``` +You can set basic HTTP authentication credentials using + +```conf + username = my_username + password = my_password +``` + `buffered` and `lost` dimensions in the Netdata Exporting Connector Data Size operation monitoring chart estimate uncompressed buffer size on failures. diff --git a/exporting/prometheus/remote_write/remote_write.c b/exporting/prometheus/remote_write/remote_write.c index 986ad9f0e..8339712eb 100644 --- a/exporting/prometheus/remote_write/remote_write.c +++ b/exporting/prometheus/remote_write/remote_write.c @@ -25,6 +25,7 @@ void prometheus_remote_write_prepare_header(struct instance *instance) "POST %s HTTP/1.1\r\n" "Host: %s\r\n" "Accept: */*\r\n" + "%s" "Content-Encoding: snappy\r\n" "Content-Type: application/x-protobuf\r\n" "X-Prometheus-Remote-Write-Version: 0.1.0\r\n" @@ -32,6 +33,7 @@ void prometheus_remote_write_prepare_header(struct instance *instance) "\r\n", connector_specific_config->remote_write_path, simple_connector_data->connected_to, + simple_connector_data->auth_string ? simple_connector_data->auth_string : "", buffer_strlen(simple_connector_data->last_buffer->buffer)); } -- cgit v1.2.3