summaryrefslogtreecommitdiffstats
path: root/exporting/prometheus
diff options
context:
space:
mode:
Diffstat (limited to 'exporting/prometheus')
-rw-r--r--exporting/prometheus/README.md70
-rw-r--r--exporting/prometheus/prometheus.c8
-rw-r--r--exporting/prometheus/remote_write/README.md7
-rw-r--r--exporting/prometheus/remote_write/remote_write.c2
4 files changed, 49 insertions, 38 deletions
diff --git a/exporting/prometheus/README.md b/exporting/prometheus/README.md
index d718a366e..ef6f61358 100644
--- a/exporting/prometheus/README.md
+++ b/exporting/prometheus/README.md
@@ -128,46 +128,46 @@ scrape_configs:
#### Install nodes.yml
-The following is completely optional, it will enable Prometheus to generate alerts from some NetData sources. Tweak the
+The following is completely optional, it will enable Prometheus to generate alerts from some Netdata sources. Tweak the
values to your own needs. We will use the following `nodes.yml` file below. Save it at `/opt/prometheus/nodes.yml`, and
add a _- "nodes.yml"_ entry under the _rule_files:_ section in the example prometheus.yml file above.
```yaml
groups:
-- name: nodes
-
- rules:
- - alert: node_high_cpu_usage_70
- expr: avg(rate(netdata_cpu_cpu_percentage_average{dimension="idle"}[1m])) by (job) > 70
- for: 1m
- annotations:
- description: '{{ $labels.job }} on ''{{ $labels.job }}'' CPU usage is at {{ humanize $value }}%.'
- summary: CPU alert for container node '{{ $labels.job }}'
-
- - alert: node_high_memory_usage_70
- expr: 100 / sum(netdata_system_ram_MB_average) by (job)
- * sum(netdata_system_ram_MB_average{dimension=~"free|cached"}) by (job) < 30
- for: 1m
- annotations:
- description: '{{ $labels.job }} memory usage is {{ humanize $value}}%.'
- summary: Memory alert for container node '{{ $labels.job }}'
-
- - alert: node_low_root_filesystem_space_20
- expr: 100 / sum(netdata_disk_space_GB_average{family="/"}) by (job)
- * sum(netdata_disk_space_GB_average{family="/",dimension=~"avail|cached"}) by (job) < 20
- for: 1m
- annotations:
- description: '{{ $labels.job }} root filesystem space is {{ humanize $value}}%.'
- summary: Root filesystem alert for container node '{{ $labels.job }}'
-
- - alert: node_root_filesystem_fill_rate_6h
- expr: predict_linear(netdata_disk_space_GB_average{family="/",dimension=~"avail|cached"}[1h], 6 * 3600) < 0
- for: 1h
- labels:
- severity: critical
- annotations:
- description: Container node {{ $labels.job }} root filesystem is going to fill up in 6h.
- summary: Disk fill alert for Swarm node '{{ $labels.job }}'
+ - name: nodes
+
+ rules:
+ - alert: node_high_cpu_usage_70
+ expr: sum(sum_over_time(netdata_system_cpu_percentage_average{dimension=~"(user|system|softirq|irq|guest)"}[10m])) by (job) / sum(count_over_time(netdata_system_cpu_percentage_average{dimension="idle"}[10m])) by (job) > 70
+ for: 1m
+ annotations:
+ description: '{{ $labels.job }} on ''{{ $labels.job }}'' CPU usage is at {{ humanize $value }}%.'
+ summary: CPU alert for container node '{{ $labels.job }}'
+
+ - alert: node_high_memory_usage_70
+ expr: 100 / sum(netdata_system_ram_MB_average) by (job)
+ * sum(netdata_system_ram_MB_average{dimension=~"free|cached"}) by (job) < 30
+ for: 1m
+ annotations:
+ description: '{{ $labels.job }} memory usage is {{ humanize $value}}%.'
+ summary: Memory alert for container node '{{ $labels.job }}'
+
+ - alert: node_low_root_filesystem_space_20
+ expr: 100 / sum(netdata_disk_space_GB_average{family="/"}) by (job)
+ * sum(netdata_disk_space_GB_average{family="/",dimension=~"avail|cached"}) by (job) < 20
+ for: 1m
+ annotations:
+ description: '{{ $labels.job }} root filesystem space is {{ humanize $value}}%.'
+ summary: Root filesystem alert for container node '{{ $labels.job }}'
+
+ - alert: node_root_filesystem_fill_rate_6h
+ expr: predict_linear(netdata_disk_space_GB_average{family="/",dimension=~"avail|cached"}[1h], 6 * 3600) < 0
+ for: 1h
+ labels:
+ severity: critical
+ annotations:
+ description: Container node {{ $labels.job }} root filesystem is going to fill up in 6h.
+ summary: Disk fill alert for Swarm node '{{ $labels.job }}'
```
#### Install prometheus.service
diff --git a/exporting/prometheus/prometheus.c b/exporting/prometheus/prometheus.c
index 6759313c3..0a3190074 100644
--- a/exporting/prometheus/prometheus.c
+++ b/exporting/prometheus/prometheus.c
@@ -16,7 +16,9 @@
*/
inline int can_send_rrdset(struct instance *instance, RRDSET *st)
{
+#ifdef NETDATA_INTERNAL_CHECKS
RRDHOST *host = st->rrdhost;
+#endif
if (unlikely(rrdset_flag_check(st, RRDSET_FLAG_EXPORTING_IGNORE)))
return 0;
@@ -136,7 +138,7 @@ static inline time_t prometheus_server_last_access(const char *server, RRDHOST *
* Copy and sanitize name.
*
* @param d a destination string.
- * @param s a source sting.
+ * @param s a source string.
* @param usable the number of characters to copy.
* @return Returns the length of the copied string.
*/
@@ -161,7 +163,7 @@ inline size_t prometheus_name_copy(char *d, const char *s, size_t usable)
* Copy and sanitize label.
*
* @param d a destination string.
- * @param s a source sting.
+ * @param s a source string.
* @param usable the number of characters to copy.
* @return Returns the length of the copied string.
*/
@@ -190,7 +192,7 @@ inline size_t prometheus_label_copy(char *d, const char *s, size_t usable)
* Copy and sanitize units.
*
* @param d a destination string.
- * @param s a source sting.
+ * @param s a source string.
* @param usable the number of characters to copy.
* @param showoldunits set this flag to 1 to show old (before v1.12) units.
* @return Returns the destination string.
diff --git a/exporting/prometheus/remote_write/README.md b/exporting/prometheus/remote_write/README.md
index fe901024b..ce379063e 100644
--- a/exporting/prometheus/remote_write/README.md
+++ b/exporting/prometheus/remote_write/README.md
@@ -41,6 +41,13 @@ For example, if your endpoint is `http://example.domain:example_port/storage/rea
remote write URL path = /storage/read
```
+You can set basic HTTP authentication credentials using
+
+```conf
+ username = my_username
+ password = my_password
+```
+
`buffered` and `lost` dimensions in the Netdata Exporting Connector Data Size operation monitoring chart estimate uncompressed
buffer size on failures.
diff --git a/exporting/prometheus/remote_write/remote_write.c b/exporting/prometheus/remote_write/remote_write.c
index 986ad9f0e..8339712eb 100644
--- a/exporting/prometheus/remote_write/remote_write.c
+++ b/exporting/prometheus/remote_write/remote_write.c
@@ -25,6 +25,7 @@ void prometheus_remote_write_prepare_header(struct instance *instance)
"POST %s HTTP/1.1\r\n"
"Host: %s\r\n"
"Accept: */*\r\n"
+ "%s"
"Content-Encoding: snappy\r\n"
"Content-Type: application/x-protobuf\r\n"
"X-Prometheus-Remote-Write-Version: 0.1.0\r\n"
@@ -32,6 +33,7 @@ void prometheus_remote_write_prepare_header(struct instance *instance)
"\r\n",
connector_specific_config->remote_write_path,
simple_connector_data->connected_to,
+ simple_connector_data->auth_string ? simple_connector_data->auth_string : "",
buffer_strlen(simple_connector_data->last_buffer->buffer));
}