summaryrefslogtreecommitdiffstats
path: root/exporting/prometheus
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 14:31:17 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 14:31:17 +0000
commit8020f71afd34d7696d7933659df2d763ab05542f (patch)
tree2fdf1b5447ffd8bdd61e702ca183e814afdcb4fc /exporting/prometheus
parentInitial commit. (diff)
downloadnetdata-8020f71afd34d7696d7933659df2d763ab05542f.tar.xz
netdata-8020f71afd34d7696d7933659df2d763ab05542f.zip
Adding upstream version 1.37.1.upstream/1.37.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'exporting/prometheus')
-rw-r--r--exporting/prometheus/Makefile.am12
-rw-r--r--exporting/prometheus/README.md461
-rw-r--r--exporting/prometheus/prometheus.c902
-rw-r--r--exporting/prometheus/prometheus.h41
-rw-r--r--exporting/prometheus/remote_write/Makefile.am14
-rw-r--r--exporting/prometheus/remote_write/README.md58
-rw-r--r--exporting/prometheus/remote_write/remote_write.c406
-rw-r--r--exporting/prometheus/remote_write/remote_write.h32
-rw-r--r--exporting/prometheus/remote_write/remote_write.proto29
-rw-r--r--exporting/prometheus/remote_write/remote_write_request.cc258
-rw-r--r--exporting/prometheus/remote_write/remote_write_request.h42
11 files changed, 2255 insertions, 0 deletions
diff --git a/exporting/prometheus/Makefile.am b/exporting/prometheus/Makefile.am
new file mode 100644
index 0000000..334fca8
--- /dev/null
+++ b/exporting/prometheus/Makefile.am
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+AUTOMAKE_OPTIONS = subdir-objects
+MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
+
+SUBDIRS = \
+ remote_write \
+ $(NULL)
+
+dist_noinst_DATA = \
+ README.md \
+ $(NULL)
diff --git a/exporting/prometheus/README.md b/exporting/prometheus/README.md
new file mode 100644
index 0000000..ae94867
--- /dev/null
+++ b/exporting/prometheus/README.md
@@ -0,0 +1,461 @@
+<!--
+title: "Export metrics to Prometheus"
+description: "Export Netdata metrics to Prometheus for archiving and further analysis."
+custom_edit_url: https://github.com/netdata/netdata/edit/master/exporting/prometheus/README.md
+sidebar_label: "Using Netdata with Prometheus"
+-->
+import { OneLineInstallWget, OneLineInstallCurl } from '@site/src/components/OneLineInstall/'
+
+# Using Netdata with Prometheus
+
+Prometheus is a distributed monitoring system which offers a very simple setup along with a robust data model. Recently
+Netdata added support for Prometheus. I'm going to quickly show you how to install both Netdata and Prometheus on the
+same server. We can then use Grafana pointed at Prometheus to obtain long term metrics Netdata offers. I'm assuming we
+are starting at a fresh ubuntu shell (whether you'd like to follow along in a VM or a cloud instance is up to you).
+
+## Installing Netdata and Prometheus
+
+### Installing Netdata
+
+There are number of ways to install Netdata according to [Installation](/packaging/installer/README.md). The suggested way
+of installing the latest Netdata and keep it upgrade automatically.
+
+<!-- candidate for reuse -->
+
+To install Netdata, run the following as your normal user:
+
+<OneLineInstallWget/>
+
+Or, if you have cURL but not wget (such as on macOS):
+
+<OneLineInstallCurl/>
+
+At this point we should have Netdata listening on port 19999. Attempt to take your browser here:
+
+```sh
+http://your.netdata.ip:19999
+```
+
+_(replace `your.netdata.ip` with the IP or hostname of the server running Netdata)_
+
+### Installing Prometheus
+
+In order to install Prometheus we are going to introduce our own systemd startup script along with an example of
+prometheus.yaml configuration. Prometheus needs to be pointed to your server at a specific target url for it to scrape
+Netdata's api. Prometheus is always a pull model meaning Netdata is the passive client within this architecture.
+Prometheus always initiates the connection with Netdata.
+
+#### Download Prometheus
+
+```sh
+cd /tmp && curl -s https://api.github.com/repos/prometheus/prometheus/releases/latest \
+| grep "browser_download_url.*linux-amd64.tar.gz" \
+| cut -d '"' -f 4 \
+| wget -qi -
+```
+
+#### Create prometheus system user
+
+```sh
+sudo useradd -r prometheus
+```
+
+#### Create prometheus directory
+
+```sh
+sudo mkdir /opt/prometheus
+sudo chown prometheus:prometheus /opt/prometheus
+```
+
+#### Untar prometheus directory
+
+```sh
+sudo tar -xvf /tmp/prometheus-*linux-amd64.tar.gz -C /opt/prometheus --strip=1
+```
+
+#### Install prometheus.yml
+
+We will use the following `prometheus.yml` file. Save it at `/opt/prometheus/prometheus.yml`.
+
+Make sure to replace `your.netdata.ip` with the IP or hostname of the host running Netdata.
+
+```yaml
+# my global config
+global:
+ scrape_interval: 5s # Set the scrape interval to every 5 seconds. Default is every 1 minute.
+ evaluation_interval: 5s # Evaluate rules every 5 seconds. The default is every 1 minute.
+ # scrape_timeout is set to the global default (10s).
+
+ # Attach these labels to any time series or alerts when communicating with
+ # external systems (federation, remote storage, Alertmanager).
+ external_labels:
+ monitor: 'codelab-monitor'
+
+# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
+rule_files:
+ # - "first.rules"
+ # - "second.rules"
+
+# A scrape configuration containing exactly one endpoint to scrape:
+# Here it's Prometheus itself.
+scrape_configs:
+ # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
+ - job_name: 'prometheus'
+
+ # metrics_path defaults to '/metrics'
+ # scheme defaults to 'http'.
+
+ static_configs:
+ - targets: ['0.0.0.0:9090']
+
+ - job_name: 'netdata-scrape'
+
+ metrics_path: '/api/v1/allmetrics'
+ params:
+ # format: prometheus | prometheus_all_hosts
+ # You can use `prometheus_all_hosts` if you want Prometheus to set the `instance` to your hostname instead of IP
+ format: [prometheus]
+ #
+ # sources: as-collected | raw | average | sum | volume
+ # default is: average
+ #source: [as-collected]
+ #
+ # server name for this prometheus - the default is the client IP
+ # for Netdata to uniquely identify it
+ #server: ['prometheus1']
+ honor_labels: true
+
+ static_configs:
+ - targets: ['{your.netdata.ip}:19999']
+```
+
+#### Install nodes.yml
+
+The following is completely optional, it will enable Prometheus to generate alerts from some Netdata sources. Tweak the
+values to your own needs. We will use the following `nodes.yml` file below. Save it at `/opt/prometheus/nodes.yml`, and
+add a _- "nodes.yml"_ entry under the _rule_files:_ section in the example prometheus.yml file above.
+
+```yaml
+groups:
+ - name: nodes
+
+ rules:
+ - alert: node_high_cpu_usage_70
+ expr: sum(sum_over_time(netdata_system_cpu_percentage_average{dimension=~"(user|system|softirq|irq|guest)"}[10m])) by (job) / sum(count_over_time(netdata_system_cpu_percentage_average{dimension="idle"}[10m])) by (job) > 70
+ for: 1m
+ annotations:
+ description: '{{ $labels.job }} on ''{{ $labels.job }}'' CPU usage is at {{ humanize $value }}%.'
+ summary: CPU alert for container node '{{ $labels.job }}'
+
+ - alert: node_high_memory_usage_70
+ expr: 100 / sum(netdata_system_ram_MB_average) by (job)
+ * sum(netdata_system_ram_MB_average{dimension=~"free|cached"}) by (job) < 30
+ for: 1m
+ annotations:
+ description: '{{ $labels.job }} memory usage is {{ humanize $value}}%.'
+ summary: Memory alert for container node '{{ $labels.job }}'
+
+ - alert: node_low_root_filesystem_space_20
+ expr: 100 / sum(netdata_disk_space_GB_average{family="/"}) by (job)
+ * sum(netdata_disk_space_GB_average{family="/",dimension=~"avail|cached"}) by (job) < 20
+ for: 1m
+ annotations:
+ description: '{{ $labels.job }} root filesystem space is {{ humanize $value}}%.'
+ summary: Root filesystem alert for container node '{{ $labels.job }}'
+
+ - alert: node_root_filesystem_fill_rate_6h
+ expr: predict_linear(netdata_disk_space_GB_average{family="/",dimension=~"avail|cached"}[1h], 6 * 3600) < 0
+ for: 1h
+ labels:
+ severity: critical
+ annotations:
+ description: Container node {{ $labels.job }} root filesystem is going to fill up in 6h.
+ summary: Disk fill alert for Swarm node '{{ $labels.job }}'
+```
+
+#### Install prometheus.service
+
+Save this service file as `/etc/systemd/system/prometheus.service`:
+
+```sh
+[Unit]
+Description=Prometheus Server
+AssertPathExists=/opt/prometheus
+
+[Service]
+Type=simple
+WorkingDirectory=/opt/prometheus
+User=prometheus
+Group=prometheus
+ExecStart=/opt/prometheus/prometheus --config.file=/opt/prometheus/prometheus.yml --log.level=info
+ExecReload=/bin/kill -SIGHUP $MAINPID
+ExecStop=/bin/kill -SIGINT $MAINPID
+
+[Install]
+WantedBy=multi-user.target
+```
+
+##### Start Prometheus
+
+```sh
+sudo systemctl start prometheus
+sudo systemctl enable prometheus
+```
+
+Prometheus should now start and listen on port 9090. Attempt to head there with your browser.
+
+If everything is working correctly when you fetch `http://your.prometheus.ip:9090` you will see a 'Status' tab. Click
+this and click on 'targets' We should see the Netdata host as a scraped target.
+
+---
+
+## Netdata support for Prometheus
+
+Before explaining the changes, we have to understand the key differences between Netdata and Prometheus.
+
+### understanding Netdata metrics
+
+#### charts
+
+Each chart in Netdata has several properties (common to all its metrics):
+
+- `chart_id` - uniquely identifies a chart.
+
+- `chart_name` - a more human friendly name for `chart_id`, also unique.
+
+- `context` - this is the template of the chart. All disk I/O charts have the same context, all mysql requests charts
+ have the same context, etc. This is used for alarm templates to match all the charts they should be attached to.
+
+- `family` groups a set of charts together. It is used as the submenu of the dashboard.
+
+- `units` is the units for all the metrics attached to the chart.
+
+#### dimensions
+
+Then each Netdata chart contains metrics called `dimensions`. All the dimensions of a chart have the same units of
+measurement, and are contextually in the same category (ie. the metrics for disk bandwidth are `read` and `write` and
+they are both in the same chart).
+
+### Netdata data source
+
+Netdata can send metrics to Prometheus from 3 data sources:
+
+- `as collected` or `raw` - this data source sends the metrics to Prometheus as they are collected. No conversion is
+ done by Netdata. The latest value for each metric is just given to Prometheus. This is the most preferred method by
+ Prometheus, but it is also the harder to work with. To work with this data source, you will need to understand how
+ to get meaningful values out of them.
+
+ The format of the metrics is: `CONTEXT{chart="CHART",family="FAMILY",dimension="DIMENSION"}`.
+
+ If the metric is a counter (`incremental` in Netdata lingo), `_total` is appended the context.
+
+ Unlike Prometheus, Netdata allows each dimension of a chart to have a different algorithm and conversion constants
+ (`multiplier` and `divisor`). In this case, that the dimensions of a charts are heterogeneous, Netdata will use this
+ format: `CONTEXT_DIMENSION{chart="CHART",family="FAMILY"}`
+
+- `average` - this data source uses the Netdata database to send the metrics to Prometheus as they are presented on
+ the Netdata dashboard. So, all the metrics are sent as gauges, at the units they are presented in the Netdata
+ dashboard charts. This is the easiest to work with.
+
+ The format of the metrics is: `CONTEXT_UNITS_average{chart="CHART",family="FAMILY",dimension="DIMENSION"}`.
+
+ When this source is used, Netdata keeps track of the last access time for each Prometheus server fetching the
+ metrics. This last access time is used at the subsequent queries of the same Prometheus server to identify the
+ time-frame the `average` will be calculated.
+
+ So, no matter how frequently Prometheus scrapes Netdata, it will get all the database data.
+ To identify each Prometheus server, Netdata uses by default the IP of the client fetching the metrics.
+
+ If there are multiple Prometheus servers fetching data from the same Netdata, using the same IP, each Prometheus
+ server can append `server=NAME` to the URL. Netdata will use this `NAME` to uniquely identify the Prometheus server.
+
+- `sum` or `volume`, is like `average` but instead of averaging the values, it sums them.
+
+ The format of the metrics is: `CONTEXT_UNITS_sum{chart="CHART",family="FAMILY",dimension="DIMENSION"}`. All the
+ other operations are the same with `average`.
+
+ To change the data source to `sum` or `as-collected` you need to provide the `source` parameter in the request URL.
+ e.g.: `http://your.netdata.ip:19999/api/v1/allmetrics?format=prometheus&help=yes&source=as-collected`
+
+ Keep in mind that early versions of Netdata were sending the metrics as: `CHART_DIMENSION{}`.
+
+### Querying Metrics
+
+Fetch with your web browser this URL:
+
+`http://your.netdata.ip:19999/api/v1/allmetrics?format=prometheus&help=yes`
+
+_(replace `your.netdata.ip` with the ip or hostname of your Netdata server)_
+
+Netdata will respond with all the metrics it sends to Prometheus.
+
+If you search that page for `"system.cpu"` you will find all the metrics Netdata is exporting to Prometheus for this
+chart. `system.cpu` is the chart name on the Netdata dashboard (on the Netdata dashboard all charts have a text heading
+such as : `Total CPU utilization (system.cpu)`. What we are interested here in the chart name: `system.cpu`).
+
+Searching for `"system.cpu"` reveals:
+
+```sh
+# COMMENT homogeneous chart "system.cpu", context "system.cpu", family "cpu", units "percentage"
+# COMMENT netdata_system_cpu_percentage_average: dimension "guest_nice", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive
+netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="guest_nice"} 0.0000000 1500066662000
+# COMMENT netdata_system_cpu_percentage_average: dimension "guest", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive
+netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="guest"} 1.7837326 1500066662000
+# COMMENT netdata_system_cpu_percentage_average: dimension "steal", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive
+netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="steal"} 0.0000000 1500066662000
+# COMMENT netdata_system_cpu_percentage_average: dimension "softirq", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive
+netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="softirq"} 0.5275442 1500066662000
+# COMMENT netdata_system_cpu_percentage_average: dimension "irq", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive
+netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="irq"} 0.2260836 1500066662000
+# COMMENT netdata_system_cpu_percentage_average: dimension "user", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive
+netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="user"} 2.3362762 1500066662000
+# COMMENT netdata_system_cpu_percentage_average: dimension "system", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive
+netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="system"} 1.7961062 1500066662000
+# COMMENT netdata_system_cpu_percentage_average: dimension "nice", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive
+netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="nice"} 0.0000000 1500066662000
+# COMMENT netdata_system_cpu_percentage_average: dimension "iowait", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive
+netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="iowait"} 0.9671802 1500066662000
+# COMMENT netdata_system_cpu_percentage_average: dimension "idle", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive
+netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="idle"} 92.3630770 1500066662000
+```
+
+_(Netdata response for `system.cpu` with source=`average`)_
+
+In `average` or `sum` data sources, all values are normalized and are reported to Prometheus as gauges. Now, use the
+'expression' text form in Prometheus. Begin to type the metrics we are looking for: `netdata_system_cpu`. You should see
+that the text form begins to auto-fill as Prometheus knows about this metric.
+
+If the data source was `as collected`, the response would be:
+
+```sh
+# COMMENT homogeneous chart "system.cpu", context "system.cpu", family "cpu", units "percentage"
+# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "guest_nice", value * 1 / 1 delta gives percentage (counter)
+netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="guest_nice"} 0 1500066716438
+# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "guest", value * 1 / 1 delta gives percentage (counter)
+netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="guest"} 63945 1500066716438
+# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "steal", value * 1 / 1 delta gives percentage (counter)
+netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="steal"} 0 1500066716438
+# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "softirq", value * 1 / 1 delta gives percentage (counter)
+netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="softirq"} 8295 1500066716438
+# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "irq", value * 1 / 1 delta gives percentage (counter)
+netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="irq"} 4079 1500066716438
+# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "user", value * 1 / 1 delta gives percentage (counter)
+netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="user"} 116488 1500066716438
+# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "system", value * 1 / 1 delta gives percentage (counter)
+netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="system"} 35084 1500066716438
+# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "nice", value * 1 / 1 delta gives percentage (counter)
+netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="nice"} 505 1500066716438
+# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "iowait", value * 1 / 1 delta gives percentage (counter)
+netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="iowait"} 23314 1500066716438
+# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "idle", value * 1 / 1 delta gives percentage (counter)
+netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="idle"} 918470 1500066716438
+```
+
+_(Netdata response for `system.cpu` with source=`as-collected`)_
+
+For more information check Prometheus documentation.
+
+### Streaming data from upstream hosts
+
+The `format=prometheus` parameter only exports the host's Netdata metrics. If you are using the parent-child
+functionality of Netdata this ignores any upstream hosts - so you should consider using the below in your
+**prometheus.yml**:
+
+```yaml
+ metrics_path: '/api/v1/allmetrics'
+ params:
+ format: [prometheus_all_hosts]
+ honor_labels: true
+```
+
+This will report all upstream host data, and `honor_labels` will make Prometheus take note of the instance names
+provided.
+
+### Timestamps
+
+To pass the metrics through Prometheus pushgateway, Netdata supports the option `&timestamps=no` to send the metrics
+without timestamps.
+
+## Netdata host variables
+
+Netdata collects various system configuration metrics, like the max number of TCP sockets supported, the max number of
+files allowed system-wide, various IPC sizes, etc. These metrics are not exposed to Prometheus by default.
+
+To expose them, append `variables=yes` to the Netdata URL.
+
+### TYPE and HELP
+
+To save bandwidth, and because Prometheus does not use them anyway, `# TYPE` and `# HELP` lines are suppressed. If
+wanted they can be re-enabled via `types=yes` and `help=yes`, e.g.
+`/api/v1/allmetrics?format=prometheus&types=yes&help=yes`
+
+Note that if enabled, the `# TYPE` and `# HELP` lines are repeated for every occurrence of a metric, which goes against the Prometheus documentation's [specification for these lines](https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md#comments-help-text-and-type-information).
+
+### Names and IDs
+
+Netdata supports names and IDs for charts and dimensions. Usually IDs are unique identifiers as read by the system and
+names are human friendly labels (also unique).
+
+Most charts and metrics have the same ID and name, but in several cases they are different: disks with device-mapper,
+interrupts, QoS classes, statsd synthetic charts, etc.
+
+The default is controlled in `exporting.conf`:
+
+```conf
+[prometheus:exporter]
+ send names instead of ids = yes | no
+```
+
+You can overwrite it from Prometheus, by appending to the URL:
+
+- `&names=no` to get IDs (the old behaviour)
+- `&names=yes` to get names
+
+### Filtering metrics sent to Prometheus
+
+Netdata can filter the metrics it sends to Prometheus with this setting:
+
+```conf
+[prometheus:exporter]
+ send charts matching = *
+```
+
+This settings accepts a space separated list of [simple patterns](/libnetdata/simple_pattern/README.md) to match the
+**charts** to be sent to Prometheus. Each pattern can use `*` as wildcard, any number of times (e.g `*a*b*c*` is valid).
+Patterns starting with `!` give a negative match (e.g `!*.bad users.* groups.*` will send all the users and groups
+except `bad` user and `bad` group). The order is important: the first match (positive or negative) left to right, is
+used.
+
+### Changing the prefix of Netdata metrics
+
+Netdata sends all metrics prefixed with `netdata_`. You can change this in `netdata.conf`, like this:
+
+```conf
+[prometheus:exporter]
+ prefix = netdata
+```
+
+It can also be changed from the URL, by appending `&prefix=netdata`.
+
+### Metric Units
+
+The default source `average` adds the unit of measurement to the name of each metric (e.g. `_KiB_persec`). To hide the
+units and get the same metric names as with the other sources, append to the URL `&hideunits=yes`.
+
+The units were standardized in v1.12, with the effect of changing the metric names. To get the metric names as they were
+before v1.12, append to the URL `&oldunits=yes`
+
+### Accuracy of `average` and `sum` data sources
+
+When the data source is set to `average` or `sum`, Netdata remembers the last access of each client accessing Prometheus
+metrics and uses this last access time to respond with the `average` or `sum` of all the entries in the database since
+that. This means that Prometheus servers are not losing data when they access Netdata with data source = `average` or
+`sum`.
+
+To uniquely identify each Prometheus server, Netdata uses the IP of the client accessing the metrics. If however the IP
+is not good enough for identifying a single Prometheus server (e.g. when Prometheus servers are accessing Netdata
+through a web proxy, or when multiple Prometheus servers are NATed to a single IP), each Prometheus may append
+`&server=NAME` to the URL. This `NAME` is used by Netdata to uniquely identify each Prometheus server and keep track of
+its last access time.
+
+
diff --git a/exporting/prometheus/prometheus.c b/exporting/prometheus/prometheus.c
new file mode 100644
index 0000000..294d8ec
--- /dev/null
+++ b/exporting/prometheus/prometheus.c
@@ -0,0 +1,902 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#define EXPORTINGS_INTERNALS
+#include "prometheus.h"
+
+// ----------------------------------------------------------------------------
+// PROMETHEUS
+// /api/v1/allmetrics?format=prometheus and /api/v1/allmetrics?format=prometheus_all_hosts
+
+static int is_matches_rrdset(struct instance *instance, RRDSET *st, SIMPLE_PATTERN *filter) {
+ if (instance->config.options & EXPORTING_OPTION_SEND_NAMES) {
+ return simple_pattern_matches(filter, rrdset_name(st));
+ }
+ return simple_pattern_matches(filter, rrdset_id(st));
+}
+
+/**
+ * Check if a chart can be sent to Prometheus
+ *
+ * @param instance an instance data structure.
+ * @param st a chart.
+ * @param filter a simple pattern to match against.
+ * @return Returns 1 if the chart can be sent, 0 otherwise.
+ */
+inline int can_send_rrdset(struct instance *instance, RRDSET *st, SIMPLE_PATTERN *filter)
+{
+#ifdef NETDATA_INTERNAL_CHECKS
+ RRDHOST *host = st->rrdhost;
+#endif
+
+ if (unlikely(rrdset_flag_check(st, RRDSET_FLAG_EXPORTING_IGNORE)))
+ return 0;
+
+ if (filter) {
+ if (!is_matches_rrdset(instance, st, filter)) {
+ return 0;
+ }
+ } else if (unlikely(!rrdset_flag_check(st, RRDSET_FLAG_EXPORTING_SEND))) {
+ // we have not checked this chart
+ if (is_matches_rrdset(instance, st, instance->config.charts_pattern)) {
+ rrdset_flag_set(st, RRDSET_FLAG_EXPORTING_SEND);
+ } else {
+ rrdset_flag_set(st, RRDSET_FLAG_EXPORTING_IGNORE);
+ debug(
+ D_EXPORTING,
+ "EXPORTING: not sending chart '%s' of host '%s', because it is disabled for exporting.",
+ rrdset_id(st),
+ rrdhost_hostname(host));
+ return 0;
+ }
+ }
+
+ if (unlikely(!rrdset_is_available_for_exporting_and_alarms(st))) {
+ debug(
+ D_EXPORTING,
+ "EXPORTING: not sending chart '%s' of host '%s', because it is not available for exporting.",
+ rrdset_id(st),
+ rrdhost_hostname(host));
+ return 0;
+ }
+
+ if (unlikely(
+ st->rrd_memory_mode == RRD_MEMORY_MODE_NONE &&
+ !(EXPORTING_OPTIONS_DATA_SOURCE(instance->config.options) == EXPORTING_SOURCE_DATA_AS_COLLECTED))) {
+ debug(
+ D_EXPORTING,
+ "EXPORTING: not sending chart '%s' of host '%s' because its memory mode is '%s' and the exporting connector requires database access.",
+ rrdset_id(st),
+ rrdhost_hostname(host),
+ rrd_memory_mode_name(host->rrd_memory_mode));
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct prometheus_server {
+ const char *server;
+ uint32_t hash;
+ RRDHOST *host;
+ time_t last_access;
+ struct prometheus_server *next;
+} *prometheus_server_root = NULL;
+
+static netdata_mutex_t prometheus_server_root_mutex = NETDATA_MUTEX_INITIALIZER;
+
+/**
+ * Clean server root local structure
+ */
+void prometheus_clean_server_root()
+{
+ if (prometheus_server_root) {
+ netdata_mutex_lock(&prometheus_server_root_mutex);
+
+ struct prometheus_server *ps;
+ for (ps = prometheus_server_root; ps; ) {
+ struct prometheus_server *current = ps;
+ ps = ps->next;
+ if(current->server)
+ freez((void *)current->server);
+
+ freez(current);
+ }
+ prometheus_server_root = NULL;
+ netdata_mutex_unlock(&prometheus_server_root_mutex);
+ }
+}
+
+/**
+ * Get the last time when a Prometheus server scraped the Netdata Prometheus exporter.
+ *
+ * @param server the name of the Prometheus server.
+ * @param host a data collecting host.
+ * @param now actual time.
+ * @return Returns the last time when the server accessed Netdata, or 0 if it is the first occurrence.
+ */
+static inline time_t prometheus_server_last_access(const char *server, RRDHOST *host, time_t now)
+{
+#ifdef UNIT_TESTING
+ return 0;
+#endif
+ uint32_t hash = simple_hash(server);
+
+ netdata_mutex_lock(&prometheus_server_root_mutex);
+
+ struct prometheus_server *ps;
+ for (ps = prometheus_server_root; ps; ps = ps->next) {
+ if (host == ps->host && hash == ps->hash && !strcmp(server, ps->server)) {
+ time_t last = ps->last_access;
+ ps->last_access = now;
+ netdata_mutex_unlock(&prometheus_server_root_mutex);
+ return last;
+ }
+ }
+
+ ps = callocz(1, sizeof(struct prometheus_server));
+ ps->server = strdupz(server);
+ ps->hash = hash;
+ ps->host = host;
+ ps->last_access = now;
+ ps->next = prometheus_server_root;
+ prometheus_server_root = ps;
+
+ netdata_mutex_unlock(&prometheus_server_root_mutex);
+ return 0;
+}
+
+/**
+ * Copy and sanitize name.
+ *
+ * @param d a destination string.
+ * @param s a source string.
+ * @param usable the number of characters to copy.
+ * @return Returns the length of the copied string.
+ */
+inline size_t prometheus_name_copy(char *d, const char *s, size_t usable)
+{
+ size_t n;
+
+ for (n = 0; *s && n < usable; d++, s++, n++) {
+ register char c = *s;
+
+ if (!isalnum(c))
+ *d = '_';
+ else
+ *d = c;
+ }
+ *d = '\0';
+
+ return n;
+}
+
+/**
+ * Copy and sanitize label.
+ *
+ * @param d a destination string.
+ * @param s a source string.
+ * @param usable the number of characters to copy.
+ * @return Returns the length of the copied string.
+ */
+inline size_t prometheus_label_copy(char *d, const char *s, size_t usable)
+{
+ size_t n;
+
+ // make sure we can escape one character without overflowing the buffer
+ usable--;
+
+ for (n = 0; *s && n < usable; d++, s++, n++) {
+ register char c = *s;
+
+ if (unlikely(c == '"' || c == '\\' || c == '\n')) {
+ *d++ = '\\';
+ n++;
+ }
+ *d = c;
+ }
+ *d = '\0';
+
+ return n;
+}
+
+/**
+ * Copy and sanitize units.
+ *
+ * @param d a destination string.
+ * @param s a source string.
+ * @param usable the number of characters to copy.
+ * @param showoldunits set this flag to 1 to show old (before v1.12) units.
+ * @return Returns the destination string.
+ */
+inline char *prometheus_units_copy(char *d, const char *s, size_t usable, int showoldunits)
+{
+ const char *sorig = s;
+ char *ret = d;
+ size_t n;
+
+ // Fix for issue 5227
+ if (unlikely(showoldunits)) {
+ static struct {
+ const char *newunit;
+ uint32_t hash;
+ const char *oldunit;
+ } units[] = { { "KiB/s", 0, "kilobytes/s" },
+ { "MiB/s", 0, "MB/s" },
+ { "GiB/s", 0, "GB/s" },
+ { "KiB", 0, "KB" },
+ { "MiB", 0, "MB" },
+ { "GiB", 0, "GB" },
+ { "inodes", 0, "Inodes" },
+ { "percentage", 0, "percent" },
+ { "faults/s", 0, "page faults/s" },
+ { "KiB/operation", 0, "kilobytes per operation" },
+ { "milliseconds/operation", 0, "ms per operation" },
+ { NULL, 0, NULL } };
+ static int initialized = 0;
+ int i;
+
+ if (unlikely(!initialized)) {
+ for (i = 0; units[i].newunit; i++)
+ units[i].hash = simple_hash(units[i].newunit);
+ initialized = 1;
+ }
+
+ uint32_t hash = simple_hash(s);
+ for (i = 0; units[i].newunit; i++) {
+ if (unlikely(hash == units[i].hash && !strcmp(s, units[i].newunit))) {
+ // info("matched extension for filename '%s': '%s'", filename, last_dot);
+ s = units[i].oldunit;
+ sorig = s;
+ break;
+ }
+ }
+ }
+ *d++ = '_';
+ for (n = 1; *s && n < usable; d++, s++, n++) {
+ register char c = *s;
+
+ if (!isalnum(c))
+ *d = '_';
+ else
+ *d = c;
+ }
+
+ if (n == 2 && sorig[0] == '%') {
+ n = 0;
+ d = ret;
+ s = "_percent";
+ for (; *s && n < usable; n++)
+ *d++ = *s++;
+ } else if (n > 3 && sorig[n - 3] == '/' && sorig[n - 2] == 's') {
+ n = n - 2;
+ d -= 2;
+ s = "_persec";
+ for (; *s && n < usable; n++)
+ *d++ = *s++;
+ }
+
+ *d = '\0';
+
+ return ret;
+}
+
+/**
+ * Format host labels for the Prometheus exporter
+ *
+ * @param instance an instance data structure.
+ * @param host a data collecting host.
+ */
+
+struct format_prometheus_label_callback {
+ struct instance *instance;
+ size_t count;
+};
+
+static int format_prometheus_label_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) {
+ struct format_prometheus_label_callback *d = (struct format_prometheus_label_callback *)data;
+
+ if (!should_send_label(d->instance, ls)) return 0;
+
+ char k[PROMETHEUS_ELEMENT_MAX + 1];
+ char v[PROMETHEUS_ELEMENT_MAX + 1];
+
+ prometheus_name_copy(k, name, PROMETHEUS_ELEMENT_MAX);
+ prometheus_label_copy(v, value, PROMETHEUS_ELEMENT_MAX);
+
+ if (*k && *v) {
+ if (d->count > 0) buffer_strcat(d->instance->labels_buffer, ",");
+ buffer_sprintf(d->instance->labels_buffer, "%s=\"%s\"", k, v);
+ d->count++;
+ }
+ return 1;
+}
+
+void format_host_labels_prometheus(struct instance *instance, RRDHOST *host)
+{
+ if (unlikely(!sending_labels_configured(instance)))
+ return;
+
+ if (!instance->labels_buffer)
+ instance->labels_buffer = buffer_create(1024);
+
+ struct format_prometheus_label_callback tmp = {
+ .instance = instance,
+ .count = 0
+ };
+ rrdlabels_walkthrough_read(host->rrdlabels, format_prometheus_label_callback, &tmp);
+}
+
+struct host_variables_callback_options {
+ RRDHOST *host;
+ BUFFER *wb;
+ EXPORTING_OPTIONS exporting_options;
+ PROMETHEUS_OUTPUT_OPTIONS output_options;
+ const char *prefix;
+ const char *labels;
+ time_t now;
+ int host_header_printed;
+ char name[PROMETHEUS_VARIABLE_MAX + 1];
+};
+
+/**
+ * Print host variables.
+ *
+ * @param rv a variable.
+ * @param data callback options.
+ * @return Returns 1 if the chart can be sent, 0 otherwise.
+ */
+static int print_host_variables_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rv_ptr __maybe_unused, void *data) {
+ const RRDVAR_ACQUIRED *rv = (const RRDVAR_ACQUIRED *)item;
+
+ struct host_variables_callback_options *opts = data;
+
+ if (rrdvar_flags(rv) & (RRDVAR_FLAG_CUSTOM_HOST_VAR | RRDVAR_FLAG_CUSTOM_CHART_VAR)) {
+ if (!opts->host_header_printed) {
+ opts->host_header_printed = 1;
+
+ if (opts->output_options & PROMETHEUS_OUTPUT_HELP) {
+ buffer_sprintf(opts->wb, "\n# COMMENT global host and chart variables\n");
+ }
+ }
+
+ NETDATA_DOUBLE value = rrdvar2number(rv);
+ if (isnan(value) || isinf(value)) {
+ if (opts->output_options & PROMETHEUS_OUTPUT_HELP)
+ buffer_sprintf(
+ opts->wb, "# COMMENT variable \"%s\" is %s. Skipped.\n", rrdvar_name(rv), (isnan(value)) ? "NAN" : "INF");
+
+ return 0;
+ }
+
+ char *label_pre = "";
+ char *label_post = "";
+ if (opts->labels && *opts->labels) {
+ label_pre = "{";
+ label_post = "}";
+ }
+
+ prometheus_name_copy(opts->name, rrdvar_name(rv), sizeof(opts->name));
+
+ if (opts->output_options & PROMETHEUS_OUTPUT_TIMESTAMPS)
+ buffer_sprintf(
+ opts->wb,
+ "%s_%s%s%s%s " NETDATA_DOUBLE_FORMAT " %llu\n",
+ opts->prefix,
+ opts->name,
+ label_pre,
+ opts->labels,
+ label_post,
+ value,
+ opts->now * 1000ULL);
+ else
+ buffer_sprintf(
+ opts->wb,
+ "%s_%s%s%s%s " NETDATA_DOUBLE_FORMAT "\n",
+ opts->prefix,
+ opts->name,
+ label_pre,
+ opts->labels,
+ label_post,
+ value);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+struct gen_parameters {
+ const char *prefix;
+ char *context;
+ char *suffix;
+
+ char *chart;
+ char *dimension;
+ char *family;
+ char *labels;
+
+ PROMETHEUS_OUTPUT_OPTIONS output_options;
+ RRDSET *st;
+ RRDDIM *rd;
+
+ const char *relation;
+ const char *type;
+};
+
+/**
+ * Write an as-collected help comment to a buffer.
+ *
+ * @param wb the buffer to write the comment to.
+ * @param p parameters for generating the comment string.
+ * @param homogeneous a flag for homogeneous charts.
+ * @param prometheus_collector a flag for metrics from prometheus collector.
+ */
+static void generate_as_collected_prom_help(BUFFER *wb, struct gen_parameters *p, int homogeneous, int prometheus_collector)
+{
+ buffer_sprintf(wb, "# COMMENT %s_%s", p->prefix, p->context);
+
+ if (!homogeneous)
+ buffer_sprintf(wb, "_%s", p->dimension);
+
+ buffer_sprintf(
+ wb,
+ "%s: chart \"%s\", context \"%s\", family \"%s\", dimension \"%s\", value * ",
+ p->suffix,
+ (p->output_options & PROMETHEUS_OUTPUT_NAMES && p->st->name) ? rrdset_name(p->st) : rrdset_id(p->st),
+ rrdset_context(p->st),
+ rrdset_family(p->st),
+ (p->output_options & PROMETHEUS_OUTPUT_NAMES && p->rd->name) ? rrddim_name(p->rd) : rrddim_id(p->rd));
+
+ if (prometheus_collector)
+ buffer_sprintf(wb, "1 / 1");
+ else
+ buffer_sprintf(wb, COLLECTED_NUMBER_FORMAT " / " COLLECTED_NUMBER_FORMAT, p->rd->multiplier, p->rd->divisor);
+
+ buffer_sprintf(wb, " %s %s (%s)\n", p->relation, rrdset_units(p->st), p->type);
+}
+
+/**
+ * Write an as-collected metric to a buffer.
+ *
+ * @param wb the buffer to write the metric to.
+ * @param p parameters for generating the metric string.
+ * @param homogeneous a flag for homogeneous charts.
+ * @param prometheus_collector a flag for metrics from prometheus collector.
+ */
+static void generate_as_collected_prom_metric(BUFFER *wb, struct gen_parameters *p, int homogeneous, int prometheus_collector)
+{
+ buffer_sprintf(wb, "%s_%s", p->prefix, p->context);
+
+ if (!homogeneous)
+ buffer_sprintf(wb, "_%s", p->dimension);
+
+ buffer_sprintf(wb, "%s{chart=\"%s\",family=\"%s\"", p->suffix, p->chart, p->family);
+
+ if (homogeneous)
+ buffer_sprintf(wb, ",dimension=\"%s\"", p->dimension);
+
+ buffer_sprintf(wb, "%s} ", p->labels);
+
+ if (prometheus_collector)
+ buffer_sprintf(
+ wb,
+ NETDATA_DOUBLE_FORMAT,
+ (NETDATA_DOUBLE)p->rd->last_collected_value * (NETDATA_DOUBLE)p->rd->multiplier /
+ (NETDATA_DOUBLE)p->rd->divisor);
+ else
+ buffer_sprintf(wb, COLLECTED_NUMBER_FORMAT, p->rd->last_collected_value);
+
+ if (p->output_options & PROMETHEUS_OUTPUT_TIMESTAMPS)
+ buffer_sprintf(wb, " %llu\n", timeval_msec(&p->rd->last_collected_time));
+ else
+ buffer_sprintf(wb, "\n");
+}
+
+/**
+ * Write metrics in Prometheus format to a buffer.
+ *
+ * @param instance an instance data structure.
+ * @param host a data collecting host.
+ * @param filter_string a simple pattern filter.
+ * @param wb the buffer to fill with metrics.
+ * @param prefix a prefix for every metric.
+ * @param exporting_options options to configure what data is exported.
+ * @param allhosts set to 1 if host instance should be in the output for tags.
+ * @param output_options options to configure the format of the output.
+ */
+static void rrd_stats_api_v1_charts_allmetrics_prometheus(
+ struct instance *instance,
+ RRDHOST *host,
+ const char *filter_string,
+ BUFFER *wb,
+ const char *prefix,
+ EXPORTING_OPTIONS exporting_options,
+ int allhosts,
+ PROMETHEUS_OUTPUT_OPTIONS output_options)
+{
+ SIMPLE_PATTERN *filter = simple_pattern_create(filter_string, NULL, SIMPLE_PATTERN_EXACT);
+
+ char hostname[PROMETHEUS_ELEMENT_MAX + 1];
+ prometheus_label_copy(hostname, rrdhost_hostname(host), PROMETHEUS_ELEMENT_MAX);
+
+ format_host_labels_prometheus(instance, host);
+
+ buffer_sprintf(
+ wb,
+ "netdata_info{instance=\"%s\",application=\"%s\",version=\"%s\"",
+ hostname,
+ rrdhost_program_name(host),
+ rrdhost_program_version(host));
+
+ if (instance->labels_buffer && *buffer_tostring(instance->labels_buffer)) {
+ buffer_sprintf(wb, ",%s", buffer_tostring(instance->labels_buffer));
+ }
+
+ if (output_options & PROMETHEUS_OUTPUT_TIMESTAMPS)
+ buffer_sprintf(wb, "} 1 %llu\n", now_realtime_usec() / USEC_PER_MS);
+ else
+ buffer_sprintf(wb, "} 1\n");
+
+ char labels[PROMETHEUS_LABELS_MAX + 1] = "";
+ if (allhosts) {
+ snprintfz(labels, PROMETHEUS_LABELS_MAX, ",instance=\"%s\"", hostname);
+ }
+
+ if (instance->labels_buffer)
+ buffer_flush(instance->labels_buffer);
+
+ // send custom variables set for the host
+ if (output_options & PROMETHEUS_OUTPUT_VARIABLES) {
+
+ struct host_variables_callback_options opts = {
+ .host = host,
+ .wb = wb,
+ .labels = (labels[0] == ',') ? &labels[1] : labels,
+ .exporting_options = exporting_options,
+ .output_options = output_options,
+ .prefix = prefix,
+ .now = now_realtime_sec(),
+ .host_header_printed = 0
+ };
+
+ rrdvar_walkthrough_read(host->rrdvars, print_host_variables_callback, &opts);
+ }
+
+ // for each chart
+ RRDSET *st;
+ rrdset_foreach_read(st, host) {
+
+ if (likely(can_send_rrdset(instance, st, filter))) {
+ char chart[PROMETHEUS_ELEMENT_MAX + 1];
+ char context[PROMETHEUS_ELEMENT_MAX + 1];
+ char family[PROMETHEUS_ELEMENT_MAX + 1];
+ char units[PROMETHEUS_ELEMENT_MAX + 1] = "";
+
+ prometheus_label_copy(chart, (output_options & PROMETHEUS_OUTPUT_NAMES && st->name) ? rrdset_name(st) : rrdset_id(st), PROMETHEUS_ELEMENT_MAX);
+ prometheus_label_copy(family, rrdset_family(st), PROMETHEUS_ELEMENT_MAX);
+ prometheus_name_copy(context, rrdset_context(st), PROMETHEUS_ELEMENT_MAX);
+
+ int as_collected = (EXPORTING_OPTIONS_DATA_SOURCE(exporting_options) == EXPORTING_SOURCE_DATA_AS_COLLECTED);
+ int homogeneous = 1;
+ int prometheus_collector = 0;
+ if (as_collected) {
+ if (rrdset_flag_check(st, RRDSET_FLAG_HOMOGENEOUS_CHECK))
+ rrdset_update_heterogeneous_flag(st);
+
+ if (rrdset_flag_check(st, RRDSET_FLAG_HETEROGENEOUS))
+ homogeneous = 0;
+
+ if (!strcmp(rrdset_module_name(st), "prometheus"))
+ prometheus_collector = 1;
+ } else {
+ if (EXPORTING_OPTIONS_DATA_SOURCE(exporting_options) == EXPORTING_SOURCE_DATA_AVERAGE &&
+ !(output_options & PROMETHEUS_OUTPUT_HIDEUNITS))
+ prometheus_units_copy(
+ units, rrdset_units(st), PROMETHEUS_ELEMENT_MAX, output_options & PROMETHEUS_OUTPUT_OLDUNITS);
+ }
+
+ if (unlikely(output_options & PROMETHEUS_OUTPUT_HELP))
+ buffer_sprintf(
+ wb,
+ "\n# COMMENT %s chart \"%s\", context \"%s\", family \"%s\", units \"%s\"\n",
+ (homogeneous) ? "homogeneous" : "heterogeneous",
+ (output_options & PROMETHEUS_OUTPUT_NAMES && st->name) ? rrdset_name(st) : rrdset_id(st),
+ rrdset_context(st),
+ rrdset_family(st),
+ rrdset_units(st));
+
+ // for each dimension
+ RRDDIM *rd;
+ rrddim_foreach_read(rd, st) {
+ if (rd->collections_counter && !rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) {
+ char dimension[PROMETHEUS_ELEMENT_MAX + 1];
+ char *suffix = "";
+
+ if (as_collected) {
+ // we need as-collected / raw data
+
+ struct gen_parameters p;
+ p.prefix = prefix;
+ p.context = context;
+ p.suffix = suffix;
+ p.chart = chart;
+ p.dimension = dimension;
+ p.family = family;
+ p.labels = labels;
+ p.output_options = output_options;
+ p.st = st;
+ p.rd = rd;
+
+ if (unlikely(rd->last_collected_time.tv_sec < instance->after))
+ continue;
+
+ p.type = "gauge";
+ p.relation = "gives";
+ if (rd->algorithm == RRD_ALGORITHM_INCREMENTAL ||
+ rd->algorithm == RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL) {
+ p.type = "counter";
+ p.relation = "delta gives";
+ if (!prometheus_collector)
+ p.suffix = "_total";
+ }
+
+ if (homogeneous) {
+ // all the dimensions of the chart, has the same algorithm, multiplier and divisor
+ // we add all dimensions as labels
+
+ prometheus_label_copy(
+ dimension,
+ (output_options & PROMETHEUS_OUTPUT_NAMES && rd->name) ? rrddim_name(rd) : rrddim_id(rd),
+ PROMETHEUS_ELEMENT_MAX);
+
+ if (unlikely(output_options & PROMETHEUS_OUTPUT_HELP))
+ generate_as_collected_prom_help(wb, &p, homogeneous, prometheus_collector);
+
+ if (unlikely(output_options & PROMETHEUS_OUTPUT_TYPES))
+ buffer_sprintf(wb, "# TYPE %s_%s%s %s\n", prefix, context, suffix, p.type);
+
+ generate_as_collected_prom_metric(wb, &p, homogeneous, prometheus_collector);
+ }
+ else {
+ // the dimensions of the chart, do not have the same algorithm, multiplier or divisor
+ // we create a metric per dimension
+
+ prometheus_name_copy(
+ dimension,
+ (output_options & PROMETHEUS_OUTPUT_NAMES && rd->name) ? rrddim_name(rd) : rrddim_id(rd),
+ PROMETHEUS_ELEMENT_MAX);
+
+ if (unlikely(output_options & PROMETHEUS_OUTPUT_HELP))
+ generate_as_collected_prom_help(wb, &p, homogeneous, prometheus_collector);
+
+ if (unlikely(output_options & PROMETHEUS_OUTPUT_TYPES))
+ buffer_sprintf(
+ wb, "# TYPE %s_%s_%s%s %s\n", prefix, context, dimension, suffix, p.type);
+
+ generate_as_collected_prom_metric(wb, &p, homogeneous, prometheus_collector);
+ }
+ }
+ else {
+ // we need average or sum of the data
+
+ time_t first_time = instance->after;
+ time_t last_time = instance->before;
+ NETDATA_DOUBLE value = exporting_calculate_value_from_stored_data(instance, rd, &last_time);
+
+ if (!isnan(value) && !isinf(value)) {
+ if (EXPORTING_OPTIONS_DATA_SOURCE(exporting_options) == EXPORTING_SOURCE_DATA_AVERAGE)
+ suffix = "_average";
+ else if (EXPORTING_OPTIONS_DATA_SOURCE(exporting_options) == EXPORTING_SOURCE_DATA_SUM)
+ suffix = "_sum";
+
+ prometheus_label_copy(
+ dimension,
+ (output_options & PROMETHEUS_OUTPUT_NAMES && rd->name) ? rrddim_name(rd) : rrddim_id(rd),
+ PROMETHEUS_ELEMENT_MAX);
+
+ if (unlikely(output_options & PROMETHEUS_OUTPUT_HELP))
+ buffer_sprintf(
+ wb,
+ "# COMMENT %s_%s%s%s: dimension \"%s\", value is %s, gauge, dt %llu to %llu inclusive\n",
+ prefix,
+ context,
+ units,
+ suffix,
+ (output_options & PROMETHEUS_OUTPUT_NAMES && rd->name) ? rrddim_name(rd) : rrddim_id(rd),
+ rrdset_units(st),
+ (unsigned long long)first_time,
+ (unsigned long long)last_time);
+
+ if (unlikely(output_options & PROMETHEUS_OUTPUT_TYPES))
+ buffer_sprintf(wb, "# TYPE %s_%s%s%s gauge\n", prefix, context, units, suffix);
+
+ if (output_options & PROMETHEUS_OUTPUT_TIMESTAMPS)
+ buffer_sprintf(
+ wb,
+ "%s_%s%s%s{chart=\"%s\",family=\"%s\",dimension=\"%s\"%s} " NETDATA_DOUBLE_FORMAT
+ " %llu\n",
+ prefix,
+ context,
+ units,
+ suffix,
+ chart,
+ family,
+ dimension,
+ labels,
+ value,
+ last_time * MSEC_PER_SEC);
+ else
+ buffer_sprintf(
+ wb,
+ "%s_%s%s%s{chart=\"%s\",family=\"%s\",dimension=\"%s\"%s} " NETDATA_DOUBLE_FORMAT
+ "\n",
+ prefix,
+ context,
+ units,
+ suffix,
+ chart,
+ family,
+ dimension,
+ labels,
+ value);
+ }
+ }
+ }
+ }
+ rrddim_foreach_done(rd);
+ }
+ }
+ rrdset_foreach_done(st);
+
+ simple_pattern_free(filter);
+}
+
+/**
+ * Get the last time time when a server accessed Netdata. Write information about an API request to a buffer.
+ *
+ * @param instance an instance data structure.
+ * @param host a data collecting host.
+ * @param wb the buffer to write to.
+ * @param exporting_options options to configure what data is exported.
+ * @param server the name of a Prometheus server..
+ * @param now actual time.
+ * @param output_options options to configure the format of the output.
+ * @return Returns the last time when the server accessed Netdata.
+ */
+static inline time_t prometheus_preparation(
+ struct instance *instance,
+ RRDHOST *host,
+ BUFFER *wb,
+ EXPORTING_OPTIONS exporting_options,
+ const char *server,
+ time_t now,
+ PROMETHEUS_OUTPUT_OPTIONS output_options)
+{
+#ifndef UNIT_TESTING
+ analytics_log_prometheus();
+#endif
+ if (!server || !*server)
+ server = "default";
+
+ time_t after = prometheus_server_last_access(server, host, now);
+
+ int first_seen = 0;
+ if (!after) {
+ after = now - instance->config.update_every;
+ first_seen = 1;
+ }
+
+ if (after > now) {
+ // oops! this should never happen
+ after = now - instance->config.update_every;
+ }
+
+ if (output_options & PROMETHEUS_OUTPUT_HELP) {
+ char *mode;
+ if (EXPORTING_OPTIONS_DATA_SOURCE(exporting_options) == EXPORTING_SOURCE_DATA_AS_COLLECTED)
+ mode = "as collected";
+ else if (EXPORTING_OPTIONS_DATA_SOURCE(exporting_options) == EXPORTING_SOURCE_DATA_AVERAGE)
+ mode = "average";
+ else if (EXPORTING_OPTIONS_DATA_SOURCE(exporting_options) == EXPORTING_SOURCE_DATA_SUM)
+ mode = "sum";
+ else
+ mode = "unknown";
+
+ buffer_sprintf(
+ wb,
+ "# COMMENT netdata \"%s\" to %sprometheus \"%s\", source \"%s\", last seen %lu %s, time range %lu to %lu\n\n",
+ rrdhost_hostname(host),
+ (first_seen) ? "FIRST SEEN " : "",
+ server,
+ mode,
+ (unsigned long)((first_seen) ? 0 : (now - after)),
+ (first_seen) ? "never" : "seconds ago",
+ (unsigned long)after,
+ (unsigned long)now);
+ }
+
+ return after;
+}
+
+/**
+ * Write metrics and auxiliary information for one host to a buffer.
+ *
+ * @param host a data collecting host.
+ * @param filter_string a simple pattern filter.
+ * @param wb the buffer to write to.
+ * @param server the name of a Prometheus server.
+ * @param prefix a prefix for every metric.
+ * @param exporting_options options to configure what data is exported.
+ * @param output_options options to configure the format of the output.
+ */
+void rrd_stats_api_v1_charts_allmetrics_prometheus_single_host(
+ RRDHOST *host,
+ const char *filter_string,
+ BUFFER *wb,
+ const char *server,
+ const char *prefix,
+ EXPORTING_OPTIONS exporting_options,
+ PROMETHEUS_OUTPUT_OPTIONS output_options)
+{
+ if (unlikely(!prometheus_exporter_instance || !prometheus_exporter_instance->config.initialized))
+ return;
+
+ prometheus_exporter_instance->before = now_realtime_sec();
+
+ // we start at the point we had stopped before
+ prometheus_exporter_instance->after = prometheus_preparation(
+ prometheus_exporter_instance,
+ host,
+ wb,
+ exporting_options,
+ server,
+ prometheus_exporter_instance->before,
+ output_options);
+
+ rrd_stats_api_v1_charts_allmetrics_prometheus(
+ prometheus_exporter_instance, host, filter_string, wb, prefix, exporting_options, 0, output_options);
+}
+
+/**
+ * Write metrics and auxiliary information for all hosts to a buffer.
+ *
+ * @param host a data collecting host.
+ * @param filter_string a simple pattern filter.
+ * @param wb the buffer to write to.
+ * @param server the name of a Prometheus server.
+ * @param prefix a prefix for every metric.
+ * @param exporting_options options to configure what data is exported.
+ * @param output_options options to configure the format of the output.
+ */
+void rrd_stats_api_v1_charts_allmetrics_prometheus_all_hosts(
+ RRDHOST *host,
+ const char *filter_string,
+ BUFFER *wb,
+ const char *server,
+ const char *prefix,
+ EXPORTING_OPTIONS exporting_options,
+ PROMETHEUS_OUTPUT_OPTIONS output_options)
+{
+ if (unlikely(!prometheus_exporter_instance || !prometheus_exporter_instance->config.initialized))
+ return;
+
+ prometheus_exporter_instance->before = now_realtime_sec();
+
+ // we start at the point we had stopped before
+ prometheus_exporter_instance->after = prometheus_preparation(
+ prometheus_exporter_instance,
+ host,
+ wb,
+ exporting_options,
+ server,
+ prometheus_exporter_instance->before,
+ output_options);
+
+ rrd_rdlock();
+ rrdhost_foreach_read(host)
+ {
+ rrd_stats_api_v1_charts_allmetrics_prometheus(
+ prometheus_exporter_instance, host, filter_string, wb, prefix, exporting_options, 1, output_options);
+ }
+ rrd_unlock();
+}
diff --git a/exporting/prometheus/prometheus.h b/exporting/prometheus/prometheus.h
new file mode 100644
index 0000000..e80b682
--- /dev/null
+++ b/exporting/prometheus/prometheus.h
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_EXPORTING_PROMETHEUS_H
+#define NETDATA_EXPORTING_PROMETHEUS_H 1
+
+#include "exporting/exporting_engine.h"
+
+#define PROMETHEUS_ELEMENT_MAX 256
+#define PROMETHEUS_LABELS_MAX 1024
+#define PROMETHEUS_VARIABLE_MAX 256
+
+#define PROMETHEUS_LABELS_MAX_NUMBER 128
+
+typedef enum prometheus_output_flags {
+ PROMETHEUS_OUTPUT_NONE = 0,
+ PROMETHEUS_OUTPUT_HELP = (1 << 0),
+ PROMETHEUS_OUTPUT_TYPES = (1 << 1),
+ PROMETHEUS_OUTPUT_NAMES = (1 << 2),
+ PROMETHEUS_OUTPUT_TIMESTAMPS = (1 << 3),
+ PROMETHEUS_OUTPUT_VARIABLES = (1 << 4),
+ PROMETHEUS_OUTPUT_OLDUNITS = (1 << 5),
+ PROMETHEUS_OUTPUT_HIDEUNITS = (1 << 6)
+} PROMETHEUS_OUTPUT_OPTIONS;
+
+void rrd_stats_api_v1_charts_allmetrics_prometheus_single_host(
+ RRDHOST *host, const char *filter_string, BUFFER *wb, const char *server, const char *prefix,
+ EXPORTING_OPTIONS exporting_options, PROMETHEUS_OUTPUT_OPTIONS output_options);
+void rrd_stats_api_v1_charts_allmetrics_prometheus_all_hosts(
+ RRDHOST *host, const char *filter_string, BUFFER *wb, const char *server, const char *prefix,
+ EXPORTING_OPTIONS exporting_options, PROMETHEUS_OUTPUT_OPTIONS output_options);
+
+int can_send_rrdset(struct instance *instance, RRDSET *st, SIMPLE_PATTERN *filter);
+size_t prometheus_name_copy(char *d, const char *s, size_t usable);
+size_t prometheus_label_copy(char *d, const char *s, size_t usable);
+char *prometheus_units_copy(char *d, const char *s, size_t usable, int showoldunits);
+
+void format_host_labels_prometheus(struct instance *instance, RRDHOST *host);
+
+void prometheus_clean_server_root();
+
+#endif //NETDATA_EXPORTING_PROMETHEUS_H
diff --git a/exporting/prometheus/remote_write/Makefile.am b/exporting/prometheus/remote_write/Makefile.am
new file mode 100644
index 0000000..d049ef4
--- /dev/null
+++ b/exporting/prometheus/remote_write/Makefile.am
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+AUTOMAKE_OPTIONS = subdir-objects
+MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
+
+CLEANFILES = \
+ remote_write.pb.cc \
+ remote_write.pb.h \
+ $(NULL)
+
+dist_noinst_DATA = \
+ remote_write.proto \
+ README.md \
+ $(NULL)
diff --git a/exporting/prometheus/remote_write/README.md b/exporting/prometheus/remote_write/README.md
new file mode 100644
index 0000000..54c5d65
--- /dev/null
+++ b/exporting/prometheus/remote_write/README.md
@@ -0,0 +1,58 @@
+<!--
+title: "Export metrics to Prometheus remote write providers"
+description: "Send Netdata metrics to your choice of more than 20 external storage providers for long-term archiving and further analysis."
+custom_edit_url: https://github.com/netdata/netdata/edit/master/exporting/prometheus/remote_write/README.md
+sidebar_label: Prometheus remote write
+-->
+
+# Prometheus remote write exporting connector
+
+The Prometheus remote write exporting connector uses the exporting engine to send Netdata metrics to your choice of more
+than 20 external storage providers for long-term archiving and further analysis.
+
+## Prerequisites
+
+To use the Prometheus remote write API with [storage
+providers](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage), install
+[protobuf](https://developers.google.com/protocol-buffers/) and [snappy](https://github.com/google/snappy) libraries.
+Next, [reinstall Netdata](/packaging/installer/REINSTALL.md), which detects that the required libraries and utilities
+are now available.
+
+## Configuration
+
+To enable data exporting to a storage provider using the Prometheus remote write API, run `./edit-config exporting.conf`
+in the Netdata configuration directory and set the following options:
+
+```conf
+[prometheus_remote_write:my_instance]
+ enabled = yes
+ destination = example.domain:example_port
+ remote write URL path = /receive
+```
+
+You can also add `:https` modifier to the connector type if you need to use the TLS/SSL protocol. For example:
+`remote_write:https:my_instance`.
+
+`remote write URL path` is used to set an endpoint path for the remote write protocol. The default value is `/receive`.
+For example, if your endpoint is `http://example.domain:example_port/storage/read`:
+
+```conf
+ destination = example.domain:example_port
+ remote write URL path = /storage/read
+```
+
+You can set basic HTTP authentication credentials using
+
+```conf
+ username = my_username
+ password = my_password
+```
+
+`buffered` and `lost` dimensions in the Netdata Exporting Connector Data Size operation monitoring chart estimate uncompressed
+buffer size on failures.
+
+## Notes
+
+The remote write exporting connector does not support `buffer on failures`
+
+
diff --git a/exporting/prometheus/remote_write/remote_write.c b/exporting/prometheus/remote_write/remote_write.c
new file mode 100644
index 0000000..2e2fa3c
--- /dev/null
+++ b/exporting/prometheus/remote_write/remote_write.c
@@ -0,0 +1,406 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "remote_write.h"
+
+static int as_collected;
+static int homogeneous;
+char context[PROMETHEUS_ELEMENT_MAX + 1];
+char chart[PROMETHEUS_ELEMENT_MAX + 1];
+char family[PROMETHEUS_ELEMENT_MAX + 1];
+char units[PROMETHEUS_ELEMENT_MAX + 1] = "";
+
+/**
+ * Prepare HTTP header
+ *
+ * @param instance an instance data structure.
+ */
+void prometheus_remote_write_prepare_header(struct instance *instance)
+{
+ struct prometheus_remote_write_specific_config *connector_specific_config =
+ instance->config.connector_specific_config;
+ struct simple_connector_data *simple_connector_data = instance->connector_specific_data;
+
+ buffer_sprintf(
+ simple_connector_data->last_buffer->header,
+ "POST %s HTTP/1.1\r\n"
+ "Host: %s\r\n"
+ "Accept: */*\r\n"
+ "%s"
+ "Content-Encoding: snappy\r\n"
+ "Content-Type: application/x-protobuf\r\n"
+ "X-Prometheus-Remote-Write-Version: 0.1.0\r\n"
+ "Content-Length: %zu\r\n"
+ "\r\n",
+ connector_specific_config->remote_write_path,
+ simple_connector_data->connected_to,
+ simple_connector_data->auth_string ? simple_connector_data->auth_string : "",
+ buffer_strlen(simple_connector_data->last_buffer->buffer));
+}
+
+/**
+ * Process a response received after Prometheus remote write connector had sent data
+ *
+ * @param buffer a response from a remote service.
+ * @param instance an instance data structure.
+ * @return Returns 0 on success, 1 on failure.
+ */
+int process_prometheus_remote_write_response(BUFFER *buffer, struct instance *instance)
+{
+ if (unlikely(!buffer))
+ return 1;
+
+ const char *s = buffer_tostring(buffer);
+ int len = buffer_strlen(buffer);
+
+ // do nothing with HTTP responses 200 or 204
+
+ while (!isspace(*s) && len) {
+ s++;
+ len--;
+ }
+ s++;
+ len--;
+
+ if (likely(len > 4 && (!strncmp(s, "200 ", 4) || !strncmp(s, "204 ", 4))))
+ return 0;
+ else
+ return exporting_discard_response(buffer, instance);
+}
+
+/**
+ * Release specific data allocated.
+ *
+ * @param instance an instance data structure.
+ */
+void clean_prometheus_remote_write(struct instance *instance)
+{
+ struct simple_connector_data *simple_connector_data = instance->connector_specific_data;
+ freez(simple_connector_data->connector_specific_data);
+
+ struct prometheus_remote_write_specific_config *connector_specific_config =
+ instance->config.connector_specific_config;
+ freez(connector_specific_config->remote_write_path);
+}
+
+/**
+ * Initialize Prometheus Remote Write connector instance
+ *
+ * @param instance an instance data structure.
+ * @return Returns 0 on success, 1 on failure.
+ */
+int init_prometheus_remote_write_instance(struct instance *instance)
+{
+ instance->worker = simple_connector_worker;
+
+ instance->start_batch_formatting = NULL;
+ instance->start_host_formatting = format_host_prometheus_remote_write;
+ instance->start_chart_formatting = format_chart_prometheus_remote_write;
+ instance->metric_formatting = format_dimension_prometheus_remote_write;
+ instance->end_chart_formatting = NULL;
+ instance->variables_formatting = format_variables_prometheus_remote_write;
+ instance->end_host_formatting = NULL;
+ instance->end_batch_formatting = format_batch_prometheus_remote_write;
+
+ instance->prepare_header = prometheus_remote_write_prepare_header;
+ instance->check_response = process_prometheus_remote_write_response;
+
+ instance->buffer = (void *)buffer_create(0);
+
+ if (uv_mutex_init(&instance->mutex))
+ return 1;
+ if (uv_cond_init(&instance->cond_var))
+ return 1;
+
+ struct simple_connector_data *simple_connector_data = callocz(1, sizeof(struct simple_connector_data));
+ instance->connector_specific_data = simple_connector_data;
+
+#ifdef ENABLE_HTTPS
+ simple_connector_data->flags = NETDATA_SSL_START;
+ simple_connector_data->conn = NULL;
+ if (instance->config.options & EXPORTING_OPTION_USE_TLS) {
+ security_start_ssl(NETDATA_SSL_CONTEXT_EXPORTING);
+ }
+#endif
+
+ struct prometheus_remote_write_specific_data *connector_specific_data =
+ callocz(1, sizeof(struct prometheus_remote_write_specific_data));
+ simple_connector_data->connector_specific_data = (void *)connector_specific_data;
+
+ simple_connector_init(instance);
+
+ connector_specific_data->write_request = init_write_request();
+
+ instance->engine->protocol_buffers_initialized = 1;
+
+ return 0;
+}
+
+struct format_remote_write_label_callback {
+ struct instance *instance;
+ void *write_request;
+};
+
+static int format_remote_write_label_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) {
+ struct format_remote_write_label_callback *d = (struct format_remote_write_label_callback *)data;
+
+ if (!should_send_label(d->instance, ls)) return 0;
+
+ char k[PROMETHEUS_ELEMENT_MAX + 1];
+ char v[PROMETHEUS_ELEMENT_MAX + 1];
+
+ prometheus_name_copy(k, name, PROMETHEUS_ELEMENT_MAX);
+ prometheus_label_copy(v, value, PROMETHEUS_ELEMENT_MAX);
+ add_label(d->write_request, k, v);
+ return 1;
+}
+
+/**
+ * Format host data for Prometheus Remote Write connector
+ *
+ * @param instance an instance data structure.
+ * @param host a data collecting host.
+ * @return Always returns 0.
+ */
+int format_host_prometheus_remote_write(struct instance *instance, RRDHOST *host)
+{
+ struct simple_connector_data *simple_connector_data =
+ (struct simple_connector_data *)instance->connector_specific_data;
+ struct prometheus_remote_write_specific_data *connector_specific_data =
+ (struct prometheus_remote_write_specific_data *)simple_connector_data->connector_specific_data;
+
+ char hostname[PROMETHEUS_ELEMENT_MAX + 1];
+ prometheus_label_copy(
+ hostname,
+ (host == localhost) ? instance->config.hostname : rrdhost_hostname(host),
+ PROMETHEUS_ELEMENT_MAX);
+
+ add_host_info(
+ connector_specific_data->write_request,
+ "netdata_info", hostname, rrdhost_program_name(host), rrdhost_program_version(host), now_realtime_usec() / USEC_PER_MS);
+
+ if (unlikely(sending_labels_configured(instance))) {
+ struct format_remote_write_label_callback tmp = {
+ .write_request = connector_specific_data->write_request,
+ .instance = instance
+ };
+ rrdlabels_walkthrough_read(host->rrdlabels, format_remote_write_label_callback, &tmp);
+ }
+
+ return 0;
+}
+
+/**
+ * Format chart data for Prometheus Remote Write connector
+ *
+ * @param instance an instance data structure.
+ * @param st a chart.
+ * @return Always returns 0.
+ */
+int format_chart_prometheus_remote_write(struct instance *instance, RRDSET *st)
+{
+ prometheus_label_copy(
+ chart,
+ (instance->config.options & EXPORTING_OPTION_SEND_NAMES && st->name) ? rrdset_name(st) : rrdset_id(st),
+ PROMETHEUS_ELEMENT_MAX);
+ prometheus_label_copy(family, rrdset_family(st), PROMETHEUS_ELEMENT_MAX);
+ prometheus_name_copy(context, rrdset_context(st), PROMETHEUS_ELEMENT_MAX);
+
+ as_collected = (EXPORTING_OPTIONS_DATA_SOURCE(instance->config.options) == EXPORTING_SOURCE_DATA_AS_COLLECTED);
+ homogeneous = 1;
+ if (as_collected) {
+ if (rrdset_flag_check(st, RRDSET_FLAG_HOMOGENEOUS_CHECK))
+ rrdset_update_heterogeneous_flag(st);
+
+ if (rrdset_flag_check(st, RRDSET_FLAG_HETEROGENEOUS))
+ homogeneous = 0;
+ } else {
+ if (EXPORTING_OPTIONS_DATA_SOURCE(instance->config.options) == EXPORTING_SOURCE_DATA_AVERAGE)
+ prometheus_units_copy(units, rrdset_units(st), PROMETHEUS_ELEMENT_MAX, 0);
+ }
+
+ return 0;
+}
+
+/**
+ * Format dimension data for Prometheus Remote Write connector
+ *
+ * @param instance an instance data structure.
+ * @param rd a dimension.
+ * @return Always returns 0.
+ */
+int format_dimension_prometheus_remote_write(struct instance *instance, RRDDIM *rd)
+{
+ struct simple_connector_data *simple_connector_data =
+ (struct simple_connector_data *)instance->connector_specific_data;
+ struct prometheus_remote_write_specific_data *connector_specific_data =
+ (struct prometheus_remote_write_specific_data *)simple_connector_data->connector_specific_data;
+
+ if (rd->collections_counter && !rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) {
+ char name[PROMETHEUS_LABELS_MAX + 1];
+ char dimension[PROMETHEUS_ELEMENT_MAX + 1];
+ char *suffix = "";
+ RRDHOST *host = rd->rrdset->rrdhost;
+
+ if (as_collected) {
+ // we need as-collected / raw data
+
+ if (unlikely(rd->last_collected_time.tv_sec < instance->after)) {
+ debug(
+ D_EXPORTING,
+ "EXPORTING: not sending dimension '%s' of chart '%s' from host '%s', "
+ "its last data collection (%lu) is not within our timeframe (%lu to %lu)",
+ rrddim_id(rd), rrdset_id(rd->rrdset),
+ (host == localhost) ? instance->config.hostname : rrdhost_hostname(host),
+ (unsigned long)rd->last_collected_time.tv_sec,
+ (unsigned long)instance->after,
+ (unsigned long)instance->before);
+ return 0;
+ }
+
+ if (rd->algorithm == RRD_ALGORITHM_INCREMENTAL || rd->algorithm == RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL) {
+ if (strcmp(rrdset_module_name(rd->rrdset), "prometheus"))
+ suffix = "_total";
+ }
+
+ if (homogeneous) {
+ // all the dimensions of the chart, has the same algorithm, multiplier and divisor
+ // we add all dimensions as labels
+
+ prometheus_label_copy(
+ dimension,
+ (instance->config.options & EXPORTING_OPTION_SEND_NAMES && rd->name) ? rrddim_name(rd) : rrddim_id(rd),
+ PROMETHEUS_ELEMENT_MAX);
+ snprintf(name, PROMETHEUS_LABELS_MAX, "%s_%s%s", instance->config.prefix, context, suffix);
+
+ add_metric(
+ connector_specific_data->write_request,
+ name, chart, family, dimension,
+ (host == localhost) ? instance->config.hostname : rrdhost_hostname(host),
+ rd->last_collected_value, timeval_msec(&rd->last_collected_time));
+ } else {
+ // the dimensions of the chart, do not have the same algorithm, multiplier or divisor
+ // we create a metric per dimension
+
+ prometheus_name_copy(
+ dimension,
+ (instance->config.options & EXPORTING_OPTION_SEND_NAMES && rd->name) ? rrddim_name(rd) : rrddim_id(rd),
+ PROMETHEUS_ELEMENT_MAX);
+ snprintf(
+ name, PROMETHEUS_LABELS_MAX, "%s_%s_%s%s", instance->config.prefix, context, dimension,
+ suffix);
+
+ add_metric(
+ connector_specific_data->write_request,
+ name, chart, family, NULL,
+ (host == localhost) ? instance->config.hostname : rrdhost_hostname(host),
+ rd->last_collected_value, timeval_msec(&rd->last_collected_time));
+ }
+ } else {
+ // we need average or sum of the data
+
+ time_t last_t = instance->before;
+ NETDATA_DOUBLE value = exporting_calculate_value_from_stored_data(instance, rd, &last_t);
+
+ if (!isnan(value) && !isinf(value)) {
+ if (EXPORTING_OPTIONS_DATA_SOURCE(instance->config.options) == EXPORTING_SOURCE_DATA_AVERAGE)
+ suffix = "_average";
+ else if (EXPORTING_OPTIONS_DATA_SOURCE(instance->config.options) == EXPORTING_SOURCE_DATA_SUM)
+ suffix = "_sum";
+
+ prometheus_label_copy(
+ dimension,
+ (instance->config.options & EXPORTING_OPTION_SEND_NAMES && rd->name) ? rrddim_name(rd) : rrddim_id(rd),
+ PROMETHEUS_ELEMENT_MAX);
+ snprintf(
+ name, PROMETHEUS_LABELS_MAX, "%s_%s%s%s", instance->config.prefix, context, units, suffix);
+
+ add_metric(
+ connector_specific_data->write_request,
+ name, chart, family, dimension,
+ (host == localhost) ? instance->config.hostname : rrdhost_hostname(host),
+ value, last_t * MSEC_PER_SEC);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int format_variable_prometheus_remote_write_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rv_ptr __maybe_unused, void *data) {
+ const RRDVAR_ACQUIRED *rv = (const RRDVAR_ACQUIRED *)item;
+
+ struct prometheus_remote_write_variables_callback_options *opts = data;
+
+ if (rrdvar_flags(rv) & (RRDVAR_FLAG_CUSTOM_HOST_VAR | RRDVAR_FLAG_CUSTOM_CHART_VAR)) {
+ RRDHOST *host = opts->host;
+ struct instance *instance = opts->instance;
+ struct simple_connector_data *simple_connector_data =
+ (struct simple_connector_data *)instance->connector_specific_data;
+ struct prometheus_remote_write_specific_data *connector_specific_data =
+ (struct prometheus_remote_write_specific_data *)simple_connector_data->connector_specific_data;
+
+ char name[PROMETHEUS_LABELS_MAX + 1];
+ char *suffix = "";
+
+ prometheus_name_copy(context, rrdvar_name(rv), PROMETHEUS_ELEMENT_MAX);
+ snprintf(name, PROMETHEUS_LABELS_MAX, "%s_%s%s", instance->config.prefix, context, suffix);
+
+ NETDATA_DOUBLE value = rrdvar2number(rv);
+ add_variable(connector_specific_data->write_request, name,
+ (host == localhost) ? instance->config.hostname : rrdhost_hostname(host), value, opts->now / USEC_PER_MS);
+ }
+
+ return 0;
+}
+
+/**
+ * Format a variable for Prometheus Remote Write connector
+ *
+ * @param rv a variable.
+ * @param instance an instance data structure.
+ * @return Always returns 0.
+ */
+int format_variables_prometheus_remote_write(struct instance *instance, RRDHOST *host)
+{
+ struct prometheus_remote_write_variables_callback_options opt = {
+ .host = host,
+ .instance = instance,
+ .now = now_realtime_usec(),
+ };
+
+ return rrdvar_walkthrough_read(host->rrdvars, format_variable_prometheus_remote_write_callback, &opt);
+}
+
+/**
+ * Format a batch for Prometheus Remote Write connector
+ *
+ * @param instance an instance data structure.
+ * @return Returns 0 on success, 1 on failure.
+ */
+int format_batch_prometheus_remote_write(struct instance *instance)
+{
+ struct simple_connector_data *simple_connector_data =
+ (struct simple_connector_data *)instance->connector_specific_data;
+ struct prometheus_remote_write_specific_data *connector_specific_data =
+ (struct prometheus_remote_write_specific_data *)simple_connector_data->connector_specific_data;
+
+ size_t data_size = get_write_request_size(connector_specific_data->write_request);
+
+ if (unlikely(!data_size)) {
+ error("EXPORTING: write request size is out of range");
+ return 1;
+ }
+
+ BUFFER *buffer = instance->buffer;
+
+ buffer_need_bytes(buffer, data_size);
+ if (unlikely(pack_and_clear_write_request(connector_specific_data->write_request, buffer->buffer, &data_size))) {
+ error("EXPORTING: cannot pack write request");
+ return 1;
+ }
+ buffer->len = data_size;
+
+ simple_connector_end_batch(instance);
+
+ return 0;
+}
diff --git a/exporting/prometheus/remote_write/remote_write.h b/exporting/prometheus/remote_write/remote_write.h
new file mode 100644
index 0000000..d4e8649
--- /dev/null
+++ b/exporting/prometheus/remote_write/remote_write.h
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_EXPORTING_PROMETHEUS_REMOTE_WRITE_H
+#define NETDATA_EXPORTING_PROMETHEUS_REMOTE_WRITE_H
+
+#include "exporting/exporting_engine.h"
+#include "exporting/prometheus/prometheus.h"
+#include "remote_write_request.h"
+
+struct prometheus_remote_write_specific_data {
+ void *write_request;
+};
+
+struct prometheus_remote_write_variables_callback_options {
+ RRDHOST *host;
+ time_t now;
+ struct instance *instance;
+};
+
+int init_prometheus_remote_write_instance(struct instance *instance);
+void clean_prometheus_remote_write(struct instance *instance);
+
+int format_host_prometheus_remote_write(struct instance *instance, RRDHOST *host);
+int format_chart_prometheus_remote_write(struct instance *instance, RRDSET *st);
+int format_dimension_prometheus_remote_write(struct instance *instance, RRDDIM *rd);
+int format_variables_prometheus_remote_write(struct instance *instance, RRDHOST *host);
+int format_batch_prometheus_remote_write(struct instance *instance);
+
+void prometheus_remote_write_prepare_header(struct instance *instance);
+int process_prometheus_remote_write_response(BUFFER *buffer, struct instance *instance);
+
+#endif //NETDATA_EXPORTING_PROMETHEUS_REMOTE_WRITE_H
diff --git a/exporting/prometheus/remote_write/remote_write.proto b/exporting/prometheus/remote_write/remote_write.proto
new file mode 100644
index 0000000..dfde254
--- /dev/null
+++ b/exporting/prometheus/remote_write/remote_write.proto
@@ -0,0 +1,29 @@
+syntax = "proto3";
+package prometheus;
+
+option cc_enable_arenas = true;
+
+import "google/protobuf/descriptor.proto";
+
+message WriteRequest {
+ repeated TimeSeries timeseries = 1 [(nullable) = false];
+}
+
+message TimeSeries {
+ repeated Label labels = 1 [(nullable) = false];
+ repeated Sample samples = 2 [(nullable) = false];
+}
+
+message Label {
+ string name = 1;
+ string value = 2;
+}
+
+message Sample {
+ double value = 1;
+ int64 timestamp = 2;
+}
+
+extend google.protobuf.FieldOptions {
+ bool nullable = 65001;
+}
diff --git a/exporting/prometheus/remote_write/remote_write_request.cc b/exporting/prometheus/remote_write/remote_write_request.cc
new file mode 100644
index 0000000..ecfa11f
--- /dev/null
+++ b/exporting/prometheus/remote_write/remote_write_request.cc
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include <snappy.h>
+#include "remote_write.pb.h"
+#include "remote_write_request.h"
+
+using namespace prometheus;
+
+google::protobuf::Arena arena;
+
+/**
+ * Initialize a write request
+ *
+ * @return Returns a new write request
+ */
+void *init_write_request()
+{
+ GOOGLE_PROTOBUF_VERIFY_VERSION;
+ WriteRequest *write_request = google::protobuf::Arena::CreateMessage<WriteRequest>(&arena);
+ return (void *)write_request;
+}
+
+/**
+ * Adds information about a host to a write request
+ *
+ * @param write_request_p the write request
+ * @param name the name of a metric which is used for providing the host information
+ * @param instance the name of the host itself
+ * @param application the name of a program which sends the information
+ * @param version the version of the program
+ * @param timestamp the timestamp for the metric in milliseconds
+ */
+void add_host_info(
+ void *write_request_p,
+ const char *name, const char *instance, const char *application, const char *version, const int64_t timestamp)
+{
+ WriteRequest *write_request = (WriteRequest *)write_request_p;
+ TimeSeries *timeseries;
+ Sample *sample;
+ Label *label;
+
+ timeseries = write_request->add_timeseries();
+
+ label = timeseries->add_labels();
+ label->set_name("__name__");
+ label->set_value(name);
+
+ label = timeseries->add_labels();
+ label->set_name("instance");
+ label->set_value(instance);
+
+ if (application) {
+ label = timeseries->add_labels();
+ label->set_name("application");
+ label->set_value(application);
+ }
+
+ if (version) {
+ label = timeseries->add_labels();
+ label->set_name("version");
+ label->set_value(version);
+ }
+
+ sample = timeseries->add_samples();
+ sample->set_value(1);
+ sample->set_timestamp(timestamp);
+}
+
+/**
+ * Adds a label to the last created timeseries
+ *
+ * @param write_request_p the write request with the timeseries
+ * @param key the key of the label
+ * @param value the value of the label
+ */
+void add_label(void *write_request_p, char *key, char *value)
+{
+ WriteRequest *write_request = (WriteRequest *)write_request_p;
+ TimeSeries *timeseries;
+ Label *label;
+
+ timeseries = write_request->mutable_timeseries(write_request->timeseries_size() - 1);
+
+ label = timeseries->add_labels();
+ label->set_name(key);
+ label->set_value(value);
+}
+
+/**
+ * Adds a metric to a write request
+ *
+ * @param write_request_p the write request
+ * @param name the name of the metric
+ * @param chart the chart, the metric belongs to
+ * @param family the family, the metric belongs to
+ * @param dimension the dimension, the metric belongs to
+ * @param instance the name of the host, the metric belongs to
+ * @param value the value of the metric
+ * @param timestamp the timestamp for the metric in milliseconds
+ */
+void add_metric(
+ void *write_request_p,
+ const char *name, const char *chart, const char *family, const char *dimension, const char *instance,
+ const double value, const int64_t timestamp)
+{
+ WriteRequest *write_request = (WriteRequest *)write_request_p;
+ TimeSeries *timeseries;
+ Sample *sample;
+ Label *label;
+
+ timeseries = write_request->add_timeseries();
+
+ label = timeseries->add_labels();
+ label->set_name("__name__");
+ label->set_value(name);
+
+ label = timeseries->add_labels();
+ label->set_name("chart");
+ label->set_value(chart);
+
+ label = timeseries->add_labels();
+ label->set_name("family");
+ label->set_value(family);
+
+ if (dimension) {
+ label = timeseries->add_labels();
+ label->set_name("dimension");
+ label->set_value(dimension);
+ }
+
+ label = timeseries->add_labels();
+ label->set_name("instance");
+ label->set_value(instance);
+
+ sample = timeseries->add_samples();
+ sample->set_value(value);
+ sample->set_timestamp(timestamp);
+}
+
+/**
+ * Adds a metric to a write request
+ *
+ * @param write_request_p the write request
+ * @param name the name of the metric
+ * @param instance the name of the host, the metric belongs to
+ * @param value the value of the metric
+ * @param timestamp the timestamp for the metric in milliseconds
+ */
+void add_variable(
+ void *write_request_p, const char *name, const char *instance, const double value, const int64_t timestamp)
+{
+ WriteRequest *write_request = (WriteRequest *)write_request_p;
+ TimeSeries *timeseries;
+ Sample *sample;
+ Label *label;
+
+ timeseries = write_request->add_timeseries();
+
+ label = timeseries->add_labels();
+ label->set_name("__name__");
+ label->set_value(name);
+
+ label = timeseries->add_labels();
+ label->set_name("instance");
+ label->set_value(instance);
+
+ sample = timeseries->add_samples();
+ sample->set_value(value);
+ sample->set_timestamp(timestamp);
+}
+
+/**
+ * Gets the size of a write request
+ *
+ * @param write_request_p the write request
+ * @return Returns the size of the write request
+ */
+size_t get_write_request_size(void *write_request_p)
+{
+ WriteRequest *write_request = (WriteRequest *)write_request_p;
+
+#if GOOGLE_PROTOBUF_VERSION < 3001000
+ size_t size = (size_t)snappy::MaxCompressedLength(write_request->ByteSize());
+#else
+ size_t size = (size_t)snappy::MaxCompressedLength(write_request->ByteSizeLong());
+#endif
+
+ return (size < INT_MAX) ? size : 0;
+}
+
+/**
+ * Packs a write request into a buffer and clears the request
+ *
+ * @param write_request_p the write request
+ * @param buffer a buffer, where compressed data is written
+ * @param size gets the size of the write request, returns the size of the compressed data
+ * @return Returns 0 on success, 1 on failure
+ */
+int pack_and_clear_write_request(void *write_request_p, char *buffer, size_t *size)
+{
+ WriteRequest *write_request = (WriteRequest *)write_request_p;
+ std::string uncompressed_write_request;
+
+ if (write_request->SerializeToString(&uncompressed_write_request) == false)
+ return 1;
+ write_request->clear_timeseries();
+ snappy::RawCompress(uncompressed_write_request.data(), uncompressed_write_request.size(), buffer, size);
+
+ return 0;
+}
+
+/**
+ * Writes an unpacked write request into a text buffer
+ *
+ * @param write_request_p the write request
+ * @param buffer a buffer, where text is written
+ * @param size the size of the buffer
+ * @return Returns 0 on success, 1 on failure
+ */
+int convert_write_request_to_string(
+ const char *compressed_write_request,
+ size_t compressed_size,
+ char *buffer,
+ size_t size)
+{
+ size_t uncompressed_size = 0;
+
+ snappy::GetUncompressedLength(compressed_write_request, compressed_size, &uncompressed_size);
+ if (size < uncompressed_size)
+ return 1;
+ char *uncompressed_write_request = (char *)malloc(size);
+
+ if (snappy::RawUncompress(compressed_write_request, compressed_size, uncompressed_write_request) == false) {
+ free(uncompressed_write_request);
+ return 1;
+ }
+
+ WriteRequest *write_request = google::protobuf::Arena::CreateMessage<WriteRequest>(&arena);
+ if (write_request->ParseFromString(std::string(uncompressed_write_request, uncompressed_size)) == false) {
+ free(uncompressed_write_request);
+ return 1;
+ }
+
+ std::string text_write_request(write_request->DebugString());
+ text_write_request.copy(buffer, size);
+
+ free(uncompressed_write_request);
+
+ return 0;
+}
+
+/**
+ * Shuts down the Protobuf library
+ */
+void protocol_buffers_shutdown()
+{
+ google::protobuf::ShutdownProtobufLibrary();
+}
diff --git a/exporting/prometheus/remote_write/remote_write_request.h b/exporting/prometheus/remote_write/remote_write_request.h
new file mode 100644
index 0000000..b253701
--- /dev/null
+++ b/exporting/prometheus/remote_write/remote_write_request.h
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_EXPORTING_PROMETHEUS_REMOTE_WRITE_REQUEST_H
+#define NETDATA_EXPORTING_PROMETHEUS_REMOTE_WRITE_REQUEST_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void *init_write_request();
+
+void add_host_info(
+ void *write_request_p,
+ const char *name, const char *instance, const char *application, const char *version, const int64_t timestamp);
+
+void add_label(void *write_request_p, char *key, char *value);
+
+void add_metric(
+ void *write_request_p,
+ const char *name, const char *chart, const char *family, const char *dimension,
+ const char *instance, const double value, const int64_t timestamp);
+
+void add_variable(
+ void *write_request_p, const char *name, const char *instance, const double value, const int64_t timestamp);
+
+size_t get_write_request_size(void *write_request_p);
+
+int pack_and_clear_write_request(void *write_request_p, char *buffer, size_t *size);
+
+int convert_write_request_to_string(
+ const char *compressed_write_request,
+ size_t compressed_size,
+ char *buffer,
+ size_t size);
+
+void protocol_buffers_shutdown();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //NETDATA_EXPORTING_PROMETHEUS_REMOTE_WRITE_REQUEST_H