diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 14:31:17 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 14:31:17 +0000 |
commit | 8020f71afd34d7696d7933659df2d763ab05542f (patch) | |
tree | 2fdf1b5447ffd8bdd61e702ca183e814afdcb4fc /exporting/prometheus | |
parent | Initial commit. (diff) | |
download | netdata-8020f71afd34d7696d7933659df2d763ab05542f.tar.xz netdata-8020f71afd34d7696d7933659df2d763ab05542f.zip |
Adding upstream version 1.37.1.upstream/1.37.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'exporting/prometheus')
-rw-r--r-- | exporting/prometheus/Makefile.am | 12 | ||||
-rw-r--r-- | exporting/prometheus/README.md | 461 | ||||
-rw-r--r-- | exporting/prometheus/prometheus.c | 902 | ||||
-rw-r--r-- | exporting/prometheus/prometheus.h | 41 | ||||
-rw-r--r-- | exporting/prometheus/remote_write/Makefile.am | 14 | ||||
-rw-r--r-- | exporting/prometheus/remote_write/README.md | 58 | ||||
-rw-r--r-- | exporting/prometheus/remote_write/remote_write.c | 406 | ||||
-rw-r--r-- | exporting/prometheus/remote_write/remote_write.h | 32 | ||||
-rw-r--r-- | exporting/prometheus/remote_write/remote_write.proto | 29 | ||||
-rw-r--r-- | exporting/prometheus/remote_write/remote_write_request.cc | 258 | ||||
-rw-r--r-- | exporting/prometheus/remote_write/remote_write_request.h | 42 |
11 files changed, 2255 insertions, 0 deletions
diff --git a/exporting/prometheus/Makefile.am b/exporting/prometheus/Makefile.am new file mode 100644 index 0000000..334fca8 --- /dev/null +++ b/exporting/prometheus/Makefile.am @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +SUBDIRS = \ + remote_write \ + $(NULL) + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/exporting/prometheus/README.md b/exporting/prometheus/README.md new file mode 100644 index 0000000..ae94867 --- /dev/null +++ b/exporting/prometheus/README.md @@ -0,0 +1,461 @@ +<!-- +title: "Export metrics to Prometheus" +description: "Export Netdata metrics to Prometheus for archiving and further analysis." +custom_edit_url: https://github.com/netdata/netdata/edit/master/exporting/prometheus/README.md +sidebar_label: "Using Netdata with Prometheus" +--> +import { OneLineInstallWget, OneLineInstallCurl } from '@site/src/components/OneLineInstall/' + +# Using Netdata with Prometheus + +Prometheus is a distributed monitoring system which offers a very simple setup along with a robust data model. Recently +Netdata added support for Prometheus. I'm going to quickly show you how to install both Netdata and Prometheus on the +same server. We can then use Grafana pointed at Prometheus to obtain long term metrics Netdata offers. I'm assuming we +are starting at a fresh ubuntu shell (whether you'd like to follow along in a VM or a cloud instance is up to you). + +## Installing Netdata and Prometheus + +### Installing Netdata + +There are number of ways to install Netdata according to [Installation](/packaging/installer/README.md). The suggested way +of installing the latest Netdata and keep it upgrade automatically. + +<!-- candidate for reuse --> + +To install Netdata, run the following as your normal user: + +<OneLineInstallWget/> + +Or, if you have cURL but not wget (such as on macOS): + +<OneLineInstallCurl/> + +At this point we should have Netdata listening on port 19999. Attempt to take your browser here: + +```sh +http://your.netdata.ip:19999 +``` + +_(replace `your.netdata.ip` with the IP or hostname of the server running Netdata)_ + +### Installing Prometheus + +In order to install Prometheus we are going to introduce our own systemd startup script along with an example of +prometheus.yaml configuration. Prometheus needs to be pointed to your server at a specific target url for it to scrape +Netdata's api. Prometheus is always a pull model meaning Netdata is the passive client within this architecture. +Prometheus always initiates the connection with Netdata. + +#### Download Prometheus + +```sh +cd /tmp && curl -s https://api.github.com/repos/prometheus/prometheus/releases/latest \ +| grep "browser_download_url.*linux-amd64.tar.gz" \ +| cut -d '"' -f 4 \ +| wget -qi - +``` + +#### Create prometheus system user + +```sh +sudo useradd -r prometheus +``` + +#### Create prometheus directory + +```sh +sudo mkdir /opt/prometheus +sudo chown prometheus:prometheus /opt/prometheus +``` + +#### Untar prometheus directory + +```sh +sudo tar -xvf /tmp/prometheus-*linux-amd64.tar.gz -C /opt/prometheus --strip=1 +``` + +#### Install prometheus.yml + +We will use the following `prometheus.yml` file. Save it at `/opt/prometheus/prometheus.yml`. + +Make sure to replace `your.netdata.ip` with the IP or hostname of the host running Netdata. + +```yaml +# my global config +global: + scrape_interval: 5s # Set the scrape interval to every 5 seconds. Default is every 1 minute. + evaluation_interval: 5s # Evaluate rules every 5 seconds. The default is every 1 minute. + # scrape_timeout is set to the global default (10s). + + # Attach these labels to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). + external_labels: + monitor: 'codelab-monitor' + +# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. +rule_files: + # - "first.rules" + # - "second.rules" + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: + # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. + - job_name: 'prometheus' + + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + + static_configs: + - targets: ['0.0.0.0:9090'] + + - job_name: 'netdata-scrape' + + metrics_path: '/api/v1/allmetrics' + params: + # format: prometheus | prometheus_all_hosts + # You can use `prometheus_all_hosts` if you want Prometheus to set the `instance` to your hostname instead of IP + format: [prometheus] + # + # sources: as-collected | raw | average | sum | volume + # default is: average + #source: [as-collected] + # + # server name for this prometheus - the default is the client IP + # for Netdata to uniquely identify it + #server: ['prometheus1'] + honor_labels: true + + static_configs: + - targets: ['{your.netdata.ip}:19999'] +``` + +#### Install nodes.yml + +The following is completely optional, it will enable Prometheus to generate alerts from some Netdata sources. Tweak the +values to your own needs. We will use the following `nodes.yml` file below. Save it at `/opt/prometheus/nodes.yml`, and +add a _- "nodes.yml"_ entry under the _rule_files:_ section in the example prometheus.yml file above. + +```yaml +groups: + - name: nodes + + rules: + - alert: node_high_cpu_usage_70 + expr: sum(sum_over_time(netdata_system_cpu_percentage_average{dimension=~"(user|system|softirq|irq|guest)"}[10m])) by (job) / sum(count_over_time(netdata_system_cpu_percentage_average{dimension="idle"}[10m])) by (job) > 70 + for: 1m + annotations: + description: '{{ $labels.job }} on ''{{ $labels.job }}'' CPU usage is at {{ humanize $value }}%.' + summary: CPU alert for container node '{{ $labels.job }}' + + - alert: node_high_memory_usage_70 + expr: 100 / sum(netdata_system_ram_MB_average) by (job) + * sum(netdata_system_ram_MB_average{dimension=~"free|cached"}) by (job) < 30 + for: 1m + annotations: + description: '{{ $labels.job }} memory usage is {{ humanize $value}}%.' + summary: Memory alert for container node '{{ $labels.job }}' + + - alert: node_low_root_filesystem_space_20 + expr: 100 / sum(netdata_disk_space_GB_average{family="/"}) by (job) + * sum(netdata_disk_space_GB_average{family="/",dimension=~"avail|cached"}) by (job) < 20 + for: 1m + annotations: + description: '{{ $labels.job }} root filesystem space is {{ humanize $value}}%.' + summary: Root filesystem alert for container node '{{ $labels.job }}' + + - alert: node_root_filesystem_fill_rate_6h + expr: predict_linear(netdata_disk_space_GB_average{family="/",dimension=~"avail|cached"}[1h], 6 * 3600) < 0 + for: 1h + labels: + severity: critical + annotations: + description: Container node {{ $labels.job }} root filesystem is going to fill up in 6h. + summary: Disk fill alert for Swarm node '{{ $labels.job }}' +``` + +#### Install prometheus.service + +Save this service file as `/etc/systemd/system/prometheus.service`: + +```sh +[Unit] +Description=Prometheus Server +AssertPathExists=/opt/prometheus + +[Service] +Type=simple +WorkingDirectory=/opt/prometheus +User=prometheus +Group=prometheus +ExecStart=/opt/prometheus/prometheus --config.file=/opt/prometheus/prometheus.yml --log.level=info +ExecReload=/bin/kill -SIGHUP $MAINPID +ExecStop=/bin/kill -SIGINT $MAINPID + +[Install] +WantedBy=multi-user.target +``` + +##### Start Prometheus + +```sh +sudo systemctl start prometheus +sudo systemctl enable prometheus +``` + +Prometheus should now start and listen on port 9090. Attempt to head there with your browser. + +If everything is working correctly when you fetch `http://your.prometheus.ip:9090` you will see a 'Status' tab. Click +this and click on 'targets' We should see the Netdata host as a scraped target. + +--- + +## Netdata support for Prometheus + +Before explaining the changes, we have to understand the key differences between Netdata and Prometheus. + +### understanding Netdata metrics + +#### charts + +Each chart in Netdata has several properties (common to all its metrics): + +- `chart_id` - uniquely identifies a chart. + +- `chart_name` - a more human friendly name for `chart_id`, also unique. + +- `context` - this is the template of the chart. All disk I/O charts have the same context, all mysql requests charts + have the same context, etc. This is used for alarm templates to match all the charts they should be attached to. + +- `family` groups a set of charts together. It is used as the submenu of the dashboard. + +- `units` is the units for all the metrics attached to the chart. + +#### dimensions + +Then each Netdata chart contains metrics called `dimensions`. All the dimensions of a chart have the same units of +measurement, and are contextually in the same category (ie. the metrics for disk bandwidth are `read` and `write` and +they are both in the same chart). + +### Netdata data source + +Netdata can send metrics to Prometheus from 3 data sources: + +- `as collected` or `raw` - this data source sends the metrics to Prometheus as they are collected. No conversion is + done by Netdata. The latest value for each metric is just given to Prometheus. This is the most preferred method by + Prometheus, but it is also the harder to work with. To work with this data source, you will need to understand how + to get meaningful values out of them. + + The format of the metrics is: `CONTEXT{chart="CHART",family="FAMILY",dimension="DIMENSION"}`. + + If the metric is a counter (`incremental` in Netdata lingo), `_total` is appended the context. + + Unlike Prometheus, Netdata allows each dimension of a chart to have a different algorithm and conversion constants + (`multiplier` and `divisor`). In this case, that the dimensions of a charts are heterogeneous, Netdata will use this + format: `CONTEXT_DIMENSION{chart="CHART",family="FAMILY"}` + +- `average` - this data source uses the Netdata database to send the metrics to Prometheus as they are presented on + the Netdata dashboard. So, all the metrics are sent as gauges, at the units they are presented in the Netdata + dashboard charts. This is the easiest to work with. + + The format of the metrics is: `CONTEXT_UNITS_average{chart="CHART",family="FAMILY",dimension="DIMENSION"}`. + + When this source is used, Netdata keeps track of the last access time for each Prometheus server fetching the + metrics. This last access time is used at the subsequent queries of the same Prometheus server to identify the + time-frame the `average` will be calculated. + + So, no matter how frequently Prometheus scrapes Netdata, it will get all the database data. + To identify each Prometheus server, Netdata uses by default the IP of the client fetching the metrics. + + If there are multiple Prometheus servers fetching data from the same Netdata, using the same IP, each Prometheus + server can append `server=NAME` to the URL. Netdata will use this `NAME` to uniquely identify the Prometheus server. + +- `sum` or `volume`, is like `average` but instead of averaging the values, it sums them. + + The format of the metrics is: `CONTEXT_UNITS_sum{chart="CHART",family="FAMILY",dimension="DIMENSION"}`. All the + other operations are the same with `average`. + + To change the data source to `sum` or `as-collected` you need to provide the `source` parameter in the request URL. + e.g.: `http://your.netdata.ip:19999/api/v1/allmetrics?format=prometheus&help=yes&source=as-collected` + + Keep in mind that early versions of Netdata were sending the metrics as: `CHART_DIMENSION{}`. + +### Querying Metrics + +Fetch with your web browser this URL: + +`http://your.netdata.ip:19999/api/v1/allmetrics?format=prometheus&help=yes` + +_(replace `your.netdata.ip` with the ip or hostname of your Netdata server)_ + +Netdata will respond with all the metrics it sends to Prometheus. + +If you search that page for `"system.cpu"` you will find all the metrics Netdata is exporting to Prometheus for this +chart. `system.cpu` is the chart name on the Netdata dashboard (on the Netdata dashboard all charts have a text heading +such as : `Total CPU utilization (system.cpu)`. What we are interested here in the chart name: `system.cpu`). + +Searching for `"system.cpu"` reveals: + +```sh +# COMMENT homogeneous chart "system.cpu", context "system.cpu", family "cpu", units "percentage" +# COMMENT netdata_system_cpu_percentage_average: dimension "guest_nice", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive +netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="guest_nice"} 0.0000000 1500066662000 +# COMMENT netdata_system_cpu_percentage_average: dimension "guest", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive +netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="guest"} 1.7837326 1500066662000 +# COMMENT netdata_system_cpu_percentage_average: dimension "steal", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive +netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="steal"} 0.0000000 1500066662000 +# COMMENT netdata_system_cpu_percentage_average: dimension "softirq", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive +netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="softirq"} 0.5275442 1500066662000 +# COMMENT netdata_system_cpu_percentage_average: dimension "irq", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive +netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="irq"} 0.2260836 1500066662000 +# COMMENT netdata_system_cpu_percentage_average: dimension "user", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive +netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="user"} 2.3362762 1500066662000 +# COMMENT netdata_system_cpu_percentage_average: dimension "system", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive +netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="system"} 1.7961062 1500066662000 +# COMMENT netdata_system_cpu_percentage_average: dimension "nice", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive +netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="nice"} 0.0000000 1500066662000 +# COMMENT netdata_system_cpu_percentage_average: dimension "iowait", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive +netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="iowait"} 0.9671802 1500066662000 +# COMMENT netdata_system_cpu_percentage_average: dimension "idle", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive +netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="idle"} 92.3630770 1500066662000 +``` + +_(Netdata response for `system.cpu` with source=`average`)_ + +In `average` or `sum` data sources, all values are normalized and are reported to Prometheus as gauges. Now, use the +'expression' text form in Prometheus. Begin to type the metrics we are looking for: `netdata_system_cpu`. You should see +that the text form begins to auto-fill as Prometheus knows about this metric. + +If the data source was `as collected`, the response would be: + +```sh +# COMMENT homogeneous chart "system.cpu", context "system.cpu", family "cpu", units "percentage" +# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "guest_nice", value * 1 / 1 delta gives percentage (counter) +netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="guest_nice"} 0 1500066716438 +# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "guest", value * 1 / 1 delta gives percentage (counter) +netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="guest"} 63945 1500066716438 +# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "steal", value * 1 / 1 delta gives percentage (counter) +netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="steal"} 0 1500066716438 +# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "softirq", value * 1 / 1 delta gives percentage (counter) +netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="softirq"} 8295 1500066716438 +# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "irq", value * 1 / 1 delta gives percentage (counter) +netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="irq"} 4079 1500066716438 +# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "user", value * 1 / 1 delta gives percentage (counter) +netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="user"} 116488 1500066716438 +# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "system", value * 1 / 1 delta gives percentage (counter) +netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="system"} 35084 1500066716438 +# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "nice", value * 1 / 1 delta gives percentage (counter) +netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="nice"} 505 1500066716438 +# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "iowait", value * 1 / 1 delta gives percentage (counter) +netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="iowait"} 23314 1500066716438 +# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "idle", value * 1 / 1 delta gives percentage (counter) +netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="idle"} 918470 1500066716438 +``` + +_(Netdata response for `system.cpu` with source=`as-collected`)_ + +For more information check Prometheus documentation. + +### Streaming data from upstream hosts + +The `format=prometheus` parameter only exports the host's Netdata metrics. If you are using the parent-child +functionality of Netdata this ignores any upstream hosts - so you should consider using the below in your +**prometheus.yml**: + +```yaml + metrics_path: '/api/v1/allmetrics' + params: + format: [prometheus_all_hosts] + honor_labels: true +``` + +This will report all upstream host data, and `honor_labels` will make Prometheus take note of the instance names +provided. + +### Timestamps + +To pass the metrics through Prometheus pushgateway, Netdata supports the option `×tamps=no` to send the metrics +without timestamps. + +## Netdata host variables + +Netdata collects various system configuration metrics, like the max number of TCP sockets supported, the max number of +files allowed system-wide, various IPC sizes, etc. These metrics are not exposed to Prometheus by default. + +To expose them, append `variables=yes` to the Netdata URL. + +### TYPE and HELP + +To save bandwidth, and because Prometheus does not use them anyway, `# TYPE` and `# HELP` lines are suppressed. If +wanted they can be re-enabled via `types=yes` and `help=yes`, e.g. +`/api/v1/allmetrics?format=prometheus&types=yes&help=yes` + +Note that if enabled, the `# TYPE` and `# HELP` lines are repeated for every occurrence of a metric, which goes against the Prometheus documentation's [specification for these lines](https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md#comments-help-text-and-type-information). + +### Names and IDs + +Netdata supports names and IDs for charts and dimensions. Usually IDs are unique identifiers as read by the system and +names are human friendly labels (also unique). + +Most charts and metrics have the same ID and name, but in several cases they are different: disks with device-mapper, +interrupts, QoS classes, statsd synthetic charts, etc. + +The default is controlled in `exporting.conf`: + +```conf +[prometheus:exporter] + send names instead of ids = yes | no +``` + +You can overwrite it from Prometheus, by appending to the URL: + +- `&names=no` to get IDs (the old behaviour) +- `&names=yes` to get names + +### Filtering metrics sent to Prometheus + +Netdata can filter the metrics it sends to Prometheus with this setting: + +```conf +[prometheus:exporter] + send charts matching = * +``` + +This settings accepts a space separated list of [simple patterns](/libnetdata/simple_pattern/README.md) to match the +**charts** to be sent to Prometheus. Each pattern can use `*` as wildcard, any number of times (e.g `*a*b*c*` is valid). +Patterns starting with `!` give a negative match (e.g `!*.bad users.* groups.*` will send all the users and groups +except `bad` user and `bad` group). The order is important: the first match (positive or negative) left to right, is +used. + +### Changing the prefix of Netdata metrics + +Netdata sends all metrics prefixed with `netdata_`. You can change this in `netdata.conf`, like this: + +```conf +[prometheus:exporter] + prefix = netdata +``` + +It can also be changed from the URL, by appending `&prefix=netdata`. + +### Metric Units + +The default source `average` adds the unit of measurement to the name of each metric (e.g. `_KiB_persec`). To hide the +units and get the same metric names as with the other sources, append to the URL `&hideunits=yes`. + +The units were standardized in v1.12, with the effect of changing the metric names. To get the metric names as they were +before v1.12, append to the URL `&oldunits=yes` + +### Accuracy of `average` and `sum` data sources + +When the data source is set to `average` or `sum`, Netdata remembers the last access of each client accessing Prometheus +metrics and uses this last access time to respond with the `average` or `sum` of all the entries in the database since +that. This means that Prometheus servers are not losing data when they access Netdata with data source = `average` or +`sum`. + +To uniquely identify each Prometheus server, Netdata uses the IP of the client accessing the metrics. If however the IP +is not good enough for identifying a single Prometheus server (e.g. when Prometheus servers are accessing Netdata +through a web proxy, or when multiple Prometheus servers are NATed to a single IP), each Prometheus may append +`&server=NAME` to the URL. This `NAME` is used by Netdata to uniquely identify each Prometheus server and keep track of +its last access time. + + diff --git a/exporting/prometheus/prometheus.c b/exporting/prometheus/prometheus.c new file mode 100644 index 0000000..294d8ec --- /dev/null +++ b/exporting/prometheus/prometheus.c @@ -0,0 +1,902 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define EXPORTINGS_INTERNALS +#include "prometheus.h" + +// ---------------------------------------------------------------------------- +// PROMETHEUS +// /api/v1/allmetrics?format=prometheus and /api/v1/allmetrics?format=prometheus_all_hosts + +static int is_matches_rrdset(struct instance *instance, RRDSET *st, SIMPLE_PATTERN *filter) { + if (instance->config.options & EXPORTING_OPTION_SEND_NAMES) { + return simple_pattern_matches(filter, rrdset_name(st)); + } + return simple_pattern_matches(filter, rrdset_id(st)); +} + +/** + * Check if a chart can be sent to Prometheus + * + * @param instance an instance data structure. + * @param st a chart. + * @param filter a simple pattern to match against. + * @return Returns 1 if the chart can be sent, 0 otherwise. + */ +inline int can_send_rrdset(struct instance *instance, RRDSET *st, SIMPLE_PATTERN *filter) +{ +#ifdef NETDATA_INTERNAL_CHECKS + RRDHOST *host = st->rrdhost; +#endif + + if (unlikely(rrdset_flag_check(st, RRDSET_FLAG_EXPORTING_IGNORE))) + return 0; + + if (filter) { + if (!is_matches_rrdset(instance, st, filter)) { + return 0; + } + } else if (unlikely(!rrdset_flag_check(st, RRDSET_FLAG_EXPORTING_SEND))) { + // we have not checked this chart + if (is_matches_rrdset(instance, st, instance->config.charts_pattern)) { + rrdset_flag_set(st, RRDSET_FLAG_EXPORTING_SEND); + } else { + rrdset_flag_set(st, RRDSET_FLAG_EXPORTING_IGNORE); + debug( + D_EXPORTING, + "EXPORTING: not sending chart '%s' of host '%s', because it is disabled for exporting.", + rrdset_id(st), + rrdhost_hostname(host)); + return 0; + } + } + + if (unlikely(!rrdset_is_available_for_exporting_and_alarms(st))) { + debug( + D_EXPORTING, + "EXPORTING: not sending chart '%s' of host '%s', because it is not available for exporting.", + rrdset_id(st), + rrdhost_hostname(host)); + return 0; + } + + if (unlikely( + st->rrd_memory_mode == RRD_MEMORY_MODE_NONE && + !(EXPORTING_OPTIONS_DATA_SOURCE(instance->config.options) == EXPORTING_SOURCE_DATA_AS_COLLECTED))) { + debug( + D_EXPORTING, + "EXPORTING: not sending chart '%s' of host '%s' because its memory mode is '%s' and the exporting connector requires database access.", + rrdset_id(st), + rrdhost_hostname(host), + rrd_memory_mode_name(host->rrd_memory_mode)); + return 0; + } + + return 1; +} + +static struct prometheus_server { + const char *server; + uint32_t hash; + RRDHOST *host; + time_t last_access; + struct prometheus_server *next; +} *prometheus_server_root = NULL; + +static netdata_mutex_t prometheus_server_root_mutex = NETDATA_MUTEX_INITIALIZER; + +/** + * Clean server root local structure + */ +void prometheus_clean_server_root() +{ + if (prometheus_server_root) { + netdata_mutex_lock(&prometheus_server_root_mutex); + + struct prometheus_server *ps; + for (ps = prometheus_server_root; ps; ) { + struct prometheus_server *current = ps; + ps = ps->next; + if(current->server) + freez((void *)current->server); + + freez(current); + } + prometheus_server_root = NULL; + netdata_mutex_unlock(&prometheus_server_root_mutex); + } +} + +/** + * Get the last time when a Prometheus server scraped the Netdata Prometheus exporter. + * + * @param server the name of the Prometheus server. + * @param host a data collecting host. + * @param now actual time. + * @return Returns the last time when the server accessed Netdata, or 0 if it is the first occurrence. + */ +static inline time_t prometheus_server_last_access(const char *server, RRDHOST *host, time_t now) +{ +#ifdef UNIT_TESTING + return 0; +#endif + uint32_t hash = simple_hash(server); + + netdata_mutex_lock(&prometheus_server_root_mutex); + + struct prometheus_server *ps; + for (ps = prometheus_server_root; ps; ps = ps->next) { + if (host == ps->host && hash == ps->hash && !strcmp(server, ps->server)) { + time_t last = ps->last_access; + ps->last_access = now; + netdata_mutex_unlock(&prometheus_server_root_mutex); + return last; + } + } + + ps = callocz(1, sizeof(struct prometheus_server)); + ps->server = strdupz(server); + ps->hash = hash; + ps->host = host; + ps->last_access = now; + ps->next = prometheus_server_root; + prometheus_server_root = ps; + + netdata_mutex_unlock(&prometheus_server_root_mutex); + return 0; +} + +/** + * Copy and sanitize name. + * + * @param d a destination string. + * @param s a source string. + * @param usable the number of characters to copy. + * @return Returns the length of the copied string. + */ +inline size_t prometheus_name_copy(char *d, const char *s, size_t usable) +{ + size_t n; + + for (n = 0; *s && n < usable; d++, s++, n++) { + register char c = *s; + + if (!isalnum(c)) + *d = '_'; + else + *d = c; + } + *d = '\0'; + + return n; +} + +/** + * Copy and sanitize label. + * + * @param d a destination string. + * @param s a source string. + * @param usable the number of characters to copy. + * @return Returns the length of the copied string. + */ +inline size_t prometheus_label_copy(char *d, const char *s, size_t usable) +{ + size_t n; + + // make sure we can escape one character without overflowing the buffer + usable--; + + for (n = 0; *s && n < usable; d++, s++, n++) { + register char c = *s; + + if (unlikely(c == '"' || c == '\\' || c == '\n')) { + *d++ = '\\'; + n++; + } + *d = c; + } + *d = '\0'; + + return n; +} + +/** + * Copy and sanitize units. + * + * @param d a destination string. + * @param s a source string. + * @param usable the number of characters to copy. + * @param showoldunits set this flag to 1 to show old (before v1.12) units. + * @return Returns the destination string. + */ +inline char *prometheus_units_copy(char *d, const char *s, size_t usable, int showoldunits) +{ + const char *sorig = s; + char *ret = d; + size_t n; + + // Fix for issue 5227 + if (unlikely(showoldunits)) { + static struct { + const char *newunit; + uint32_t hash; + const char *oldunit; + } units[] = { { "KiB/s", 0, "kilobytes/s" }, + { "MiB/s", 0, "MB/s" }, + { "GiB/s", 0, "GB/s" }, + { "KiB", 0, "KB" }, + { "MiB", 0, "MB" }, + { "GiB", 0, "GB" }, + { "inodes", 0, "Inodes" }, + { "percentage", 0, "percent" }, + { "faults/s", 0, "page faults/s" }, + { "KiB/operation", 0, "kilobytes per operation" }, + { "milliseconds/operation", 0, "ms per operation" }, + { NULL, 0, NULL } }; + static int initialized = 0; + int i; + + if (unlikely(!initialized)) { + for (i = 0; units[i].newunit; i++) + units[i].hash = simple_hash(units[i].newunit); + initialized = 1; + } + + uint32_t hash = simple_hash(s); + for (i = 0; units[i].newunit; i++) { + if (unlikely(hash == units[i].hash && !strcmp(s, units[i].newunit))) { + // info("matched extension for filename '%s': '%s'", filename, last_dot); + s = units[i].oldunit; + sorig = s; + break; + } + } + } + *d++ = '_'; + for (n = 1; *s && n < usable; d++, s++, n++) { + register char c = *s; + + if (!isalnum(c)) + *d = '_'; + else + *d = c; + } + + if (n == 2 && sorig[0] == '%') { + n = 0; + d = ret; + s = "_percent"; + for (; *s && n < usable; n++) + *d++ = *s++; + } else if (n > 3 && sorig[n - 3] == '/' && sorig[n - 2] == 's') { + n = n - 2; + d -= 2; + s = "_persec"; + for (; *s && n < usable; n++) + *d++ = *s++; + } + + *d = '\0'; + + return ret; +} + +/** + * Format host labels for the Prometheus exporter + * + * @param instance an instance data structure. + * @param host a data collecting host. + */ + +struct format_prometheus_label_callback { + struct instance *instance; + size_t count; +}; + +static int format_prometheus_label_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) { + struct format_prometheus_label_callback *d = (struct format_prometheus_label_callback *)data; + + if (!should_send_label(d->instance, ls)) return 0; + + char k[PROMETHEUS_ELEMENT_MAX + 1]; + char v[PROMETHEUS_ELEMENT_MAX + 1]; + + prometheus_name_copy(k, name, PROMETHEUS_ELEMENT_MAX); + prometheus_label_copy(v, value, PROMETHEUS_ELEMENT_MAX); + + if (*k && *v) { + if (d->count > 0) buffer_strcat(d->instance->labels_buffer, ","); + buffer_sprintf(d->instance->labels_buffer, "%s=\"%s\"", k, v); + d->count++; + } + return 1; +} + +void format_host_labels_prometheus(struct instance *instance, RRDHOST *host) +{ + if (unlikely(!sending_labels_configured(instance))) + return; + + if (!instance->labels_buffer) + instance->labels_buffer = buffer_create(1024); + + struct format_prometheus_label_callback tmp = { + .instance = instance, + .count = 0 + }; + rrdlabels_walkthrough_read(host->rrdlabels, format_prometheus_label_callback, &tmp); +} + +struct host_variables_callback_options { + RRDHOST *host; + BUFFER *wb; + EXPORTING_OPTIONS exporting_options; + PROMETHEUS_OUTPUT_OPTIONS output_options; + const char *prefix; + const char *labels; + time_t now; + int host_header_printed; + char name[PROMETHEUS_VARIABLE_MAX + 1]; +}; + +/** + * Print host variables. + * + * @param rv a variable. + * @param data callback options. + * @return Returns 1 if the chart can be sent, 0 otherwise. + */ +static int print_host_variables_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rv_ptr __maybe_unused, void *data) { + const RRDVAR_ACQUIRED *rv = (const RRDVAR_ACQUIRED *)item; + + struct host_variables_callback_options *opts = data; + + if (rrdvar_flags(rv) & (RRDVAR_FLAG_CUSTOM_HOST_VAR | RRDVAR_FLAG_CUSTOM_CHART_VAR)) { + if (!opts->host_header_printed) { + opts->host_header_printed = 1; + + if (opts->output_options & PROMETHEUS_OUTPUT_HELP) { + buffer_sprintf(opts->wb, "\n# COMMENT global host and chart variables\n"); + } + } + + NETDATA_DOUBLE value = rrdvar2number(rv); + if (isnan(value) || isinf(value)) { + if (opts->output_options & PROMETHEUS_OUTPUT_HELP) + buffer_sprintf( + opts->wb, "# COMMENT variable \"%s\" is %s. Skipped.\n", rrdvar_name(rv), (isnan(value)) ? "NAN" : "INF"); + + return 0; + } + + char *label_pre = ""; + char *label_post = ""; + if (opts->labels && *opts->labels) { + label_pre = "{"; + label_post = "}"; + } + + prometheus_name_copy(opts->name, rrdvar_name(rv), sizeof(opts->name)); + + if (opts->output_options & PROMETHEUS_OUTPUT_TIMESTAMPS) + buffer_sprintf( + opts->wb, + "%s_%s%s%s%s " NETDATA_DOUBLE_FORMAT " %llu\n", + opts->prefix, + opts->name, + label_pre, + opts->labels, + label_post, + value, + opts->now * 1000ULL); + else + buffer_sprintf( + opts->wb, + "%s_%s%s%s%s " NETDATA_DOUBLE_FORMAT "\n", + opts->prefix, + opts->name, + label_pre, + opts->labels, + label_post, + value); + + return 1; + } + + return 0; +} + +struct gen_parameters { + const char *prefix; + char *context; + char *suffix; + + char *chart; + char *dimension; + char *family; + char *labels; + + PROMETHEUS_OUTPUT_OPTIONS output_options; + RRDSET *st; + RRDDIM *rd; + + const char *relation; + const char *type; +}; + +/** + * Write an as-collected help comment to a buffer. + * + * @param wb the buffer to write the comment to. + * @param p parameters for generating the comment string. + * @param homogeneous a flag for homogeneous charts. + * @param prometheus_collector a flag for metrics from prometheus collector. + */ +static void generate_as_collected_prom_help(BUFFER *wb, struct gen_parameters *p, int homogeneous, int prometheus_collector) +{ + buffer_sprintf(wb, "# COMMENT %s_%s", p->prefix, p->context); + + if (!homogeneous) + buffer_sprintf(wb, "_%s", p->dimension); + + buffer_sprintf( + wb, + "%s: chart \"%s\", context \"%s\", family \"%s\", dimension \"%s\", value * ", + p->suffix, + (p->output_options & PROMETHEUS_OUTPUT_NAMES && p->st->name) ? rrdset_name(p->st) : rrdset_id(p->st), + rrdset_context(p->st), + rrdset_family(p->st), + (p->output_options & PROMETHEUS_OUTPUT_NAMES && p->rd->name) ? rrddim_name(p->rd) : rrddim_id(p->rd)); + + if (prometheus_collector) + buffer_sprintf(wb, "1 / 1"); + else + buffer_sprintf(wb, COLLECTED_NUMBER_FORMAT " / " COLLECTED_NUMBER_FORMAT, p->rd->multiplier, p->rd->divisor); + + buffer_sprintf(wb, " %s %s (%s)\n", p->relation, rrdset_units(p->st), p->type); +} + +/** + * Write an as-collected metric to a buffer. + * + * @param wb the buffer to write the metric to. + * @param p parameters for generating the metric string. + * @param homogeneous a flag for homogeneous charts. + * @param prometheus_collector a flag for metrics from prometheus collector. + */ +static void generate_as_collected_prom_metric(BUFFER *wb, struct gen_parameters *p, int homogeneous, int prometheus_collector) +{ + buffer_sprintf(wb, "%s_%s", p->prefix, p->context); + + if (!homogeneous) + buffer_sprintf(wb, "_%s", p->dimension); + + buffer_sprintf(wb, "%s{chart=\"%s\",family=\"%s\"", p->suffix, p->chart, p->family); + + if (homogeneous) + buffer_sprintf(wb, ",dimension=\"%s\"", p->dimension); + + buffer_sprintf(wb, "%s} ", p->labels); + + if (prometheus_collector) + buffer_sprintf( + wb, + NETDATA_DOUBLE_FORMAT, + (NETDATA_DOUBLE)p->rd->last_collected_value * (NETDATA_DOUBLE)p->rd->multiplier / + (NETDATA_DOUBLE)p->rd->divisor); + else + buffer_sprintf(wb, COLLECTED_NUMBER_FORMAT, p->rd->last_collected_value); + + if (p->output_options & PROMETHEUS_OUTPUT_TIMESTAMPS) + buffer_sprintf(wb, " %llu\n", timeval_msec(&p->rd->last_collected_time)); + else + buffer_sprintf(wb, "\n"); +} + +/** + * Write metrics in Prometheus format to a buffer. + * + * @param instance an instance data structure. + * @param host a data collecting host. + * @param filter_string a simple pattern filter. + * @param wb the buffer to fill with metrics. + * @param prefix a prefix for every metric. + * @param exporting_options options to configure what data is exported. + * @param allhosts set to 1 if host instance should be in the output for tags. + * @param output_options options to configure the format of the output. + */ +static void rrd_stats_api_v1_charts_allmetrics_prometheus( + struct instance *instance, + RRDHOST *host, + const char *filter_string, + BUFFER *wb, + const char *prefix, + EXPORTING_OPTIONS exporting_options, + int allhosts, + PROMETHEUS_OUTPUT_OPTIONS output_options) +{ + SIMPLE_PATTERN *filter = simple_pattern_create(filter_string, NULL, SIMPLE_PATTERN_EXACT); + + char hostname[PROMETHEUS_ELEMENT_MAX + 1]; + prometheus_label_copy(hostname, rrdhost_hostname(host), PROMETHEUS_ELEMENT_MAX); + + format_host_labels_prometheus(instance, host); + + buffer_sprintf( + wb, + "netdata_info{instance=\"%s\",application=\"%s\",version=\"%s\"", + hostname, + rrdhost_program_name(host), + rrdhost_program_version(host)); + + if (instance->labels_buffer && *buffer_tostring(instance->labels_buffer)) { + buffer_sprintf(wb, ",%s", buffer_tostring(instance->labels_buffer)); + } + + if (output_options & PROMETHEUS_OUTPUT_TIMESTAMPS) + buffer_sprintf(wb, "} 1 %llu\n", now_realtime_usec() / USEC_PER_MS); + else + buffer_sprintf(wb, "} 1\n"); + + char labels[PROMETHEUS_LABELS_MAX + 1] = ""; + if (allhosts) { + snprintfz(labels, PROMETHEUS_LABELS_MAX, ",instance=\"%s\"", hostname); + } + + if (instance->labels_buffer) + buffer_flush(instance->labels_buffer); + + // send custom variables set for the host + if (output_options & PROMETHEUS_OUTPUT_VARIABLES) { + + struct host_variables_callback_options opts = { + .host = host, + .wb = wb, + .labels = (labels[0] == ',') ? &labels[1] : labels, + .exporting_options = exporting_options, + .output_options = output_options, + .prefix = prefix, + .now = now_realtime_sec(), + .host_header_printed = 0 + }; + + rrdvar_walkthrough_read(host->rrdvars, print_host_variables_callback, &opts); + } + + // for each chart + RRDSET *st; + rrdset_foreach_read(st, host) { + + if (likely(can_send_rrdset(instance, st, filter))) { + char chart[PROMETHEUS_ELEMENT_MAX + 1]; + char context[PROMETHEUS_ELEMENT_MAX + 1]; + char family[PROMETHEUS_ELEMENT_MAX + 1]; + char units[PROMETHEUS_ELEMENT_MAX + 1] = ""; + + prometheus_label_copy(chart, (output_options & PROMETHEUS_OUTPUT_NAMES && st->name) ? rrdset_name(st) : rrdset_id(st), PROMETHEUS_ELEMENT_MAX); + prometheus_label_copy(family, rrdset_family(st), PROMETHEUS_ELEMENT_MAX); + prometheus_name_copy(context, rrdset_context(st), PROMETHEUS_ELEMENT_MAX); + + int as_collected = (EXPORTING_OPTIONS_DATA_SOURCE(exporting_options) == EXPORTING_SOURCE_DATA_AS_COLLECTED); + int homogeneous = 1; + int prometheus_collector = 0; + if (as_collected) { + if (rrdset_flag_check(st, RRDSET_FLAG_HOMOGENEOUS_CHECK)) + rrdset_update_heterogeneous_flag(st); + + if (rrdset_flag_check(st, RRDSET_FLAG_HETEROGENEOUS)) + homogeneous = 0; + + if (!strcmp(rrdset_module_name(st), "prometheus")) + prometheus_collector = 1; + } else { + if (EXPORTING_OPTIONS_DATA_SOURCE(exporting_options) == EXPORTING_SOURCE_DATA_AVERAGE && + !(output_options & PROMETHEUS_OUTPUT_HIDEUNITS)) + prometheus_units_copy( + units, rrdset_units(st), PROMETHEUS_ELEMENT_MAX, output_options & PROMETHEUS_OUTPUT_OLDUNITS); + } + + if (unlikely(output_options & PROMETHEUS_OUTPUT_HELP)) + buffer_sprintf( + wb, + "\n# COMMENT %s chart \"%s\", context \"%s\", family \"%s\", units \"%s\"\n", + (homogeneous) ? "homogeneous" : "heterogeneous", + (output_options & PROMETHEUS_OUTPUT_NAMES && st->name) ? rrdset_name(st) : rrdset_id(st), + rrdset_context(st), + rrdset_family(st), + rrdset_units(st)); + + // for each dimension + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + if (rd->collections_counter && !rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) { + char dimension[PROMETHEUS_ELEMENT_MAX + 1]; + char *suffix = ""; + + if (as_collected) { + // we need as-collected / raw data + + struct gen_parameters p; + p.prefix = prefix; + p.context = context; + p.suffix = suffix; + p.chart = chart; + p.dimension = dimension; + p.family = family; + p.labels = labels; + p.output_options = output_options; + p.st = st; + p.rd = rd; + + if (unlikely(rd->last_collected_time.tv_sec < instance->after)) + continue; + + p.type = "gauge"; + p.relation = "gives"; + if (rd->algorithm == RRD_ALGORITHM_INCREMENTAL || + rd->algorithm == RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL) { + p.type = "counter"; + p.relation = "delta gives"; + if (!prometheus_collector) + p.suffix = "_total"; + } + + if (homogeneous) { + // all the dimensions of the chart, has the same algorithm, multiplier and divisor + // we add all dimensions as labels + + prometheus_label_copy( + dimension, + (output_options & PROMETHEUS_OUTPUT_NAMES && rd->name) ? rrddim_name(rd) : rrddim_id(rd), + PROMETHEUS_ELEMENT_MAX); + + if (unlikely(output_options & PROMETHEUS_OUTPUT_HELP)) + generate_as_collected_prom_help(wb, &p, homogeneous, prometheus_collector); + + if (unlikely(output_options & PROMETHEUS_OUTPUT_TYPES)) + buffer_sprintf(wb, "# TYPE %s_%s%s %s\n", prefix, context, suffix, p.type); + + generate_as_collected_prom_metric(wb, &p, homogeneous, prometheus_collector); + } + else { + // the dimensions of the chart, do not have the same algorithm, multiplier or divisor + // we create a metric per dimension + + prometheus_name_copy( + dimension, + (output_options & PROMETHEUS_OUTPUT_NAMES && rd->name) ? rrddim_name(rd) : rrddim_id(rd), + PROMETHEUS_ELEMENT_MAX); + + if (unlikely(output_options & PROMETHEUS_OUTPUT_HELP)) + generate_as_collected_prom_help(wb, &p, homogeneous, prometheus_collector); + + if (unlikely(output_options & PROMETHEUS_OUTPUT_TYPES)) + buffer_sprintf( + wb, "# TYPE %s_%s_%s%s %s\n", prefix, context, dimension, suffix, p.type); + + generate_as_collected_prom_metric(wb, &p, homogeneous, prometheus_collector); + } + } + else { + // we need average or sum of the data + + time_t first_time = instance->after; + time_t last_time = instance->before; + NETDATA_DOUBLE value = exporting_calculate_value_from_stored_data(instance, rd, &last_time); + + if (!isnan(value) && !isinf(value)) { + if (EXPORTING_OPTIONS_DATA_SOURCE(exporting_options) == EXPORTING_SOURCE_DATA_AVERAGE) + suffix = "_average"; + else if (EXPORTING_OPTIONS_DATA_SOURCE(exporting_options) == EXPORTING_SOURCE_DATA_SUM) + suffix = "_sum"; + + prometheus_label_copy( + dimension, + (output_options & PROMETHEUS_OUTPUT_NAMES && rd->name) ? rrddim_name(rd) : rrddim_id(rd), + PROMETHEUS_ELEMENT_MAX); + + if (unlikely(output_options & PROMETHEUS_OUTPUT_HELP)) + buffer_sprintf( + wb, + "# COMMENT %s_%s%s%s: dimension \"%s\", value is %s, gauge, dt %llu to %llu inclusive\n", + prefix, + context, + units, + suffix, + (output_options & PROMETHEUS_OUTPUT_NAMES && rd->name) ? rrddim_name(rd) : rrddim_id(rd), + rrdset_units(st), + (unsigned long long)first_time, + (unsigned long long)last_time); + + if (unlikely(output_options & PROMETHEUS_OUTPUT_TYPES)) + buffer_sprintf(wb, "# TYPE %s_%s%s%s gauge\n", prefix, context, units, suffix); + + if (output_options & PROMETHEUS_OUTPUT_TIMESTAMPS) + buffer_sprintf( + wb, + "%s_%s%s%s{chart=\"%s\",family=\"%s\",dimension=\"%s\"%s} " NETDATA_DOUBLE_FORMAT + " %llu\n", + prefix, + context, + units, + suffix, + chart, + family, + dimension, + labels, + value, + last_time * MSEC_PER_SEC); + else + buffer_sprintf( + wb, + "%s_%s%s%s{chart=\"%s\",family=\"%s\",dimension=\"%s\"%s} " NETDATA_DOUBLE_FORMAT + "\n", + prefix, + context, + units, + suffix, + chart, + family, + dimension, + labels, + value); + } + } + } + } + rrddim_foreach_done(rd); + } + } + rrdset_foreach_done(st); + + simple_pattern_free(filter); +} + +/** + * Get the last time time when a server accessed Netdata. Write information about an API request to a buffer. + * + * @param instance an instance data structure. + * @param host a data collecting host. + * @param wb the buffer to write to. + * @param exporting_options options to configure what data is exported. + * @param server the name of a Prometheus server.. + * @param now actual time. + * @param output_options options to configure the format of the output. + * @return Returns the last time when the server accessed Netdata. + */ +static inline time_t prometheus_preparation( + struct instance *instance, + RRDHOST *host, + BUFFER *wb, + EXPORTING_OPTIONS exporting_options, + const char *server, + time_t now, + PROMETHEUS_OUTPUT_OPTIONS output_options) +{ +#ifndef UNIT_TESTING + analytics_log_prometheus(); +#endif + if (!server || !*server) + server = "default"; + + time_t after = prometheus_server_last_access(server, host, now); + + int first_seen = 0; + if (!after) { + after = now - instance->config.update_every; + first_seen = 1; + } + + if (after > now) { + // oops! this should never happen + after = now - instance->config.update_every; + } + + if (output_options & PROMETHEUS_OUTPUT_HELP) { + char *mode; + if (EXPORTING_OPTIONS_DATA_SOURCE(exporting_options) == EXPORTING_SOURCE_DATA_AS_COLLECTED) + mode = "as collected"; + else if (EXPORTING_OPTIONS_DATA_SOURCE(exporting_options) == EXPORTING_SOURCE_DATA_AVERAGE) + mode = "average"; + else if (EXPORTING_OPTIONS_DATA_SOURCE(exporting_options) == EXPORTING_SOURCE_DATA_SUM) + mode = "sum"; + else + mode = "unknown"; + + buffer_sprintf( + wb, + "# COMMENT netdata \"%s\" to %sprometheus \"%s\", source \"%s\", last seen %lu %s, time range %lu to %lu\n\n", + rrdhost_hostname(host), + (first_seen) ? "FIRST SEEN " : "", + server, + mode, + (unsigned long)((first_seen) ? 0 : (now - after)), + (first_seen) ? "never" : "seconds ago", + (unsigned long)after, + (unsigned long)now); + } + + return after; +} + +/** + * Write metrics and auxiliary information for one host to a buffer. + * + * @param host a data collecting host. + * @param filter_string a simple pattern filter. + * @param wb the buffer to write to. + * @param server the name of a Prometheus server. + * @param prefix a prefix for every metric. + * @param exporting_options options to configure what data is exported. + * @param output_options options to configure the format of the output. + */ +void rrd_stats_api_v1_charts_allmetrics_prometheus_single_host( + RRDHOST *host, + const char *filter_string, + BUFFER *wb, + const char *server, + const char *prefix, + EXPORTING_OPTIONS exporting_options, + PROMETHEUS_OUTPUT_OPTIONS output_options) +{ + if (unlikely(!prometheus_exporter_instance || !prometheus_exporter_instance->config.initialized)) + return; + + prometheus_exporter_instance->before = now_realtime_sec(); + + // we start at the point we had stopped before + prometheus_exporter_instance->after = prometheus_preparation( + prometheus_exporter_instance, + host, + wb, + exporting_options, + server, + prometheus_exporter_instance->before, + output_options); + + rrd_stats_api_v1_charts_allmetrics_prometheus( + prometheus_exporter_instance, host, filter_string, wb, prefix, exporting_options, 0, output_options); +} + +/** + * Write metrics and auxiliary information for all hosts to a buffer. + * + * @param host a data collecting host. + * @param filter_string a simple pattern filter. + * @param wb the buffer to write to. + * @param server the name of a Prometheus server. + * @param prefix a prefix for every metric. + * @param exporting_options options to configure what data is exported. + * @param output_options options to configure the format of the output. + */ +void rrd_stats_api_v1_charts_allmetrics_prometheus_all_hosts( + RRDHOST *host, + const char *filter_string, + BUFFER *wb, + const char *server, + const char *prefix, + EXPORTING_OPTIONS exporting_options, + PROMETHEUS_OUTPUT_OPTIONS output_options) +{ + if (unlikely(!prometheus_exporter_instance || !prometheus_exporter_instance->config.initialized)) + return; + + prometheus_exporter_instance->before = now_realtime_sec(); + + // we start at the point we had stopped before + prometheus_exporter_instance->after = prometheus_preparation( + prometheus_exporter_instance, + host, + wb, + exporting_options, + server, + prometheus_exporter_instance->before, + output_options); + + rrd_rdlock(); + rrdhost_foreach_read(host) + { + rrd_stats_api_v1_charts_allmetrics_prometheus( + prometheus_exporter_instance, host, filter_string, wb, prefix, exporting_options, 1, output_options); + } + rrd_unlock(); +} diff --git a/exporting/prometheus/prometheus.h b/exporting/prometheus/prometheus.h new file mode 100644 index 0000000..e80b682 --- /dev/null +++ b/exporting/prometheus/prometheus.h @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EXPORTING_PROMETHEUS_H +#define NETDATA_EXPORTING_PROMETHEUS_H 1 + +#include "exporting/exporting_engine.h" + +#define PROMETHEUS_ELEMENT_MAX 256 +#define PROMETHEUS_LABELS_MAX 1024 +#define PROMETHEUS_VARIABLE_MAX 256 + +#define PROMETHEUS_LABELS_MAX_NUMBER 128 + +typedef enum prometheus_output_flags { + PROMETHEUS_OUTPUT_NONE = 0, + PROMETHEUS_OUTPUT_HELP = (1 << 0), + PROMETHEUS_OUTPUT_TYPES = (1 << 1), + PROMETHEUS_OUTPUT_NAMES = (1 << 2), + PROMETHEUS_OUTPUT_TIMESTAMPS = (1 << 3), + PROMETHEUS_OUTPUT_VARIABLES = (1 << 4), + PROMETHEUS_OUTPUT_OLDUNITS = (1 << 5), + PROMETHEUS_OUTPUT_HIDEUNITS = (1 << 6) +} PROMETHEUS_OUTPUT_OPTIONS; + +void rrd_stats_api_v1_charts_allmetrics_prometheus_single_host( + RRDHOST *host, const char *filter_string, BUFFER *wb, const char *server, const char *prefix, + EXPORTING_OPTIONS exporting_options, PROMETHEUS_OUTPUT_OPTIONS output_options); +void rrd_stats_api_v1_charts_allmetrics_prometheus_all_hosts( + RRDHOST *host, const char *filter_string, BUFFER *wb, const char *server, const char *prefix, + EXPORTING_OPTIONS exporting_options, PROMETHEUS_OUTPUT_OPTIONS output_options); + +int can_send_rrdset(struct instance *instance, RRDSET *st, SIMPLE_PATTERN *filter); +size_t prometheus_name_copy(char *d, const char *s, size_t usable); +size_t prometheus_label_copy(char *d, const char *s, size_t usable); +char *prometheus_units_copy(char *d, const char *s, size_t usable, int showoldunits); + +void format_host_labels_prometheus(struct instance *instance, RRDHOST *host); + +void prometheus_clean_server_root(); + +#endif //NETDATA_EXPORTING_PROMETHEUS_H diff --git a/exporting/prometheus/remote_write/Makefile.am b/exporting/prometheus/remote_write/Makefile.am new file mode 100644 index 0000000..d049ef4 --- /dev/null +++ b/exporting/prometheus/remote_write/Makefile.am @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +CLEANFILES = \ + remote_write.pb.cc \ + remote_write.pb.h \ + $(NULL) + +dist_noinst_DATA = \ + remote_write.proto \ + README.md \ + $(NULL) diff --git a/exporting/prometheus/remote_write/README.md b/exporting/prometheus/remote_write/README.md new file mode 100644 index 0000000..54c5d65 --- /dev/null +++ b/exporting/prometheus/remote_write/README.md @@ -0,0 +1,58 @@ +<!-- +title: "Export metrics to Prometheus remote write providers" +description: "Send Netdata metrics to your choice of more than 20 external storage providers for long-term archiving and further analysis." +custom_edit_url: https://github.com/netdata/netdata/edit/master/exporting/prometheus/remote_write/README.md +sidebar_label: Prometheus remote write +--> + +# Prometheus remote write exporting connector + +The Prometheus remote write exporting connector uses the exporting engine to send Netdata metrics to your choice of more +than 20 external storage providers for long-term archiving and further analysis. + +## Prerequisites + +To use the Prometheus remote write API with [storage +providers](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage), install +[protobuf](https://developers.google.com/protocol-buffers/) and [snappy](https://github.com/google/snappy) libraries. +Next, [reinstall Netdata](/packaging/installer/REINSTALL.md), which detects that the required libraries and utilities +are now available. + +## Configuration + +To enable data exporting to a storage provider using the Prometheus remote write API, run `./edit-config exporting.conf` +in the Netdata configuration directory and set the following options: + +```conf +[prometheus_remote_write:my_instance] + enabled = yes + destination = example.domain:example_port + remote write URL path = /receive +``` + +You can also add `:https` modifier to the connector type if you need to use the TLS/SSL protocol. For example: +`remote_write:https:my_instance`. + +`remote write URL path` is used to set an endpoint path for the remote write protocol. The default value is `/receive`. +For example, if your endpoint is `http://example.domain:example_port/storage/read`: + +```conf + destination = example.domain:example_port + remote write URL path = /storage/read +``` + +You can set basic HTTP authentication credentials using + +```conf + username = my_username + password = my_password +``` + +`buffered` and `lost` dimensions in the Netdata Exporting Connector Data Size operation monitoring chart estimate uncompressed +buffer size on failures. + +## Notes + +The remote write exporting connector does not support `buffer on failures` + + diff --git a/exporting/prometheus/remote_write/remote_write.c b/exporting/prometheus/remote_write/remote_write.c new file mode 100644 index 0000000..2e2fa3c --- /dev/null +++ b/exporting/prometheus/remote_write/remote_write.c @@ -0,0 +1,406 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "remote_write.h" + +static int as_collected; +static int homogeneous; +char context[PROMETHEUS_ELEMENT_MAX + 1]; +char chart[PROMETHEUS_ELEMENT_MAX + 1]; +char family[PROMETHEUS_ELEMENT_MAX + 1]; +char units[PROMETHEUS_ELEMENT_MAX + 1] = ""; + +/** + * Prepare HTTP header + * + * @param instance an instance data structure. + */ +void prometheus_remote_write_prepare_header(struct instance *instance) +{ + struct prometheus_remote_write_specific_config *connector_specific_config = + instance->config.connector_specific_config; + struct simple_connector_data *simple_connector_data = instance->connector_specific_data; + + buffer_sprintf( + simple_connector_data->last_buffer->header, + "POST %s HTTP/1.1\r\n" + "Host: %s\r\n" + "Accept: */*\r\n" + "%s" + "Content-Encoding: snappy\r\n" + "Content-Type: application/x-protobuf\r\n" + "X-Prometheus-Remote-Write-Version: 0.1.0\r\n" + "Content-Length: %zu\r\n" + "\r\n", + connector_specific_config->remote_write_path, + simple_connector_data->connected_to, + simple_connector_data->auth_string ? simple_connector_data->auth_string : "", + buffer_strlen(simple_connector_data->last_buffer->buffer)); +} + +/** + * Process a response received after Prometheus remote write connector had sent data + * + * @param buffer a response from a remote service. + * @param instance an instance data structure. + * @return Returns 0 on success, 1 on failure. + */ +int process_prometheus_remote_write_response(BUFFER *buffer, struct instance *instance) +{ + if (unlikely(!buffer)) + return 1; + + const char *s = buffer_tostring(buffer); + int len = buffer_strlen(buffer); + + // do nothing with HTTP responses 200 or 204 + + while (!isspace(*s) && len) { + s++; + len--; + } + s++; + len--; + + if (likely(len > 4 && (!strncmp(s, "200 ", 4) || !strncmp(s, "204 ", 4)))) + return 0; + else + return exporting_discard_response(buffer, instance); +} + +/** + * Release specific data allocated. + * + * @param instance an instance data structure. + */ +void clean_prometheus_remote_write(struct instance *instance) +{ + struct simple_connector_data *simple_connector_data = instance->connector_specific_data; + freez(simple_connector_data->connector_specific_data); + + struct prometheus_remote_write_specific_config *connector_specific_config = + instance->config.connector_specific_config; + freez(connector_specific_config->remote_write_path); +} + +/** + * Initialize Prometheus Remote Write connector instance + * + * @param instance an instance data structure. + * @return Returns 0 on success, 1 on failure. + */ +int init_prometheus_remote_write_instance(struct instance *instance) +{ + instance->worker = simple_connector_worker; + + instance->start_batch_formatting = NULL; + instance->start_host_formatting = format_host_prometheus_remote_write; + instance->start_chart_formatting = format_chart_prometheus_remote_write; + instance->metric_formatting = format_dimension_prometheus_remote_write; + instance->end_chart_formatting = NULL; + instance->variables_formatting = format_variables_prometheus_remote_write; + instance->end_host_formatting = NULL; + instance->end_batch_formatting = format_batch_prometheus_remote_write; + + instance->prepare_header = prometheus_remote_write_prepare_header; + instance->check_response = process_prometheus_remote_write_response; + + instance->buffer = (void *)buffer_create(0); + + if (uv_mutex_init(&instance->mutex)) + return 1; + if (uv_cond_init(&instance->cond_var)) + return 1; + + struct simple_connector_data *simple_connector_data = callocz(1, sizeof(struct simple_connector_data)); + instance->connector_specific_data = simple_connector_data; + +#ifdef ENABLE_HTTPS + simple_connector_data->flags = NETDATA_SSL_START; + simple_connector_data->conn = NULL; + if (instance->config.options & EXPORTING_OPTION_USE_TLS) { + security_start_ssl(NETDATA_SSL_CONTEXT_EXPORTING); + } +#endif + + struct prometheus_remote_write_specific_data *connector_specific_data = + callocz(1, sizeof(struct prometheus_remote_write_specific_data)); + simple_connector_data->connector_specific_data = (void *)connector_specific_data; + + simple_connector_init(instance); + + connector_specific_data->write_request = init_write_request(); + + instance->engine->protocol_buffers_initialized = 1; + + return 0; +} + +struct format_remote_write_label_callback { + struct instance *instance; + void *write_request; +}; + +static int format_remote_write_label_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) { + struct format_remote_write_label_callback *d = (struct format_remote_write_label_callback *)data; + + if (!should_send_label(d->instance, ls)) return 0; + + char k[PROMETHEUS_ELEMENT_MAX + 1]; + char v[PROMETHEUS_ELEMENT_MAX + 1]; + + prometheus_name_copy(k, name, PROMETHEUS_ELEMENT_MAX); + prometheus_label_copy(v, value, PROMETHEUS_ELEMENT_MAX); + add_label(d->write_request, k, v); + return 1; +} + +/** + * Format host data for Prometheus Remote Write connector + * + * @param instance an instance data structure. + * @param host a data collecting host. + * @return Always returns 0. + */ +int format_host_prometheus_remote_write(struct instance *instance, RRDHOST *host) +{ + struct simple_connector_data *simple_connector_data = + (struct simple_connector_data *)instance->connector_specific_data; + struct prometheus_remote_write_specific_data *connector_specific_data = + (struct prometheus_remote_write_specific_data *)simple_connector_data->connector_specific_data; + + char hostname[PROMETHEUS_ELEMENT_MAX + 1]; + prometheus_label_copy( + hostname, + (host == localhost) ? instance->config.hostname : rrdhost_hostname(host), + PROMETHEUS_ELEMENT_MAX); + + add_host_info( + connector_specific_data->write_request, + "netdata_info", hostname, rrdhost_program_name(host), rrdhost_program_version(host), now_realtime_usec() / USEC_PER_MS); + + if (unlikely(sending_labels_configured(instance))) { + struct format_remote_write_label_callback tmp = { + .write_request = connector_specific_data->write_request, + .instance = instance + }; + rrdlabels_walkthrough_read(host->rrdlabels, format_remote_write_label_callback, &tmp); + } + + return 0; +} + +/** + * Format chart data for Prometheus Remote Write connector + * + * @param instance an instance data structure. + * @param st a chart. + * @return Always returns 0. + */ +int format_chart_prometheus_remote_write(struct instance *instance, RRDSET *st) +{ + prometheus_label_copy( + chart, + (instance->config.options & EXPORTING_OPTION_SEND_NAMES && st->name) ? rrdset_name(st) : rrdset_id(st), + PROMETHEUS_ELEMENT_MAX); + prometheus_label_copy(family, rrdset_family(st), PROMETHEUS_ELEMENT_MAX); + prometheus_name_copy(context, rrdset_context(st), PROMETHEUS_ELEMENT_MAX); + + as_collected = (EXPORTING_OPTIONS_DATA_SOURCE(instance->config.options) == EXPORTING_SOURCE_DATA_AS_COLLECTED); + homogeneous = 1; + if (as_collected) { + if (rrdset_flag_check(st, RRDSET_FLAG_HOMOGENEOUS_CHECK)) + rrdset_update_heterogeneous_flag(st); + + if (rrdset_flag_check(st, RRDSET_FLAG_HETEROGENEOUS)) + homogeneous = 0; + } else { + if (EXPORTING_OPTIONS_DATA_SOURCE(instance->config.options) == EXPORTING_SOURCE_DATA_AVERAGE) + prometheus_units_copy(units, rrdset_units(st), PROMETHEUS_ELEMENT_MAX, 0); + } + + return 0; +} + +/** + * Format dimension data for Prometheus Remote Write connector + * + * @param instance an instance data structure. + * @param rd a dimension. + * @return Always returns 0. + */ +int format_dimension_prometheus_remote_write(struct instance *instance, RRDDIM *rd) +{ + struct simple_connector_data *simple_connector_data = + (struct simple_connector_data *)instance->connector_specific_data; + struct prometheus_remote_write_specific_data *connector_specific_data = + (struct prometheus_remote_write_specific_data *)simple_connector_data->connector_specific_data; + + if (rd->collections_counter && !rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) { + char name[PROMETHEUS_LABELS_MAX + 1]; + char dimension[PROMETHEUS_ELEMENT_MAX + 1]; + char *suffix = ""; + RRDHOST *host = rd->rrdset->rrdhost; + + if (as_collected) { + // we need as-collected / raw data + + if (unlikely(rd->last_collected_time.tv_sec < instance->after)) { + debug( + D_EXPORTING, + "EXPORTING: not sending dimension '%s' of chart '%s' from host '%s', " + "its last data collection (%lu) is not within our timeframe (%lu to %lu)", + rrddim_id(rd), rrdset_id(rd->rrdset), + (host == localhost) ? instance->config.hostname : rrdhost_hostname(host), + (unsigned long)rd->last_collected_time.tv_sec, + (unsigned long)instance->after, + (unsigned long)instance->before); + return 0; + } + + if (rd->algorithm == RRD_ALGORITHM_INCREMENTAL || rd->algorithm == RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL) { + if (strcmp(rrdset_module_name(rd->rrdset), "prometheus")) + suffix = "_total"; + } + + if (homogeneous) { + // all the dimensions of the chart, has the same algorithm, multiplier and divisor + // we add all dimensions as labels + + prometheus_label_copy( + dimension, + (instance->config.options & EXPORTING_OPTION_SEND_NAMES && rd->name) ? rrddim_name(rd) : rrddim_id(rd), + PROMETHEUS_ELEMENT_MAX); + snprintf(name, PROMETHEUS_LABELS_MAX, "%s_%s%s", instance->config.prefix, context, suffix); + + add_metric( + connector_specific_data->write_request, + name, chart, family, dimension, + (host == localhost) ? instance->config.hostname : rrdhost_hostname(host), + rd->last_collected_value, timeval_msec(&rd->last_collected_time)); + } else { + // the dimensions of the chart, do not have the same algorithm, multiplier or divisor + // we create a metric per dimension + + prometheus_name_copy( + dimension, + (instance->config.options & EXPORTING_OPTION_SEND_NAMES && rd->name) ? rrddim_name(rd) : rrddim_id(rd), + PROMETHEUS_ELEMENT_MAX); + snprintf( + name, PROMETHEUS_LABELS_MAX, "%s_%s_%s%s", instance->config.prefix, context, dimension, + suffix); + + add_metric( + connector_specific_data->write_request, + name, chart, family, NULL, + (host == localhost) ? instance->config.hostname : rrdhost_hostname(host), + rd->last_collected_value, timeval_msec(&rd->last_collected_time)); + } + } else { + // we need average or sum of the data + + time_t last_t = instance->before; + NETDATA_DOUBLE value = exporting_calculate_value_from_stored_data(instance, rd, &last_t); + + if (!isnan(value) && !isinf(value)) { + if (EXPORTING_OPTIONS_DATA_SOURCE(instance->config.options) == EXPORTING_SOURCE_DATA_AVERAGE) + suffix = "_average"; + else if (EXPORTING_OPTIONS_DATA_SOURCE(instance->config.options) == EXPORTING_SOURCE_DATA_SUM) + suffix = "_sum"; + + prometheus_label_copy( + dimension, + (instance->config.options & EXPORTING_OPTION_SEND_NAMES && rd->name) ? rrddim_name(rd) : rrddim_id(rd), + PROMETHEUS_ELEMENT_MAX); + snprintf( + name, PROMETHEUS_LABELS_MAX, "%s_%s%s%s", instance->config.prefix, context, units, suffix); + + add_metric( + connector_specific_data->write_request, + name, chart, family, dimension, + (host == localhost) ? instance->config.hostname : rrdhost_hostname(host), + value, last_t * MSEC_PER_SEC); + } + } + } + + return 0; +} + +static int format_variable_prometheus_remote_write_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rv_ptr __maybe_unused, void *data) { + const RRDVAR_ACQUIRED *rv = (const RRDVAR_ACQUIRED *)item; + + struct prometheus_remote_write_variables_callback_options *opts = data; + + if (rrdvar_flags(rv) & (RRDVAR_FLAG_CUSTOM_HOST_VAR | RRDVAR_FLAG_CUSTOM_CHART_VAR)) { + RRDHOST *host = opts->host; + struct instance *instance = opts->instance; + struct simple_connector_data *simple_connector_data = + (struct simple_connector_data *)instance->connector_specific_data; + struct prometheus_remote_write_specific_data *connector_specific_data = + (struct prometheus_remote_write_specific_data *)simple_connector_data->connector_specific_data; + + char name[PROMETHEUS_LABELS_MAX + 1]; + char *suffix = ""; + + prometheus_name_copy(context, rrdvar_name(rv), PROMETHEUS_ELEMENT_MAX); + snprintf(name, PROMETHEUS_LABELS_MAX, "%s_%s%s", instance->config.prefix, context, suffix); + + NETDATA_DOUBLE value = rrdvar2number(rv); + add_variable(connector_specific_data->write_request, name, + (host == localhost) ? instance->config.hostname : rrdhost_hostname(host), value, opts->now / USEC_PER_MS); + } + + return 0; +} + +/** + * Format a variable for Prometheus Remote Write connector + * + * @param rv a variable. + * @param instance an instance data structure. + * @return Always returns 0. + */ +int format_variables_prometheus_remote_write(struct instance *instance, RRDHOST *host) +{ + struct prometheus_remote_write_variables_callback_options opt = { + .host = host, + .instance = instance, + .now = now_realtime_usec(), + }; + + return rrdvar_walkthrough_read(host->rrdvars, format_variable_prometheus_remote_write_callback, &opt); +} + +/** + * Format a batch for Prometheus Remote Write connector + * + * @param instance an instance data structure. + * @return Returns 0 on success, 1 on failure. + */ +int format_batch_prometheus_remote_write(struct instance *instance) +{ + struct simple_connector_data *simple_connector_data = + (struct simple_connector_data *)instance->connector_specific_data; + struct prometheus_remote_write_specific_data *connector_specific_data = + (struct prometheus_remote_write_specific_data *)simple_connector_data->connector_specific_data; + + size_t data_size = get_write_request_size(connector_specific_data->write_request); + + if (unlikely(!data_size)) { + error("EXPORTING: write request size is out of range"); + return 1; + } + + BUFFER *buffer = instance->buffer; + + buffer_need_bytes(buffer, data_size); + if (unlikely(pack_and_clear_write_request(connector_specific_data->write_request, buffer->buffer, &data_size))) { + error("EXPORTING: cannot pack write request"); + return 1; + } + buffer->len = data_size; + + simple_connector_end_batch(instance); + + return 0; +} diff --git a/exporting/prometheus/remote_write/remote_write.h b/exporting/prometheus/remote_write/remote_write.h new file mode 100644 index 0000000..d4e8649 --- /dev/null +++ b/exporting/prometheus/remote_write/remote_write.h @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EXPORTING_PROMETHEUS_REMOTE_WRITE_H +#define NETDATA_EXPORTING_PROMETHEUS_REMOTE_WRITE_H + +#include "exporting/exporting_engine.h" +#include "exporting/prometheus/prometheus.h" +#include "remote_write_request.h" + +struct prometheus_remote_write_specific_data { + void *write_request; +}; + +struct prometheus_remote_write_variables_callback_options { + RRDHOST *host; + time_t now; + struct instance *instance; +}; + +int init_prometheus_remote_write_instance(struct instance *instance); +void clean_prometheus_remote_write(struct instance *instance); + +int format_host_prometheus_remote_write(struct instance *instance, RRDHOST *host); +int format_chart_prometheus_remote_write(struct instance *instance, RRDSET *st); +int format_dimension_prometheus_remote_write(struct instance *instance, RRDDIM *rd); +int format_variables_prometheus_remote_write(struct instance *instance, RRDHOST *host); +int format_batch_prometheus_remote_write(struct instance *instance); + +void prometheus_remote_write_prepare_header(struct instance *instance); +int process_prometheus_remote_write_response(BUFFER *buffer, struct instance *instance); + +#endif //NETDATA_EXPORTING_PROMETHEUS_REMOTE_WRITE_H diff --git a/exporting/prometheus/remote_write/remote_write.proto b/exporting/prometheus/remote_write/remote_write.proto new file mode 100644 index 0000000..dfde254 --- /dev/null +++ b/exporting/prometheus/remote_write/remote_write.proto @@ -0,0 +1,29 @@ +syntax = "proto3"; +package prometheus; + +option cc_enable_arenas = true; + +import "google/protobuf/descriptor.proto"; + +message WriteRequest { + repeated TimeSeries timeseries = 1 [(nullable) = false]; +} + +message TimeSeries { + repeated Label labels = 1 [(nullable) = false]; + repeated Sample samples = 2 [(nullable) = false]; +} + +message Label { + string name = 1; + string value = 2; +} + +message Sample { + double value = 1; + int64 timestamp = 2; +} + +extend google.protobuf.FieldOptions { + bool nullable = 65001; +} diff --git a/exporting/prometheus/remote_write/remote_write_request.cc b/exporting/prometheus/remote_write/remote_write_request.cc new file mode 100644 index 0000000..ecfa11f --- /dev/null +++ b/exporting/prometheus/remote_write/remote_write_request.cc @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include <snappy.h> +#include "remote_write.pb.h" +#include "remote_write_request.h" + +using namespace prometheus; + +google::protobuf::Arena arena; + +/** + * Initialize a write request + * + * @return Returns a new write request + */ +void *init_write_request() +{ + GOOGLE_PROTOBUF_VERIFY_VERSION; + WriteRequest *write_request = google::protobuf::Arena::CreateMessage<WriteRequest>(&arena); + return (void *)write_request; +} + +/** + * Adds information about a host to a write request + * + * @param write_request_p the write request + * @param name the name of a metric which is used for providing the host information + * @param instance the name of the host itself + * @param application the name of a program which sends the information + * @param version the version of the program + * @param timestamp the timestamp for the metric in milliseconds + */ +void add_host_info( + void *write_request_p, + const char *name, const char *instance, const char *application, const char *version, const int64_t timestamp) +{ + WriteRequest *write_request = (WriteRequest *)write_request_p; + TimeSeries *timeseries; + Sample *sample; + Label *label; + + timeseries = write_request->add_timeseries(); + + label = timeseries->add_labels(); + label->set_name("__name__"); + label->set_value(name); + + label = timeseries->add_labels(); + label->set_name("instance"); + label->set_value(instance); + + if (application) { + label = timeseries->add_labels(); + label->set_name("application"); + label->set_value(application); + } + + if (version) { + label = timeseries->add_labels(); + label->set_name("version"); + label->set_value(version); + } + + sample = timeseries->add_samples(); + sample->set_value(1); + sample->set_timestamp(timestamp); +} + +/** + * Adds a label to the last created timeseries + * + * @param write_request_p the write request with the timeseries + * @param key the key of the label + * @param value the value of the label + */ +void add_label(void *write_request_p, char *key, char *value) +{ + WriteRequest *write_request = (WriteRequest *)write_request_p; + TimeSeries *timeseries; + Label *label; + + timeseries = write_request->mutable_timeseries(write_request->timeseries_size() - 1); + + label = timeseries->add_labels(); + label->set_name(key); + label->set_value(value); +} + +/** + * Adds a metric to a write request + * + * @param write_request_p the write request + * @param name the name of the metric + * @param chart the chart, the metric belongs to + * @param family the family, the metric belongs to + * @param dimension the dimension, the metric belongs to + * @param instance the name of the host, the metric belongs to + * @param value the value of the metric + * @param timestamp the timestamp for the metric in milliseconds + */ +void add_metric( + void *write_request_p, + const char *name, const char *chart, const char *family, const char *dimension, const char *instance, + const double value, const int64_t timestamp) +{ + WriteRequest *write_request = (WriteRequest *)write_request_p; + TimeSeries *timeseries; + Sample *sample; + Label *label; + + timeseries = write_request->add_timeseries(); + + label = timeseries->add_labels(); + label->set_name("__name__"); + label->set_value(name); + + label = timeseries->add_labels(); + label->set_name("chart"); + label->set_value(chart); + + label = timeseries->add_labels(); + label->set_name("family"); + label->set_value(family); + + if (dimension) { + label = timeseries->add_labels(); + label->set_name("dimension"); + label->set_value(dimension); + } + + label = timeseries->add_labels(); + label->set_name("instance"); + label->set_value(instance); + + sample = timeseries->add_samples(); + sample->set_value(value); + sample->set_timestamp(timestamp); +} + +/** + * Adds a metric to a write request + * + * @param write_request_p the write request + * @param name the name of the metric + * @param instance the name of the host, the metric belongs to + * @param value the value of the metric + * @param timestamp the timestamp for the metric in milliseconds + */ +void add_variable( + void *write_request_p, const char *name, const char *instance, const double value, const int64_t timestamp) +{ + WriteRequest *write_request = (WriteRequest *)write_request_p; + TimeSeries *timeseries; + Sample *sample; + Label *label; + + timeseries = write_request->add_timeseries(); + + label = timeseries->add_labels(); + label->set_name("__name__"); + label->set_value(name); + + label = timeseries->add_labels(); + label->set_name("instance"); + label->set_value(instance); + + sample = timeseries->add_samples(); + sample->set_value(value); + sample->set_timestamp(timestamp); +} + +/** + * Gets the size of a write request + * + * @param write_request_p the write request + * @return Returns the size of the write request + */ +size_t get_write_request_size(void *write_request_p) +{ + WriteRequest *write_request = (WriteRequest *)write_request_p; + +#if GOOGLE_PROTOBUF_VERSION < 3001000 + size_t size = (size_t)snappy::MaxCompressedLength(write_request->ByteSize()); +#else + size_t size = (size_t)snappy::MaxCompressedLength(write_request->ByteSizeLong()); +#endif + + return (size < INT_MAX) ? size : 0; +} + +/** + * Packs a write request into a buffer and clears the request + * + * @param write_request_p the write request + * @param buffer a buffer, where compressed data is written + * @param size gets the size of the write request, returns the size of the compressed data + * @return Returns 0 on success, 1 on failure + */ +int pack_and_clear_write_request(void *write_request_p, char *buffer, size_t *size) +{ + WriteRequest *write_request = (WriteRequest *)write_request_p; + std::string uncompressed_write_request; + + if (write_request->SerializeToString(&uncompressed_write_request) == false) + return 1; + write_request->clear_timeseries(); + snappy::RawCompress(uncompressed_write_request.data(), uncompressed_write_request.size(), buffer, size); + + return 0; +} + +/** + * Writes an unpacked write request into a text buffer + * + * @param write_request_p the write request + * @param buffer a buffer, where text is written + * @param size the size of the buffer + * @return Returns 0 on success, 1 on failure + */ +int convert_write_request_to_string( + const char *compressed_write_request, + size_t compressed_size, + char *buffer, + size_t size) +{ + size_t uncompressed_size = 0; + + snappy::GetUncompressedLength(compressed_write_request, compressed_size, &uncompressed_size); + if (size < uncompressed_size) + return 1; + char *uncompressed_write_request = (char *)malloc(size); + + if (snappy::RawUncompress(compressed_write_request, compressed_size, uncompressed_write_request) == false) { + free(uncompressed_write_request); + return 1; + } + + WriteRequest *write_request = google::protobuf::Arena::CreateMessage<WriteRequest>(&arena); + if (write_request->ParseFromString(std::string(uncompressed_write_request, uncompressed_size)) == false) { + free(uncompressed_write_request); + return 1; + } + + std::string text_write_request(write_request->DebugString()); + text_write_request.copy(buffer, size); + + free(uncompressed_write_request); + + return 0; +} + +/** + * Shuts down the Protobuf library + */ +void protocol_buffers_shutdown() +{ + google::protobuf::ShutdownProtobufLibrary(); +} diff --git a/exporting/prometheus/remote_write/remote_write_request.h b/exporting/prometheus/remote_write/remote_write_request.h new file mode 100644 index 0000000..b253701 --- /dev/null +++ b/exporting/prometheus/remote_write/remote_write_request.h @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EXPORTING_PROMETHEUS_REMOTE_WRITE_REQUEST_H +#define NETDATA_EXPORTING_PROMETHEUS_REMOTE_WRITE_REQUEST_H + +#ifdef __cplusplus +extern "C" { +#endif + +void *init_write_request(); + +void add_host_info( + void *write_request_p, + const char *name, const char *instance, const char *application, const char *version, const int64_t timestamp); + +void add_label(void *write_request_p, char *key, char *value); + +void add_metric( + void *write_request_p, + const char *name, const char *chart, const char *family, const char *dimension, + const char *instance, const double value, const int64_t timestamp); + +void add_variable( + void *write_request_p, const char *name, const char *instance, const double value, const int64_t timestamp); + +size_t get_write_request_size(void *write_request_p); + +int pack_and_clear_write_request(void *write_request_p, char *buffer, size_t *size); + +int convert_write_request_to_string( + const char *compressed_write_request, + size_t compressed_size, + char *buffer, + size_t size); + +void protocol_buffers_shutdown(); + +#ifdef __cplusplus +} +#endif + +#endif //NETDATA_EXPORTING_PROMETHEUS_REMOTE_WRITE_REQUEST_H |