diff options
Diffstat (limited to 'backends/prometheus')
-rw-r--r-- | backends/prometheus/Makefile.am | 12 | ||||
-rw-r--r-- | backends/prometheus/README.md | 457 | ||||
-rw-r--r-- | backends/prometheus/backend_prometheus.c | 797 | ||||
-rw-r--r-- | backends/prometheus/backend_prometheus.h | 37 | ||||
-rw-r--r-- | backends/prometheus/remote_write/Makefile.am | 14 | ||||
-rw-r--r-- | backends/prometheus/remote_write/README.md | 41 | ||||
-rw-r--r-- | backends/prometheus/remote_write/remote_write.cc | 120 | ||||
-rw-r--r-- | backends/prometheus/remote_write/remote_write.h | 30 | ||||
-rw-r--r-- | backends/prometheus/remote_write/remote_write.proto | 29 |
9 files changed, 0 insertions, 1537 deletions
diff --git a/backends/prometheus/Makefile.am b/backends/prometheus/Makefile.am deleted file mode 100644 index 334fca81c..000000000 --- a/backends/prometheus/Makefile.am +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: GPL-3.0-or-later - -AUTOMAKE_OPTIONS = subdir-objects -MAINTAINERCLEANFILES = $(srcdir)/Makefile.in - -SUBDIRS = \ - remote_write \ - $(NULL) - -dist_noinst_DATA = \ - README.md \ - $(NULL) diff --git a/backends/prometheus/README.md b/backends/prometheus/README.md deleted file mode 100644 index a0460d1d8..000000000 --- a/backends/prometheus/README.md +++ /dev/null @@ -1,457 +0,0 @@ -<!-- -title: "Using Netdata with Prometheus" -custom_edit_url: https://github.com/netdata/netdata/edit/master/backends/prometheus/README.md ---> - -# Using Netdata with Prometheus - -> IMPORTANT: the format Netdata sends metrics to prometheus has changed since Netdata v1.7. The new prometheus backend -> for Netdata supports a lot more features and is aligned to the development of the rest of the Netdata backends. - -Prometheus is a distributed monitoring system which offers a very simple setup along with a robust data model. Recently -Netdata added support for Prometheus. I'm going to quickly show you how to install both Netdata and prometheus on the -same server. We can then use grafana pointed at Prometheus to obtain long term metrics Netdata offers. I'm assuming we -are starting at a fresh ubuntu shell (whether you'd like to follow along in a VM or a cloud instance is up to you). - -## Installing Netdata and prometheus - -### Installing Netdata - -There are number of ways to install Netdata according to [Installation](/packaging/installer/README.md). The suggested way -of installing the latest Netdata and keep it upgrade automatically. Using one line installation: - -```sh -bash <(curl -Ss https://my-netdata.io/kickstart.sh) -``` - -At this point we should have Netdata listening on port 19999. Attempt to take your browser here: - -```sh -http://your.netdata.ip:19999 -``` - -_(replace `your.netdata.ip` with the IP or hostname of the server running Netdata)_ - -### Installing Prometheus - -In order to install prometheus we are going to introduce our own systemd startup script along with an example of -prometheus.yaml configuration. Prometheus needs to be pointed to your server at a specific target url for it to scrape -Netdata's api. Prometheus is always a pull model meaning Netdata is the passive client within this architecture. -Prometheus always initiates the connection with Netdata. - -#### Download Prometheus - -```sh -cd /tmp && curl -s https://api.github.com/repos/prometheus/prometheus/releases/latest \ -| grep "browser_download_url.*linux-amd64.tar.gz" \ -| cut -d '"' -f 4 \ -| wget -qi - -``` - -#### Create prometheus system user - -```sh -sudo useradd -r prometheus -``` - -#### Create prometheus directory - -```sh -sudo mkdir /opt/prometheus -sudo chown prometheus:prometheus /opt/prometheus -``` - -#### Untar prometheus directory - -```sh -sudo tar -xvf /tmp/prometheus-*linux-amd64.tar.gz -C /opt/prometheus --strip=1 -``` - -#### Install prometheus.yml - -We will use the following `prometheus.yml` file. Save it at `/opt/prometheus/prometheus.yml`. - -Make sure to replace `your.netdata.ip` with the IP or hostname of the host running Netdata. - -```yaml -# my global config -global: - scrape_interval: 5s # Set the scrape interval to every 5 seconds. Default is every 1 minute. - evaluation_interval: 5s # Evaluate rules every 5 seconds. The default is every 1 minute. - # scrape_timeout is set to the global default (10s). - - # Attach these labels to any time series or alerts when communicating with - # external systems (federation, remote storage, Alertmanager). - external_labels: - monitor: 'codelab-monitor' - -# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. -rule_files: - # - "first.rules" - # - "second.rules" - -# A scrape configuration containing exactly one endpoint to scrape: -# Here it's Prometheus itself. -scrape_configs: - # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. - - job_name: 'prometheus' - - # metrics_path defaults to '/metrics' - # scheme defaults to 'http'. - - static_configs: - - targets: ['0.0.0.0:9090'] - - - job_name: 'netdata-scrape' - - metrics_path: '/api/v1/allmetrics' - params: - # format: prometheus | prometheus_all_hosts - # You can use `prometheus_all_hosts` if you want Prometheus to set the `instance` to your hostname instead of IP - format: [prometheus] - # - # source: as-collected | raw | average | sum | volume - # default is: average - #source: [as-collected] - # - # server name for this prometheus - the default is the client IP - # for Netdata to uniquely identify it - #server: ['prometheus1'] - honor_labels: true - - static_configs: - - targets: ['{your.netdata.ip}:19999'] -``` - -#### Install nodes.yml - -The following is completely optional, it will enable Prometheus to generate alerts from some Netdata sources. Tweak the -values to your own needs. We will use the following `nodes.yml` file below. Save it at `/opt/prometheus/nodes.yml`, and -add a _- "nodes.yml"_ entry under the _rule_files:_ section in the example prometheus.yml file above. - -```yaml -groups: - - name: nodes - - rules: - - alert: node_high_cpu_usage_70 - expr: sum(sum_over_time(netdata_system_cpu_percentage_average{dimension=~"(user|system|softirq|irq|guest)"}[10m])) by (job) / sum(count_over_time(netdata_system_cpu_percentage_average{dimension="idle"}[10m])) by (job) > 70 - for: 1m - annotations: - description: '{{ $labels.job }} on ''{{ $labels.job }}'' CPU usage is at {{ humanize $value }}%.' - summary: CPU alert for container node '{{ $labels.job }}' - - - alert: node_high_memory_usage_70 - expr: 100 / sum(netdata_system_ram_MB_average) by (job) - * sum(netdata_system_ram_MB_average{dimension=~"free|cached"}) by (job) < 30 - for: 1m - annotations: - description: '{{ $labels.job }} memory usage is {{ humanize $value}}%.' - summary: Memory alert for container node '{{ $labels.job }}' - - - alert: node_low_root_filesystem_space_20 - expr: 100 / sum(netdata_disk_space_GB_average{family="/"}) by (job) - * sum(netdata_disk_space_GB_average{family="/",dimension=~"avail|cached"}) by (job) < 20 - for: 1m - annotations: - description: '{{ $labels.job }} root filesystem space is {{ humanize $value}}%.' - summary: Root filesystem alert for container node '{{ $labels.job }}' - - - alert: node_root_filesystem_fill_rate_6h - expr: predict_linear(netdata_disk_space_GB_average{family="/",dimension=~"avail|cached"}[1h], 6 * 3600) < 0 - for: 1h - labels: - severity: critical - annotations: - description: Container node {{ $labels.job }} root filesystem is going to fill up in 6h. - summary: Disk fill alert for Swarm node '{{ $labels.job }}' -``` - -#### Install prometheus.service - -Save this service file as `/etc/systemd/system/prometheus.service`: - -```sh -[Unit] -Description=Prometheus Server -AssertPathExists=/opt/prometheus - -[Service] -Type=simple -WorkingDirectory=/opt/prometheus -User=prometheus -Group=prometheus -ExecStart=/opt/prometheus/prometheus --config.file=/opt/prometheus/prometheus.yml --log.level=info -ExecReload=/bin/kill -SIGHUP $MAINPID -ExecStop=/bin/kill -SIGINT $MAINPID - -[Install] -WantedBy=multi-user.target -``` - -##### Start Prometheus - -```sh -sudo systemctl start prometheus -sudo systemctl enable prometheus -``` - -Prometheus should now start and listen on port 9090. Attempt to head there with your browser. - -If everything is working correctly when you fetch `http://your.prometheus.ip:9090` you will see a 'Status' tab. Click -this and click on 'targets' We should see the Netdata host as a scraped target. - ---- - -## Netdata support for prometheus - -> IMPORTANT: the format Netdata sends metrics to prometheus has changed since Netdata v1.6. The new format allows easier -> queries for metrics and supports both `as collected` and normalized metrics. - -Before explaining the changes, we have to understand the key differences between Netdata and prometheus. - -### understanding Netdata metrics - -#### charts - -Each chart in Netdata has several properties (common to all its metrics): - -- `chart_id` - uniquely identifies a chart. - -- `chart_name` - a more human friendly name for `chart_id`, also unique. - -- `context` - this is the template of the chart. All disk I/O charts have the same context, all mysql requests charts - have the same context, etc. This is used for alarm templates to match all the charts they should be attached to. - -- `family` groups a set of charts together. It is used as the submenu of the dashboard. - -- `units` is the units for all the metrics attached to the chart. - -#### dimensions - -Then each Netdata chart contains metrics called `dimensions`. All the dimensions of a chart have the same units of -measurement, and are contextually in the same category (ie. the metrics for disk bandwidth are `read` and `write` and -they are both in the same chart). - -### Netdata data source - -Netdata can send metrics to prometheus from 3 data sources: - -- `as collected` or `raw` - this data source sends the metrics to prometheus as they are collected. No conversion is - done by Netdata. The latest value for each metric is just given to prometheus. This is the most preferred method by - prometheus, but it is also the harder to work with. To work with this data source, you will need to understand how - to get meaningful values out of them. - - The format of the metrics is: `CONTEXT{chart="CHART",family="FAMILY",dimension="DIMENSION"}`. - - If the metric is a counter (`incremental` in Netdata lingo), `_total` is appended the context. - - Unlike prometheus, Netdata allows each dimension of a chart to have a different algorithm and conversion constants - (`multiplier` and `divisor`). In this case, that the dimensions of a charts are heterogeneous, Netdata will use this - format: `CONTEXT_DIMENSION{chart="CHART",family="FAMILY"}` - -- `average` - this data source uses the Netdata database to send the metrics to prometheus as they are presented on - the Netdata dashboard. So, all the metrics are sent as gauges, at the units they are presented in the Netdata - dashboard charts. This is the easiest to work with. - - The format of the metrics is: `CONTEXT_UNITS_average{chart="CHART",family="FAMILY",dimension="DIMENSION"}`. - - When this source is used, Netdata keeps track of the last access time for each prometheus server fetching the - metrics. This last access time is used at the subsequent queries of the same prometheus server to identify the - time-frame the `average` will be calculated. - - So, no matter how frequently prometheus scrapes Netdata, it will get all the database data. - To identify each prometheus server, Netdata uses by default the IP of the client fetching the metrics. - - If there are multiple prometheus servers fetching data from the same Netdata, using the same IP, each prometheus - server can append `server=NAME` to the URL. Netdata will use this `NAME` to uniquely identify the prometheus server. - -- `sum` or `volume`, is like `average` but instead of averaging the values, it sums them. - - The format of the metrics is: `CONTEXT_UNITS_sum{chart="CHART",family="FAMILY",dimension="DIMENSION"}`. All the - other operations are the same with `average`. - - To change the data source to `sum` or `as-collected` you need to provide the `source` parameter in the request URL. - e.g.: `http://your.netdata.ip:19999/api/v1/allmetrics?format=prometheus&help=yes&source=as-collected` - - Keep in mind that early versions of Netdata were sending the metrics as: `CHART_DIMENSION{}`. - -### Querying Metrics - -Fetch with your web browser this URL: - -`http://your.netdata.ip:19999/api/v1/allmetrics?format=prometheus&help=yes` - -_(replace `your.netdata.ip` with the ip or hostname of your Netdata server)_ - -Netdata will respond with all the metrics it sends to prometheus. - -If you search that page for `"system.cpu"` you will find all the metrics Netdata is exporting to prometheus for this -chart. `system.cpu` is the chart name on the Netdata dashboard (on the Netdata dashboard all charts have a text heading -such as : `Total CPU utilization (system.cpu)`. What we are interested here in the chart name: `system.cpu`). - -Searching for `"system.cpu"` reveals: - -```sh -# COMMENT homogeneous chart "system.cpu", context "system.cpu", family "cpu", units "percentage" -# COMMENT netdata_system_cpu_percentage_average: dimension "guest_nice", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive -netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="guest_nice"} 0.0000000 1500066662000 -# COMMENT netdata_system_cpu_percentage_average: dimension "guest", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive -netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="guest"} 1.7837326 1500066662000 -# COMMENT netdata_system_cpu_percentage_average: dimension "steal", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive -netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="steal"} 0.0000000 1500066662000 -# COMMENT netdata_system_cpu_percentage_average: dimension "softirq", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive -netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="softirq"} 0.5275442 1500066662000 -# COMMENT netdata_system_cpu_percentage_average: dimension "irq", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive -netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="irq"} 0.2260836 1500066662000 -# COMMENT netdata_system_cpu_percentage_average: dimension "user", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive -netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="user"} 2.3362762 1500066662000 -# COMMENT netdata_system_cpu_percentage_average: dimension "system", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive -netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="system"} 1.7961062 1500066662000 -# COMMENT netdata_system_cpu_percentage_average: dimension "nice", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive -netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="nice"} 0.0000000 1500066662000 -# COMMENT netdata_system_cpu_percentage_average: dimension "iowait", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive -netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="iowait"} 0.9671802 1500066662000 -# COMMENT netdata_system_cpu_percentage_average: dimension "idle", value is percentage, gauge, dt 1500066653 to 1500066662 inclusive -netdata_system_cpu_percentage_average{chart="system.cpu",family="cpu",dimension="idle"} 92.3630770 1500066662000 -``` - -_(Netdata response for `system.cpu` with source=`average`)_ - -In `average` or `sum` data sources, all values are normalized and are reported to prometheus as gauges. Now, use the -'expression' text form in prometheus. Begin to type the metrics we are looking for: `netdata_system_cpu`. You should see -that the text form begins to auto-fill as prometheus knows about this metric. - -If the data source was `as collected`, the response would be: - -```sh -# COMMENT homogeneous chart "system.cpu", context "system.cpu", family "cpu", units "percentage" -# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "guest_nice", value * 1 / 1 delta gives percentage (counter) -netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="guest_nice"} 0 1500066716438 -# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "guest", value * 1 / 1 delta gives percentage (counter) -netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="guest"} 63945 1500066716438 -# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "steal", value * 1 / 1 delta gives percentage (counter) -netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="steal"} 0 1500066716438 -# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "softirq", value * 1 / 1 delta gives percentage (counter) -netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="softirq"} 8295 1500066716438 -# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "irq", value * 1 / 1 delta gives percentage (counter) -netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="irq"} 4079 1500066716438 -# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "user", value * 1 / 1 delta gives percentage (counter) -netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="user"} 116488 1500066716438 -# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "system", value * 1 / 1 delta gives percentage (counter) -netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="system"} 35084 1500066716438 -# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "nice", value * 1 / 1 delta gives percentage (counter) -netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="nice"} 505 1500066716438 -# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "iowait", value * 1 / 1 delta gives percentage (counter) -netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="iowait"} 23314 1500066716438 -# COMMENT netdata_system_cpu_total: chart "system.cpu", context "system.cpu", family "cpu", dimension "idle", value * 1 / 1 delta gives percentage (counter) -netdata_system_cpu_total{chart="system.cpu",family="cpu",dimension="idle"} 918470 1500066716438 -``` - -_(Netdata response for `system.cpu` with source=`as-collected`)_ - -For more information check prometheus documentation. - -### Streaming data from upstream hosts - -The `format=prometheus` parameter only exports the host's Netdata metrics. If you are using the parent-child -functionality of Netdata this ignores any upstream hosts - so you should consider using the below in your -**prometheus.yml**: - -```yaml - metrics_path: '/api/v1/allmetrics' - params: - format: [prometheus_all_hosts] - honor_labels: true -``` - -This will report all upstream host data, and `honor_labels` will make Prometheus take note of the instance names -provided. - -### Timestamps - -To pass the metrics through prometheus pushgateway, Netdata supports the option `×tamps=no` to send the metrics -without timestamps. - -## Netdata host variables - -Netdata collects various system configuration metrics, like the max number of TCP sockets supported, the max number of -files allowed system-wide, various IPC sizes, etc. These metrics are not exposed to prometheus by default. - -To expose them, append `variables=yes` to the Netdata URL. - -### TYPE and HELP - -To save bandwidth, and because prometheus does not use them anyway, `# TYPE` and `# HELP` lines are suppressed. If -wanted they can be re-enabled via `types=yes` and `help=yes`, e.g. -`/api/v1/allmetrics?format=prometheus&types=yes&help=yes` - -Note that if enabled, the `# TYPE` and `# HELP` lines are repeated for every occurrence of a metric, which goes against the Prometheus documentation's [specification for these lines](https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md#comments-help-text-and-type-information). - -### Names and IDs - -Netdata supports names and IDs for charts and dimensions. Usually IDs are unique identifiers as read by the system and -names are human friendly labels (also unique). - -Most charts and metrics have the same ID and name, but in several cases they are different: disks with device-mapper, -interrupts, QoS classes, statsd synthetic charts, etc. - -The default is controlled in `netdata.conf`: - -```conf -[backend] - send names instead of ids = yes | no -``` - -You can overwrite it from prometheus, by appending to the URL: - -- `&names=no` to get IDs (the old behaviour) -- `&names=yes` to get names - -### Filtering metrics sent to prometheus - -Netdata can filter the metrics it sends to prometheus with this setting: - -```conf -[backend] - send charts matching = * -``` - -This settings accepts a space separated list of patterns to match the **charts** to be sent to prometheus. Each pattern -can use `*` as wildcard, any number of times (e.g `*a*b*c*` is valid). Patterns starting with `!` give a negative match -(e.g `!*.bad users.* groups.*` will send all the users and groups except `bad` user and `bad` group). The order is -important: the first match (positive or negative) left to right, is used. - -### Changing the prefix of Netdata metrics - -Netdata sends all metrics prefixed with `netdata_`. You can change this in `netdata.conf`, like this: - -```conf -[backend] - prefix = netdata -``` - -It can also be changed from the URL, by appending `&prefix=netdata`. - -### Metric Units - -The default source `average` adds the unit of measurement to the name of each metric (e.g. `_KiB_persec`). To hide the -units and get the same metric names as with the other sources, append to the URL `&hideunits=yes`. - -The units were standardized in v1.12, with the effect of changing the metric names. To get the metric names as they were -before v1.12, append to the URL `&oldunits=yes` - -### Accuracy of `average` and `sum` data sources - -When the data source is set to `average` or `sum`, Netdata remembers the last access of each client accessing prometheus -metrics and uses this last access time to respond with the `average` or `sum` of all the entries in the database since -that. This means that prometheus servers are not losing data when they access Netdata with data source = `average` or -`sum`. - -To uniquely identify each prometheus server, Netdata uses the IP of the client accessing the metrics. If however the IP -is not good enough for identifying a single prometheus server (e.g. when prometheus servers are accessing Netdata -through a web proxy, or when multiple prometheus servers are NATed to a single IP), each prometheus may append -`&server=NAME` to the URL. This `NAME` is used by Netdata to uniquely identify each prometheus server and keep track of -its last access time. - -[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fbackends%2Fprometheus%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)](<>) diff --git a/backends/prometheus/backend_prometheus.c b/backends/prometheus/backend_prometheus.c deleted file mode 100644 index 1fb3fd42c..000000000 --- a/backends/prometheus/backend_prometheus.c +++ /dev/null @@ -1,797 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#define BACKENDS_INTERNALS -#include "backend_prometheus.h" - -// ---------------------------------------------------------------------------- -// PROMETHEUS -// /api/v1/allmetrics?format=prometheus and /api/v1/allmetrics?format=prometheus_all_hosts - -static struct prometheus_server { - const char *server; - uint32_t hash; - RRDHOST *host; - time_t last_access; - struct prometheus_server *next; -} *prometheus_server_root = NULL; - -static inline time_t prometheus_server_last_access(const char *server, RRDHOST *host, time_t now) { - static netdata_mutex_t prometheus_server_root_mutex = NETDATA_MUTEX_INITIALIZER; - - uint32_t hash = simple_hash(server); - - netdata_mutex_lock(&prometheus_server_root_mutex); - - struct prometheus_server *ps; - for(ps = prometheus_server_root; ps ;ps = ps->next) { - if (host == ps->host && hash == ps->hash && !strcmp(server, ps->server)) { - time_t last = ps->last_access; - ps->last_access = now; - netdata_mutex_unlock(&prometheus_server_root_mutex); - return last; - } - } - - ps = callocz(1, sizeof(struct prometheus_server)); - ps->server = strdupz(server); - ps->hash = hash; - ps->host = host; - ps->last_access = now; - ps->next = prometheus_server_root; - prometheus_server_root = ps; - - netdata_mutex_unlock(&prometheus_server_root_mutex); - return 0; -} - -static inline size_t backends_prometheus_name_copy(char *d, const char *s, size_t usable) { - size_t n; - - for(n = 0; *s && n < usable ; d++, s++, n++) { - register char c = *s; - - if(!isalnum(c)) *d = '_'; - else *d = c; - } - *d = '\0'; - - return n; -} - -static inline size_t backends_prometheus_label_copy(char *d, const char *s, size_t usable) { - size_t n; - - // make sure we can escape one character without overflowing the buffer - usable--; - - for(n = 0; *s && n < usable ; d++, s++, n++) { - register char c = *s; - - if(unlikely(c == '"' || c == '\\' || c == '\n')) { - *d++ = '\\'; - n++; - } - *d = c; - } - *d = '\0'; - - return n; -} - -static inline char *backends_prometheus_units_copy(char *d, const char *s, size_t usable, int showoldunits) { - const char *sorig = s; - char *ret = d; - size_t n; - - // Fix for issue 5227 - if (unlikely(showoldunits)) { - static struct { - const char *newunit; - uint32_t hash; - const char *oldunit; - } units[] = { - {"KiB/s", 0, "kilobytes/s"} - , {"MiB/s", 0, "MB/s"} - , {"GiB/s", 0, "GB/s"} - , {"KiB" , 0, "KB"} - , {"MiB" , 0, "MB"} - , {"GiB" , 0, "GB"} - , {"inodes" , 0, "Inodes"} - , {"percentage" , 0, "percent"} - , {"faults/s" , 0, "page faults/s"} - , {"KiB/operation", 0, "kilobytes per operation"} - , {"milliseconds/operation", 0, "ms per operation"} - , {NULL, 0, NULL} - }; - static int initialized = 0; - int i; - - if(unlikely(!initialized)) { - for (i = 0; units[i].newunit; i++) - units[i].hash = simple_hash(units[i].newunit); - initialized = 1; - } - - uint32_t hash = simple_hash(s); - for(i = 0; units[i].newunit ; i++) { - if(unlikely(hash == units[i].hash && !strcmp(s, units[i].newunit))) { - // info("matched extension for filename '%s': '%s'", filename, last_dot); - s=units[i].oldunit; - sorig = s; - break; - } - } - } - *d++ = '_'; - for(n = 1; *s && n < usable ; d++, s++, n++) { - register char c = *s; - - if(!isalnum(c)) *d = '_'; - else *d = c; - } - - if(n == 2 && sorig[0] == '%') { - n = 0; - d = ret; - s = "_percent"; - for( ; *s && n < usable ; n++) *d++ = *s++; - } - else if(n > 3 && sorig[n-3] == '/' && sorig[n-2] == 's') { - n = n - 2; - d -= 2; - s = "_persec"; - for( ; *s && n < usable ; n++) *d++ = *s++; - } - - *d = '\0'; - - return ret; -} - - -#define PROMETHEUS_ELEMENT_MAX 256 -#define PROMETHEUS_LABELS_MAX 1024 -#define PROMETHEUS_VARIABLE_MAX 256 - -#define PROMETHEUS_LABELS_MAX_NUMBER 128 - -struct host_variables_callback_options { - RRDHOST *host; - BUFFER *wb; - BACKEND_OPTIONS backend_options; - BACKENDS_PROMETHEUS_OUTPUT_OPTIONS output_options; - const char *prefix; - const char *labels; - time_t now; - int host_header_printed; - char name[PROMETHEUS_VARIABLE_MAX+1]; -}; - -static int print_host_variables(RRDVAR *rv, void *data) { - struct host_variables_callback_options *opts = data; - - if(rv->options & (RRDVAR_OPTION_CUSTOM_HOST_VAR|RRDVAR_OPTION_CUSTOM_CHART_VAR)) { - if(!opts->host_header_printed) { - opts->host_header_printed = 1; - - if(opts->output_options & BACKENDS_PROMETHEUS_OUTPUT_HELP) { - buffer_sprintf(opts->wb, "\n# COMMENT global host and chart variables\n"); - } - } - - calculated_number value = rrdvar2number(rv); - if(isnan(value) || isinf(value)) { - if(opts->output_options & BACKENDS_PROMETHEUS_OUTPUT_HELP) - buffer_sprintf(opts->wb, "# COMMENT variable \"%s\" is %s. Skipped.\n", rv->name, (isnan(value))?"NAN":"INF"); - - return 0; - } - - char *label_pre = ""; - char *label_post = ""; - if(opts->labels && *opts->labels) { - label_pre = "{"; - label_post = "}"; - } - - backends_prometheus_name_copy(opts->name, rv->name, sizeof(opts->name)); - - if(opts->output_options & BACKENDS_PROMETHEUS_OUTPUT_TIMESTAMPS) - buffer_sprintf(opts->wb - , "%s_%s%s%s%s " CALCULATED_NUMBER_FORMAT " %llu\n" - , opts->prefix - , opts->name - , label_pre - , opts->labels - , label_post - , value - , opts->now * 1000ULL - ); - else - buffer_sprintf(opts->wb, "%s_%s%s%s%s " CALCULATED_NUMBER_FORMAT "\n" - , opts->prefix - , opts->name - , label_pre - , opts->labels - , label_post - , value - ); - - return 1; - } - - return 0; -} - -static void rrd_stats_api_v1_charts_allmetrics_prometheus(RRDHOST *host, BUFFER *wb, const char *prefix, BACKEND_OPTIONS backend_options, time_t after, time_t before, int allhosts, BACKENDS_PROMETHEUS_OUTPUT_OPTIONS output_options) { - rrdhost_rdlock(host); - - char hostname[PROMETHEUS_ELEMENT_MAX + 1]; - backends_prometheus_label_copy(hostname, host->hostname, PROMETHEUS_ELEMENT_MAX); - - char labels[PROMETHEUS_LABELS_MAX + 1] = ""; - if(allhosts) { - if(output_options & BACKENDS_PROMETHEUS_OUTPUT_TIMESTAMPS) - buffer_sprintf(wb, "netdata_info{instance=\"%s\",application=\"%s\",version=\"%s\"} 1 %llu\n", hostname, host->program_name, host->program_version, now_realtime_usec() / USEC_PER_MS); - else - buffer_sprintf(wb, "netdata_info{instance=\"%s\",application=\"%s\",version=\"%s\"} 1\n", hostname, host->program_name, host->program_version); - - if(host->tags && *(host->tags)) { - if(output_options & BACKENDS_PROMETHEUS_OUTPUT_TIMESTAMPS) { - buffer_sprintf(wb, "netdata_host_tags_info{instance=\"%s\",%s} 1 %llu\n", hostname, host->tags, now_realtime_usec() / USEC_PER_MS); - - // deprecated, exists only for compatibility with older queries - buffer_sprintf(wb, "netdata_host_tags{instance=\"%s\",%s} 1 %llu\n", hostname, host->tags, now_realtime_usec() / USEC_PER_MS); - } - else { - buffer_sprintf(wb, "netdata_host_tags_info{instance=\"%s\",%s} 1\n", hostname, host->tags); - - // deprecated, exists only for compatibility with older queries - buffer_sprintf(wb, "netdata_host_tags{instance=\"%s\",%s} 1\n", hostname, host->tags); - } - - } - - snprintfz(labels, PROMETHEUS_LABELS_MAX, ",instance=\"%s\"", hostname); - } - else { - if(output_options & BACKENDS_PROMETHEUS_OUTPUT_TIMESTAMPS) - buffer_sprintf(wb, "netdata_info{instance=\"%s\",application=\"%s\",version=\"%s\"} 1 %llu\n", hostname, host->program_name, host->program_version, now_realtime_usec() / USEC_PER_MS); - else - buffer_sprintf(wb, "netdata_info{instance=\"%s\",application=\"%s\",version=\"%s\"} 1\n", hostname, host->program_name, host->program_version); - - if(host->tags && *(host->tags)) { - if(output_options & BACKENDS_PROMETHEUS_OUTPUT_TIMESTAMPS) { - buffer_sprintf(wb, "netdata_host_tags_info{%s} 1 %llu\n", host->tags, now_realtime_usec() / USEC_PER_MS); - - // deprecated, exists only for compatibility with older queries - buffer_sprintf(wb, "netdata_host_tags{%s} 1 %llu\n", host->tags, now_realtime_usec() / USEC_PER_MS); - } - else { - buffer_sprintf(wb, "netdata_host_tags_info{%s} 1\n", host->tags); - - // deprecated, exists only for compatibility with older queries - buffer_sprintf(wb, "netdata_host_tags{%s} 1\n", host->tags); - } - } - } - - // send custom variables set for the host - if(output_options & BACKENDS_PROMETHEUS_OUTPUT_VARIABLES){ - struct host_variables_callback_options opts = { - .host = host, - .wb = wb, - .labels = (labels[0] == ',')?&labels[1]:labels, - .backend_options = backend_options, - .output_options = output_options, - .prefix = prefix, - .now = now_realtime_sec(), - .host_header_printed = 0 - }; - foreach_host_variable_callback(host, print_host_variables, &opts); - } - - // for each chart - RRDSET *st; - rrdset_foreach_read(st, host) { - char chart[PROMETHEUS_ELEMENT_MAX + 1]; - char context[PROMETHEUS_ELEMENT_MAX + 1]; - char family[PROMETHEUS_ELEMENT_MAX + 1]; - char units[PROMETHEUS_ELEMENT_MAX + 1] = ""; - - backends_prometheus_label_copy(chart, (output_options & BACKENDS_PROMETHEUS_OUTPUT_NAMES && st->name)?st->name:st->id, PROMETHEUS_ELEMENT_MAX); - backends_prometheus_label_copy(family, st->family, PROMETHEUS_ELEMENT_MAX); - backends_prometheus_name_copy(context, st->context, PROMETHEUS_ELEMENT_MAX); - - if(likely(backends_can_send_rrdset(backend_options, st))) { - rrdset_rdlock(st); - - int as_collected = (BACKEND_OPTIONS_DATA_SOURCE(backend_options) == BACKEND_SOURCE_DATA_AS_COLLECTED); - int homogeneous = 1; - if(as_collected) { - if(rrdset_flag_check(st, RRDSET_FLAG_HOMOGENEOUS_CHECK)) - rrdset_update_heterogeneous_flag(st); - - if(rrdset_flag_check(st, RRDSET_FLAG_HETEROGENEOUS)) - homogeneous = 0; - } - else { - if(BACKEND_OPTIONS_DATA_SOURCE(backend_options) == BACKEND_SOURCE_DATA_AVERAGE && !(output_options & BACKENDS_PROMETHEUS_OUTPUT_HIDEUNITS)) - backends_prometheus_units_copy(units, st->units, PROMETHEUS_ELEMENT_MAX, output_options & BACKENDS_PROMETHEUS_OUTPUT_OLDUNITS); - } - - if(unlikely(output_options & BACKENDS_PROMETHEUS_OUTPUT_HELP)) - buffer_sprintf(wb, "\n# COMMENT %s chart \"%s\", context \"%s\", family \"%s\", units \"%s\"\n" - , (homogeneous)?"homogeneous":"heterogeneous" - , (output_options & BACKENDS_PROMETHEUS_OUTPUT_NAMES && st->name) ? st->name : st->id - , st->context - , st->family - , st->units - ); - - // for each dimension - RRDDIM *rd; - rrddim_foreach_read(rd, st) { - if(rd->collections_counter && !rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) { - char dimension[PROMETHEUS_ELEMENT_MAX + 1]; - char *suffix = ""; - - if (as_collected) { - // we need as-collected / raw data - - if(unlikely(rd->last_collected_time.tv_sec < after)) - continue; - - const char *t = "gauge", *h = "gives"; - if(rd->algorithm == RRD_ALGORITHM_INCREMENTAL || - rd->algorithm == RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL) { - t = "counter"; - h = "delta gives"; - suffix = "_total"; - } - - if(homogeneous) { - // all the dimensions of the chart, has the same algorithm, multiplier and divisor - // we add all dimensions as labels - - backends_prometheus_label_copy(dimension, (output_options & BACKENDS_PROMETHEUS_OUTPUT_NAMES && rd->name) ? rd->name : rd->id, PROMETHEUS_ELEMENT_MAX); - - if(unlikely(output_options & BACKENDS_PROMETHEUS_OUTPUT_HELP)) - buffer_sprintf(wb - , "# COMMENT %s_%s%s: chart \"%s\", context \"%s\", family \"%s\", dimension \"%s\", value * " COLLECTED_NUMBER_FORMAT " / " COLLECTED_NUMBER_FORMAT " %s %s (%s)\n" - , prefix - , context - , suffix - , (output_options & BACKENDS_PROMETHEUS_OUTPUT_NAMES && st->name) ? st->name : st->id - , st->context - , st->family - , (output_options & BACKENDS_PROMETHEUS_OUTPUT_NAMES && rd->name) ? rd->name : rd->id - , rd->multiplier - , rd->divisor - , h - , st->units - , t - ); - - if(unlikely(output_options & BACKENDS_PROMETHEUS_OUTPUT_TYPES)) - buffer_sprintf(wb, "# TYPE %s_%s%s %s\n" - , prefix - , context - , suffix - , t - ); - - if(output_options & BACKENDS_PROMETHEUS_OUTPUT_TIMESTAMPS) - buffer_sprintf(wb - , "%s_%s%s{chart=\"%s\",family=\"%s\",dimension=\"%s\"%s} " COLLECTED_NUMBER_FORMAT " %llu\n" - , prefix - , context - , suffix - , chart - , family - , dimension - , labels - , rd->last_collected_value - , timeval_msec(&rd->last_collected_time) - ); - else - buffer_sprintf(wb - , "%s_%s%s{chart=\"%s\",family=\"%s\",dimension=\"%s\"%s} " COLLECTED_NUMBER_FORMAT "\n" - , prefix - , context - , suffix - , chart - , family - , dimension - , labels - , rd->last_collected_value - ); - } - else { - // the dimensions of the chart, do not have the same algorithm, multiplier or divisor - // we create a metric per dimension - - backends_prometheus_name_copy(dimension, (output_options & BACKENDS_PROMETHEUS_OUTPUT_NAMES && rd->name) ? rd->name : rd->id, PROMETHEUS_ELEMENT_MAX); - - if(unlikely(output_options & BACKENDS_PROMETHEUS_OUTPUT_HELP)) - buffer_sprintf(wb - , "# COMMENT %s_%s_%s%s: chart \"%s\", context \"%s\", family \"%s\", dimension \"%s\", value * " COLLECTED_NUMBER_FORMAT " / " COLLECTED_NUMBER_FORMAT " %s %s (%s)\n" - , prefix - , context - , dimension - , suffix - , (output_options & BACKENDS_PROMETHEUS_OUTPUT_NAMES && st->name) ? st->name : st->id - , st->context - , st->family - , (output_options & BACKENDS_PROMETHEUS_OUTPUT_NAMES && rd->name) ? rd->name : rd->id - , rd->multiplier - , rd->divisor - , h - , st->units - , t - ); - - if(unlikely(output_options & BACKENDS_PROMETHEUS_OUTPUT_TYPES)) - buffer_sprintf(wb, "# TYPE %s_%s_%s%s %s\n" - , prefix - , context - , dimension - , suffix - , t - ); - - if(output_options & BACKENDS_PROMETHEUS_OUTPUT_TIMESTAMPS) - buffer_sprintf(wb - , "%s_%s_%s%s{chart=\"%s\",family=\"%s\"%s} " COLLECTED_NUMBER_FORMAT " %llu\n" - , prefix - , context - , dimension - , suffix - , chart - , family - , labels - , rd->last_collected_value - , timeval_msec(&rd->last_collected_time) - ); - else - buffer_sprintf(wb - , "%s_%s_%s%s{chart=\"%s\",family=\"%s\"%s} " COLLECTED_NUMBER_FORMAT "\n" - , prefix - , context - , dimension - , suffix - , chart - , family - , labels - , rd->last_collected_value - ); - } - } - else { - // we need average or sum of the data - - time_t first_t = after, last_t = before; - calculated_number value = backend_calculate_value_from_stored_data(st, rd, after, before, backend_options, &first_t, &last_t); - - if(!isnan(value) && !isinf(value)) { - - if(BACKEND_OPTIONS_DATA_SOURCE(backend_options) == BACKEND_SOURCE_DATA_AVERAGE) - suffix = "_average"; - else if(BACKEND_OPTIONS_DATA_SOURCE(backend_options) == BACKEND_SOURCE_DATA_SUM) - suffix = "_sum"; - - backends_prometheus_label_copy(dimension, (output_options & BACKENDS_PROMETHEUS_OUTPUT_NAMES && rd->name) ? rd->name : rd->id, PROMETHEUS_ELEMENT_MAX); - - if (unlikely(output_options & BACKENDS_PROMETHEUS_OUTPUT_HELP)) - buffer_sprintf(wb, "# COMMENT %s_%s%s%s: dimension \"%s\", value is %s, gauge, dt %llu to %llu inclusive\n" - , prefix - , context - , units - , suffix - , (output_options & BACKENDS_PROMETHEUS_OUTPUT_NAMES && rd->name) ? rd->name : rd->id - , st->units - , (unsigned long long)first_t - , (unsigned long long)last_t - ); - - if (unlikely(output_options & BACKENDS_PROMETHEUS_OUTPUT_TYPES)) - buffer_sprintf(wb, "# TYPE %s_%s%s%s gauge\n" - , prefix - , context - , units - , suffix - ); - - if(output_options & BACKENDS_PROMETHEUS_OUTPUT_TIMESTAMPS) - buffer_sprintf(wb, "%s_%s%s%s{chart=\"%s\",family=\"%s\",dimension=\"%s\"%s} " CALCULATED_NUMBER_FORMAT " %llu\n" - , prefix - , context - , units - , suffix - , chart - , family - , dimension - , labels - , value - , last_t * MSEC_PER_SEC - ); - else - buffer_sprintf(wb, "%s_%s%s%s{chart=\"%s\",family=\"%s\",dimension=\"%s\"%s} " CALCULATED_NUMBER_FORMAT "\n" - , prefix - , context - , units - , suffix - , chart - , family - , dimension - , labels - , value - ); - } - } - } - } - - rrdset_unlock(st); - } - } - - rrdhost_unlock(host); -} - -#if ENABLE_PROMETHEUS_REMOTE_WRITE -inline static void remote_write_split_words(char *str, char **words, int max_words) { - char *s = str; - int i = 0; - - while(*s && i < max_words - 1) { - while(*s && isspace(*s)) s++; // skip spaces to the beginning of a tag name - - if(*s) - words[i] = s; - - while(*s && !isspace(*s) && *s != '=') s++; // find the end of the tag name - - if(*s != '=') { - words[i] = NULL; - break; - } - *s = '\0'; - s++; - i++; - - while(*s && isspace(*s)) s++; // skip spaces to the beginning of a tag value - - if(*s && *s == '"') s++; // strip an opening quote - if(*s) - words[i] = s; - - while(*s && !isspace(*s) && *s != ',') s++; // find the end of the tag value - - if(*s && *s != ',') { - words[i] = NULL; - break; - } - if(s != words[i] && *(s - 1) == '"') *(s - 1) = '\0'; // strip a closing quote - if(*s != '\0') { - *s = '\0'; - s++; - i++; - } - } -} - -void backends_rrd_stats_remote_write_allmetrics_prometheus( - RRDHOST *host - , const char *__hostname - , const char *prefix - , BACKEND_OPTIONS backend_options - , time_t after - , time_t before - , size_t *count_charts - , size_t *count_dims - , size_t *count_dims_skipped -) { - char hostname[PROMETHEUS_ELEMENT_MAX + 1]; - backends_prometheus_label_copy(hostname, __hostname, PROMETHEUS_ELEMENT_MAX); - - backends_add_host_info("netdata_info", hostname, host->program_name, host->program_version, now_realtime_usec() / USEC_PER_MS); - - if(host->tags && *(host->tags)) { - char tags[PROMETHEUS_LABELS_MAX + 1]; - strncpy(tags, host->tags, PROMETHEUS_LABELS_MAX); - char *words[PROMETHEUS_LABELS_MAX_NUMBER] = {NULL}; - int i; - - remote_write_split_words(tags, words, PROMETHEUS_LABELS_MAX_NUMBER); - - backends_add_host_info("netdata_host_tags_info", hostname, NULL, NULL, now_realtime_usec() / USEC_PER_MS); - - for(i = 0; words[i] != NULL && words[i + 1] != NULL && (i + 1) < PROMETHEUS_LABELS_MAX_NUMBER; i += 2) { - backends_add_tag(words[i], words[i + 1]); - } - } - - // for each chart - RRDSET *st; - rrdset_foreach_read(st, host) { - char chart[PROMETHEUS_ELEMENT_MAX + 1]; - char context[PROMETHEUS_ELEMENT_MAX + 1]; - char family[PROMETHEUS_ELEMENT_MAX + 1]; - char units[PROMETHEUS_ELEMENT_MAX + 1] = ""; - - backends_prometheus_label_copy(chart, (backend_options & BACKEND_OPTION_SEND_NAMES && st->name)?st->name:st->id, PROMETHEUS_ELEMENT_MAX); - backends_prometheus_label_copy(family, st->family, PROMETHEUS_ELEMENT_MAX); - backends_prometheus_name_copy(context, st->context, PROMETHEUS_ELEMENT_MAX); - - if(likely(backends_can_send_rrdset(backend_options, st))) { - rrdset_rdlock(st); - - (*count_charts)++; - - int as_collected = (BACKEND_OPTIONS_DATA_SOURCE(backend_options) == BACKEND_SOURCE_DATA_AS_COLLECTED); - int homogeneous = 1; - if(as_collected) { - if(rrdset_flag_check(st, RRDSET_FLAG_HOMOGENEOUS_CHECK)) - rrdset_update_heterogeneous_flag(st); - - if(rrdset_flag_check(st, RRDSET_FLAG_HETEROGENEOUS)) - homogeneous = 0; - } - else { - if(BACKEND_OPTIONS_DATA_SOURCE(backend_options) == BACKEND_SOURCE_DATA_AVERAGE) - backends_prometheus_units_copy(units, st->units, PROMETHEUS_ELEMENT_MAX, 0); - } - - // for each dimension - RRDDIM *rd; - rrddim_foreach_read(rd, st) { - if(rd->collections_counter && !rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) { - char name[PROMETHEUS_LABELS_MAX + 1]; - char dimension[PROMETHEUS_ELEMENT_MAX + 1]; - char *suffix = ""; - - if (as_collected) { - // we need as-collected / raw data - - if(unlikely(rd->last_collected_time.tv_sec < after)) { - debug(D_BACKEND, "BACKEND: not sending dimension '%s' of chart '%s' from host '%s', its last data collection (%lu) is not within our timeframe (%lu to %lu)", rd->id, st->id, __hostname, (unsigned long)rd->last_collected_time.tv_sec, (unsigned long)after, (unsigned long)before); - (*count_dims_skipped)++; - continue; - } - - if(homogeneous) { - // all the dimensions of the chart, has the same algorithm, multiplier and divisor - // we add all dimensions as labels - - backends_prometheus_label_copy(dimension, (backend_options & BACKEND_OPTION_SEND_NAMES && rd->name) ? rd->name : rd->id, PROMETHEUS_ELEMENT_MAX); - snprintf(name, PROMETHEUS_LABELS_MAX, "%s_%s%s", prefix, context, suffix); - - backends_add_metric(name, chart, family, dimension, hostname, rd->last_collected_value, timeval_msec(&rd->last_collected_time)); - (*count_dims)++; - } - else { - // the dimensions of the chart, do not have the same algorithm, multiplier or divisor - // we create a metric per dimension - - backends_prometheus_name_copy(dimension, (backend_options & BACKEND_OPTION_SEND_NAMES && rd->name) ? rd->name : rd->id, PROMETHEUS_ELEMENT_MAX); - snprintf(name, PROMETHEUS_LABELS_MAX, "%s_%s_%s%s", prefix, context, dimension, suffix); - - backends_add_metric(name, chart, family, NULL, hostname, rd->last_collected_value, timeval_msec(&rd->last_collected_time)); - (*count_dims)++; - } - } - else { - // we need average or sum of the data - - time_t first_t = after, last_t = before; - calculated_number value = backend_calculate_value_from_stored_data(st, rd, after, before, backend_options, &first_t, &last_t); - - if(!isnan(value) && !isinf(value)) { - - if(BACKEND_OPTIONS_DATA_SOURCE(backend_options) == BACKEND_SOURCE_DATA_AVERAGE) - suffix = "_average"; - else if(BACKEND_OPTIONS_DATA_SOURCE(backend_options) == BACKEND_SOURCE_DATA_SUM) - suffix = "_sum"; - - backends_prometheus_label_copy(dimension, (backend_options & BACKEND_OPTION_SEND_NAMES && rd->name) ? rd->name : rd->id, PROMETHEUS_ELEMENT_MAX); - snprintf(name, PROMETHEUS_LABELS_MAX, "%s_%s%s%s", prefix, context, units, suffix); - - backends_add_metric(name, chart, family, dimension, hostname, value, last_t * MSEC_PER_SEC); - (*count_dims)++; - } - } - } - } - - rrdset_unlock(st); - } - } -} -#endif /* ENABLE_PROMETHEUS_REMOTE_WRITE */ - -static inline time_t prometheus_preparation(RRDHOST *host, BUFFER *wb, BACKEND_OPTIONS backend_options, const char *server, time_t now, BACKENDS_PROMETHEUS_OUTPUT_OPTIONS output_options) { - if(!server || !*server) server = "default"; - - time_t after = prometheus_server_last_access(server, host, now); - - int first_seen = 0; - if(!after) { - after = now - global_backend_update_every; - first_seen = 1; - } - - if(after > now) { - // oops! this should never happen - after = now - global_backend_update_every; - } - - if(output_options & BACKENDS_PROMETHEUS_OUTPUT_HELP) { - char *mode; - if(BACKEND_OPTIONS_DATA_SOURCE(backend_options) == BACKEND_SOURCE_DATA_AS_COLLECTED) - mode = "as collected"; - else if(BACKEND_OPTIONS_DATA_SOURCE(backend_options) == BACKEND_SOURCE_DATA_AVERAGE) - mode = "average"; - else if(BACKEND_OPTIONS_DATA_SOURCE(backend_options) == BACKEND_SOURCE_DATA_SUM) - mode = "sum"; - else - mode = "unknown"; - - buffer_sprintf(wb, "# COMMENT netdata \"%s\" to %sprometheus \"%s\", source \"%s\", last seen %lu %s, time range %lu to %lu\n\n" - , host->hostname - , (first_seen)?"FIRST SEEN ":"" - , server - , mode - , (unsigned long)((first_seen)?0:(now - after)) - , (first_seen)?"never":"seconds ago" - , (unsigned long)after, (unsigned long)now - ); - } - - return after; -} - -void backends_rrd_stats_api_v1_charts_allmetrics_prometheus_single_host(RRDHOST *host, BUFFER *wb, const char *server, const char *prefix, BACKEND_OPTIONS backend_options, BACKENDS_PROMETHEUS_OUTPUT_OPTIONS output_options) { - time_t before = now_realtime_sec(); - - // we start at the point we had stopped before - time_t after = prometheus_preparation(host, wb, backend_options, server, before, output_options); - - rrd_stats_api_v1_charts_allmetrics_prometheus(host, wb, prefix, backend_options, after, before, 0, output_options); -} - -void backends_rrd_stats_api_v1_charts_allmetrics_prometheus_all_hosts(RRDHOST *host, BUFFER *wb, const char *server, const char *prefix, BACKEND_OPTIONS backend_options, BACKENDS_PROMETHEUS_OUTPUT_OPTIONS output_options) { - time_t before = now_realtime_sec(); - - // we start at the point we had stopped before - time_t after = prometheus_preparation(host, wb, backend_options, server, before, output_options); - - rrd_rdlock(); - rrdhost_foreach_read(host) { - rrd_stats_api_v1_charts_allmetrics_prometheus(host, wb, prefix, backend_options, after, before, 1, output_options); - } - rrd_unlock(); -} - -#if ENABLE_PROMETHEUS_REMOTE_WRITE -int backends_process_prometheus_remote_write_response(BUFFER *b) { - if(unlikely(!b)) return 1; - - const char *s = buffer_tostring(b); - int len = buffer_strlen(b); - - // do nothing with HTTP responses 200 or 204 - - while(!isspace(*s) && len) { - s++; - len--; - } - s++; - len--; - - if(likely(len > 4 && (!strncmp(s, "200 ", 4) || !strncmp(s, "204 ", 4)))) - return 0; - else - return discard_response(b, "prometheus remote write"); -} -#endif diff --git a/backends/prometheus/backend_prometheus.h b/backends/prometheus/backend_prometheus.h deleted file mode 100644 index 8c14ddc26..000000000 --- a/backends/prometheus/backend_prometheus.h +++ /dev/null @@ -1,37 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef NETDATA_BACKEND_PROMETHEUS_H -#define NETDATA_BACKEND_PROMETHEUS_H 1 - -#include "backends/backends.h" - -typedef enum backends_prometheus_output_flags { - BACKENDS_PROMETHEUS_OUTPUT_NONE = 0, - BACKENDS_PROMETHEUS_OUTPUT_HELP = (1 << 0), - BACKENDS_PROMETHEUS_OUTPUT_TYPES = (1 << 1), - BACKENDS_PROMETHEUS_OUTPUT_NAMES = (1 << 2), - BACKENDS_PROMETHEUS_OUTPUT_TIMESTAMPS = (1 << 3), - BACKENDS_PROMETHEUS_OUTPUT_VARIABLES = (1 << 4), - BACKENDS_PROMETHEUS_OUTPUT_OLDUNITS = (1 << 5), - BACKENDS_PROMETHEUS_OUTPUT_HIDEUNITS = (1 << 6) -} BACKENDS_PROMETHEUS_OUTPUT_OPTIONS; - -extern void backends_rrd_stats_api_v1_charts_allmetrics_prometheus_single_host(RRDHOST *host, BUFFER *wb, const char *server, const char *prefix, BACKEND_OPTIONS backend_options, BACKENDS_PROMETHEUS_OUTPUT_OPTIONS output_options); -extern void backends_rrd_stats_api_v1_charts_allmetrics_prometheus_all_hosts(RRDHOST *host, BUFFER *wb, const char *server, const char *prefix, BACKEND_OPTIONS backend_options, BACKENDS_PROMETHEUS_OUTPUT_OPTIONS output_options); - -#if ENABLE_PROMETHEUS_REMOTE_WRITE -extern void backends_rrd_stats_remote_write_allmetrics_prometheus( - RRDHOST *host - , const char *__hostname - , const char *prefix - , BACKEND_OPTIONS backend_options - , time_t after - , time_t before - , size_t *count_charts - , size_t *count_dims - , size_t *count_dims_skipped -); -extern int backends_process_prometheus_remote_write_response(BUFFER *b); -#endif - -#endif //NETDATA_BACKEND_PROMETHEUS_H diff --git a/backends/prometheus/remote_write/Makefile.am b/backends/prometheus/remote_write/Makefile.am deleted file mode 100644 index d049ef48c..000000000 --- a/backends/prometheus/remote_write/Makefile.am +++ /dev/null @@ -1,14 +0,0 @@ -# SPDX-License-Identifier: GPL-3.0-or-later - -AUTOMAKE_OPTIONS = subdir-objects -MAINTAINERCLEANFILES = $(srcdir)/Makefile.in - -CLEANFILES = \ - remote_write.pb.cc \ - remote_write.pb.h \ - $(NULL) - -dist_noinst_DATA = \ - remote_write.proto \ - README.md \ - $(NULL) diff --git a/backends/prometheus/remote_write/README.md b/backends/prometheus/remote_write/README.md deleted file mode 100644 index b83575e10..000000000 --- a/backends/prometheus/remote_write/README.md +++ /dev/null @@ -1,41 +0,0 @@ -<!-- -title: "Prometheus remote write backend" -custom_edit_url: https://github.com/netdata/netdata/edit/master/backends/prometheus/remote_write/README.md ---> - -# Prometheus remote write backend - -## Prerequisites - -To use the prometheus remote write API with [storage -providers](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage) -[protobuf](https://developers.google.com/protocol-buffers/) and [snappy](https://github.com/google/snappy) libraries -should be installed first. Next, Netdata should be re-installed from the source. The installer will detect that the -required libraries and utilities are now available. - -## Configuration - -An additional option in the backend configuration section is available for the remote write backend: - -```conf -[backend] - remote write URL path = /receive -``` - -The default value is `/receive`. `remote write URL path` is used to set an endpoint path for the remote write protocol. -For example, if your endpoint is `http://example.domain:example_port/storage/read` you should set - -```conf -[backend] - destination = example.domain:example_port - remote write URL path = /storage/read -``` - -`buffered` and `lost` dimensions in the Netdata Backend Data Size operation monitoring chart estimate uncompressed -buffer size on failures. - -## Notes - -The remote write backend does not support `buffer on failures` - -[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fbackends%2Fprometheus%2Fremote_write%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)](<>) diff --git a/backends/prometheus/remote_write/remote_write.cc b/backends/prometheus/remote_write/remote_write.cc deleted file mode 100644 index b919cffad..000000000 --- a/backends/prometheus/remote_write/remote_write.cc +++ /dev/null @@ -1,120 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include <snappy.h> -#include "exporting/prometheus/remote_write/remote_write.pb.h" -#include "remote_write.h" - -using namespace prometheus; - -static google::protobuf::Arena arena; -static WriteRequest *write_request; - -void backends_init_write_request() { - GOOGLE_PROTOBUF_VERIFY_VERSION; - write_request = google::protobuf::Arena::CreateMessage<WriteRequest>(&arena); -} - -void backends_clear_write_request() { - write_request->clear_timeseries(); -} - -void backends_add_host_info(const char *name, const char *instance, const char *application, const char *version, const int64_t timestamp) { - TimeSeries *timeseries; - Sample *sample; - Label *label; - - timeseries = write_request->add_timeseries(); - - label = timeseries->add_labels(); - label->set_name("__name__"); - label->set_value(name); - - label = timeseries->add_labels(); - label->set_name("instance"); - label->set_value(instance); - - if(application) { - label = timeseries->add_labels(); - label->set_name("application"); - label->set_value(application); - } - - if(version) { - label = timeseries->add_labels(); - label->set_name("version"); - label->set_value(version); - } - - sample = timeseries->add_samples(); - sample->set_value(1); - sample->set_timestamp(timestamp); -} - -// adds tag to the last created timeseries -void backends_add_tag(char *tag, char *value) { - TimeSeries *timeseries; - Label *label; - - timeseries = write_request->mutable_timeseries(write_request->timeseries_size() - 1); - - label = timeseries->add_labels(); - label->set_name(tag); - label->set_value(value); -} - -void backends_add_metric(const char *name, const char *chart, const char *family, const char *dimension, const char *instance, const double value, const int64_t timestamp) { - TimeSeries *timeseries; - Sample *sample; - Label *label; - - timeseries = write_request->add_timeseries(); - - label = timeseries->add_labels(); - label->set_name("__name__"); - label->set_value(name); - - label = timeseries->add_labels(); - label->set_name("chart"); - label->set_value(chart); - - label = timeseries->add_labels(); - label->set_name("family"); - label->set_value(family); - - if(dimension) { - label = timeseries->add_labels(); - label->set_name("dimension"); - label->set_value(dimension); - } - - label = timeseries->add_labels(); - label->set_name("instance"); - label->set_value(instance); - - sample = timeseries->add_samples(); - sample->set_value(value); - sample->set_timestamp(timestamp); -} - -size_t backends_get_write_request_size(){ -#if GOOGLE_PROTOBUF_VERSION < 3001000 - size_t size = (size_t)snappy::MaxCompressedLength(write_request->ByteSize()); -#else - size_t size = (size_t)snappy::MaxCompressedLength(write_request->ByteSizeLong()); -#endif - - return (size < INT_MAX)?size:0; -} - -int backends_pack_write_request(char *buffer, size_t *size) { - std::string uncompressed_write_request; - if(write_request->SerializeToString(&uncompressed_write_request) == false) return 1; - - snappy::RawCompress(uncompressed_write_request.data(), uncompressed_write_request.size(), buffer, size); - - return 0; -} - -void backends_protocol_buffers_shutdown() { - google::protobuf::ShutdownProtobufLibrary(); -} diff --git a/backends/prometheus/remote_write/remote_write.h b/backends/prometheus/remote_write/remote_write.h deleted file mode 100644 index 1307d7281..000000000 --- a/backends/prometheus/remote_write/remote_write.h +++ /dev/null @@ -1,30 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef NETDATA_BACKEND_PROMETHEUS_REMOTE_WRITE_H -#define NETDATA_BACKEND_PROMETHEUS_REMOTE_WRITE_H - -#ifdef __cplusplus -extern "C" { -#endif - -void backends_init_write_request(); - -void backends_clear_write_request(); - -void backends_add_host_info(const char *name, const char *instance, const char *application, const char *version, const int64_t timestamp); - -void backends_add_tag(char *tag, char *value); - -void backends_add_metric(const char *name, const char *chart, const char *family, const char *dimension, const char *instance, const double value, const int64_t timestamp); - -size_t backends_get_write_request_size(); - -int backends_pack_write_request(char *buffer, size_t *size); - -void backends_protocol_buffers_shutdown(); - -#ifdef __cplusplus -} -#endif - -#endif //NETDATA_BACKEND_PROMETHEUS_REMOTE_WRITE_H diff --git a/backends/prometheus/remote_write/remote_write.proto b/backends/prometheus/remote_write/remote_write.proto deleted file mode 100644 index dfde254e1..000000000 --- a/backends/prometheus/remote_write/remote_write.proto +++ /dev/null @@ -1,29 +0,0 @@ -syntax = "proto3"; -package prometheus; - -option cc_enable_arenas = true; - -import "google/protobuf/descriptor.proto"; - -message WriteRequest { - repeated TimeSeries timeseries = 1 [(nullable) = false]; -} - -message TimeSeries { - repeated Label labels = 1 [(nullable) = false]; - repeated Sample samples = 2 [(nullable) = false]; -} - -message Label { - string name = 1; - string value = 2; -} - -message Sample { - double value = 1; - int64 timestamp = 2; -} - -extend google.protobuf.FieldOptions { - bool nullable = 65001; -} |