diff options
Diffstat (limited to '')
-rw-r--r-- | collectors/statsd.plugin/Makefile.am | 23 | ||||
-rw-r--r-- | collectors/statsd.plugin/README.md | 699 | ||||
-rw-r--r-- | collectors/statsd.plugin/asterisk.conf | 208 | ||||
-rw-r--r-- | collectors/statsd.plugin/asterisk.md | 61 | ||||
-rw-r--r-- | collectors/statsd.plugin/example.conf | 64 | ||||
-rw-r--r-- | collectors/statsd.plugin/k6.conf | 110 | ||||
-rw-r--r-- | collectors/statsd.plugin/k6.md | 76 | ||||
-rw-r--r-- | collectors/statsd.plugin/statsd.c | 2844 |
8 files changed, 4085 insertions, 0 deletions
diff --git a/collectors/statsd.plugin/Makefile.am b/collectors/statsd.plugin/Makefile.am new file mode 100644 index 0000000..c8144c1 --- /dev/null +++ b/collectors/statsd.plugin/Makefile.am @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) + +statsdconfigdir=$(libconfigdir)/statsd.d +dist_statsdconfig_DATA = \ + example.conf \ + k6.conf \ + asterisk.conf \ + $(NULL) + +userstatsdconfigdir=$(configdir)/statsd.d +dist_userstatsdconfig_DATA = \ + $(NULL) + +# Explicitly install directories to avoid permission issues due to umask +install-exec-local: + $(INSTALL) -d $(DESTDIR)$(userstatsdconfigdir) diff --git a/collectors/statsd.plugin/README.md b/collectors/statsd.plugin/README.md new file mode 100644 index 0000000..b46ca28 --- /dev/null +++ b/collectors/statsd.plugin/README.md @@ -0,0 +1,699 @@ +<!-- +title: "statsd.plugin" +description: "The Netdata Agent is a fully-featured StatsD server that collects metrics from any custom application and visualizes them in real-time." +custom_edit_url: https://github.com/netdata/netdata/edit/master/collectors/statsd.plugin/README.md +--> + +StatsD is a system to collect data from any application. Applications send metrics to it, usually via non-blocking UDP communication, and StatsD servers collect these metrics, perform a few simple calculations on them and push them to backend time-series databases. + +If you want to learn more about the StatsD protocol, we have written a [blog post](https://www.netdata.cloud/blog/introduction-to-statsd/) about it! + + +Netdata is a fully featured statsd server. It can collect statsd formatted metrics, visualize them on its dashboards and store them in it's database for long-term retention. + +Netdata statsd is inside Netdata (an internal plugin, running inside the Netdata daemon), it is configured via `netdata.conf` and by-default listens on standard statsd port 8125. Netdata supports both TCP and UDP packets at the same time. + +Since statsd is embedded in Netdata, it means you now have a statsd server embedded on all your servers. + +Netdata statsd is fast. It can collect several millions of metrics per second on modern hardware, using just 1 CPU core. The implementation uses two threads: one thread collects metrics, another thread updates the charts from the collected data. + +## Available StatsD synthetic application charts + +Netdata ships with a few synthetic chart definitions to automatically present application metrics into a more uniform way. These synthetic charts are configuration files (you can create your own) that re-arrange statsd metrics into a more meaningful way. + +On synthetic charts, we can have alarms as with any metric and chart. + +- [K6 load testing tool](https://k6.io) + - **Description:** k6 is a developer-centric, free and open-source load testing tool built for making performance testing a productive and enjoyable experience. + - [Documentation](/collectors/statsd.plugin/k6.md) + - [Configuration](https://github.com/netdata/netdata/blob/master/collectors/statsd.plugin/k6.conf) +- [Asterisk](https://www.asterisk.org/) + - **Description:** Asterisk is an Open Source PBX and telephony toolkit. + - [Documentation](/collectors/statsd.plugin/asterisk.md) + - [Configuration](https://github.com/netdata/netdata/blob/master/collectors/statsd.plugin/asterisk.conf) + +## Metrics supported by Netdata + +Netdata fully supports the StatsD protocol and also extends it to support more advanced Netdata specific use cases. All StatsD client libraries can be used with Netdata too. + +- **Gauges** + + The application sends `name:value|g`, where `value` is any **decimal/fractional** number, StatsD reports the latest value collected and the number of times it was updated (events). + + The application may increment or decrement a previous value, by setting the first character of the value to `+` or `-` (so, the only way to set a gauge to an absolute negative value, is to first set it to zero). + + [Sampling rate](#sampling-rates) is supported. + [Tags](#tags) are supported for changing chart units, family and dimension name. + + When a gauge is not collected and the setting is not to show gaps on the charts (the default), the last value will be shown, until a data collection event changes it. + +- **Counters** and **Meters** + + The application sends `name:value|c`, `name:value|C` or `name:value|m`, where `value` is a positive or negative **integer** number of events occurred, StatsD reports the **rate** and the number of times it was updated (events). + + `:value` can be omitted and StatsD will assume it is `1`. `|c`, `|C` and `|m` can be omitted and StatsD will assume it is `|m`. So, the application may send just `name` and StatsD will parse it as `name:1|m`. + + - Counters use `|c` (etsy/StatsD compatible) or `|C` (brubeck compatible) + - Meters use `|m` + + [Sampling rate](#sampling-rates) is supported. + [Tags](#tags) are supported for changing chart units, family and dimension name. + + When a counter or meter is not collected, StatsD **defaults** to showing a zero value, until a data collection event changes the value. + +- **Timers** and **Histograms** + + The application sends `name:value|ms` or `name:value|h`, where `value` is any **decimal/fractional** number, StatsD reports **min**, **max**, **average**, **95th percentile**, **median** and **standard deviation** and the total number of times it was updated (events). Internally it also calculates the **sum**, which is available for synthetic charts. + + - Timers use `|ms` + - Histograms use `|h` + + The only difference between the two, is the `units` of the charts, as timers report *milliseconds*. + + [Sampling rate](#sampling-rates) is supported. + [Tags](#tags) are supported for changing chart units and family. + + When a counter or meter is not collected, StatsD **defaults** to showing a zero value, until a data collection event changes the value. + +- **Sets** + + The application sends `name:value|s`, where `value` is anything (**number or text**, leading and trailing spaces are removed), StatsD reports the number of unique values sent and the number of times it was updated (events). + + Sampling rate is **not** supported for Sets. `value` is always considered text (so `01` and `1` are considered different). + + [Tags](#tags) are supported for changing chart units and family. + + When a set is not collected, Netdata **defaults** to showing a zero value, until a data collection event changes the value. + +- **Dictionaries** + + The application sends `name:value|d`, where `value` is anything (**number or text**, leading and trailing spaces are removed), StatsD reports the number of events sent for each `value` and the total times `name` was updated (events). + + Sampling rate is **not** supported for Dictionaries. `value` is always considered text (so `01` and `1` are considered different). + + [Tags](#tags) are supported for changing chart units and family. + + When a set is not collected, Netdata **defaults** to showing a zero value, until a data collection event changes the value. + +#### Sampling Rates + +The application may append `|@sampling_rate`, where `sampling_rate` is a number from `0.0` to `1.0` in order for StatD to extrapolate the value and predict the total for the entire period. If the application reports to StatsD a value for 1/10th of the time, it can append `|@0.1` to the metrics it sends to statsd. + +#### Tags + +The application may append `|#tag1:value1,tag2:value2,tag3:value3` etc, where `tagX` and `valueX` are strings. `:valueX` can be omitted. + +Currently, Netdata uses only 2 tags: + + * `units=string` which sets the units of the chart that is automatically generated + * `family=string` which sets the family of the chart that is automatically generated (the family is the submenu of the dashboard) + * `name=string` which sets the name of the dimension of the chart that is automatically generated (only for counters, meters, gauges) + +Other tags are parsed, but currently are ignored. + +Charts are not updated to change units or dimension names once they are created. So, either send the tags on every event, or use the special `zinit` value to initiaze the charts at the beginning. `zinit` is a special value that can be used on any chart, to have netdata initialize the charts, without actually setting any values to them. So, instead of sending `my.metric:VALUE|c|#units=bytes,name=size` every time, the application can send at the beginning `my.metric:zinit|c|#units=bytes,name=size` and then `my.metric:VALUE|c`. + +#### Overlapping metrics + +Netdata's StatsD server maintains different indexes for each of the metric types supported. This means the same metric `name` may exist under different types concurrently. + +#### How to name your metrics + +A good practice is to name your metrics like `application.operation.metric`, where: + +- `application` is the application name - Netdata will automatically create a dashboard section based on the first keyword of the metrics, so you can have all your applications in different sections. +- `operation` is the operation your application is executing, like `dbquery`, `request`, `response`, etc. +- `metric` is anything you want to name your metric as. Netdata will automatically append the metric type (meter, counter, gauge, set, dictionary, timer, histogram) to the generated chart. + +Using [Tags](#tags) you can also change the submenus of the dashboard, the units of the charts and for meters, counters and gauges, the name of dimension. So, you can have a usable default view without using [Synthetic StatsD charts](#synthetic-statsd-charts) + +#### Multiple metrics per packet + +Netdata accepts multiple metrics per packet if each is terminated with a newline (`\n`) at the end. + +#### TCP packets + +Netdata listens for both TCP and UDP packets. For TCP, is it important to always append `\n` on each metric, as Netdata will use the newline character to detect if a metric is split into multiple TCP packets. + + +#### UDP packets + +When sending multiple metrics over a single UDP message, it is important not to exceed the network MTU, which is usually 1500 bytes. + +Netdata will accept UDP packets up to 9000 bytes, but the underlying network will not exceed MTU. + +> You can read more about the network maximum transmission unit(MTU) in this cloudflare [article](https://www.cloudflare.com/en-gb/learning/network-layer/what-is-mtu/). + +## Configuration + +You can find the configuration at `/etc/netdata/netdata.conf`: + +``` +[statsd] + # enabled = yes + # decimal detail = 1000 + # update every (flushInterval) = 1 + # udp messages to process at once = 10 + # create private charts for metrics matching = * + # max private charts allowed = 200 + # max private charts hard limit = 1000 + # private charts memory mode = save + # private charts history = 3996 + # histograms and timers percentile (percentThreshold) = 95.00000 + # add dimension for number of events received = no + # gaps on gauges (deleteGauges) = no + # gaps on counters (deleteCounters) = no + # gaps on meters (deleteMeters) = no + # gaps on sets (deleteSets) = no + # gaps on histograms (deleteHistograms) = no + # gaps on timers (deleteTimers) = no + # listen backlog = 4096 + # default port = 8125 + # bind to = udp:localhost:8125 tcp:localhost:8125 +``` + +### StatsD main config options + +- `enabled = yes|no` + + controls if StatsD will be enabled for this Netdata. The default is enabled. + +- `default port = 8125` + + controls the default port StatsD will use if no port is defined in the following setting. + +- `bind to = udp:localhost tcp:localhost` + + is a space separated list of IPs and ports to listen to. The format is `PROTOCOL:IP:PORT` - if `PORT` is omitted, the `default port` will be used. If `IP` is IPv6, it needs to be enclosed in `[]`. `IP` can also be `*` (to listen on all IPs) or even a hostname. + +- `update every (flushInterval) = 1` seconds, controls the frequency StatsD will push the collected metrics to Netdata charts. + +- `decimal detail = 1000` controls the number of fractional digits in gauges and histograms. Netdata collects metrics using signed 64-bit integers and their fractional detail is controlled using multipliers and divisors. This setting is used to multiply all collected values to convert them to integers and is also set as the divisors, so that the final data will be a floating point number with this fractional detail (1000 = X.0 - X.999, 10000 = X.0 - X.9999, etc). + +The rest of the settings are discussed below. + +## StatsD charts + +Netdata can visualize StatsD collected metrics in 2 ways: + +1. Each metric gets its own **private chart**. This is the default and does not require any configuration. You can adjust the default parameters. + +2. **Synthetic charts** can be created, combining multiple metrics, independently of their metric types. For this type of charts, special configuration is required, to define the chart title, type, units, its dimensions, etc. + +### Private metric charts + +Private charts are controlled with `create private charts for metrics matching = *`. This setting accepts a space-separated list of [simple patterns](/libnetdata/simple_pattern/README.md). Netdata will create private charts for all metrics **by default**. + +For example, to render charts for all `myapp.*` metrics, except `myapp.*.badmetric`, use: + +``` +create private charts for metrics matching = !myapp.*.badmetric myapp.* +``` + +You can specify Netdata StatsD to have a different `memory mode` than the rest of the Netdata Agent. You can read more about `memory mode` in the [documentation](/database/README.md). + +The default behavior is to use the same settings as the rest of the Netdata Agent. If you wish to change them, edit the following settings: +- `private charts memory mode` +- `private charts history` + +### Optimize private metric charts visualization and storage + +If you have thousands of metrics, each with its own private chart, you may notice that your web browser becomes slow when you view the Netdata dashboard (this is a web browser issue we need to address at the Netdata UI). So, Netdata has a protection to stop creating charts when `max private charts allowed = 200` (soft limit) is reached. + +The metrics above this soft limit are still processed by Netdata, can be used in synthetic charts and will be available to be sent to backend time-series databases, up to `max private charts hard limit = 1000`. So, between 200 and 1000 charts, Netdata will still generate charts, but they will automatically be created with `memory mode = none` (Netdata will not maintain a database for them). These metrics will be sent to backend time series databases, if the backend configuration is set to `as collected`. + +Metrics above the hard limit are still collected, but they can only be used in synthetic charts (once a metric is added to chart, it will be sent to backend servers too). + +Example private charts (automatically generated without any configuration): + +#### Counters + +- Scope: **count the events of something** (e.g. number of file downloads) +- Format: `name:INTEGER|c` or `name:INTEGER|C` or `name|c` +- StatsD increments the counter by the `INTEGER` number supplied (positive, or negative). + +![image](https://cloud.githubusercontent.com/assets/2662304/26131553/4a26d19c-3aa3-11e7-94e8-c53b5ed6ebc3.png) + +#### Gauges + +- Scope: **report the value of something** (e.g. cache memory used by the application server) +- Format: `name:FLOAT|g` +- StatsD remembers the last value supplied, and can increment or decrement the latest value if `FLOAT` begins with `+` or `-`. + +![image](https://cloud.githubusercontent.com/assets/2662304/26131575/5d54e6f0-3aa3-11e7-9099-bc4440cd4592.png) + +#### histograms + +- Scope: **statistics on a size of events** (e.g. statistics on the sizes of files downloaded) +- Format: `name:FLOAT|h` +- StatsD maintains a list of all the values supplied and provides statistics on them. + +![image](https://cloud.githubusercontent.com/assets/2662304/26131587/704de72a-3aa3-11e7-9ea9-0d2bb778c150.png) + +The same chart with `sum` unselected, to show the detail of the dimensions supported: +![image](https://cloud.githubusercontent.com/assets/2662304/26131598/8076443a-3aa3-11e7-9ffa-ea535aee9c9f.png) + +#### Meters + +This is identical to `counter`. + +- Scope: **count the events of something** (e.g. number of file downloads) +- Format: `name:INTEGER|m` or `name|m` or just `name` +- StatsD increments the counter by the `INTEGER` number supplied (positive, or negative). + +![image](https://cloud.githubusercontent.com/assets/2662304/26131605/8fdf5a06-3aa3-11e7-963f-7ecf207d1dbc.png) + +#### Sets + +- Scope: **count the unique occurrences of something** (e.g. unique filenames downloaded, or unique users that downloaded files) +- Format: `name:TEXT|s` +- StatsD maintains a unique index of all values supplied, and reports the unique entries in it. + +![image](https://cloud.githubusercontent.com/assets/2662304/26131612/9eaa7b1a-3aa3-11e7-903b-d881e9a35be2.png) + +#### Timers + +- Scope: **statistics on the duration of events** (e.g. statistics for the duration of file downloads) +- Format: `name:FLOAT|ms` +- StatsD maintains a list of all the values supplied and provides statistics on them. + +![image](https://cloud.githubusercontent.com/assets/2662304/26131629/bc34f2d2-3aa3-11e7-8a07-f2fc94ba4352.png) + +### Synthetic StatsD charts + +Use synthetic charts to create dedicated sections on the dashboard to render your StatsD charts. + +Synthetic charts are organized in + +- **application** aka section in Netdata Dashboard. +- **charts for each application** aka family in Netdata Dashboard. +- **StatsD metrics for each chart** /aka charts and context Netdata Dashboard. + +> You can read more about how the Netdata Agent organizes information in the relevant [documentation](/web/README.md) + +For each application you need to create a `.conf` file in `/etc/netdata/statsd.d`. + +For example, if you want to monitor the application `myapp` using StatsD and Netdata, create the file `/etc/netdata/statsd.d/myapp.conf`, with this content: +``` +[app] + name = myapp + metrics = myapp.* + private charts = no + gaps when not collected = no + history = 60 +# memory mode = ram + +[dictionary] + m1 = metric1 + m2 = metric2 + +# replace 'mychart' with the chart id +# the chart will be named: myapp.mychart +[mychart] + name = mychart + title = my chart title + family = my family + context = chart.context + units = tests/s + priority = 91000 + type = area + dimension = myapp.metric1 m1 + dimension = myapp.metric2 m2 +``` + +Using the above configuration `myapp` should get its own section on the dashboard, having one chart with 2 dimensions. + +`[app]` starts a new application definition. The supported settings in this section are: + +- `name` defines the name of the app. +- `metrics` is a Netdata [simple pattern](/libnetdata/simple_pattern/README.md). This pattern should match all the possible StatsD metrics that will be participating in the application `myapp`. +- `private charts = yes|no`, enables or disables private charts for the metrics matched. +- `gaps when not collected = yes|no`, enables or disables gaps on the charts of the application in case that no metrics are collected. +- `memory mode` sets the memory mode for all charts of the application. The default is the global default for Netdata (not the global default for StatsD private charts). We suggest not to use this (we have commented it out in the example) and let your app use the global default for Netdata, which is our dbengine. + +- `history` sets the size of the round robin database for this application. The default is the global default for Netdata (not the global default for StatsD private charts). This is only relevant if you use `memory mode = save`. Read more on our [metrics storage(]/docs/store/change-metrics-storage.md) doc. + +`[dictionary]` defines name-value associations. These are used to renaming metrics, when added to synthetic charts. Metric names are also defined at each `dimension` line. However, using the dictionary dimension names can be declared globally, for each app and is the only way to rename dimensions when using patterns. Of course the dictionary can be empty or missing. + +Then, add any number of charts. Each chart should start with `[id]`. The chart will be called `app_name.id`. `family` controls the submenu on the dashboard. `context` controls the alarm templates. `priority` controls the ordering of the charts on the dashboard. The rest of the settings are informational. + +Add any number of metrics to a chart, using `dimension` lines. These lines accept 5 space separated parameters: + +1. the metric name, as it is collected (it has to be matched by the `metrics =` pattern of the app) +2. the dimension name, as it should be shown on the chart +3. an optional selector (type) of the value to shown (see below) +4. an optional multiplier +5. an optional divider +6. optional flags, space separated and enclosed in quotes. All the external plugins `DIMENSION` flags can be used. Currently the only usable flag is `hidden`, to add the dimension, but not show it on the dashboard. This is usually needed to have the values available for percentage calculation, or use them in alarms. + +So, the format is this: + +``` +dimension = [pattern] METRIC NAME TYPE MULTIPLIER DIVIDER OPTIONS +``` + +`pattern` is a keyword. When set, `METRIC` is expected to be a Netdata [simple pattern](/libnetdata/simple_pattern/README.md) that will be used to match all the StatsD metrics to be added to the chart. So, `pattern` automatically matches any number of StatsD metrics, all of which will be added as separate chart dimensions. + +`TYPE`, `MULTIPLIER`, `DIVIDER` and `OPTIONS` are optional. + +`TYPE` can be: + +- `events` to show the number of events received by StatsD for this metric +- `last` to show the last value, as calculated at the flush interval of the metric (the default) + +Then for histograms and timers the following types are also supported: + +- `min`, show the minimum value +- `max`, show the maximum value +- `sum`, show the sum of all values +- `average` (same as `last`) +- `percentile`, show the 95th percentile (or any other percentile, as configured at StatsD global config) +- `median`, show the median of all values (i.e. sort all values and get the middle value) +- `stddev`, show the standard deviation of the values + +#### Example synthetic charts + +StatsD metrics: `foo` and `bar`. + +Contents of file `/etc/netdata/stats.d/foobar.conf`: + +``` +[app] + name = foobarapp + metrics = foo bar + private charts = yes + +[foobar_chart1] + title = Hey, foo and bar together + family = foobar_family + context = foobarapp.foobars + units = foobars + type = area + dimension = foo 'foo me' last 1 1 + dimension = bar 'bar me' last 1 1 +``` + +Metrics sent to statsd: `foo:10|g` and `bar:20|g`. + +Private charts: + +![screenshot from 2017-08-03 23-28-19](https://user-images.githubusercontent.com/2662304/28942295-7c3a73a8-78a3-11e7-88e5-a9a006bb7465.png) + +Synthetic chart: + +![screenshot from 2017-08-03 23-29-14](https://user-images.githubusercontent.com/2662304/28942317-958a2c68-78a3-11e7-853f-32850141dd36.png) + +#### Renaming StatsD synthetic charts' metrics + +You can define a dictionary to rename metrics sent by StatsD clients. This enables you to send response `"200"` and Netdata visualize it as `succesful connection` + +The `[dictionary]` section accepts any number of `name = value` pairs. + +Netdata uses this dictionary as follows: + +1. When a `dimension` has a non-empty `NAME`, that name is looked up at the dictionary. + +2. If the above lookup gives nothing, or the `dimension` has an empty `NAME`, the original StatsD metric name is looked up at the dictionary. + +3. If any of the above succeeds, Netdata uses the `value` of the dictionary, to set the name of the dimension. The dimensions will have as ID the original StatsD metric name, and as name, the dictionary value. + +Use the dictionary in 2 ways: + +1. set `dimension = myapp.metric1 ''` and have at the dictionary `myapp.metric1 = metric1 name` +2. set `dimension = myapp.metric1 'm1'` and have at the dictionary `m1 = metric1 name` + +In both cases, the dimension will be added with ID `myapp.metric1` and will be named `metric1 name`. So, in alarms use either of the 2 as `${myapp.metric1}` or `${metric1 name}`. + +> keep in mind that if you add multiple times the same StatsD metric to a chart, Netdata will append `TYPE` to the dimension ID, so `myapp.metric1` will be added as `myapp.metric1_last` or `myapp.metric1_events`, etc. If you add multiple times the same metric with the same `TYPE` to a chart, Netdata will also append an incremental counter to the dimension ID, i.e. `myapp.metric1_last1`, `myapp.metric1_last2`, etc. + +#### Dimension patterns + +Netdata allows adding multiple dimensions to a chart, by matching the StatsD metrics with a Netdata simple pattern. + +Assume we have an API that provides StatsD metrics for each response code per method it supports, like these: + +``` +myapp.api.get.200 +myapp.api.get.400 +myapp.api.get.500 +myapp.api.del.200 +myapp.api.del.400 +myapp.api.del.500 +myapp.api.post.200 +myapp.api.post.400 +myapp.api.post.500 +myapp.api.all.200 +myapp.api.all.400 +myapp.api.all.500 +``` + +In order to add all the response codes of `myapp.api.get` to a chart, we simply make the following configuration: + +``` +[api_get_responses] + ... + dimension = pattern 'myapp.api.get.* '' last 1 1 +``` + +The above will add dimension named `200`, `400` and `500`. Netdata extracts the wildcard part of the metric name - so the dimensions will be named with whatever the `*` matched. + +You can rename the dimensions with this: + +``` +[dictionary] + get.200 = 200 ok + get.400 = 400 bad request + get.500 = 500 cannot connect to db + +[api_get_responses] + ... + dimension = pattern 'myapp.api.get.* 'get.' last 1 1 +``` + +Note that we added a `NAME` to the dimension line with `get.`. This is prefixed to the wildcarded part of the metric name, to compose the key for looking up the dictionary. So `500` became `get.500` which was looked up to the dictionary to find value `500 cannot connect to db`. This way we can have different dimension names, for each of the API methods (i.e. `get.500 = 500 cannot connect to db` while `post.500 = 500 cannot write to disk`). + +To add all 200s across all API methods to a chart, you can do this: + +``` +[ok_by_method] + ... + dimension = pattern 'myapp.api.*.200 '' last 1 1 +``` + +The above will add `get`, `post`, `del` and `all` to the chart. + +If `all` is not wanted (a `stacked` chart does not need the `all` dimension, since the sum of the dimensions provides the total), the line should be: + +``` +[ok_by_method] + ... + dimension = pattern '!myapp.api.all.* myapp.api.*.200 '' last 1 1 +``` + +With the above, all methods except `all` will be added to the chart. + +To automatically rename the methods, you can use this: + +``` +[dictionary] + method.get = GET + method.post = ADD + method.del = DELETE + +[ok_by_method] + ... + dimension = pattern '!myapp.api.all.* myapp.api.*.200 'method.' last 1 1 +``` + +Using the above, the dimensions will be added as `GET`, `ADD` and `DELETE`. + +## StatsD examples + +### Python + +It's really easy to instrument your python application with StatsD, for example using [jsocol/pystatsd](https://github.com/jsocol/pystatsd). + +```python +import statsd +c = statsd.StatsClient('localhost', 8125) +c.incr('foo') # Increment the 'foo' counter. +for i in range(100000000): + c.incr('bar') + c.incr('foo') + if i % 3: + c.decr('bar') + c.timing('stats.timed', 320) # Record a 320ms 'stats.timed'. +``` + +You can find detailed documentation in their [documentation page](https://statsd.readthedocs.io/en/v3.3/). + +### Javascript and Node.js + +Using the client library by [sivy/node-statsd](https://github.com/sivy/node-statsd), you can easily embed StatsD into your Node.js project. + +```javascript + var StatsD = require('node-statsd'), + client = new StatsD(); + + // Timing: sends a timing command with the specified milliseconds + client.timing('response_time', 42); + + // Increment: Increments a stat by a value (default is 1) + client.increment('my_counter'); + + // Decrement: Decrements a stat by a value (default is -1) + client.decrement('my_counter'); + + // Using the callback + client.set(['foo', 'bar'], 42, function(error, bytes){ + //this only gets called once after all messages have been sent + if(error){ + console.error('Oh noes! There was an error:', error); + } else { + console.log('Successfully sent', bytes, 'bytes'); + } + }); + + // Sampling, tags and callback are optional and could be used in any combination + client.histogram('my_histogram', 42, 0.25); // 25% Sample Rate + client.histogram('my_histogram', 42, ['tag']); // User-defined tag + client.histogram('my_histogram', 42, next); // Callback + client.histogram('my_histogram', 42, 0.25, ['tag']); + client.histogram('my_histogram', 42, 0.25, next); + client.histogram('my_histogram', 42, ['tag'], next); + client.histogram('my_histogram', 42, 0.25, ['tag'], next); +``` +### Other languages + +You can also use StatsD with: +- Golang, thanks to [alexcesaro/statsd](https://github.com/alexcesaro/statsd) +- Ruby, thanks to [reinh/statsd](https://github.com/reinh/statsd) +- Java, thanks to [DataDog/java-dogstatsd-client](https://github.com/DataDog/java-dogstatsd-client) + + +### Shell + +Getting the proper support for a programming language is not always easy, but the Unix shell is available on most Unix systems. You can use shell and `nc` to instrument your systems and send metric data to Netdata's StatsD implementation. + +Using the method you can send metrics from any script. You can generate events like: backup.started, backup.ended, backup.time, or even tail logs and convert them to metrics. + +> **IMPORTANT**: +> +> To send StatsD messages you need from the `netcat` package, the `nc` command. +> There are multiple versions of this package. Please try to experiment with the `nc` command you have available on your right system, to find the right parameters. +> +> In the examples below, we assume the `openbsd-netcat` is installed. + +If you plan to send short StatsD events at sporadic occasions, use UDP. The messages should not be too long (remember, most networks support up to 1500 bytes MTU, which is also the limit for StatsD messages over UDP). The good thing is that using UDP will not block your script, even if the StatsD server is not there (UDP messages are "fire-and-forget"). + + +For UDP use this: + +```sh +echo "APPLICATION.METRIC:VALUE|TYPE" | nc -u -w 0 localhost 8125 +``` + +`-u` turns on UDP, `-w 0` tells `nc` not to wait for a response from StatsD (idle time to close the connection). + +where: + +- `APPLICATION` is any name for your application +- `METRIC` is the name for the specific metric +- `VALUE` is the value for that metric (**meters**, **counters**, **gauges**, **timers** and **histograms** accept integer/decimal/fractional numbers, **sets** and **dictionaries** accept strings) +- `TYPE` is one of `m`, `c`, `g`, `ms`, `h`, `s`, `d` to define the metric type. + +For tailing a log and converting it to metrics, do something like this: + +```sh +tail -f some.log | awk 'awk commands to parse the log and format statsd metrics' | nc -N -w 120 localhost 8125 +``` + +`-N` tells `nc` to close the socket once it receives EOF on its input. `-w 120` tells `nc` to stop if the connection is idle for 120 seconds. The timeout is needed to stop the `nc` command if you restart Netdata while `nc` is connected to it. Without it, `nc` will sit idle forever. + +When you embed the above commands to a script, you may notice that all the metrics are sent to StatsD with a delay. They are buffered in the pipes `|`. You can turn them to real-time by prepending each command with `stdbuf -i0 -oL -eL command to be run`, like this: + +```sh +stdbuf -i0 -oL -eL tail -f some.log |\ + stdbuf -i0 -oL -eL awk 'awk commands to parse the log and format statsd metrics' |\ + stdbuf -i0 -oL -eL nc -N -w 120 localhost 8125 +``` + +If you use `mawk` you also need to run awk with `-W interactive`. + +Examples: + +To set `myapp.used_memory` as gauge to value `123456`, use: + +```sh +echo "myapp.used_memory:123456|g|#units:bytes" | nc -u -w 0 localhost 8125 +``` + +To increment `myapp.files_sent` by `10`, as a counter, use: + +```sh +echo "myapp.files_sent:10|c|#units:files" | nc -u -w 0 localhost 8125 +``` + +You can send multiple metrics like this: + +```sh +# send multiple metrics via UDP +printf "myapp.used_memory:123456|g|#units:bytes\nmyapp.files_sent:10|c|#units:files\n" | nc -u -w 0 localhost 8125 +``` + +Remember, for UDP communication each packet should not exceed the MTU. So, if you plan to push too many metrics at once, prefer TCP communication: + +```sh +# send multiple metrics via TCP +cat /tmp/statsd.metrics.txt | nc -N -w 120 localhost 8125 +``` + +You can also use this little function to take care of all the details: + +```sh +#!/usr/bin/env bash + +# we assume nc is from the openbsd-netcat package + +STATSD_HOST="localhost" +STATSD_PORT="8125" +statsd() { + local options="-u -w 0" all="${*}" + + # replace all spaces with newlines + all="${all// /\\n}" + + # if the string length of all parameters given is above 1000, use TCP + [ "${#all}" -gt 1000 ] && options="-N -w 0" + + # send the metrics to statsd + printf "${all}\n" | nc ${options} ${STATSD_HOST} ${STATSD_PORT} || return 1 + + return 0 +} + +if [ ! -z "${*}" ] +then + statsd "${@}" +fi +``` + +You can use it like this: + +```sh +# first, source it in your script +source statsd.sh + +# then, at any point: +statsd "myapp.used_memory:123456|g|#units:bytes" "myapp.files_sent:10|c|#units:files" ... +``` + +or even at a terminal prompt, like this: + +```sh +./statsd.sh "myapp.used_memory:123456|g|#units:bytes" "myapp.files_sent:10|c|#units:files" ... +``` + +The function is smart enough to call `nc` just once and pass all the metrics to it. It will also automatically switch to TCP if the metrics to send are above 1000 bytes. + +If you have gotten thus far, make sure to check out our [community forums](https://community.netdata.cloud) to share your experience using Netdata with StatsD. diff --git a/collectors/statsd.plugin/asterisk.conf b/collectors/statsd.plugin/asterisk.conf new file mode 100644 index 0000000..160b80f --- /dev/null +++ b/collectors/statsd.plugin/asterisk.conf @@ -0,0 +1,208 @@ +[app] + name = asterisk + metrics = asterisk.* + private charts = yes + gaps when not collected = no + +[dictionary] + # https://www.voip-info.org/asterisk-variable-hangupcause/ + q931.1 = unallocated 1 + q931.2 = no route transit net 2 + q931.3 = no route destination 3 + q931.6 = channel unacceptable 6 + q931.7 = call awarded delivered 7 + q931.16 = normal 16 + q931.17 = busy 17 + q931.18 = no response 18 + q931.19 = no answer 19 + q931.21 = rejected call 21 + q931.22 = number changed 22 + q931.27 = dst out of order 27 + q931.28 = invalid number 28 + q931.29 = rejected facility 29 + q931.30 = response to status 30 + q931.31 = normal unspecified 31 + q931.34 = congestion circuit 34 + q931.38 = net out of order 38 + q931.41 = normal tmp fail 41 + q931.42 = congestion switch 42 + q931.43 = access info discarded 43 + q931.44 = requested chan unavail 44 + q931.45 = pre empted 45 + q931.47 = resource unavailable, unspecified 47 + q931.50 = facility not subscribed 50 + q931.52 = outgoing call barred 52 + q931.54 = incoming call barred 54 + q931.57 = bearer capability not auth 57 + q931.58 = bearer capability not avail 58 + q931.65 = bearer capability not implemented 65 + q931.66 = chan not implemented 66 + q931.69 = facility not implemented 67 + q931.81 = invalid call reference 81 + q931.88 = incompatible destination 88 + q931.95 = invalid msg specified 95 + q931.96 = mandatory ie missing 96 + q931.97 = message type non exist 97 + q931.98 = wrong message 98 + q931.99 = ie non exist 99 + q931.100 = invalid ie contents 100 + q931.101 = wrong call state 101 + q931.102 = recovery on timer expire 102 + q931.103 = mandatory ie length error 103 + q931.111 = protocol error 111 + q931.127 = interworking 127 + + +[channels] + name = channels + title = Active Channels + family = channels + context = asterisk.channels + units = channels + priority = 91000 + type = stacked + dimension = pattern asterisk.channels.count 'channels' last 1 1 + # FIXME: netdata needs to prevent this from going negative + +[endpoints] + name = endpoints + title = Active Endpoints + family = endpoints + context = asterisk.endpoints + units = endpoints + priority = 91005 + type = stacked + dimension = pattern asterisk.endpoints.count 'endpoints' last 1 1 + +[endpoints_by_status] + name = endpoints_by_status + title = Active Endpoints by Status + family = endpoints + context = asterisk.endpoints_by_status + units = endpoints + priority = 91006 + type = stacked + dimension = pattern asterisk.endpoints.state.* '' last 1 1 + +[sip_channels_by_endpoint] + name = sip_channels_by_endpoint + title = Active SIP channels by endpoint + family = channels + context = asterisk.sip_channels_by_endpoint + units = channels + priority = 91110 + type = stacked + dimension = pattern asterisk.endpoints.SIP.*.channels '' last 1 1 + +[pjsip_channels_by_endpoint] + name = pjsip_channels_by_endpoint + title = Active PJSIP channels by endpoint + family = channels + context = asterisk.pjsip_channels_by_endpoint + units = channels + priority = 91111 + type = stacked + dimension = pattern asterisk.endpoints.PJSIP.*.channels '' last 1 1 + +[dialstatuses] + name = dialstatuses + title = Distribution of Dial Statuses + family = dial_statuses + context = asterisk.dialstatus + units = calls + priority = 91150 + type = stacked + dimension = pattern 'asterisk.dialstatus.*' '' last 1 1 + +[calltime] + name = calltime + title = Asterisk Channels Call Duration + family = calltime + context = asterisk.calltime + units = milliseconds + priority = 91160 + type = stacked + dimension = asterisk.channels.calltime 'calltime' average 1 1 + dimension = asterisk.channels.calltime 'sum' sum 1 1 hidden + dimension = asterisk.channels.calltime 'count' events 1 1 hidden + +[hangupcause] + name = hangupcause + title = Distribution of Hangup Causes + family = hangup_causes + context = asterisk.hangupcause + units = calls + priority = 91200 + type = stacked + dimension = pattern 'asterisk.hangupcause.*' 'q931.' last 1 1 + +[hangupcause_answer] + name = hangupcause_answer + title = Distribution of Hangup Causes for ANSWERed calls + family = hangup_causes + context = asterisk.hangupcause_answer + units = calls + priority = 91210 + type = stacked + dimension = pattern 'asterisk.dialhangupcause.ANSWER.*' 'q931.' last 1 1 + +[hangupcause_busy] + name = hangupcause_busy + title = Distribution of Hangup Causes for BUSY calls + family = hangup_causes + context = asterisk.hangupcause_busy + units = calls + priority = 91215 + type = stacked + dimension = pattern 'asterisk.dialhangupcause.BUSY.*' 'q931.' last 1 1 + +[hangupcause_cancel] + name = hangupcause_cancel + title = Distribution of Hangup Causes for CANCELled calls + family = hangup_causes + context = asterisk.hangupcause_cancel + units = calls + priority = 91220 + type = stacked + dimension = pattern 'asterisk.dialhangupcause.CANCEL.*' 'q931.' last 1 1 + +[hangupcause_chanunavail] + name = hangupcause_chanunavail + title = Distribution of Hangup Causes for CHANUNVAILed calls + family = hangup_causes + context = asterisk.hangupcause_chanunavail + units = calls + priority = 91230 + type = stacked + dimension = pattern 'asterisk.dialhangupcause.CHANUNAVAIL.*' 'q931.' last 1 1 + +[hangupcause_congestion] + name = hangupcause_congestion + title = Distribution of Hangup Causes for CONGESTIONed calls + family = hangup_causes + context = asterisk.hangupcause_congestion + units = calls + priority = 91240 + type = stacked + dimension = pattern 'asterisk.dialhangupcause.CONGESTION.*' 'q931.' last 1 1 + +[events] + name = events + title = Asterisk Dialplan Events + family = events + context = asterisk.events + units = events/s + priority = 91400 + type = stacked + dimension = pattern 'asterisk.stasis.message.ast_channel_*_type' '' last 1 1 + +[qualify] + name = qualify + title = Asterisk PJSIP Peers Qualify + family = qualify + context = asterisk.qualify + units = milliseconds + priority = 91500 + type = stacked + dimension = pattern 'asterisk.PJSIP.contacts.*.rtt' '' max 1 1 + # FIXME: netdata needs to set update every = 15 on this diff --git a/collectors/statsd.plugin/asterisk.md b/collectors/statsd.plugin/asterisk.md new file mode 100644 index 0000000..94da94e --- /dev/null +++ b/collectors/statsd.plugin/asterisk.md @@ -0,0 +1,61 @@ +<!-- +title: "Asterisk monitoring with Netdata" +custom_edit_url: https://github.com/netdata/netdata/edit/master/collectors/statsd.plugin/asterisk.md + +sidebar_label: "Asterisk" +--> + +# Asterisk monitoring with Netdata + +Monitors [Asterisk](https://www.asterisk.org/) dialplan application's statistics. + +## Requirements + +- Asterisk [integrated with StatsD](https://www.asterisk.org/integrating-asterisk-with-statsd/). + +## Configuration + +Netdata ships +with [asterisk.conf](https://github.com/netdata/netdata/blob/master/collectors/statsd.plugin/asterisk.conf) with +preconfigured charts. + +To receive Asterisk metrics in Netdata, uncomment the following lines in the `/etc/asterisk/statsd.conf` file: + +```ini +[general] +enabled = yes ; When set to yes, statsd support is enabled +server = 127.0.0.1 ; server[:port] of statsd server to use. + ; If not specified, the port is 8125 +prefix = asterisk ; Prefix to prepend to all metrics +``` + +> See [statsd.conf.sample](https://github.com/asterisk/asterisk/blob/master/configs/samples/statsd.conf.sample) for all available options. + +## Charts and metrics + +<details><summary>Click to see screenshots of the charts.</summary> + +![image](https://user-images.githubusercontent.com/2662304/158055351-fcc7a7fb-9b95-4656-bdc6-2e5f5a909215.png) +![image](https://user-images.githubusercontent.com/2662304/158055367-cfd25cd5-d71a-4bab-8cd1-bfcc47bc7312.png) + +</details> + +Mapping Asterisk StatsD metrics and Netdata charts. + +| Chart | Metrics | +|------------------------------------------------------|--------------------------------------------| +| Active Channels | asterisk.channels.count | +| Active Endpoints | asterisk.endpoints.count | +| Active Endpoints by Status | asterisk.endpoints.state.* | +| Active SIP channels by endpoint | asterisk.endpoints.SIP.*.channels | +| Active PJSIP channels by endpoint | asterisk.endpoints.PJSIP.*.channels | +| Distribution of Dial Statuses | asterisk.dialstatus.* | +| Asterisk Channels Call Duration | asterisk.channels.calltime | +| Distribution of Hangup Causes | asterisk.hangupcause.* | +| Distribution of Hangup Causes for ANSWERed calls | asterisk.dialhangupcause.ANSWER.* | +| Distribution of Hangup Causes for BUSY calls | asterisk.dialhangupcause.BUSY.* | +| Distribution of Hangup Causes for CANCELled calls | asterisk.dialhangupcause.CANCEL.* | +| Distribution of Hangup Causes for CHANUNVAILed calls | asterisk.dialhangupcause.CHANUNAVAIL.* | +| Distribution of Hangup Causes for CONGESTIONed calls | asterisk.dialhangupcause.CONGESTION.* | +| Asterisk Dialplan Events | asterisk.stasis.message.ast_channel_*_type | +| Asterisk PJSIP Peers Qualify | asterisk.PJSIP.contacts.*.rtt | diff --git a/collectors/statsd.plugin/example.conf b/collectors/statsd.plugin/example.conf new file mode 100644 index 0000000..2c7de6c --- /dev/null +++ b/collectors/statsd.plugin/example.conf @@ -0,0 +1,64 @@ +# statsd synthetic charts configuration + +# You can add many .conf files in /etc/netdata/statsd.d/, +# one for each of your apps. + +# start a new app - you can add many apps in the same file +[app] + # give a name for this app + # this controls the main menu on the dashboard + # and will be the prefix for all charts of the app + name = myexampleapp + + # match all the metrics of the app + metrics = myexampleapp.* + + # shall private charts of these metrics be created? + private charts = no + + # shall gaps be shown when metrics are not collected? + gaps when not collected = no + + # the memory mode for the charts of this app: none|map|save + # the default is to use the global memory mode + #memory mode = ram + + # the history size for the charts of this app, in seconds + # the default is to use the global history + #history = 3600 + +# create a chart +# this is its id - the chart will be named myexampleapp.myexamplechart +[myexamplechart] + # a name for the chart, similar to the id (2 names for each chart) + name = myexamplechart + + # the chart title + title = my chart title + + # the submenu of the dashboard + family = my family + + # the context for alarm templates + context = chart.context + + # the units of the chart + units = tests/s + + # the sorting priority of the chart on the dashboard + priority = 91000 + + # the type of chart to create: line | area | stacked + type = area + + # one or more dimensions for the chart + # type = events | last | min | max | sum | average | percentile | median | stddev + # events = the number of events for this metric + # last = the last value collected + # all the others are only valid for histograms and timers + dimension = myexampleapp.metric1 avg average 1 1 + dimension = myexampleapp.metric1 lower min 1 1 + dimension = myexampleapp.metric1 upper max 1 1 + dimension = myexampleapp.metric2 other last 1 1 + +# You can add as many charts as needed diff --git a/collectors/statsd.plugin/k6.conf b/collectors/statsd.plugin/k6.conf new file mode 100644 index 0000000..3bef00c --- /dev/null +++ b/collectors/statsd.plugin/k6.conf @@ -0,0 +1,110 @@ +[app] + name = k6 + metrics = k6* + private charts = no + gaps when not collected = yes + +[dictionary] + http_reqs = HTTP Requests + http_reqs_failed = Failed HTTP Requests + vus = Virtual active users + vus_max = max Virtual active users + iteration_duration = iteration duration + iteration_duration_max = max iteration duration + iteration_duration_min = min iteration duration + iteration_duration_avg = avg iteration duration + dropped_iterations = Dropped iterations + http_req_blocked = Blocked HTTP requests + http_req_connecting = Connecting HTTP requests + http_req_sending = Sending HTTP requests + http_req_receiving = Receiving HTTP requests + http_req_waiting = Waiting HTTP requests + http_req_duration_median = Median HTTP req duration + http_req_duration_average = Avg HTTP req duration + http_req_duration = HTTP req duration + http_req_duration_max = max HTTP req duration + http_req_duration_min = min HTTP req duration + http_req_duration_p95 = 95 percentile of HTTP req duration + data_received = Received data + data_sent = Sent data + + +[http_reqs] + name = http_reqs + title = HTTP Requests rate + family = http requests + context = k6.http_requests + dimension = k6.http_reqs http_reqs last 1 1 sum + type = line + units = requests/s + +[http_reqs] + name = http_reqs_failed + title = Failed HTTP Requests rate + family = http requests + context = k6.http_requests + dimension = k6.http_reqs_failed http_reqs_failed last 1 1 sum + type = line + units = requests/s + +[vus] + name = vus + title = Virtual Active Users + family = k6_metrics + dimension = k6.vus vus last 1 1 + dimension = k6.vus_max vus_max last 1 1 + type = line + units = vus + +[iteration_duration] + name = iteration_duration_2 + title = Iteration duration + family = k6_metrics + dimension = k6.iteration_duration iteration_duration last 1 1 + dimension = k6.iteration_duration iteration_duration_max max 1 1 + dimension = k6.iteration_duration iteration_duration_min min 1 1 + dimension = k6.iteration_duration iteration_duration_avg average 1 1 + type = line + units = s + +[dropped_iterations] + name = dropped_iterations + title = Dropped Iterations + family = k6_metrics + dimension = k6.dropped_iterations dropped_iterations last 1 1 + units = iterations + type = line + +[data] + name = data + title = K6 Data + family = k6_metrics + dimension = k6.data_received data_received last 1 1 + dimension = k6.data_sent data_sent last -1 1 + units = kb/s + type = area + +[http_req_duration_types] + name = http_req_duration_types + title = HTTP Requests total duration + family = http requests + dimension = k6.http_req_sending http_req_sending last 1 1 + dimension = k6.http_req_waiting http_req_waiting last 1 1 + dimension = k6.http_req_receiving http_req_receiving last 1 1 + dimension = k6.http_req_blocked http_req_blocked last 1 1 + dimension = k6.http_req_connecting http_req_connecting last 1 1 + units = ms + type = stacked + +[http_req_duration] + name = http_req_duration + title = HTTP duration metrics + family = http requests + dimension = k6.http_req_duration http_req_duration_median median 1 1 + dimension = k6.http_req_duration http_req_duration_max max 1 1 + dimension = k6.http_req_duration http_req_duration_average average 1 1 + dimension = k6.http_req_duration http_req_duration_min min 1 1 + dimension = k6.http_req_duration http_req_duration_p95 percentile 1 1 + dimension = k6.http_req_duration http_req_duration last 1 1 + units = ms + type = line diff --git a/collectors/statsd.plugin/k6.md b/collectors/statsd.plugin/k6.md new file mode 100644 index 0000000..4f8c701 --- /dev/null +++ b/collectors/statsd.plugin/k6.md @@ -0,0 +1,76 @@ +<!-- +title: "K6 load test monitoring with Netdata" +custom_edit_url: https://github.com/netdata/netdata/edit/master/collectors/statsd.plugin/k6.md + +sidebar_label: "K6 Load Testing" +--> + +# K6 Load Testing monitoring with Netdata + +Monitors the impact of load testing experiments performed with [K6](https://k6.io/). + +You can read more about the metrics that K6 sends in the [K6 documentation](https://k6.io/docs/using-k6/metrics/). + +## Requirements + +- When running the k6 experiment, specify a [StatsD output](https://k6.io/docs/results-visualization/statsd/). + - Tip: K6 currently supports tags only with [datadog output](https://k6.io/docs/results-visualization/datadog/), which is in essence StatsD. Netdata can be used with both. + +## Metrics + +![image](https://user-images.githubusercontent.com/13405632/117691411-8a7baf00-b1c4-11eb-9d87-8e9e7214871f.png) + + +### HTTP Requests + +Number of HTTP requests that K6 generates, per second. + +### Failed HTTP Requests + +Number of failed HTTP requests that K6 generates, per second. + +### Virtual Active Users +Current number of active virtual users. + +### Iteration Duration + +The time it took K6 to complete one full iteration of the main function. + +### Dropped Iterations + +The number of iterations that could not be started either due to lack of Virtual Users or lack of time. + +### Data + +The amount of data received and sent. + +### TTP Requests total duration + +The total duration it took for a round-trip of an HTTP request. It includes: +- Blocked HTTP requests: time spent blocked before initiating the request +- Connecting HTTP requests: time spent establishing TCP connection to the remote host +- Sending HTTP requests: time spent sending data to the remote host +- Receiving HTTP requests: time spent receiving data from the remote host +- Waiting HTTP requests: time spent waiting for response from the remote host + +### HTTP duration metrics + +Different metrics on the HTTP request as defined by K6. The HTTP request duration is defined by K6 as: `HTTP sending request` + `HTTP receiving request` + `HTTP waiting request`. + +Metrics: +- Median +- Average +- Max +- Min +- 95th percentile +- absolute (the value as it is, without any computation) + +## Configuration + +The collector is preconfigured and defined in `statsd.plugin/k6.conf`. + +Due to being a StatsD collector, you only need to define the configuration file and then send data to Netdata using the StatsD protocol. + +If Netdata is running on the same machine as K6, no further configuration is required. Otherwise, you will have to [point K6](https://k6.io/docs/results-visualization/statsd/) to your node and make sure that the K6 process can reach Netdata. + +The default namespace that is used in the configuration is `k6`. If you change it in K6, you will have to change it as well in the configuration file `k6.conf`. diff --git a/collectors/statsd.plugin/statsd.c b/collectors/statsd.plugin/statsd.c new file mode 100644 index 0000000..67d7ed2 --- /dev/null +++ b/collectors/statsd.plugin/statsd.c @@ -0,0 +1,2844 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "daemon/common.h" + +#define STATSD_CHART_PREFIX "statsd" + +#define PLUGIN_STATSD_NAME "statsd.plugin" + +#define STATSD_LISTEN_PORT 8125 +#define STATSD_LISTEN_BACKLOG 4096 + +#define WORKER_JOB_TYPE_TCP_CONNECTED 0 +#define WORKER_JOB_TYPE_TCP_DISCONNECTED 1 +#define WORKER_JOB_TYPE_RCV_DATA 2 +#define WORKER_JOB_TYPE_SND_DATA 3 + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 4 +#error Please increase WORKER_UTILIZATION_MAX_JOB_TYPES to at least 4 +#endif + +// -------------------------------------------------------------------------------------- + +// DO NOT ENABLE MULTITHREADING - IT IS NOT WELL TESTED +// #define STATSD_MULTITHREADED 1 + +#define STATSD_DICTIONARY_OPTIONS (DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_ADD_IN_FRONT) +#define STATSD_DECIMAL_DETAIL 1000 // floating point values get multiplied by this, with the same divisor + +// -------------------------------------------------------------------------------------------------------------------- +// data specific to each metric type + +typedef struct statsd_metric_gauge { + NETDATA_DOUBLE value; +} STATSD_METRIC_GAUGE; + +typedef struct statsd_metric_counter { // counter and meter + long long value; +} STATSD_METRIC_COUNTER; + +typedef struct statsd_histogram_extensions { + netdata_mutex_t mutex; + + // average is stored in metric->last + collected_number last_min; + collected_number last_max; + collected_number last_percentile; + collected_number last_median; + collected_number last_stddev; + collected_number last_sum; + + int zeroed; + + RRDDIM *rd_min; + RRDDIM *rd_max; + RRDDIM *rd_percentile; + RRDDIM *rd_median; + RRDDIM *rd_stddev; + //RRDDIM *rd_sum; + + size_t size; + size_t used; + NETDATA_DOUBLE *values; // dynamic array of values collected +} STATSD_METRIC_HISTOGRAM_EXTENSIONS; + +typedef struct statsd_metric_histogram { // histogram and timer + STATSD_METRIC_HISTOGRAM_EXTENSIONS *ext; +} STATSD_METRIC_HISTOGRAM; + +typedef struct statsd_metric_set { + DICTIONARY *dict; + size_t unique; +} STATSD_METRIC_SET; + +typedef struct statsd_metric_dictionary_item { + size_t count; + RRDDIM *rd; +} STATSD_METRIC_DICTIONARY_ITEM; + +typedef struct statsd_metric_dictionary { + DICTIONARY *dict; + size_t unique; +} STATSD_METRIC_DICTIONARY; + + +// -------------------------------------------------------------------------------------------------------------------- +// this is a metric - for all types of metrics + +typedef enum statsd_metric_options { + STATSD_METRIC_OPTION_NONE = 0x00000000, // no options set + STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED = 0x00000001, // do not update the chart dimension, when this metric is not collected + STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED = 0x00000002, // render a private chart for this metric + STATSD_METRIC_OPTION_PRIVATE_CHART_CHECKED = 0x00000004, // the metric has been checked if it should get private chart or not + STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT = 0x00000008, // show the count of events for this private chart + STATSD_METRIC_OPTION_CHECKED_IN_APPS = 0x00000010, // set when this metric has been checked against apps + STATSD_METRIC_OPTION_USED_IN_APPS = 0x00000020, // set when this metric is used in apps + STATSD_METRIC_OPTION_CHECKED = 0x00000040, // set when the charting thread checks this metric for use in charts (its usefulness) + STATSD_METRIC_OPTION_USEFUL = 0x00000080, // set when the charting thread finds the metric useful (i.e. used in a chart) + STATSD_METRIC_OPTION_COLLECTION_FULL_LOGGED = 0x00000100, // set when the collection is full for this metric + STATSD_METRIC_OPTION_UPDATED_CHART_METADATA = 0x00000200, // set when the private chart metadata have been updated via tags +} STATS_METRIC_OPTIONS; + +typedef enum statsd_metric_type { + STATSD_METRIC_TYPE_GAUGE, + STATSD_METRIC_TYPE_COUNTER, + STATSD_METRIC_TYPE_METER, + STATSD_METRIC_TYPE_TIMER, + STATSD_METRIC_TYPE_HISTOGRAM, + STATSD_METRIC_TYPE_SET, + STATSD_METRIC_TYPE_DICTIONARY +} STATSD_METRIC_TYPE; + + +typedef struct statsd_metric { + const char *name; // the name of the metric - linked to dictionary name + uint32_t hash; // hash of the name + + STATSD_METRIC_TYPE type; + + // metadata about data collection + collected_number events; // the number of times this metric has been collected (never resets) + size_t count; // the number of times this metric has been collected since the last flush + + // the actual collected data + union { + STATSD_METRIC_GAUGE gauge; + STATSD_METRIC_COUNTER counter; + STATSD_METRIC_HISTOGRAM histogram; + STATSD_METRIC_SET set; + STATSD_METRIC_DICTIONARY dictionary; + }; + + char *units; + char *dimname; + char *family; + + // chart related members + STATS_METRIC_OPTIONS options; // STATSD_METRIC_OPTION_* (bitfield) + char reset; // set to 1 by the charting thread to instruct the collector thread(s) to reset this metric + collected_number last; // the last value sent to netdata + RRDSET *st; // the private chart of this metric + RRDDIM *rd_value; // the dimension of this metric value + RRDDIM *rd_count; // the dimension for the number of events received + + // linking, used for walking through all metrics + struct statsd_metric *next_useful; +} STATSD_METRIC; + + +// -------------------------------------------------------------------------------------------------------------------- +// each type of metric has its own index + +typedef struct statsd_index { + char *name; // the name of the index of metrics + size_t events; // the number of events processed for this index + size_t metrics; // the number of metrics in this index + size_t useful; // the number of useful metrics in this index + + STATSD_METRIC_TYPE type; // the type of index + DICTIONARY *dict; + + STATSD_METRIC *first_useful; // the linked list of useful metrics (new metrics are added in front) + + STATS_METRIC_OPTIONS default_options; // default options for all metrics in this index +} STATSD_INDEX; + +// -------------------------------------------------------------------------------------------------------------------- +// synthetic charts + +typedef enum statsd_app_chart_dimension_value_type { + STATSD_APP_CHART_DIM_VALUE_TYPE_EVENTS, + STATSD_APP_CHART_DIM_VALUE_TYPE_LAST, + STATSD_APP_CHART_DIM_VALUE_TYPE_AVERAGE, + STATSD_APP_CHART_DIM_VALUE_TYPE_SUM, + STATSD_APP_CHART_DIM_VALUE_TYPE_MIN, + STATSD_APP_CHART_DIM_VALUE_TYPE_MAX, + STATSD_APP_CHART_DIM_VALUE_TYPE_PERCENTILE, + STATSD_APP_CHART_DIM_VALUE_TYPE_MEDIAN, + STATSD_APP_CHART_DIM_VALUE_TYPE_STDDEV +} STATSD_APP_CHART_DIM_VALUE_TYPE; + +typedef struct statsd_app_chart_dimension { + const char *name; // the name of this dimension + const char *metric; // the source metric name of this dimension + uint32_t metric_hash; // hash for fast string comparisons + + SIMPLE_PATTERN *metric_pattern; // set when the 'metric' is a simple pattern + + collected_number multiplier; // the multiplier of the dimension + collected_number divisor; // the divisor of the dimension + RRDDIM_FLAGS flags; // the RRDDIM flags for this dimension + RRDDIM_OPTIONS options; // the RRDDIM options for this dimension + + STATSD_APP_CHART_DIM_VALUE_TYPE value_type; // which value to use of the source metric + + RRDDIM *rd; // a pointer to the RRDDIM that has been created for this dimension + collected_number *value_ptr; // a pointer to the source metric value + RRD_ALGORITHM algorithm; // the algorithm of this dimension + + struct statsd_app_chart_dimension *next; // the next dimension for this chart +} STATSD_APP_CHART_DIM; + +typedef struct statsd_app_chart { + const char *id; + const char *name; + const char *title; + const char *family; + const char *context; + const char *units; + const char *module; + long priority; + RRDSET_TYPE chart_type; + STATSD_APP_CHART_DIM *dimensions; + size_t dimensions_count; + size_t dimensions_linked_count; + + RRDSET *st; + struct statsd_app_chart *next; +} STATSD_APP_CHART; + +typedef struct statsd_app { + const char *name; + SIMPLE_PATTERN *metrics; + STATS_METRIC_OPTIONS default_options; + RRD_MEMORY_MODE rrd_memory_mode; + DICTIONARY *dict; + long rrd_history_entries; + + const char *source; + STATSD_APP_CHART *charts; + struct statsd_app *next; +} STATSD_APP; + +// -------------------------------------------------------------------------------------------------------------------- +// global statsd data + +struct collection_thread_status { + int status; + size_t max_sockets; + + netdata_thread_t thread; +}; + +static struct statsd { + STATSD_INDEX gauges; + STATSD_INDEX counters; + STATSD_INDEX timers; + STATSD_INDEX histograms; + STATSD_INDEX meters; + STATSD_INDEX sets; + STATSD_INDEX dictionaries; + + size_t unknown_types; + size_t socket_errors; + size_t tcp_socket_connects; + size_t tcp_socket_disconnects; + size_t tcp_socket_connected; + size_t tcp_socket_reads; + size_t tcp_packets_received; + size_t tcp_bytes_read; + size_t udp_socket_reads; + size_t udp_packets_received; + size_t udp_bytes_read; + + int enabled; + int update_every; + SIMPLE_PATTERN *charts_for; + + size_t tcp_idle_timeout; + collected_number decimal_detail; + size_t private_charts; + size_t max_private_charts_hard; + long private_charts_rrd_history_entries; + unsigned int private_charts_hidden:1; + + STATSD_APP *apps; + size_t recvmmsg_size; + size_t histogram_increase_step; + double histogram_percentile; + char *histogram_percentile_str; + size_t dictionary_max_unique; + + int threads; + struct collection_thread_status *collection_threads_status; + + LISTEN_SOCKETS sockets; +} statsd = { + .enabled = 1, + .max_private_charts_hard = 1000, + .private_charts_hidden = 0, + .recvmmsg_size = 10, + .decimal_detail = STATSD_DECIMAL_DETAIL, + + .gauges = { + .name = "gauge", + .events = 0, + .metrics = 0, + .dict = NULL, + .type = STATSD_METRIC_TYPE_GAUGE, + .default_options = STATSD_METRIC_OPTION_NONE + }, + .counters = { + .name = "counter", + .events = 0, + .metrics = 0, + .dict = NULL, + .type = STATSD_METRIC_TYPE_COUNTER, + .default_options = STATSD_METRIC_OPTION_NONE + }, + .timers = { + .name = "timer", + .events = 0, + .metrics = 0, + .dict = NULL, + .type = STATSD_METRIC_TYPE_TIMER, + .default_options = STATSD_METRIC_OPTION_NONE + }, + .histograms = { + .name = "histogram", + .events = 0, + .metrics = 0, + .dict = NULL, + .type = STATSD_METRIC_TYPE_HISTOGRAM, + .default_options = STATSD_METRIC_OPTION_NONE + }, + .meters = { + .name = "meter", + .events = 0, + .metrics = 0, + .dict = NULL, + .type = STATSD_METRIC_TYPE_METER, + .default_options = STATSD_METRIC_OPTION_NONE + }, + .sets = { + .name = "set", + .events = 0, + .metrics = 0, + .dict = NULL, + .type = STATSD_METRIC_TYPE_SET, + .default_options = STATSD_METRIC_OPTION_NONE + }, + .dictionaries = { + .name = "dictionary", + .events = 0, + .metrics = 0, + .dict = NULL, + .type = STATSD_METRIC_TYPE_DICTIONARY, + .default_options = STATSD_METRIC_OPTION_NONE + }, + + .tcp_idle_timeout = 600, + + .apps = NULL, + .histogram_percentile = 95.0, + .histogram_increase_step = 10, + .dictionary_max_unique = 200, + .threads = 0, + .collection_threads_status = NULL, + .sockets = { + .config = &netdata_config, + .config_section = CONFIG_SECTION_STATSD, + .default_bind_to = "udp:localhost tcp:localhost", + .default_port = STATSD_LISTEN_PORT, + .backlog = STATSD_LISTEN_BACKLOG + }, +}; + + +// -------------------------------------------------------------------------------------------------------------------- +// statsd index management - add/find metrics + +static void dictionary_metric_insert_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + STATSD_INDEX *index = (STATSD_INDEX *)data; + STATSD_METRIC *m = (STATSD_METRIC *)value; + const char *name = dictionary_acquired_item_name(item); + + debug(D_STATSD, "Creating new %s metric '%s'", index->name, name); + + m->name = name; + m->hash = simple_hash(name); + m->type = index->type; + m->options = index->default_options; + + if (m->type == STATSD_METRIC_TYPE_HISTOGRAM || m->type == STATSD_METRIC_TYPE_TIMER) { + m->histogram.ext = callocz(1,sizeof(STATSD_METRIC_HISTOGRAM_EXTENSIONS)); + netdata_mutex_init(&m->histogram.ext->mutex); + } + + __atomic_fetch_add(&index->metrics, 1, __ATOMIC_RELAXED); +} + +static void dictionary_metric_delete_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + (void)data; // STATSD_INDEX *index = (STATSD_INDEX *)data; + (void)item; + STATSD_METRIC *m = (STATSD_METRIC *)value; + + if(m->type == STATSD_METRIC_TYPE_HISTOGRAM || m->type == STATSD_METRIC_TYPE_TIMER) { + freez(m->histogram.ext); + m->histogram.ext = NULL; + } + + freez(m->units); + freez(m->family); + freez(m->dimname); +} + +static inline STATSD_METRIC *statsd_find_or_add_metric(STATSD_INDEX *index, const char *name) { + debug(D_STATSD, "searching for metric '%s' under '%s'", name, index->name); + +#ifdef STATSD_MULTITHREADED + // avoid the write lock of dictionary_set() for existing metrics + STATSD_METRIC *m = dictionary_get(index->dict, name); + if(!m) m = dictionary_set(index->dict, name, NULL, sizeof(STATSD_METRIC)); +#else + // no locks here, go faster + // this will call the dictionary_metric_insert_callback() if an item + // is inserted, otherwise it will return the existing one. + // We used the flag DICT_OPTION_DONT_OVERWRITE_VALUE to support this. + STATSD_METRIC *m = dictionary_set(index->dict, name, NULL, sizeof(STATSD_METRIC)); +#endif + + index->events++; + return m; +} + + +// -------------------------------------------------------------------------------------------------------------------- +// statsd parsing numbers + +static inline NETDATA_DOUBLE statsd_parse_float(const char *v, NETDATA_DOUBLE def) { + NETDATA_DOUBLE value; + + if(likely(v && *v)) { + char *e = NULL; + value = str2ndd(v, &e); + if(unlikely(e && *e)) + error("STATSD: excess data '%s' after value '%s'", e, v); + } + else + value = def; + + return value; +} + +static inline NETDATA_DOUBLE statsd_parse_sampling_rate(const char *v) { + NETDATA_DOUBLE sampling_rate = statsd_parse_float(v, 1.0); + if(unlikely(isless(sampling_rate, 0.001))) sampling_rate = 0.001; + if(unlikely(isgreater(sampling_rate, 1.0))) sampling_rate = 1.0; + return sampling_rate; +} + +static inline long long statsd_parse_int(const char *v, long long def) { + long long value; + + if(likely(v && *v)) { + char *e = NULL; + value = str2ll(v, &e); + if(unlikely(e && *e)) + error("STATSD: excess data '%s' after value '%s'", e, v); + } + else + value = def; + + return value; +} + + +// -------------------------------------------------------------------------------------------------------------------- +// statsd processors per metric type + +static inline void statsd_reset_metric(STATSD_METRIC *m) { + m->reset = 0; + m->count = 0; +} + +static inline int value_is_zinit(const char *value) { + return (value && *value == 'z' && *++value == 'i' && *++value == 'n' && *++value == 'i' && *++value == 't' && *++value == '\0'); +} + +#define is_metric_checked(m) ((m)->options & STATSD_METRIC_OPTION_CHECKED) +#define is_metric_useful_for_collection(m) (!is_metric_checked(m) || ((m)->options & STATSD_METRIC_OPTION_USEFUL)) + +static inline void statsd_process_gauge(STATSD_METRIC *m, const char *value, const char *sampling) { + if(!is_metric_useful_for_collection(m)) return; + + if(unlikely(!value || !*value)) { + error("STATSD: metric '%s' of type gauge, with empty value is ignored.", m->name); + return; + } + + if(unlikely(m->reset)) { + // no need to reset anything specific for gauges + statsd_reset_metric(m); + } + + if(unlikely(value_is_zinit(value))) { + // magic loading of metric, without affecting anything + } + else { + if (unlikely(*value == '+' || *value == '-')) + m->gauge.value += statsd_parse_float(value, 1.0) / statsd_parse_sampling_rate(sampling); + else + m->gauge.value = statsd_parse_float(value, 1.0); + + m->events++; + m->count++; + } +} + +static inline void statsd_process_counter_or_meter(STATSD_METRIC *m, const char *value, const char *sampling) { + if(!is_metric_useful_for_collection(m)) return; + + // we accept empty values for counters + + if(unlikely(m->reset)) statsd_reset_metric(m); + + if(unlikely(value_is_zinit(value))) { + // magic loading of metric, without affecting anything + } + else { + m->counter.value += llrintndd((NETDATA_DOUBLE) statsd_parse_int(value, 1) / statsd_parse_sampling_rate(sampling)); + + m->events++; + m->count++; + } +} + +#define statsd_process_counter(m, value, sampling) statsd_process_counter_or_meter(m, value, sampling) +#define statsd_process_meter(m, value, sampling) statsd_process_counter_or_meter(m, value, sampling) + +static inline void statsd_process_histogram_or_timer(STATSD_METRIC *m, const char *value, const char *sampling, const char *type) { + if(!is_metric_useful_for_collection(m)) return; + + if(unlikely(!value || !*value)) { + error("STATSD: metric of type %s, with empty value is ignored.", type); + return; + } + + if(unlikely(m->reset)) { + m->histogram.ext->used = 0; + statsd_reset_metric(m); + } + + if(unlikely(value_is_zinit(value))) { + // magic loading of metric, without affecting anything + } + else { + NETDATA_DOUBLE v = statsd_parse_float(value, 1.0); + NETDATA_DOUBLE sampling_rate = statsd_parse_sampling_rate(sampling); + if(unlikely(isless(sampling_rate, 0.01))) sampling_rate = 0.01; + if(unlikely(isgreater(sampling_rate, 1.0))) sampling_rate = 1.0; + + long long samples = llrintndd(1.0 / sampling_rate); + while(samples-- > 0) { + + if(unlikely(m->histogram.ext->used == m->histogram.ext->size)) { + netdata_mutex_lock(&m->histogram.ext->mutex); + m->histogram.ext->size += statsd.histogram_increase_step; + m->histogram.ext->values = reallocz(m->histogram.ext->values, sizeof(NETDATA_DOUBLE) * m->histogram.ext->size); + netdata_mutex_unlock(&m->histogram.ext->mutex); + } + + m->histogram.ext->values[m->histogram.ext->used++] = v; + } + + m->events++; + m->count++; + } +} + +#define statsd_process_timer(m, value, sampling) statsd_process_histogram_or_timer(m, value, sampling, "timer") +#define statsd_process_histogram(m, value, sampling) statsd_process_histogram_or_timer(m, value, sampling, "histogram") + +static void dictionary_metric_set_value_insert_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + (void)item; + (void)value; + STATSD_METRIC *m = (STATSD_METRIC *)data; + m->set.unique++; +} + +static inline void statsd_process_set(STATSD_METRIC *m, const char *value) { + if(!is_metric_useful_for_collection(m)) return; + + if(unlikely(!value || !*value)) { + error("STATSD: metric of type set, with empty value is ignored."); + return; + } + + if(unlikely(m->reset)) { + if(likely(m->set.dict)) { + dictionary_destroy(m->set.dict); + m->set.dict = NULL; + } + statsd_reset_metric(m); + } + + if (unlikely(!m->set.dict)) { + m->set.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + dictionary_register_insert_callback(m->set.dict, dictionary_metric_set_value_insert_callback, m); + m->set.unique = 0; + } + + if(unlikely(value_is_zinit(value))) { + // magic loading of metric, without affecting anything + } + else { +#ifdef STATSD_MULTITHREADED + // avoid the write lock to check if something is already there + if(!dictionary_get(m->set.dict, value)) + dictionary_set(m->set.dict, value, NULL, 0); +#else + dictionary_set(m->set.dict, value, NULL, 0); +#endif + m->events++; + m->count++; + } +} + +static void dictionary_metric_dict_value_insert_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + (void)item; + (void)value; + STATSD_METRIC *m = (STATSD_METRIC *)data; + m->dictionary.unique++; +} + +static inline void statsd_process_dictionary(STATSD_METRIC *m, const char *value) { + if(!is_metric_useful_for_collection(m)) return; + + if(unlikely(!value || !*value)) { + error("STATSD: metric of type set, with empty value is ignored."); + return; + } + + if(unlikely(m->reset)) + statsd_reset_metric(m); + + if (unlikely(!m->dictionary.dict)) { + m->dictionary.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + dictionary_register_insert_callback(m->dictionary.dict, dictionary_metric_dict_value_insert_callback, m); + m->dictionary.unique = 0; + } + + if(unlikely(value_is_zinit(value))) { + // magic loading of metric, without affecting anything + } + else { + STATSD_METRIC_DICTIONARY_ITEM *t = (STATSD_METRIC_DICTIONARY_ITEM *)dictionary_get(m->dictionary.dict, value); + + if (unlikely(!t)) { + if(!t && m->dictionary.unique >= statsd.dictionary_max_unique) + value = "other"; + + t = (STATSD_METRIC_DICTIONARY_ITEM *)dictionary_set(m->dictionary.dict, value, NULL, sizeof(STATSD_METRIC_DICTIONARY_ITEM)); + } + + t->count++; + m->events++; + m->count++; + } +} + + +// -------------------------------------------------------------------------------------------------------------------- +// statsd parsing + +static inline const char *statsd_parse_skip_up_to(const char *s, char d1, char d2, char d3) { + char c; + + for(c = *s; c && c != d1 && c != d2 && c != d3 && c != '\r' && c != '\n'; c = *++s) ; + + return s; +} + +const char *statsd_parse_skip_spaces(const char *s) { + char c; + + for(c = *s; c && ( c == ' ' || c == '\t' || c == '\r' || c == '\n' ); c = *++s) ; + + return s; +} + +static inline const char *statsd_parse_field_trim(const char *start, char *end) { + if(unlikely(!start || !*start)) { + start = end; + return start; + } + + while(start <= end && (*start == ' ' || *start == '\t')) + start++; + + *end = '\0'; + end--; + while(end >= start && (*end == ' ' || *end == '\t')) + *end-- = '\0'; + + return start; +} + +static void statsd_process_metric(const char *name, const char *value, const char *type, const char *sampling, const char *tags) { + debug(D_STATSD, "STATSD: raw metric '%s', value '%s', type '%s', sampling '%s', tags '%s'", name?name:"(null)", value?value:"(null)", type?type:"(null)", sampling?sampling:"(null)", tags?tags:"(null)"); + + if(unlikely(!name || !*name)) return; + if(unlikely(!type || !*type)) type = "m"; + + STATSD_METRIC *m = NULL; + + char t0 = type[0], t1 = type[1]; + if(unlikely(t0 == 'g' && t1 == '\0')) { + statsd_process_gauge( + m = statsd_find_or_add_metric(&statsd.gauges, name), + value, sampling); + } + else if(unlikely((t0 == 'c' || t0 == 'C') && t1 == '\0')) { + // etsy/statsd uses 'c' + // brubeck uses 'C' + statsd_process_counter( + m = statsd_find_or_add_metric(&statsd.counters, name), + value, sampling); + } + else if(unlikely(t0 == 'm' && t1 == '\0')) { + statsd_process_meter( + m = statsd_find_or_add_metric(&statsd.meters, name), + value, sampling); + } + else if(unlikely(t0 == 'h' && t1 == '\0')) { + statsd_process_histogram( + m = statsd_find_or_add_metric(&statsd.histograms, name), + value, sampling); + } + else if(unlikely(t0 == 's' && t1 == '\0')) { + statsd_process_set( + m = statsd_find_or_add_metric(&statsd.sets, name), + value); + } + else if(unlikely(t0 == 'd' && t1 == '\0')) { + statsd_process_dictionary( + m = statsd_find_or_add_metric(&statsd.dictionaries, name), + value); + } + else if(unlikely(t0 == 'm' && t1 == 's' && type[2] == '\0')) { + statsd_process_timer( + m = statsd_find_or_add_metric(&statsd.timers, name), + value, sampling); + } + else { + statsd.unknown_types++; + error("STATSD: metric '%s' with value '%s' is sent with unknown metric type '%s'", name, value?value:"", type); + } + + if(m && tags && *tags) { + const char *s = tags; + while(*s) { + const char *tagkey = NULL, *tagvalue = NULL; + char *tagkey_end = NULL, *tagvalue_end = NULL; + + s = tagkey_end = (char *)statsd_parse_skip_up_to(tagkey = s, ':', '=', ','); + if(tagkey == tagkey_end) { + if (*s) { + s++; + s = statsd_parse_skip_spaces(s); + } + continue; + } + + if(likely(*s == ':' || *s == '=')) + s = tagvalue_end = (char *) statsd_parse_skip_up_to(tagvalue = ++s, ',', '\0', '\0'); + + if(*s == ',') s++; + + statsd_parse_field_trim(tagkey, tagkey_end); + statsd_parse_field_trim(tagvalue, tagvalue_end); + + if(tagkey && *tagkey && tagvalue && *tagvalue) { + if (strcmp(tagkey, "units") == 0 && (!m->units || strcmp(m->units, tagvalue) != 0)) { + m->units = strdupz(tagvalue); + m->options |= STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; + } + + if (strcmp(tagkey, "name") == 0 && (!m->dimname || strcmp(m->dimname, tagvalue) != 0)) { + m->dimname = strdupz(tagvalue); + m->options |= STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; + } + + if (strcmp(tagkey, "family") == 0 && (!m->family || strcmp(m->family, tagvalue) != 0)) { + m->family = strdupz(tagvalue); + m->options |= STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; + } + } + } + } +} + +static inline size_t statsd_process(char *buffer, size_t size, int require_newlines) { + buffer[size] = '\0'; + debug(D_STATSD, "RECEIVED: %zu bytes: '%s'", size, buffer); + + const char *s = buffer; + while(*s) { + const char *name = NULL, *value = NULL, *type = NULL, *sampling = NULL, *tags = NULL; + char *name_end = NULL, *value_end = NULL, *type_end = NULL, *sampling_end = NULL, *tags_end = NULL; + + s = name_end = (char *)statsd_parse_skip_up_to(name = s, ':', '=', '|'); + if(name == name_end) { + if (*s) { + s++; + s = statsd_parse_skip_spaces(s); + } + continue; + } + + if(likely(*s == ':' || *s == '=')) + s = value_end = (char *) statsd_parse_skip_up_to(value = ++s, '|', '@', '#'); + + if(likely(*s == '|')) + s = type_end = (char *) statsd_parse_skip_up_to(type = ++s, '|', '@', '#'); + + while(*s == '|' || *s == '@' || *s == '#') { + // parse all the fields that may be appended + + if ((*s == '|' && s[1] == '@') || *s == '@') { + s = sampling_end = (char *)statsd_parse_skip_up_to(sampling = ++s, '|', '@', '#'); + if (*sampling == '@') sampling++; + } + else if ((*s == '|' && s[1] == '#') || *s == '#') { + s = tags_end = (char *)statsd_parse_skip_up_to(tags = ++s, '|', '@', '#'); + if (*tags == '#') tags++; + } + else { + // unknown field, skip it + s = (char *)statsd_parse_skip_up_to(++s, '|', '@', '#'); + } + } + + // skip everything until the end of the line + while(*s && *s != '\n') s++; + + if(unlikely(require_newlines && *s != '\n' && s > buffer)) { + // move the remaining data to the beginning + size -= (name - buffer); + memmove(buffer, name, size); + return size; + } + else + s = statsd_parse_skip_spaces(s); + + statsd_process_metric( + statsd_parse_field_trim(name, name_end) + , statsd_parse_field_trim(value, value_end) + , statsd_parse_field_trim(type, type_end) + , statsd_parse_field_trim(sampling, sampling_end) + , statsd_parse_field_trim(tags, tags_end) + ); + } + + return 0; +} + + +// -------------------------------------------------------------------------------------------------------------------- +// statsd pollfd interface + +#define STATSD_TCP_BUFFER_SIZE 65536 // minimize tcp reads +#define STATSD_UDP_BUFFER_SIZE 9000 // this should be up to MTU + +typedef enum { + STATSD_SOCKET_DATA_TYPE_TCP, + STATSD_SOCKET_DATA_TYPE_UDP +} STATSD_SOCKET_DATA_TYPE; + +struct statsd_tcp { + STATSD_SOCKET_DATA_TYPE type; + size_t size; + size_t len; + char buffer[]; +}; + +#ifdef HAVE_RECVMMSG +struct statsd_udp { + int *running; + STATSD_SOCKET_DATA_TYPE type; + size_t size; + struct iovec *iovecs; + struct mmsghdr *msgs; +}; +#else +struct statsd_udp { + int *running; + STATSD_SOCKET_DATA_TYPE type; + char buffer[STATSD_UDP_BUFFER_SIZE]; +}; +#endif + +// new TCP client connected +static void *statsd_add_callback(POLLINFO *pi, short int *events, void *data) { + (void)pi; + (void)data; + + worker_is_busy(WORKER_JOB_TYPE_TCP_CONNECTED); + *events = POLLIN; + + struct statsd_tcp *t = (struct statsd_tcp *)callocz(sizeof(struct statsd_tcp) + STATSD_TCP_BUFFER_SIZE, 1); + t->type = STATSD_SOCKET_DATA_TYPE_TCP; + t->size = STATSD_TCP_BUFFER_SIZE - 1; + statsd.tcp_socket_connects++; + statsd.tcp_socket_connected++; + + worker_is_idle(); + return t; +} + +// TCP client disconnected +static void statsd_del_callback(POLLINFO *pi) { + worker_is_busy(WORKER_JOB_TYPE_TCP_DISCONNECTED); + + struct statsd_tcp *t = pi->data; + + if(likely(t)) { + if(t->type == STATSD_SOCKET_DATA_TYPE_TCP) { + if(t->len != 0) { + statsd.socket_errors++; + error("STATSD: client is probably sending unterminated metrics. Closed socket left with '%s'. Trying to process it.", t->buffer); + statsd_process(t->buffer, t->len, 0); + } + statsd.tcp_socket_disconnects++; + statsd.tcp_socket_connected--; + } + else + error("STATSD: internal error: received socket data type is %d, but expected %d", (int)t->type, (int)STATSD_SOCKET_DATA_TYPE_TCP); + + freez(t); + } + + worker_is_idle(); +} + +// Receive data +static int statsd_rcv_callback(POLLINFO *pi, short int *events) { + int retval = -1; + worker_is_busy(WORKER_JOB_TYPE_RCV_DATA); + + *events = POLLIN; + + int fd = pi->fd; + + switch(pi->socktype) { + case SOCK_STREAM: { + struct statsd_tcp *d = (struct statsd_tcp *)pi->data; + if(unlikely(!d)) { + error("STATSD: internal error: expected TCP data pointer is NULL"); + statsd.socket_errors++; + retval = -1; + goto cleanup; + } + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(d->type != STATSD_SOCKET_DATA_TYPE_TCP)) { + error("STATSD: internal error: socket data type should be %d, but it is %d", (int)STATSD_SOCKET_DATA_TYPE_TCP, (int)d->type); + statsd.socket_errors++; + retval = -1; + goto cleanup; + } +#endif + + int ret = 0; + ssize_t rc; + do { + rc = recv(fd, &d->buffer[d->len], d->size - d->len, MSG_DONTWAIT); + if (rc < 0) { + // read failed + if (errno != EWOULDBLOCK && errno != EAGAIN && errno != EINTR) { + error("STATSD: recv() on TCP socket %d failed.", fd); + statsd.socket_errors++; + ret = -1; + } + } + else if (!rc) { + // connection closed + debug(D_STATSD, "STATSD: client disconnected."); + ret = -1; + } + else { + // data received + d->len += rc; + statsd.tcp_socket_reads++; + statsd.tcp_bytes_read += rc; + } + + if(likely(d->len > 0)) { + statsd.tcp_packets_received++; + d->len = statsd_process(d->buffer, d->len, 1); + } + + if(unlikely(ret == -1)) { + retval = -1; + goto cleanup; + } + + } while (rc != -1); + break; + } + + case SOCK_DGRAM: { + struct statsd_udp *d = (struct statsd_udp *)pi->data; + if(unlikely(!d)) { + error("STATSD: internal error: expected UDP data pointer is NULL"); + statsd.socket_errors++; + retval = -1; + goto cleanup; + } + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(d->type != STATSD_SOCKET_DATA_TYPE_UDP)) { + error("STATSD: internal error: socket data should be %d, but it is %d", (int)d->type, (int)STATSD_SOCKET_DATA_TYPE_UDP); + statsd.socket_errors++; + retval = -1; + goto cleanup; + } +#endif + +#ifdef HAVE_RECVMMSG + ssize_t rc; + do { + rc = recvmmsg(fd, d->msgs, (unsigned int)d->size, MSG_DONTWAIT, NULL); + if (rc < 0) { + // read failed + if (errno != EWOULDBLOCK && errno != EAGAIN && errno != EINTR) { + error("STATSD: recvmmsg() on UDP socket %d failed.", fd); + statsd.socket_errors++; + retval = -1; + goto cleanup; + } + } else if (rc) { + // data received + statsd.udp_socket_reads++; + statsd.udp_packets_received += rc; + + size_t i; + for (i = 0; i < (size_t)rc; ++i) { + size_t len = (size_t)d->msgs[i].msg_len; + statsd.udp_bytes_read += len; + statsd_process(d->msgs[i].msg_hdr.msg_iov->iov_base, len, 0); + } + } + } while (rc != -1); + +#else // !HAVE_RECVMMSG + ssize_t rc; + do { + rc = recv(fd, d->buffer, STATSD_UDP_BUFFER_SIZE - 1, MSG_DONTWAIT); + if (rc < 0) { + // read failed + if (errno != EWOULDBLOCK && errno != EAGAIN && errno != EINTR) { + error("STATSD: recv() on UDP socket %d failed.", fd); + statsd.socket_errors++; + retval = -1; + goto cleanup; + } + } else if (rc) { + // data received + statsd.udp_socket_reads++; + statsd.udp_packets_received++; + statsd.udp_bytes_read += rc; + statsd_process(d->buffer, (size_t) rc, 0); + } + } while (rc != -1); +#endif + + break; + } + + default: { + error("STATSD: internal error: unknown socktype %d on socket %d", pi->socktype, fd); + statsd.socket_errors++; + retval = -1; + goto cleanup; + } + } + + retval = 0; +cleanup: + worker_is_idle(); + return retval; +} + +static int statsd_snd_callback(POLLINFO *pi, short int *events) { + (void)pi; + (void)events; + + worker_is_busy(WORKER_JOB_TYPE_SND_DATA); + error("STATSD: snd_callback() called, but we never requested to send data to statsd clients."); + worker_is_idle(); + + return -1; +} + +// -------------------------------------------------------------------------------------------------------------------- +// statsd child thread to collect metrics from network + +void statsd_collector_thread_cleanup(void *data) { + struct statsd_udp *d = data; + *d->running = 0; + + info("cleaning up..."); + +#ifdef HAVE_RECVMMSG + size_t i; + for (i = 0; i < d->size; i++) + freez(d->iovecs[i].iov_base); + + freez(d->iovecs); + freez(d->msgs); +#endif + + freez(d); + worker_unregister(); +} + +void *statsd_collector_thread(void *ptr) { + struct collection_thread_status *status = ptr; + status->status = 1; + + worker_register("STATSD"); + worker_register_job_name(WORKER_JOB_TYPE_TCP_CONNECTED, "tcp connect"); + worker_register_job_name(WORKER_JOB_TYPE_TCP_DISCONNECTED, "tcp disconnect"); + worker_register_job_name(WORKER_JOB_TYPE_RCV_DATA, "receive"); + worker_register_job_name(WORKER_JOB_TYPE_SND_DATA, "send"); + + info("STATSD collector thread started with taskid %d", gettid()); + + struct statsd_udp *d = callocz(sizeof(struct statsd_udp), 1); + d->running = &status->status; + + netdata_thread_cleanup_push(statsd_collector_thread_cleanup, d); + +#ifdef HAVE_RECVMMSG + d->type = STATSD_SOCKET_DATA_TYPE_UDP; + d->size = statsd.recvmmsg_size; + d->iovecs = callocz(sizeof(struct iovec), d->size); + d->msgs = callocz(sizeof(struct mmsghdr), d->size); + + size_t i; + for (i = 0; i < d->size; i++) { + d->iovecs[i].iov_base = mallocz(STATSD_UDP_BUFFER_SIZE); + d->iovecs[i].iov_len = STATSD_UDP_BUFFER_SIZE - 1; + d->msgs[i].msg_hdr.msg_iov = &d->iovecs[i]; + d->msgs[i].msg_hdr.msg_iovlen = 1; + } +#endif + + poll_events(&statsd.sockets + , statsd_add_callback + , statsd_del_callback + , statsd_rcv_callback + , statsd_snd_callback + , NULL + , NULL // No access control pattern + , 0 // No dns lookups for access control pattern + , (void *)d + , 0 // tcp request timeout, 0 = disabled + , statsd.tcp_idle_timeout // tcp idle timeout, 0 = disabled + , statsd.update_every * 1000 + , ptr // timer_data + , status->max_sockets + ); + + netdata_thread_cleanup_pop(1); + return NULL; +} + + +// -------------------------------------------------------------------------------------------------------------------- +// statsd applications configuration files parsing + +#define STATSD_CONF_LINE_MAX 8192 + +static STATSD_APP_CHART_DIM_VALUE_TYPE string2valuetype(const char *type, size_t line, const char *filename) { + if(!type || !*type) type = "last"; + + if(!strcmp(type, "events")) return STATSD_APP_CHART_DIM_VALUE_TYPE_EVENTS; + else if(!strcmp(type, "last")) return STATSD_APP_CHART_DIM_VALUE_TYPE_LAST; + else if(!strcmp(type, "min")) return STATSD_APP_CHART_DIM_VALUE_TYPE_MIN; + else if(!strcmp(type, "max")) return STATSD_APP_CHART_DIM_VALUE_TYPE_MAX; + else if(!strcmp(type, "sum")) return STATSD_APP_CHART_DIM_VALUE_TYPE_SUM; + else if(!strcmp(type, "average")) return STATSD_APP_CHART_DIM_VALUE_TYPE_AVERAGE; + else if(!strcmp(type, "median")) return STATSD_APP_CHART_DIM_VALUE_TYPE_MEDIAN; + else if(!strcmp(type, "stddev")) return STATSD_APP_CHART_DIM_VALUE_TYPE_STDDEV; + else if(!strcmp(type, "percentile")) return STATSD_APP_CHART_DIM_VALUE_TYPE_PERCENTILE; + + error("STATSD: invalid type '%s' at line %zu of file '%s'. Using 'last'.", type, line, filename); + return STATSD_APP_CHART_DIM_VALUE_TYPE_LAST; +} + +static const char *valuetype2string(STATSD_APP_CHART_DIM_VALUE_TYPE type) { + switch(type) { + case STATSD_APP_CHART_DIM_VALUE_TYPE_EVENTS: return "events"; + case STATSD_APP_CHART_DIM_VALUE_TYPE_LAST: return "last"; + case STATSD_APP_CHART_DIM_VALUE_TYPE_MIN: return "min"; + case STATSD_APP_CHART_DIM_VALUE_TYPE_MAX: return "max"; + case STATSD_APP_CHART_DIM_VALUE_TYPE_SUM: return "sum"; + case STATSD_APP_CHART_DIM_VALUE_TYPE_AVERAGE: return "average"; + case STATSD_APP_CHART_DIM_VALUE_TYPE_MEDIAN: return "median"; + case STATSD_APP_CHART_DIM_VALUE_TYPE_STDDEV: return "stddev"; + case STATSD_APP_CHART_DIM_VALUE_TYPE_PERCENTILE: return "percentile"; + } + + return "unknown"; +} + +static STATSD_APP_CHART_DIM *add_dimension_to_app_chart( + STATSD_APP *app __maybe_unused + , STATSD_APP_CHART *chart + , const char *metric_name + , const char *dim_name + , collected_number multiplier + , collected_number divisor + , RRDDIM_FLAGS flags + , RRDDIM_OPTIONS options + , STATSD_APP_CHART_DIM_VALUE_TYPE value_type +) { + STATSD_APP_CHART_DIM *dim = callocz(sizeof(STATSD_APP_CHART_DIM), 1); + + dim->metric = strdupz(metric_name); + dim->metric_hash = simple_hash(dim->metric); + + dim->name = strdupz((dim_name)?dim_name:""); + dim->multiplier = multiplier; + dim->divisor = divisor; + dim->value_type = value_type; + dim->flags = flags; + dim->options = options; + + if(!dim->multiplier) + dim->multiplier = 1; + + if(!dim->divisor) + dim->divisor = 1; + + // append it to the list of dimension + STATSD_APP_CHART_DIM *tdim; + for(tdim = chart->dimensions; tdim && tdim->next ; tdim = tdim->next) ; + if(!tdim) { + dim->next = chart->dimensions; + chart->dimensions = dim; + } + else { + dim->next = tdim->next; + tdim->next = dim; + } + chart->dimensions_count++; + + debug(D_STATSD, "Added dimension '%s' to chart '%s' of app '%s', for metric '%s', with type %u, multiplier " COLLECTED_NUMBER_FORMAT ", divisor " COLLECTED_NUMBER_FORMAT, + dim->name, chart->id, app->name, dim->metric, dim->value_type, dim->multiplier, dim->divisor); + + return dim; +} + +static int statsd_readfile(const char *filename, STATSD_APP *app, STATSD_APP_CHART *chart, DICTIONARY *dict) { + debug(D_STATSD, "STATSD configuration reading file '%s'", filename); + + char *buffer = mallocz(STATSD_CONF_LINE_MAX + 1); + + FILE *fp = fopen(filename, "r"); + if(!fp) { + error("STATSD: cannot open file '%s'.", filename); + freez(buffer); + return -1; + } + + size_t line = 0; + char *s; + while(fgets(buffer, STATSD_CONF_LINE_MAX, fp) != NULL) { + buffer[STATSD_CONF_LINE_MAX] = '\0'; + line++; + + s = trim(buffer); + if (!s || *s == '#') { + debug(D_STATSD, "STATSD: ignoring line %zu of file '%s', it is empty.", line, filename); + continue; + } + + debug(D_STATSD, "STATSD: processing line %zu of file '%s': %s", line, filename, buffer); + + if(*s == 'i' && strncmp(s, "include", 7) == 0) { + s = trim(&s[7]); + if(s && *s) { + char *tmp; + if(*s == '/') + tmp = strdupz(s); + else { + // the file to be included is relative to current file + // find the directory name from the file we already read + char *filename2 = strdupz(filename); // copy filename, since dirname() will change it + char *dir = dirname(filename2); // find the directory part of the filename + tmp = strdupz_path_subpath(dir, s); // compose the new filename to read; + freez(filename2); // free the filename we copied + } + statsd_readfile(tmp, app, chart, dict); + freez(tmp); + } + else + error("STATSD: ignoring line %zu of file '%s', include filename is empty", line, filename); + + continue; + } + + int len = (int) strlen(s); + if (*s == '[' && s[len - 1] == ']') { + // new section + s[len - 1] = '\0'; + s++; + + if (!strcmp(s, "app")) { + // a new app + app = callocz(sizeof(STATSD_APP), 1); + app->name = strdupz("unnamed"); + app->rrd_memory_mode = localhost->rrd_memory_mode; + app->rrd_history_entries = localhost->rrd_history_entries; + + app->next = statsd.apps; + statsd.apps = app; + chart = NULL; + dict = NULL; + + { + char lineandfile[FILENAME_MAX + 1]; + snprintfz(lineandfile, FILENAME_MAX, "%zu@%s", line, filename); + app->source = strdupz(lineandfile); + } + } + else if(app) { + if(!strcmp(s, "dictionary")) { + if(!app->dict) + app->dict = dictionary_create(DICT_OPTION_SINGLE_THREADED); + + dict = app->dict; + } + else { + dict = NULL; + + // a new chart + chart = callocz(sizeof(STATSD_APP_CHART), 1); + netdata_fix_chart_id(s); + chart->id = strdupz(s); + chart->name = strdupz(s); + chart->title = strdupz("Statsd chart"); + chart->context = strdupz(s); + chart->family = strdupz("overview"); + chart->units = strdupz("value"); + chart->priority = NETDATA_CHART_PRIO_STATSD_PRIVATE; + chart->chart_type = RRDSET_TYPE_LINE; + + chart->next = app->charts; + app->charts = chart; + + if (!strncmp( + filename, + netdata_configured_stock_config_dir, + strlen(netdata_configured_stock_config_dir))) { + char tmpfilename[FILENAME_MAX + 1]; + strncpyz(tmpfilename, filename, FILENAME_MAX); + chart->module = strdupz(basename(tmpfilename)); + } else { + chart->module = strdupz("synthetic_chart"); + } + } + } + else + error("STATSD: ignoring line %zu ('%s') of file '%s', [app] is not defined.", line, s, filename); + + continue; + } + + if(!app) { + error("STATSD: ignoring line %zu ('%s') of file '%s', it is outside all sections.", line, s, filename); + continue; + } + + char *name = s; + char *value = strchr(s, '='); + if(!value) { + error("STATSD: ignoring line %zu ('%s') of file '%s', there is no = in it.", line, s, filename); + continue; + } + *value = '\0'; + value++; + + name = trim(name); + value = trim(value); + + if(!name || *name == '#') { + error("STATSD: ignoring line %zu of file '%s', name is empty.", line, filename); + continue; + } + if(!value) { + debug(D_CONFIG, "STATSD: ignoring line %zu of file '%s', value is empty.", line, filename); + continue; + } + + if(unlikely(dict)) { + // parse [dictionary] members + + dictionary_set(dict, name, value, strlen(value) + 1); + } + else if(!chart) { + // parse [app] members + + if(!strcmp(name, "name")) { + freez((void *)app->name); + netdata_fix_chart_name(value); + app->name = strdupz(value); + } + else if (!strcmp(name, "metrics")) { + simple_pattern_free(app->metrics); + app->metrics = simple_pattern_create(value, NULL, SIMPLE_PATTERN_EXACT); + } + else if (!strcmp(name, "private charts")) { + if (!strcmp(value, "yes") || !strcmp(value, "on")) + app->default_options |= STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED; + else + app->default_options &= ~STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED; + } + else if (!strcmp(name, "gaps when not collected")) { + if (!strcmp(value, "yes") || !strcmp(value, "on")) + app->default_options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + } + else if (!strcmp(name, "memory mode")) { + // this is not supported anymore + // with the implementation of storage engines, all charts have the same storage engine always + // app->rrd_memory_mode = rrd_memory_mode_id(value); + ; + } + else if (!strcmp(name, "history")) { + app->rrd_history_entries = atol(value); + if (app->rrd_history_entries < 5) + app->rrd_history_entries = 5; + } + else { + error("STATSD: ignoring line %zu ('%s') of file '%s'. Unknown keyword for the [app] section.", line, name, filename); + continue; + } + } + else { + // parse [chart] members + + if(!strcmp(name, "name")) { + freez((void *)chart->name); + netdata_fix_chart_id(value); + chart->name = strdupz(value); + } + else if(!strcmp(name, "title")) { + freez((void *)chart->title); + chart->title = strdupz(value); + } + else if (!strcmp(name, "family")) { + freez((void *)chart->family); + chart->family = strdupz(value); + } + else if (!strcmp(name, "context")) { + freez((void *)chart->context); + netdata_fix_chart_id(value); + chart->context = strdupz(value); + } + else if (!strcmp(name, "units")) { + freez((void *)chart->units); + chart->units = strdupz(value); + } + else if (!strcmp(name, "priority")) { + chart->priority = atol(value); + } + else if (!strcmp(name, "type")) { + chart->chart_type = rrdset_type_id(value); + } + else if (!strcmp(name, "dimension")) { + // metric [name [type [multiplier [divisor]]]] + char *words[10] = { NULL }; + size_t num_words = pluginsd_split_words(value, words, 10, NULL, NULL, 0); + + int pattern = 0; + size_t i = 0; + char *metric_name = get_word(words, num_words, i++); + + if(strcmp(metric_name, "pattern") == 0) { + metric_name = get_word(words, num_words, i++); + pattern = 1; + } + + char *dim_name = get_word(words, num_words, i++); + char *type = get_word(words, num_words, i++); + char *multiplier = get_word(words, num_words, i++); + char *divisor = get_word(words, num_words, i++); + char *opts = get_word(words, num_words, i++); + + RRDDIM_FLAGS flags = RRDDIM_FLAG_NONE; + RRDDIM_OPTIONS options = RRDDIM_OPTION_NONE; + if(opts && *opts) { + if(strstr(opts, "hidden") != NULL) options |= RRDDIM_OPTION_HIDDEN; + if(strstr(opts, "noreset") != NULL) options |= RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS; + if(strstr(opts, "nooverflow") != NULL) options |= RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS; + } + + if(!pattern) { + if(app->dict) { + if(dim_name && *dim_name) { + char *n = dictionary_get(app->dict, dim_name); + if(n) dim_name = n; + } + else { + dim_name = dictionary_get(app->dict, metric_name); + } + } + + if(!dim_name || !*dim_name) + dim_name = metric_name; + } + + STATSD_APP_CHART_DIM *dim = add_dimension_to_app_chart( + app + , chart + , metric_name + , dim_name + , (multiplier && *multiplier)?str2l(multiplier):1 + , (divisor && *divisor)?str2l(divisor):1 + , flags + , + options, string2valuetype(type, line, filename) + ); + + if(pattern) + dim->metric_pattern = simple_pattern_create(dim->metric, NULL, SIMPLE_PATTERN_EXACT); + } + else { + error("STATSD: ignoring line %zu ('%s') of file '%s'. Unknown keyword for the [%s] section.", line, name, filename, chart->id); + continue; + } + } + } + + freez(buffer); + fclose(fp); + return 0; +} + +static int statsd_file_callback(const char *filename, void *data) { + (void)data; + return statsd_readfile(filename, NULL, NULL, NULL); +} + +static inline void statsd_readdir(const char *user_path, const char *stock_path, const char *subpath) { + recursive_config_double_dir_load(user_path, stock_path, subpath, statsd_file_callback, NULL, 0); +} + +// -------------------------------------------------------------------------------------------------------------------- +// send metrics to netdata - in private charts - called from the main thread + +// extract chart type and chart id from metric name +static inline void statsd_get_metric_type_and_id(STATSD_METRIC *m, char *type, char *id, char *context, const char *metrictype, size_t len) { + + // The full chart type.id looks like this: + // ${STATSD_CHART_PREFIX} + "_" + ${METRIC_NAME} + "_" + ${METRIC_TYPE} + // + // where: + // STATSD_CHART_PREFIX = "statsd" as defined above + // METRIC_NAME = whatever the user gave to statsd + // METRIC_TYPE = "gauge", "counter", "meter", "timer", "histogram", "set", "dictionary" + + // for chart type, we want: + // ${STATSD_CHART_PREFIX} + "_" + the first word of ${METRIC_NAME} + + // find the first word of ${METRIC_NAME} + char firstword[len + 1], *s = ""; + strncpyz(firstword, m->name, len); + for (s = firstword; *s ; s++) { + if (unlikely(*s == '.' || *s == '_')) { + *s = '\0'; + s++; + break; + } + } + // firstword has the first word of ${METRIC_NAME} + // s has the remaining, if any + + // create the chart type: + snprintfz(type, len, STATSD_CHART_PREFIX "_%s", firstword); + + // for chart id, we want: + // the remaining of the words of ${METRIC_NAME} + "_" + ${METRIC_TYPE} + // or the ${METRIC_NAME} has no remaining words, the ${METRIC_TYPE} alone + if(*s) + snprintfz(id, len, "%s_%s", s, metrictype); + else + snprintfz(id, len, "%s", metrictype); + + // for the context, we want the full of both the above, separated with a dot (type.id): + snprintfz(context, RRD_ID_LENGTH_MAX, "%s.%s", type, id); + + // make sure they don't have illegal characters + netdata_fix_chart_id(type); + netdata_fix_chart_id(id); + netdata_fix_chart_id(context); +} + +static inline RRDSET *statsd_private_rrdset_create( + STATSD_METRIC *m __maybe_unused + , const char *type + , const char *id + , const char *name + , const char *family + , const char *context + , const char *title + , const char *units + , long priority + , int update_every + , RRDSET_TYPE chart_type +) { + if(!m->st) + statsd.private_charts++; + + RRDSET *st = rrdset_create_custom( + localhost // host + , type // type + , id // id + , name // name + , family // family + , context // context + , title // title + , units // units + , PLUGIN_STATSD_NAME // plugin + , "private_chart" // module + , priority // priority + , update_every // update every + , chart_type // chart type + , default_rrd_memory_mode // memory mode + , default_rrd_history_entries // history + ); + rrdset_flag_set(st, RRDSET_FLAG_STORE_FIRST); + + if(statsd.private_charts_hidden) + rrdset_flag_set(st, RRDSET_FLAG_HIDDEN); + + // rrdset_flag_set(st, RRDSET_FLAG_DEBUG); + return st; +} + +static inline void statsd_private_chart_gauge(STATSD_METRIC *m) { + debug(D_STATSD, "updating private chart for gauge metric '%s'", m->name); + + if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { + m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; + + char type[RRD_ID_LENGTH_MAX + 1], id[RRD_ID_LENGTH_MAX + 1], context[RRD_ID_LENGTH_MAX + 1]; + statsd_get_metric_type_and_id(m, type, id, context, "gauge", RRD_ID_LENGTH_MAX); + + char title[RRD_ID_LENGTH_MAX + 1]; + snprintfz(title, RRD_ID_LENGTH_MAX, "statsd private chart for gauge %s", m->name); + + m->st = statsd_private_rrdset_create( + m + , type + , id + , NULL // name + , m->family?m->family:"gauges" // family (submenu) + , context // context + , title // title + , m->units?m->units:"value" // units + , NETDATA_CHART_PRIO_STATSD_PRIVATE + , statsd.update_every + , RRDSET_TYPE_LINE + ); + + m->rd_value = rrddim_add(m->st, "gauge", m->dimname?m->dimname:NULL, 1, statsd.decimal_detail, RRD_ALGORITHM_ABSOLUTE); + + if(m->options & STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT) + m->rd_count = rrddim_add(m->st, "events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(m->st, m->rd_value, m->last); + + if(m->rd_count) + rrddim_set_by_pointer(m->st, m->rd_count, m->events); + + rrdset_done(m->st); +} + +static inline void statsd_private_chart_counter_or_meter(STATSD_METRIC *m, const char *dim, const char *family) { + debug(D_STATSD, "updating private chart for %s metric '%s'", dim, m->name); + + if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { + m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; + + char type[RRD_ID_LENGTH_MAX + 1], id[RRD_ID_LENGTH_MAX + 1], context[RRD_ID_LENGTH_MAX + 1]; + statsd_get_metric_type_and_id(m, type, id, context, dim, RRD_ID_LENGTH_MAX); + + char title[RRD_ID_LENGTH_MAX + 1]; + snprintfz(title, RRD_ID_LENGTH_MAX, "statsd private chart for %s %s", dim, m->name); + + m->st = statsd_private_rrdset_create( + m + , type + , id + , NULL // name + , m->family?m->family:family // family (submenu) + , context // context + , title // title + , m->units?m->units:"events/s" // units + , NETDATA_CHART_PRIO_STATSD_PRIVATE + , statsd.update_every + , RRDSET_TYPE_AREA + ); + + m->rd_value = rrddim_add(m->st, dim, m->dimname?m->dimname:NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + if(m->options & STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT) + m->rd_count = rrddim_add(m->st, "events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(m->st, m->rd_value, m->last); + + if(m->rd_count) + rrddim_set_by_pointer(m->st, m->rd_count, m->events); + + rrdset_done(m->st); +} + +static inline void statsd_private_chart_set(STATSD_METRIC *m) { + debug(D_STATSD, "updating private chart for set metric '%s'", m->name); + + if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { + m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; + + char type[RRD_ID_LENGTH_MAX + 1], id[RRD_ID_LENGTH_MAX + 1], context[RRD_ID_LENGTH_MAX + 1]; + statsd_get_metric_type_and_id(m, type, id, context, "set", RRD_ID_LENGTH_MAX); + + char title[RRD_ID_LENGTH_MAX + 1]; + snprintfz(title, RRD_ID_LENGTH_MAX, "statsd private chart for set %s", m->name); + + m->st = statsd_private_rrdset_create( + m + , type + , id + , NULL // name + , m->family?m->family:"sets" // family (submenu) + , context // context + , title // title + , m->units?m->units:"entries" // units + , NETDATA_CHART_PRIO_STATSD_PRIVATE + , statsd.update_every + , RRDSET_TYPE_LINE + ); + + m->rd_value = rrddim_add(m->st, "set", m->dimname?m->dimname:"unique", 1, 1, RRD_ALGORITHM_ABSOLUTE); + + if(m->options & STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT) + m->rd_count = rrddim_add(m->st, "events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(m->st, m->rd_value, m->last); + + if(m->rd_count) + rrddim_set_by_pointer(m->st, m->rd_count, m->events); + + rrdset_done(m->st); +} + +static inline void statsd_private_chart_dictionary(STATSD_METRIC *m) { + debug(D_STATSD, "updating private chart for dictionary metric '%s'", m->name); + + if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { + m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; + + char type[RRD_ID_LENGTH_MAX + 1], id[RRD_ID_LENGTH_MAX + 1], context[RRD_ID_LENGTH_MAX + 1]; + statsd_get_metric_type_and_id(m, type, id, context, "dictionary", RRD_ID_LENGTH_MAX); + + char title[RRD_ID_LENGTH_MAX + 1]; + snprintfz(title, RRD_ID_LENGTH_MAX, "statsd private chart for dictionary %s", m->name); + + m->st = statsd_private_rrdset_create( + m + , type + , id + , NULL // name + , m->family?m->family:"dictionaries" // family (submenu) + , context // context + , title // title + , m->units?m->units:"events/s" // units + , NETDATA_CHART_PRIO_STATSD_PRIVATE + , statsd.update_every + , RRDSET_TYPE_STACKED + ); + + if(m->options & STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT) + m->rd_count = rrddim_add(m->st, "events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + STATSD_METRIC_DICTIONARY_ITEM *t; + dfe_start_read(m->dictionary.dict, t) { + if (!t->rd) t->rd = rrddim_add(m->st, t_dfe.name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_set_by_pointer(m->st, t->rd, (collected_number)t->count); + } + dfe_done(t); + + if(m->rd_count) + rrddim_set_by_pointer(m->st, m->rd_count, m->events); + + rrdset_done(m->st); +} + +static inline void statsd_private_chart_timer_or_histogram(STATSD_METRIC *m, const char *dim, const char *family, const char *units) { + debug(D_STATSD, "updating private chart for %s metric '%s'", dim, m->name); + + if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) { + m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA; + + char type[RRD_ID_LENGTH_MAX + 1], id[RRD_ID_LENGTH_MAX + 1], context[RRD_ID_LENGTH_MAX + 1]; + statsd_get_metric_type_and_id(m, type, id, context, dim, RRD_ID_LENGTH_MAX); + + char title[RRD_ID_LENGTH_MAX + 1]; + snprintfz(title, RRD_ID_LENGTH_MAX, "statsd private chart for %s %s", dim, m->name); + + m->st = statsd_private_rrdset_create( + m + , type + , id + , NULL // name + , m->family?m->family:family // family (submenu) + , context // context + , title // title + , m->units?m->units:units // units + , NETDATA_CHART_PRIO_STATSD_PRIVATE + , statsd.update_every + , RRDSET_TYPE_AREA + ); + + m->histogram.ext->rd_min = rrddim_add(m->st, "min", NULL, 1, statsd.decimal_detail, RRD_ALGORITHM_ABSOLUTE); + m->histogram.ext->rd_max = rrddim_add(m->st, "max", NULL, 1, statsd.decimal_detail, RRD_ALGORITHM_ABSOLUTE); + m->rd_value = rrddim_add(m->st, "average", NULL, 1, statsd.decimal_detail, RRD_ALGORITHM_ABSOLUTE); + m->histogram.ext->rd_percentile = rrddim_add(m->st, statsd.histogram_percentile_str, NULL, 1, statsd.decimal_detail, RRD_ALGORITHM_ABSOLUTE); + m->histogram.ext->rd_median = rrddim_add(m->st, "median", NULL, 1, statsd.decimal_detail, RRD_ALGORITHM_ABSOLUTE); + m->histogram.ext->rd_stddev = rrddim_add(m->st, "stddev", NULL, 1, statsd.decimal_detail, RRD_ALGORITHM_ABSOLUTE); + //m->histogram.ext->rd_sum = rrddim_add(m->st, "sum", NULL, 1, statsd.decimal_detail, RRD_ALGORITHM_ABSOLUTE); + + if(m->options & STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT) + m->rd_count = rrddim_add(m->st, "events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(m->st, m->histogram.ext->rd_min, m->histogram.ext->last_min); + rrddim_set_by_pointer(m->st, m->histogram.ext->rd_max, m->histogram.ext->last_max); + rrddim_set_by_pointer(m->st, m->histogram.ext->rd_percentile, m->histogram.ext->last_percentile); + rrddim_set_by_pointer(m->st, m->histogram.ext->rd_median, m->histogram.ext->last_median); + rrddim_set_by_pointer(m->st, m->histogram.ext->rd_stddev, m->histogram.ext->last_stddev); + //rrddim_set_by_pointer(m->st, m->histogram.ext->rd_sum, m->histogram.ext->last_sum); + rrddim_set_by_pointer(m->st, m->rd_value, m->last); + + if(m->rd_count) + rrddim_set_by_pointer(m->st, m->rd_count, m->events); + + rrdset_done(m->st); +} + +// -------------------------------------------------------------------------------------------------------------------- +// statsd flush metrics + +static inline void statsd_flush_gauge(STATSD_METRIC *m) { + debug(D_STATSD, "flushing gauge metric '%s'", m->name); + + int updated = 0; + if(unlikely(!m->reset && m->count)) { + m->last = (collected_number) (m->gauge.value * statsd.decimal_detail); + + m->reset = 1; + updated = 1; + } + + if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED)))) + statsd_private_chart_gauge(m); +} + +static inline void statsd_flush_counter_or_meter(STATSD_METRIC *m, const char *dim, const char *family) { + debug(D_STATSD, "flushing %s metric '%s'", dim, m->name); + + int updated = 0; + if(unlikely(!m->reset && m->count)) { + m->last = m->counter.value; + + m->reset = 1; + updated = 1; + } + + if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED)))) + statsd_private_chart_counter_or_meter(m, dim, family); +} + +static inline void statsd_flush_counter(STATSD_METRIC *m) { + statsd_flush_counter_or_meter(m, "counter", "counters"); +} + +static inline void statsd_flush_meter(STATSD_METRIC *m) { + statsd_flush_counter_or_meter(m, "meter", "meters"); +} + +static inline void statsd_flush_set(STATSD_METRIC *m) { + debug(D_STATSD, "flushing set metric '%s'", m->name); + + int updated = 0; + if(unlikely(!m->reset && m->count)) { + m->last = (collected_number)m->set.unique; + + m->reset = 1; + updated = 1; + } + else { + m->last = 0; + } + + if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED)))) + statsd_private_chart_set(m); +} + +static inline void statsd_flush_dictionary(STATSD_METRIC *m) { + debug(D_STATSD, "flushing dictionary metric '%s'", m->name); + + int updated = 0; + if(unlikely(!m->reset && m->count)) { + m->last = (collected_number)m->dictionary.unique; + + m->reset = 1; + updated = 1; + } + else { + m->last = 0; + } + + if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED)))) + statsd_private_chart_dictionary(m); + + if(m->dictionary.unique >= statsd.dictionary_max_unique) { + if(!(m->options & STATSD_METRIC_OPTION_COLLECTION_FULL_LOGGED)) { + m->options |= STATSD_METRIC_OPTION_COLLECTION_FULL_LOGGED; + info( + "STATSD dictionary '%s' reach max of %zu items - try increasing 'dictionaries max unique dimensions' in netdata.conf", + m->name, + m->dictionary.unique); + } + } +} + +static inline void statsd_flush_timer_or_histogram(STATSD_METRIC *m, const char *dim, const char *family, const char *units) { + debug(D_STATSD, "flushing %s metric '%s'", dim, m->name); + + int updated = 0; + if(unlikely(!m->reset && m->count && m->histogram.ext->used > 0)) { + netdata_mutex_lock(&m->histogram.ext->mutex); + + size_t len = m->histogram.ext->used; + NETDATA_DOUBLE *series = m->histogram.ext->values; + sort_series(series, len); + + m->histogram.ext->last_min = (collected_number)roundndd(series[0] * statsd.decimal_detail); + m->histogram.ext->last_max = (collected_number)roundndd(series[len - 1] * statsd.decimal_detail); + m->last = (collected_number)roundndd(average(series, len) * statsd.decimal_detail); + m->histogram.ext->last_median = (collected_number)roundndd(median_on_sorted_series(series, len) * statsd.decimal_detail); + m->histogram.ext->last_stddev = (collected_number)roundndd(standard_deviation(series, len) * statsd.decimal_detail); + m->histogram.ext->last_sum = (collected_number)roundndd(sum(series, len) * statsd.decimal_detail); + + size_t pct_len = (size_t)floor((double)len * statsd.histogram_percentile / 100.0); + if(pct_len < 1) + m->histogram.ext->last_percentile = (collected_number)(series[0] * statsd.decimal_detail); + else + m->histogram.ext->last_percentile = (collected_number)roundndd(series[pct_len - 1] * statsd.decimal_detail); + + netdata_mutex_unlock(&m->histogram.ext->mutex); + + debug(D_STATSD, "STATSD %s metric %s: min " COLLECTED_NUMBER_FORMAT ", max " COLLECTED_NUMBER_FORMAT ", last " COLLECTED_NUMBER_FORMAT ", pcent " COLLECTED_NUMBER_FORMAT ", median " COLLECTED_NUMBER_FORMAT ", stddev " COLLECTED_NUMBER_FORMAT ", sum " COLLECTED_NUMBER_FORMAT, + dim, m->name, m->histogram.ext->last_min, m->histogram.ext->last_max, m->last, m->histogram.ext->last_percentile, m->histogram.ext->last_median, m->histogram.ext->last_stddev, m->histogram.ext->last_sum); + + m->histogram.ext->zeroed = 0; + m->reset = 1; + updated = 1; + } + else if(unlikely(!m->histogram.ext->zeroed)) { + // reset the metrics + // if we collected anything, they will be updated below + // this ensures that we report zeros if nothing is collected + + m->histogram.ext->last_min = 0; + m->histogram.ext->last_max = 0; + m->last = 0; + m->histogram.ext->last_median = 0; + m->histogram.ext->last_stddev = 0; + m->histogram.ext->last_sum = 0; + m->histogram.ext->last_percentile = 0; + + m->histogram.ext->zeroed = 1; + } + + if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED)))) + statsd_private_chart_timer_or_histogram(m, dim, family, units); +} + +static inline void statsd_flush_timer(STATSD_METRIC *m) { + statsd_flush_timer_or_histogram(m, "timer", "timers", "milliseconds"); +} + +static inline void statsd_flush_histogram(STATSD_METRIC *m) { + statsd_flush_timer_or_histogram(m, "histogram", "histograms", "value"); +} + +static inline RRD_ALGORITHM statsd_algorithm_for_metric(STATSD_METRIC *m) { + switch(m->type) { + default: + case STATSD_METRIC_TYPE_GAUGE: + case STATSD_METRIC_TYPE_SET: + case STATSD_METRIC_TYPE_TIMER: + case STATSD_METRIC_TYPE_HISTOGRAM: + return RRD_ALGORITHM_ABSOLUTE; + + case STATSD_METRIC_TYPE_METER: + case STATSD_METRIC_TYPE_COUNTER: + case STATSD_METRIC_TYPE_DICTIONARY: + return RRD_ALGORITHM_INCREMENTAL; + } +} + +static inline void link_metric_to_app_dimension(STATSD_APP *app, STATSD_METRIC *m, STATSD_APP_CHART *chart, STATSD_APP_CHART_DIM *dim) { + if(dim->value_type == STATSD_APP_CHART_DIM_VALUE_TYPE_EVENTS) { + dim->value_ptr = &m->events; + dim->algorithm = RRD_ALGORITHM_INCREMENTAL; + } + else if(m->type == STATSD_METRIC_TYPE_HISTOGRAM || m->type == STATSD_METRIC_TYPE_TIMER) { + dim->algorithm = RRD_ALGORITHM_ABSOLUTE; + dim->divisor *= statsd.decimal_detail; + + switch(dim->value_type) { + case STATSD_APP_CHART_DIM_VALUE_TYPE_EVENTS: + // will never match - added to avoid warning + break; + + case STATSD_APP_CHART_DIM_VALUE_TYPE_LAST: + case STATSD_APP_CHART_DIM_VALUE_TYPE_AVERAGE: + dim->value_ptr = &m->last; + break; + + case STATSD_APP_CHART_DIM_VALUE_TYPE_SUM: + dim->value_ptr = &m->histogram.ext->last_sum; + break; + + case STATSD_APP_CHART_DIM_VALUE_TYPE_MIN: + dim->value_ptr = &m->histogram.ext->last_min; + break; + + case STATSD_APP_CHART_DIM_VALUE_TYPE_MAX: + dim->value_ptr = &m->histogram.ext->last_max; + break; + + case STATSD_APP_CHART_DIM_VALUE_TYPE_MEDIAN: + dim->value_ptr = &m->histogram.ext->last_median; + break; + + case STATSD_APP_CHART_DIM_VALUE_TYPE_PERCENTILE: + dim->value_ptr = &m->histogram.ext->last_percentile; + break; + + case STATSD_APP_CHART_DIM_VALUE_TYPE_STDDEV: + dim->value_ptr = &m->histogram.ext->last_stddev; + break; + } + } + else { + if (dim->value_type != STATSD_APP_CHART_DIM_VALUE_TYPE_LAST) + error("STATSD: unsupported value type for dimension '%s' of chart '%s' of app '%s' on metric '%s'", dim->name, chart->id, app->name, m->name); + + dim->value_ptr = &m->last; + dim->algorithm = statsd_algorithm_for_metric(m); + + if(m->type == STATSD_METRIC_TYPE_GAUGE) + dim->divisor *= statsd.decimal_detail; + } + + if(unlikely(chart->st && dim->rd)) { + rrddim_set_algorithm(chart->st, dim->rd, dim->algorithm); + rrddim_set_multiplier(chart->st, dim->rd, dim->multiplier); + rrddim_set_divisor(chart->st, dim->rd, dim->divisor); + } + + chart->dimensions_linked_count++; + m->options |= STATSD_METRIC_OPTION_USED_IN_APPS; + debug(D_STATSD, "metric '%s' of type %u linked with app '%s', chart '%s', dimension '%s', algorithm '%s'", m->name, m->type, app->name, chart->id, dim->name, rrd_algorithm_name(dim->algorithm)); +} + +static inline void check_if_metric_is_for_app(STATSD_INDEX *index, STATSD_METRIC *m) { + (void)index; + + STATSD_APP *app; + for(app = statsd.apps; app ;app = app->next) { + if(unlikely(simple_pattern_matches(app->metrics, m->name))) { + debug(D_STATSD, "metric '%s' matches app '%s'", m->name, app->name); + + // the metric should get the options from the app + + if(app->default_options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED) + m->options |= STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED; + else + m->options &= ~STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED; + + if(app->default_options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED) + m->options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + else + m->options &= ~STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + + m->options |= STATSD_METRIC_OPTION_PRIVATE_CHART_CHECKED; + + // check if there is a chart in this app, willing to get this metric + STATSD_APP_CHART *chart; + for(chart = app->charts; chart; chart = chart->next) { + + STATSD_APP_CHART_DIM *dim; + for(dim = chart->dimensions; dim ; dim = dim->next) { + if(unlikely(dim->metric_pattern)) { + size_t dim_name_len = strlen(dim->name); + size_t wildcarded_len = dim_name_len + strlen(m->name) + 1; + char wildcarded[wildcarded_len]; + + strcpy(wildcarded, dim->name); + char *ws = &wildcarded[dim_name_len]; + + if(simple_pattern_matches_extract(dim->metric_pattern, m->name, ws, wildcarded_len - dim_name_len)) { + + char *final_name = NULL; + + if(app->dict) { + if(likely(*wildcarded)) { + // use the name of the wildcarded string + final_name = dictionary_get(app->dict, wildcarded); + } + + if(unlikely(!final_name)) { + // use the name of the metric + final_name = dictionary_get(app->dict, m->name); + } + } + + if(unlikely(!final_name)) + final_name = wildcarded; + + add_dimension_to_app_chart( + app + , chart + , m->name + , final_name + , dim->multiplier + , dim->divisor + , dim->flags + , dim->options + , dim->value_type + ); + + // the new dimension is appended to the list + // so, it will be matched and linked later too + } + } + else if(!dim->value_ptr && dim->metric_hash == m->hash && !strcmp(dim->metric, m->name)) { + // we have a match - this metric should be linked to this dimension + link_metric_to_app_dimension(app, m, chart, dim); + } + } + + } + } + } +} + +static inline RRDDIM *statsd_add_dim_to_app_chart(STATSD_APP *app, STATSD_APP_CHART *chart, STATSD_APP_CHART_DIM *dim) { + (void)app; + + // allow the same statsd metric to be added multiple times to the same chart + + STATSD_APP_CHART_DIM *tdim; + size_t count_same_metric = 0, count_same_metric_value_type = 0; + size_t pos_same_metric_value_type = 0; + + for (tdim = chart->dimensions; tdim && tdim->next; tdim = tdim->next) { + if (dim->metric_hash == tdim->metric_hash && !strcmp(dim->metric, tdim->metric)) { + count_same_metric++; + + if(dim->value_type == tdim->value_type) { + count_same_metric_value_type++; + if (tdim == dim) + pos_same_metric_value_type = count_same_metric_value_type; + } + } + } + + if(count_same_metric > 1) { + // the same metric is found multiple times + + size_t len = strlen(dim->metric) + 100; + char metric[ len + 1 ]; + + if(count_same_metric_value_type > 1) { + // the same metric, with the same value type, is added multiple times + snprintfz(metric, len, "%s_%s%zu", dim->metric, valuetype2string(dim->value_type), pos_same_metric_value_type); + } + else { + // the same metric, with different value type is added + snprintfz(metric, len, "%s_%s", dim->metric, valuetype2string(dim->value_type)); + } + + dim->rd = rrddim_add(chart->st, metric, dim->name, dim->multiplier, dim->divisor, dim->algorithm); + if(dim->flags != RRDDIM_FLAG_NONE) dim->rd->flags |= dim->flags; + if(dim->options != RRDDIM_OPTION_NONE) dim->rd->options |= dim->options; + return dim->rd; + } + + dim->rd = rrddim_add(chart->st, dim->metric, dim->name, dim->multiplier, dim->divisor, dim->algorithm); + if(dim->flags != RRDDIM_FLAG_NONE) dim->rd->flags |= dim->flags; + if(dim->options != RRDDIM_OPTION_NONE) dim->rd->options |= dim->options; + return dim->rd; +} + +static inline void statsd_update_app_chart(STATSD_APP *app, STATSD_APP_CHART *chart) { + debug(D_STATSD, "updating chart '%s' for app '%s'", chart->id, app->name); + + if(!chart->st) { + chart->st = rrdset_create_custom( + localhost // host + , app->name // type + , chart->id // id + , chart->name // name + , chart->family // family + , chart->context // context + , chart->title // title + , chart->units // units + , PLUGIN_STATSD_NAME // plugin + , chart->module // module + , chart->priority // priority + , statsd.update_every // update every + , chart->chart_type // chart type + , app->rrd_memory_mode // memory mode + , app->rrd_history_entries // history + ); + + rrdset_flag_set(chart->st, RRDSET_FLAG_STORE_FIRST); + // rrdset_flag_set(chart->st, RRDSET_FLAG_DEBUG); + } + + STATSD_APP_CHART_DIM *dim; + for(dim = chart->dimensions; dim ;dim = dim->next) { + if(likely(!dim->metric_pattern)) { + if (unlikely(!dim->rd)) + statsd_add_dim_to_app_chart(app, chart, dim); + + if (unlikely(dim->value_ptr)) { + debug(D_STATSD, "updating dimension '%s' (%s) of chart '%s' (%s) for app '%s' with value " COLLECTED_NUMBER_FORMAT, dim->name, rrddim_id(dim->rd), chart->id, rrdset_id(chart->st), app->name, *dim->value_ptr); + rrddim_set_by_pointer(chart->st, dim->rd, *dim->value_ptr); + } + } + } + + rrdset_done(chart->st); + debug(D_STATSD, "completed update of chart '%s' for app '%s'", chart->id, app->name); +} + +static inline void statsd_update_all_app_charts(void) { + // debug(D_STATSD, "updating app charts"); + + STATSD_APP *app; + for(app = statsd.apps; app ;app = app->next) { + // debug(D_STATSD, "updating charts for app '%s'", app->name); + + STATSD_APP_CHART *chart; + for(chart = app->charts; chart ;chart = chart->next) { + if(unlikely(chart->dimensions_linked_count)) { + statsd_update_app_chart(app, chart); + } + } + } + + // debug(D_STATSD, "completed update of app charts"); +} + +const char *statsd_metric_type_string(STATSD_METRIC_TYPE type) { + switch(type) { + case STATSD_METRIC_TYPE_COUNTER: return "counter"; + case STATSD_METRIC_TYPE_GAUGE: return "gauge"; + case STATSD_METRIC_TYPE_HISTOGRAM: return "histogram"; + case STATSD_METRIC_TYPE_METER: return "meter"; + case STATSD_METRIC_TYPE_SET: return "set"; + case STATSD_METRIC_TYPE_DICTIONARY: return "dictionary"; + case STATSD_METRIC_TYPE_TIMER: return "timer"; + default: return "unknown"; + } +} + +static inline void statsd_flush_index_metrics(STATSD_INDEX *index, void (*flush_metric)(STATSD_METRIC *)) { + STATSD_METRIC *m; + + // find the useful metrics (incremental = each time we are called, we check the new metrics only) + dfe_start_read(index->dict, m) { + // since we add new metrics at the beginning + // check for useful charts, until the point we last checked + if(unlikely(is_metric_checked(m))) break; + + if(unlikely(!(m->options & STATSD_METRIC_OPTION_CHECKED_IN_APPS))) { + log_access("NEW STATSD METRIC '%s': '%s'", statsd_metric_type_string(m->type), m->name); + check_if_metric_is_for_app(index, m); + m->options |= STATSD_METRIC_OPTION_CHECKED_IN_APPS; + } + + if(unlikely(!(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_CHECKED))) { + if(unlikely(statsd.private_charts >= statsd.max_private_charts_hard)) { + debug(D_STATSD, "STATSD: metric '%s' will not be charted, because the hard limit of the maximum number of charts has been reached.", m->name); + info("STATSD: metric '%s' will not be charted, because the hard limit of the maximum number of charts (%zu) has been reached. Increase the number of charts by editing netdata.conf, [statsd] section.", m->name, statsd.max_private_charts_hard); + m->options &= ~STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED; + } + else { + if (simple_pattern_matches(statsd.charts_for, m->name)) { + debug(D_STATSD, "STATSD: metric '%s' will be charted.", m->name); + m->options |= STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED; + } else { + debug(D_STATSD, "STATSD: metric '%s' will not be charted.", m->name); + m->options &= ~STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED; + } + } + + m->options |= STATSD_METRIC_OPTION_PRIVATE_CHART_CHECKED; + } + + // mark it as checked + m->options |= STATSD_METRIC_OPTION_CHECKED; + + // check if it is used in charts + if((m->options & (STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED|STATSD_METRIC_OPTION_USED_IN_APPS)) && !(m->options & STATSD_METRIC_OPTION_USEFUL)) { + m->options |= STATSD_METRIC_OPTION_USEFUL; + index->useful++; + m->next_useful = index->first_useful; + index->first_useful = m; + } + } + dfe_done(m); + + // flush all the useful metrics + for(m = index->first_useful; m ; m = m->next_useful) { + flush_metric(m); + } +} + + +// -------------------------------------------------------------------------------------- +// statsd main thread + +static int statsd_listen_sockets_setup(void) { + return listen_sockets_setup(&statsd.sockets); +} + +static void statsd_main_cleanup(void *data) { + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)data; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + info("cleaning up..."); + + if (statsd.collection_threads_status) { + int i; + for (i = 0; i < statsd.threads; i++) { + if(statsd.collection_threads_status[i].status) { + info("STATSD: stopping data collection thread %d...", i + 1); + netdata_thread_cancel(statsd.collection_threads_status[i].thread); + } + else { + info("STATSD: data collection thread %d found stopped.", i + 1); + } + } + } + + info("STATSD: closing sockets..."); + listen_sockets_close(&statsd.sockets); + + // destroy the dictionaries + dictionary_destroy(statsd.gauges.dict); + dictionary_destroy(statsd.meters.dict); + dictionary_destroy(statsd.counters.dict); + dictionary_destroy(statsd.histograms.dict); + dictionary_destroy(statsd.dictionaries.dict); + dictionary_destroy(statsd.sets.dict); + dictionary_destroy(statsd.timers.dict); + + info("STATSD: cleanup completed."); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; + + worker_unregister(); +} + +#define WORKER_STATSD_FLUSH_GAUGES 0 +#define WORKER_STATSD_FLUSH_COUNTERS 1 +#define WORKER_STATSD_FLUSH_METERS 2 +#define WORKER_STATSD_FLUSH_TIMERS 3 +#define WORKER_STATSD_FLUSH_HISTOGRAMS 4 +#define WORKER_STATSD_FLUSH_SETS 5 +#define WORKER_STATSD_FLUSH_DICTIONARIES 6 +#define WORKER_STATSD_FLUSH_STATS 7 + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 8 +#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 8 +#endif + +void *statsd_main(void *ptr) { + worker_register("STATSDFLUSH"); + worker_register_job_name(WORKER_STATSD_FLUSH_GAUGES, "gauges"); + worker_register_job_name(WORKER_STATSD_FLUSH_COUNTERS, "counters"); + worker_register_job_name(WORKER_STATSD_FLUSH_METERS, "meters"); + worker_register_job_name(WORKER_STATSD_FLUSH_TIMERS, "timers"); + worker_register_job_name(WORKER_STATSD_FLUSH_HISTOGRAMS, "histograms"); + worker_register_job_name(WORKER_STATSD_FLUSH_SETS, "sets"); + worker_register_job_name(WORKER_STATSD_FLUSH_DICTIONARIES, "dictionaries"); + worker_register_job_name(WORKER_STATSD_FLUSH_STATS, "statistics"); + + netdata_thread_cleanup_push(statsd_main_cleanup, ptr); + + statsd.gauges.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + statsd.meters.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + statsd.counters.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + statsd.histograms.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + statsd.dictionaries.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + statsd.sets.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + statsd.timers.dict = dictionary_create(STATSD_DICTIONARY_OPTIONS); + + dictionary_register_insert_callback(statsd.gauges.dict, dictionary_metric_insert_callback, &statsd.gauges); + dictionary_register_insert_callback(statsd.meters.dict, dictionary_metric_insert_callback, &statsd.meters); + dictionary_register_insert_callback(statsd.counters.dict, dictionary_metric_insert_callback, &statsd.counters); + dictionary_register_insert_callback(statsd.histograms.dict, dictionary_metric_insert_callback, &statsd.histograms); + dictionary_register_insert_callback(statsd.dictionaries.dict, dictionary_metric_insert_callback, &statsd.dictionaries); + dictionary_register_insert_callback(statsd.sets.dict, dictionary_metric_insert_callback, &statsd.sets); + dictionary_register_insert_callback(statsd.timers.dict, dictionary_metric_insert_callback, &statsd.timers); + + dictionary_register_delete_callback(statsd.gauges.dict, dictionary_metric_delete_callback, &statsd.gauges); + dictionary_register_delete_callback(statsd.meters.dict, dictionary_metric_delete_callback, &statsd.meters); + dictionary_register_delete_callback(statsd.counters.dict, dictionary_metric_delete_callback, &statsd.counters); + dictionary_register_delete_callback(statsd.histograms.dict, dictionary_metric_delete_callback, &statsd.histograms); + dictionary_register_delete_callback(statsd.dictionaries.dict, dictionary_metric_delete_callback, &statsd.dictionaries); + dictionary_register_delete_callback(statsd.sets.dict, dictionary_metric_delete_callback, &statsd.sets); + dictionary_register_delete_callback(statsd.timers.dict, dictionary_metric_delete_callback, &statsd.timers); + + // ---------------------------------------------------------------------------------------------------------------- + // statsd configuration + + statsd.enabled = config_get_boolean(CONFIG_SECTION_PLUGINS, "statsd", statsd.enabled); + + statsd.update_every = default_rrd_update_every; + statsd.update_every = (int)config_get_number(CONFIG_SECTION_STATSD, "update every (flushInterval)", statsd.update_every); + if(statsd.update_every < default_rrd_update_every) { + error("STATSD: minimum flush interval %d given, but the minimum is the update every of netdata. Using %d", statsd.update_every, default_rrd_update_every); + statsd.update_every = default_rrd_update_every; + } + +#ifdef HAVE_RECVMMSG + statsd.recvmmsg_size = (size_t)config_get_number(CONFIG_SECTION_STATSD, "udp messages to process at once", (long long)statsd.recvmmsg_size); +#endif + + statsd.charts_for = simple_pattern_create(config_get(CONFIG_SECTION_STATSD, "create private charts for metrics matching", "*"), NULL, SIMPLE_PATTERN_EXACT); + statsd.max_private_charts_hard = (size_t)config_get_number(CONFIG_SECTION_STATSD, "max private charts hard limit", (long long)statsd.max_private_charts_hard); + statsd.private_charts_rrd_history_entries = (int)config_get_number(CONFIG_SECTION_STATSD, "private charts history", default_rrd_history_entries); + statsd.decimal_detail = (collected_number)config_get_number(CONFIG_SECTION_STATSD, "decimal detail", (long long int)statsd.decimal_detail); + statsd.tcp_idle_timeout = (size_t) config_get_number(CONFIG_SECTION_STATSD, "disconnect idle tcp clients after seconds", (long long int)statsd.tcp_idle_timeout); + statsd.private_charts_hidden = (unsigned int)config_get_boolean(CONFIG_SECTION_STATSD, "private charts hidden", statsd.private_charts_hidden); + + statsd.histogram_percentile = (double)config_get_float(CONFIG_SECTION_STATSD, "histograms and timers percentile (percentThreshold)", statsd.histogram_percentile); + if(isless(statsd.histogram_percentile, 0) || isgreater(statsd.histogram_percentile, 100)) { + error("STATSD: invalid histograms and timers percentile %0.5f given", statsd.histogram_percentile); + statsd.histogram_percentile = 95.0; + } + { + char buffer[314 + 1]; + snprintfz(buffer, 314, "%0.1f%%", statsd.histogram_percentile); + statsd.histogram_percentile_str = strdupz(buffer); + } + + statsd.dictionary_max_unique = config_get_number(CONFIG_SECTION_STATSD, "dictionaries max unique dimensions", statsd.dictionary_max_unique); + + if(config_get_boolean(CONFIG_SECTION_STATSD, "add dimension for number of events received", 0)) { + statsd.gauges.default_options |= STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT; + statsd.counters.default_options |= STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT; + statsd.meters.default_options |= STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT; + statsd.sets.default_options |= STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT; + statsd.histograms.default_options |= STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT; + statsd.timers.default_options |= STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT; + statsd.dictionaries.default_options |= STATSD_METRIC_OPTION_CHART_DIMENSION_COUNT; + } + + if(config_get_boolean(CONFIG_SECTION_STATSD, "gaps on gauges (deleteGauges)", 0)) + statsd.gauges.default_options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + + if(config_get_boolean(CONFIG_SECTION_STATSD, "gaps on counters (deleteCounters)", 0)) + statsd.counters.default_options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + + if(config_get_boolean(CONFIG_SECTION_STATSD, "gaps on meters (deleteMeters)", 0)) + statsd.meters.default_options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + + if(config_get_boolean(CONFIG_SECTION_STATSD, "gaps on sets (deleteSets)", 0)) + statsd.sets.default_options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + + if(config_get_boolean(CONFIG_SECTION_STATSD, "gaps on histograms (deleteHistograms)", 0)) + statsd.histograms.default_options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + + if(config_get_boolean(CONFIG_SECTION_STATSD, "gaps on timers (deleteTimers)", 0)) + statsd.timers.default_options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + + if(config_get_boolean(CONFIG_SECTION_STATSD, "gaps on dictionaries (deleteDictionaries)", 0)) + statsd.dictionaries.default_options |= STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED; + + size_t max_sockets = (size_t)config_get_number(CONFIG_SECTION_STATSD, "statsd server max TCP sockets", (long long int)(rlimit_nofile.rlim_cur / 4)); + +#ifdef STATSD_MULTITHREADED + statsd.threads = (int)config_get_number(CONFIG_SECTION_STATSD, "threads", processors); + if(statsd.threads < 1) { + error("STATSD: Invalid number of threads %d, using %d", statsd.threads, processors); + statsd.threads = processors; + config_set_number(CONFIG_SECTION_STATSD, "collector threads", statsd.threads); + } +#else + statsd.threads = 1; +#endif + + // read custom application definitions + statsd_readdir(netdata_configured_user_config_dir, netdata_configured_stock_config_dir, "statsd.d"); + + // ---------------------------------------------------------------------------------------------------------------- + // statsd setup + + if(!statsd.enabled) goto cleanup; + + statsd_listen_sockets_setup(); + if(!statsd.sockets.opened) { + error("STATSD: No statsd sockets to listen to. statsd will be disabled."); + goto cleanup; + } + + statsd.collection_threads_status = callocz((size_t)statsd.threads, sizeof(struct collection_thread_status)); + + int i; + for(i = 0; i < statsd.threads ;i++) { + statsd.collection_threads_status[i].max_sockets = max_sockets / statsd.threads; + char tag[NETDATA_THREAD_TAG_MAX + 1]; + snprintfz(tag, NETDATA_THREAD_TAG_MAX, "STATSD_COLLECTOR[%d]", i + 1); + netdata_thread_create(&statsd.collection_threads_status[i].thread, tag, NETDATA_THREAD_OPTION_DEFAULT, statsd_collector_thread, &statsd.collection_threads_status[i]); + } + + // ---------------------------------------------------------------------------------------------------------------- + // statsd monitoring charts + + RRDSET *st_metrics = NULL; + RRDDIM *rd_metrics_gauge = NULL; + RRDDIM *rd_metrics_counter = NULL; + RRDDIM *rd_metrics_timer = NULL; + RRDDIM *rd_metrics_meter = NULL; + RRDDIM *rd_metrics_histogram = NULL; + RRDDIM *rd_metrics_set = NULL; + RRDDIM *rd_metrics_dictionary = NULL; + RRDSET *st_useful_metrics = NULL; + RRDDIM *rd_useful_metrics_gauge = NULL; + RRDDIM *rd_useful_metrics_counter = NULL; + RRDDIM *rd_useful_metrics_timer = NULL; + RRDDIM *rd_useful_metrics_meter = NULL; + RRDDIM *rd_useful_metrics_histogram = NULL; + RRDDIM *rd_useful_metrics_set = NULL; + RRDDIM *rd_useful_metrics_dictionary = NULL; + RRDSET *st_events = NULL; + RRDDIM *rd_events_gauge = NULL; + RRDDIM *rd_events_counter = NULL; + RRDDIM *rd_events_timer = NULL; + RRDDIM *rd_events_meter = NULL; + RRDDIM *rd_events_histogram = NULL; + RRDDIM *rd_events_set = NULL; + RRDDIM *rd_events_dictionary = NULL; + RRDDIM *rd_events_unknown = NULL; + RRDDIM *rd_events_errors = NULL; + RRDSET *st_reads = NULL; + RRDDIM *rd_reads_tcp = NULL; + RRDDIM *rd_reads_udp = NULL; + RRDSET *st_bytes = NULL; + RRDDIM *rd_bytes_tcp = NULL; + RRDDIM *rd_bytes_udp = NULL; + RRDSET *st_packets = NULL; + RRDDIM *rd_packets_tcp = NULL; + RRDDIM *rd_packets_udp = NULL; + RRDSET *st_tcp_connects = NULL; + RRDDIM *rd_tcp_connects = NULL; + RRDDIM *rd_tcp_disconnects = NULL; + RRDSET *st_tcp_connected = NULL; + RRDDIM *rd_tcp_connected = NULL; + RRDSET *st_pcharts = NULL; + RRDDIM *rd_pcharts = NULL; + + if(global_statistics_enabled) { + st_metrics = rrdset_create_localhost( + "netdata", + "statsd_metrics", + NULL, + "statsd", + NULL, + "Metrics in the netdata statsd database", + "metrics", + PLUGIN_STATSD_NAME, + "stats", + 132010, + statsd.update_every, + RRDSET_TYPE_STACKED); + rd_metrics_gauge = rrddim_add(st_metrics, "gauges", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_metrics_counter = rrddim_add(st_metrics, "counters", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_metrics_timer = rrddim_add(st_metrics, "timers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_metrics_meter = rrddim_add(st_metrics, "meters", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_metrics_histogram = rrddim_add(st_metrics, "histograms", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_metrics_set = rrddim_add(st_metrics, "sets", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_metrics_dictionary = rrddim_add(st_metrics, "dictionaries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + st_useful_metrics = rrdset_create_localhost( + "netdata", + "statsd_useful_metrics", + NULL, + "statsd", + NULL, + "Useful metrics in the netdata statsd database", + "metrics", + PLUGIN_STATSD_NAME, + "stats", + 132010, + statsd.update_every, + RRDSET_TYPE_STACKED); + rd_useful_metrics_gauge = rrddim_add(st_useful_metrics, "gauges", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_useful_metrics_counter = rrddim_add(st_useful_metrics, "counters", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_useful_metrics_timer = rrddim_add(st_useful_metrics, "timers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_useful_metrics_meter = rrddim_add(st_useful_metrics, "meters", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_useful_metrics_histogram = rrddim_add(st_useful_metrics, "histograms", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_useful_metrics_set = rrddim_add(st_useful_metrics, "sets", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_useful_metrics_dictionary = rrddim_add(st_useful_metrics, "dictionaries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + st_events = rrdset_create_localhost( + "netdata", + "statsd_events", + NULL, + "statsd", + NULL, + "Events processed by the netdata statsd server", + "events/s", + PLUGIN_STATSD_NAME, + "stats", + 132011, + statsd.update_every, + RRDSET_TYPE_STACKED); + rd_events_gauge = rrddim_add(st_events, "gauges", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_events_counter = rrddim_add(st_events, "counters", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_events_timer = rrddim_add(st_events, "timers", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_events_meter = rrddim_add(st_events, "meters", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_events_histogram = rrddim_add(st_events, "histograms", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_events_set = rrddim_add(st_events, "sets", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_events_dictionary = rrddim_add(st_events, "dictionaries", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_events_unknown = rrddim_add(st_events, "unknown", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_events_errors = rrddim_add(st_events, "errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + st_reads = rrdset_create_localhost( + "netdata", + "statsd_reads", + NULL, + "statsd", + NULL, + "Read operations made by the netdata statsd server", + "reads/s", + PLUGIN_STATSD_NAME, + "stats", + 132012, + statsd.update_every, + RRDSET_TYPE_STACKED); + rd_reads_tcp = rrddim_add(st_reads, "tcp", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_reads_udp = rrddim_add(st_reads, "udp", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + st_bytes = rrdset_create_localhost( + "netdata", + "statsd_bytes", + NULL, + "statsd", + NULL, + "Bytes read by the netdata statsd server", + "kilobits/s", + PLUGIN_STATSD_NAME, + "stats", + 132013, + statsd.update_every, + RRDSET_TYPE_STACKED); + rd_bytes_tcp = rrddim_add(st_bytes, "tcp", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rd_bytes_udp = rrddim_add(st_bytes, "udp", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + + st_packets = rrdset_create_localhost( + "netdata", + "statsd_packets", + NULL, + "statsd", + NULL, + "Network packets processed by the netdata statsd server", + "packets/s", + PLUGIN_STATSD_NAME, + "stats", + 132014, + statsd.update_every, + RRDSET_TYPE_STACKED); + rd_packets_tcp = rrddim_add(st_packets, "tcp", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_packets_udp = rrddim_add(st_packets, "udp", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + st_tcp_connects = rrdset_create_localhost( + "netdata", + "tcp_connects", + NULL, + "statsd", + NULL, + "statsd server TCP connects and disconnects", + "events", + PLUGIN_STATSD_NAME, + "stats", + 132015, + statsd.update_every, + RRDSET_TYPE_LINE); + rd_tcp_connects = rrddim_add(st_tcp_connects, "connects", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_tcp_disconnects = rrddim_add(st_tcp_connects, "disconnects", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + st_tcp_connected = rrdset_create_localhost( + "netdata", + "tcp_connected", + NULL, + "statsd", + NULL, + "statsd server TCP connected sockets", + "sockets", + PLUGIN_STATSD_NAME, + "stats", + 132016, + statsd.update_every, + RRDSET_TYPE_LINE); + rd_tcp_connected = rrddim_add(st_tcp_connected, "connected", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + st_pcharts = rrdset_create_localhost( + "netdata", + "private_charts", + NULL, + "statsd", + NULL, + "Private metric charts created by the netdata statsd server", + "charts", + PLUGIN_STATSD_NAME, + "stats", + 132020, + statsd.update_every, + RRDSET_TYPE_AREA); + rd_pcharts = rrddim_add(st_pcharts, "charts", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + // ---------------------------------------------------------------------------------------------------------------- + // statsd thread to turn metrics into charts + + usec_t step = statsd.update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + while(!netdata_exit) { + worker_is_idle(); + heartbeat_next(&hb, step); + + worker_is_busy(WORKER_STATSD_FLUSH_GAUGES); + statsd_flush_index_metrics(&statsd.gauges, statsd_flush_gauge); + + worker_is_busy(WORKER_STATSD_FLUSH_COUNTERS); + statsd_flush_index_metrics(&statsd.counters, statsd_flush_counter); + + worker_is_busy(WORKER_STATSD_FLUSH_METERS); + statsd_flush_index_metrics(&statsd.meters, statsd_flush_meter); + + worker_is_busy(WORKER_STATSD_FLUSH_TIMERS); + statsd_flush_index_metrics(&statsd.timers, statsd_flush_timer); + + worker_is_busy(WORKER_STATSD_FLUSH_HISTOGRAMS); + statsd_flush_index_metrics(&statsd.histograms, statsd_flush_histogram); + + worker_is_busy(WORKER_STATSD_FLUSH_SETS); + statsd_flush_index_metrics(&statsd.sets, statsd_flush_set); + + worker_is_busy(WORKER_STATSD_FLUSH_DICTIONARIES); + statsd_flush_index_metrics(&statsd.dictionaries,statsd_flush_dictionary); + + worker_is_busy(WORKER_STATSD_FLUSH_STATS); + statsd_update_all_app_charts(); + + if(unlikely(netdata_exit)) + break; + + if(global_statistics_enabled) { + rrddim_set_by_pointer(st_metrics, rd_metrics_gauge, (collected_number)statsd.gauges.metrics); + rrddim_set_by_pointer(st_metrics, rd_metrics_counter, (collected_number)statsd.counters.metrics); + rrddim_set_by_pointer(st_metrics, rd_metrics_timer, (collected_number)statsd.timers.metrics); + rrddim_set_by_pointer(st_metrics, rd_metrics_meter, (collected_number)statsd.meters.metrics); + rrddim_set_by_pointer(st_metrics, rd_metrics_histogram, (collected_number)statsd.histograms.metrics); + rrddim_set_by_pointer(st_metrics, rd_metrics_set, (collected_number)statsd.sets.metrics); + rrddim_set_by_pointer(st_metrics, rd_metrics_dictionary, (collected_number)statsd.dictionaries.metrics); + rrdset_done(st_metrics); + + rrddim_set_by_pointer(st_useful_metrics, rd_useful_metrics_gauge, (collected_number)statsd.gauges.useful); + rrddim_set_by_pointer(st_useful_metrics, rd_useful_metrics_counter, (collected_number)statsd.counters.useful); + rrddim_set_by_pointer(st_useful_metrics, rd_useful_metrics_timer, (collected_number)statsd.timers.useful); + rrddim_set_by_pointer(st_useful_metrics, rd_useful_metrics_meter, (collected_number)statsd.meters.useful); + rrddim_set_by_pointer(st_useful_metrics, rd_useful_metrics_histogram, (collected_number)statsd.histograms.useful); + rrddim_set_by_pointer(st_useful_metrics, rd_useful_metrics_set, (collected_number)statsd.sets.useful); + rrddim_set_by_pointer(st_useful_metrics, rd_useful_metrics_dictionary, (collected_number)statsd.dictionaries.useful); + rrdset_done(st_useful_metrics); + + rrddim_set_by_pointer(st_events, rd_events_gauge, (collected_number)statsd.gauges.events); + rrddim_set_by_pointer(st_events, rd_events_counter, (collected_number)statsd.counters.events); + rrddim_set_by_pointer(st_events, rd_events_timer, (collected_number)statsd.timers.events); + rrddim_set_by_pointer(st_events, rd_events_meter, (collected_number)statsd.meters.events); + rrddim_set_by_pointer(st_events, rd_events_histogram, (collected_number)statsd.histograms.events); + rrddim_set_by_pointer(st_events, rd_events_set, (collected_number)statsd.sets.events); + rrddim_set_by_pointer(st_events, rd_events_dictionary, (collected_number)statsd.dictionaries.events); + rrddim_set_by_pointer(st_events, rd_events_unknown, (collected_number)statsd.unknown_types); + rrddim_set_by_pointer(st_events, rd_events_errors, (collected_number)statsd.socket_errors); + rrdset_done(st_events); + + rrddim_set_by_pointer(st_reads, rd_reads_tcp, (collected_number)statsd.tcp_socket_reads); + rrddim_set_by_pointer(st_reads, rd_reads_udp, (collected_number)statsd.udp_socket_reads); + rrdset_done(st_reads); + + rrddim_set_by_pointer(st_bytes, rd_bytes_tcp, (collected_number)statsd.tcp_bytes_read); + rrddim_set_by_pointer(st_bytes, rd_bytes_udp, (collected_number)statsd.udp_bytes_read); + rrdset_done(st_bytes); + + rrddim_set_by_pointer(st_packets, rd_packets_tcp, (collected_number)statsd.tcp_packets_received); + rrddim_set_by_pointer(st_packets, rd_packets_udp, (collected_number)statsd.udp_packets_received); + rrdset_done(st_packets); + + rrddim_set_by_pointer(st_tcp_connects, rd_tcp_connects, (collected_number)statsd.tcp_socket_connects); + rrddim_set_by_pointer(st_tcp_connects, rd_tcp_disconnects, (collected_number)statsd.tcp_socket_disconnects); + rrdset_done(st_tcp_connects); + + rrddim_set_by_pointer(st_tcp_connected, rd_tcp_connected, (collected_number)statsd.tcp_socket_connected); + rrdset_done(st_tcp_connected); + + rrddim_set_by_pointer(st_pcharts, rd_pcharts, (collected_number)statsd.private_charts); + rrdset_done(st_pcharts); + } + } + +cleanup: ; // added semi-colon to prevent older gcc error: label at end of compound statement + netdata_thread_cleanup_pop(1); + return NULL; +} |