diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-05-08 16:27:08 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-05-08 16:27:08 +0000 |
commit | 81581f9719bc56f01d5aa08952671d65fda9867a (patch) | |
tree | 0f5c6b6138bf169c23c9d24b1fc0a3521385cb18 /collectors/statsd.plugin | |
parent | Releasing debian version 1.38.1-1. (diff) | |
download | netdata-81581f9719bc56f01d5aa08952671d65fda9867a.tar.xz netdata-81581f9719bc56f01d5aa08952671d65fda9867a.zip |
Merging upstream version 1.39.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'collectors/statsd.plugin')
-rw-r--r-- | collectors/statsd.plugin/README.md | 379 | ||||
-rw-r--r-- | collectors/statsd.plugin/asterisk.md | 5 | ||||
-rw-r--r-- | collectors/statsd.plugin/k6.md | 5 | ||||
-rw-r--r-- | collectors/statsd.plugin/statsd.c | 12 |
4 files changed, 376 insertions, 25 deletions
diff --git a/collectors/statsd.plugin/README.md b/collectors/statsd.plugin/README.md index d65476ff4..dd74923ec 100644 --- a/collectors/statsd.plugin/README.md +++ b/collectors/statsd.plugin/README.md @@ -1,29 +1,40 @@ <!-- -title: "statsd.plugin" +title: "StatsD" description: "The Netdata Agent is a fully-featured StatsD server that collects metrics from any custom application and visualizes them in real-time." custom_edit_url: "https://github.com/netdata/netdata/edit/master/collectors/statsd.plugin/README.md" -sidebar_label: "statsd.plugin" +sidebar_label: "StatsD" learn_status: "Published" -learn_topic_type: "References" -learn_rel_path: "References/Collectors references/Apm" +learn_rel_path: "Integrations/Monitor/Anything" --> -StatsD is a system to collect data from any application. Applications send metrics to it, usually via non-blocking UDP communication, and StatsD servers collect these metrics, perform a few simple calculations on them and push them to backend time-series databases. +# StatsD -If you want to learn more about the StatsD protocol, we have written a [blog post](https://www.netdata.cloud/blog/introduction-to-statsd/) about it! +[StatsD](https://github.com/statsd/statsd) is a system to collect data from any application. Applications send metrics to it, +usually via non-blocking UDP communication, and StatsD servers collect these metrics, perform a few simple calculations on +them and push them to backend time-series databases. +If you want to learn more about the StatsD protocol, we have written a +[blog post](https://blog.netdata.cloud/introduction-to-statsd/) about it! -Netdata is a fully featured statsd server. It can collect statsd formatted metrics, visualize them on its dashboards and store them in it's database for long-term retention. -Netdata statsd is inside Netdata (an internal plugin, running inside the Netdata daemon), it is configured via `netdata.conf` and by-default listens on standard statsd port 8125. Netdata supports both TCP and UDP packets at the same time. +Netdata is a fully featured statsd server. It can collect statsd formatted metrics, visualize +them on its dashboards and store them in it's database for long-term retention. + +Netdata statsd is inside Netdata (an internal plugin, running inside the Netdata daemon), it is +configured via `netdata.conf` and by-default listens on standard statsd port 8125. Netdata supports +both TCP and UDP packets at the same time. Since statsd is embedded in Netdata, it means you now have a statsd server embedded on all your servers. -Netdata statsd is fast. It can collect several millions of metrics per second on modern hardware, using just 1 CPU core. The implementation uses two threads: one thread collects metrics, another thread updates the charts from the collected data. +Netdata statsd is fast. It can collect several millions of metrics per second on modern hardware, using +just 1 CPU core. The implementation uses two threads: one thread collects metrics, another thread updates +the charts from the collected data. ## Available StatsD synthetic application charts -Netdata ships with a few synthetic chart definitions to automatically present application metrics into a more uniform way. These synthetic charts are configuration files (you can create your own) that re-arrange statsd metrics into a more meaningful way. +Netdata ships with a few synthetic chart definitions to automatically present application metrics into a +more uniform way. These synthetic charts are configuration files (you can create your own) that re-arrange +statsd metrics into a more meaningful way. On synthetic charts, we can have alarms as with any metric and chart. @@ -38,13 +49,16 @@ On synthetic charts, we can have alarms as with any metric and chart. ## Metrics supported by Netdata -Netdata fully supports the StatsD protocol and also extends it to support more advanced Netdata specific use cases. All StatsD client libraries can be used with Netdata too. +Netdata fully supports the StatsD protocol and also extends it to support more advanced Netdata specific use cases. +All StatsD client libraries can be used with Netdata too. - **Gauges** - The application sends `name:value|g`, where `value` is any **decimal/fractional** number, StatsD reports the latest value collected and the number of times it was updated (events). + The application sends `name:value|g`, where `value` is any **decimal/fractional** number, StatsD reports the + latest value collected and the number of times it was updated (events). - The application may increment or decrement a previous value, by setting the first character of the value to `+` or `-` (so, the only way to set a gauge to an absolute negative value, is to first set it to zero). + The application may increment or decrement a previous value, by setting the first character of the value to + `+` or `-` (so, the only way to set a gauge to an absolute negative value, is to first set it to zero). [Sampling rate](#sampling-rates) is supported. [Tags](#tags) are supported for changing chart units, family and dimension name. @@ -305,7 +319,6 @@ For example, if you want to monitor the application `myapp` using StatsD and Net private charts = no gaps when not collected = no history = 60 -# memory mode = ram [dictionary] m1 = metric1 @@ -701,3 +714,341 @@ or even at a terminal prompt, like this: The function is smart enough to call `nc` just once and pass all the metrics to it. It will also automatically switch to TCP if the metrics to send are above 1000 bytes. If you have gotten thus far, make sure to check out our [community forums](https://community.netdata.cloud) to share your experience using Netdata with StatsD. + +## StatsD Step By Step Guide + +In this guide, we'll go through a scenario of visualizing our data in Netdata in a matter of seconds using +[k6](https://k6.io), an open-source tool for automating load testing that outputs metrics to the StatsD format. + +Although we'll use k6 as the use-case, the same principles can be applied to every application that supports +the StatsD protocol. Simply enable the StatsD output and point it to the node that runs Netdata, which is `localhost` in this case. + +In general, the process for creating a StatsD collector can be summarized in 2 steps: + +- Run an experiment by sending StatsD metrics to Netdata, without any prior configuration. This will create + a chart per metric (called private charts) and will help you verify that everything works as expected from the application side of things. + + - Make sure to reload the dashboard tab **after** you start sending data to Netdata. + +- Create a configuration file for your app using [edit-config](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md): `sudo ./edit-config + statsd.d/myapp.conf` + + - Each app will have it's own section in the right-hand menu. + +Now, let's see the above process in detail. + +### Prerequisites + +- A node with the [Netdata](https://github.com/netdata/netdata/blob/master/packaging/installer/README.md) installed. +- An application to instrument. For this guide, that will be [k6](https://k6.io/docs/getting-started/installation). + +### Understanding the metrics + +The real in instrumenting an application with StatsD for you is to decide what metrics you +want to visualize and how you want them grouped. In other words, you need decide which metrics +will be grouped in the same charts and how the charts will be grouped on Netdata's dashboard. + +Start with documentation for the particular application that you want to monitor (or the +technological stack that you are using). In our case, the +[k6 documentation](https://k6.io/docs/using-k6/metrics/) has a whole page dedicated to the +metrics output by k6, along with descriptions. + +If you are using StatsD to monitor an existing application, you don't have much control over +these metrics. For example, k6 has a type called `trend`, which is identical to timers and histograms. +Thus, _k6 is clearly dictating_ which metrics can be used as histograms and simple gauges. + +On the other hand, if you are instrumenting your own code, you will need to not only decide what are +the "things" that you want to measure, but also decide which StatsD metric type is the appropriate for each. + +### Use private charts to see all available metrics + +In Netdata, every metric will receive its own chart, called a `private chart`. Although in the +final implementation this is something that we will disable, since it can create considerable noise +(imagine having 100s of metrics), it’s very handy while building the configuration file. + +You can get a quick visual representation of the metrics and their type (e.g it’s a gauge, a timer, etc.). + +An important thing to notice is that StatsD has different types of metrics, as illustrated in the +[supported metrics](#metrics-supported-by-netdata). Histograms and timers support mathematical operations +to be performed on top of the baseline metric, like reporting the `average` of the value. + +Here are some examples of default private charts. You can see that the histogram private charts will +visualize all the available operations. + +**Gauge private chart** + +![Gauge metric example](https://i.imgur.com/Sr5nJEV.png) + +**Histogram private chart** + +![Timer metric example](https://i.imgur.com/P4p0hvq.png) + +### Create a new StatsD configuration file + +Start by creating a new configuration file under the `statsd.d/` folder in the +[Netdata config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory). +Use [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#use-edit-config-to-edit-configuration-files) +to create a new file called `k6.conf`. + +```bash= +sudo ./edit-config statsd.d/k6.conf +``` + +Copy the following configuration into your file as a starting point. + +```conf +[app] + name = k6 + metrics = k6* + private charts = yes + gaps when not collected = no + memory mode = dbengine +``` + +Next, you need is to understand how to organize metrics in Netdata’s StatsD. + +#### Synthetic charts + +Netdata lets you group the metrics exposed by your instrumented application with _synthetic charts_. + +First, create a `[dictionary]` section to transform the names of the metrics into human-readable equivalents. +`http_req_blocked`, `http_req_connecting`, `http_req_receiving`, and `http_reqs` are all metrics exposed by k6. + +``` +[dictionary] + http_req_blocked = Blocked HTTP Requests + http_req_connecting = Connecting HTTP Requests + http_req_receiving = Receiving HTTP Requests + http_reqs = Total HTTP requests +``` + +Continue this dictionary process with any other metrics you want to collect with Netdata. + +#### Families and context + +Families and context are additional ways to group metrics. Families control the submenu at right-hand menu and +it's a subcategory of the section. Given the metrics given by K6, we are organizing them in 2 major groups, +or `families`: `k6 native metrics` and `http metrics`. + +Context is a second way to group metrics, when the metrics are of the same nature but different origin. In +our case, if we ran several different load testing experiments side-by-side, we could define the same app, +but different context (e.g `http_requests.experiment1`, `http_requests.experiment2`). + +Find more details about family and context in our [documentation](https://github.com/netdata/netdata/blob/master/web/README.md#families). + +#### Dimensions + +Now, having decided on how we are going to group the charts, we need to define how we are going to group +metrics into different charts. This is particularly important, since we decide: + +- What metrics **not** to show, since they are not useful for our use-case. +- What metrics to consolidate into the same charts, so as to reduce noise and increase visual correlation. + +The dimension option has this syntax: `dimension = [pattern] METRIC NAME TYPE MULTIPLIER DIVIDER OPTIONS` + +- **pattern**: A keyword that tells the StatsD server the `METRIC` string is actually a + [simple pattern](https://github.com/netdata/netdata/blob/master/libnetdata/simple_pattern/README.md). + We don't use simple patterns in the example, but if we wanted to visualize all the `http_req` metrics, we + could have a single dimension: `dimension = pattern 'k6.http_req*' last 1 1`. Find detailed examples with + patterns in [dimension patterns](https://github.com/netdata/netdata/blob/master/collectors/statsd.plugin/README.md#dimension-patterns). + +- **METRIC** The id of the metric as it comes from the client. You can easily find this in the private charts above, + for example: `k6.http_req_connecting`. + +- **NAME**: The name of the dimension. You can use the dictionary to expand this to something more human-readable. + +- **TYPE**: + + - For all charts: + - `events`: The number of events (data points) received by the StatsD server + - `last`: The last value that the server received + + - For histograms and timers: + - `min`, `max`, `sum`, `average`, `percentile`, `median`, `stddev`: This is helpful if you want to see + different representations of the same value. You can find an example at the `[iteration_duration]` + above. Note that the baseline `metric` is the same, but the `name` of the dimension is different, + since we use the baseline, but we perform a computation on it, creating a different final metric for + visualization(dimension). + +- **MULTIPLIER DIVIDER**: Handy if you want to convert Kilobytes to Megabytes or you want to give negative value. + The second is handy for better visualization of send/receive. You can find an example at the **packets** submenu of the **IPv4 Networking Section**. + +If you define a chart, run Netdata to visualize metrics, and then add or remove a dimension from that chart, +this will result in a new chart with the same name, confusing Netdata. If you change the dimensions of the chart, +make sure to also change the `name` of that chart, since it serves as the `id` of that chart in Netdata's storage. +(e.g http_req --> http_req_1). + +#### Finalize your StatsD configuration file + +It's time to assemble all the pieces together and create the synthetic charts that will consist our application +dashboard in Netdata. We can do it in a few simple steps: + +- Decide which metrics we want to use (we have viewed all of them as private charts). For example, we want to use + `k6.http_requests`, `k6.vus`, etc. + +- Decide how we want organize them in different synthetic charts. For example, we want `k6.http_requests`, `k6.vus` + on their own, but `k6.http_req_blocked` and `k6.http_req_connecting` on the same chart. + +- For each synthetic chart, we define a **unique** name and a human readable title. + +- We decide at which `family` (submenu section) we want each synthetic chart to belong to. For example, here we + have defined 2 families: `http requests`, `k6_metrics`. + +- If we have multiple instances of the same metric, we can define different contexts, (Optional). + +- We define a dimension according to the syntax we highlighted above. + +- We define a type for each synthetic chart (line, area, stacked) + +- We define the units for each synthetic chart. + +Following the above steps, we append to the `k6.conf` that we defined above, the following configuration: + +``` +[http_req_total] + name = http_req_total + title = Total HTTP Requests + family = http requests + context = k6.http_requests + dimension = k6.http_reqs http_reqs last 1 1 sum + type = line + units = requests/s + +[vus] + name = vus + title = Virtual Active Users + family = k6_metrics + dimension = k6.vus vus last 1 1 + dimension = k6.vus_max vus_max last 1 1 + type = line + unit = vus + +[iteration_duration] + name = iteration_duration_2 + title = Iteration duration + family = k6_metrics + dimension = k6.iteration_duration iteration_duration last 1 1 + dimension = k6.iteration_duration iteration_duration_max max 1 1 + dimension = k6.iteration_duration iteration_duration_min min 1 1 + dimension = k6.iteration_duration iteration_duration_avg avg 1 1 + type = line + unit = s + +[dropped_iterations] + name = dropped_iterations + title = Dropped Iterations + family = k6_metrics + dimension = k6.dropped_iterations dropped_iterations last 1 1 + units = iterations + type = line + +[data] + name = data + title = K6 Data + family = k6_metrics + dimension = k6.data_received data_received last 1 1 + dimension = k6.data_sent data_sent last -1 1 + units = kb/s + type = area + +[http_req_status] + name = http_req_status + title = HTTP Requests Status + family = http requests + dimension = k6.http_req_blocked http_req_blocked last 1 1 + dimension = k6.http_req_connecting http_req_connecting last 1 1 + units = ms + type = line + +[http_req_duration] + name = http_req_duration + title = HTTP requests duration + family = http requests + dimension = k6.http_req_sending http_req_sending last 1 1 + dimension = k6.http_req_waiting http_req_waiting last 1 1 + dimension = k6.http_req_receiving http_req_receiving last 1 1 + units = ms + type = stacked +``` + +Note that Netdata will report the rate for metrics and counters, even if k6 or another application +sends an _absolute_ number. For example, k6 sends absolute HTTP requests with `http_reqs`, +but Netdata visualizes that in `requests/second`. + +To enable this StatsD configuration, [restart Netdata](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md). + +### Final touches + +At this point, you have used StatsD to gather metrics for k6, creating a whole new section in your +Netdata dashboard in the process. Moreover, you can further customize the icon of the particular section, +as well as the description for each chart. + +To edit the section, please follow the Netdata [documentation](https://github.com/netdata/netdata/blob/master/web/gui/README.md#customizing-the-local-dashboard). + +While the following configuration will be placed in a new file, as the documentation suggests, it is +instructing to use `dashboard_info.js` as a template. Open the file and see how the rest of sections and collectors have been defined. + +```javascript= +netdataDashboard.menu = { + 'k6': { + title: 'K6 Load Testing', + icon: '<i class="fas fa-cogs"></i>', + info: 'k6 is an open-source load testing tool and cloud service providing the best developer experience for API performance testing.' + }, + . + . + . +``` + +We can then add a description for each chart. Simply find the following section in `dashboard_info.js` to understand how a chart definitions are used: + +```javascript= +netdataDashboard.context = { + 'system.cpu': { + info: function (os) { + void (os); + return 'Total CPU utilization (all cores). 100% here means there is no CPU idle time at all. You can get per core usage at the <a href="#menu_cpu">CPUs</a> section and per application usage at the <a href="#menu_apps">Applications Monitoring</a> section.' + + netdataDashboard.sparkline('<br/>Keep an eye on <b>iowait</b> ', 'system.cpu', 'iowait', '%', '. If it is constantly high, your disks are a bottleneck and they slow your system down.') + + netdataDashboard.sparkline('<br/>An important metric worth monitoring, is <b>softirq</b> ', 'system.cpu', 'softirq', '%', '. A constantly high percentage of softirq may indicate network driver issues.'); + }, + valueRange: "[0, 100]" + }, +``` + +Afterwards, you can open your `custom_dashboard_info.js`, as suggested in the documentation linked above, +and add something like the following example: + +```javascript= +netdataDashboard.context = { + 'k6.http_req_duration': { + info: "Total time for the request. It's equal to http_req_sending + http_req_waiting + http_req_receiving (i.e. how long did the remote server take to process the request and respond, without the initial DNS lookup/connection times)" + }, + +``` +The chart is identified as ``<section_name>.<chart_name>``. + +These descriptions can greatly help the Netdata user who is monitoring your application in the midst of an incident. + +The `info` field supports `html`, embedding useful links and instructions in the description. + +### Vendoring a new collector + +While we learned how to visualize any data source in Netdata using the StatsD protocol, we have also created a new collector. + +As long as you use the same underlying collector, every new `myapp.conf` file will create a new data +source and dashboard section for Netdata. Netdata loads all the configuration files by default, but it will +**not** create dashboard sections or charts, unless it starts receiving data for that particular data source. +This means that we can now share our collector with the rest of the Netdata community. + +- Make sure you follow the [contributing guide](https://github.com/netdata/.github/edit/main/CONTRIBUTING.md) +- Fork the netdata/netdata repository +- Place the configuration file inside `netdata/collectors/statsd.plugin` +- Add a reference in `netdata/collectors/statsd.plugin/Makefile.am`. For example, if we contribute the `k6.conf` file: +```Makefile +dist_statsdconfig_DATA = \ + example.conf \ + k6.conf \ + $(NULL) +``` + + diff --git a/collectors/statsd.plugin/asterisk.md b/collectors/statsd.plugin/asterisk.md index 9d7948111..e7a7b63ce 100644 --- a/collectors/statsd.plugin/asterisk.md +++ b/collectors/statsd.plugin/asterisk.md @@ -3,11 +3,10 @@ title: "Asterisk monitoring with Netdata" custom_edit_url: "https://github.com/netdata/netdata/edit/master/collectors/statsd.plugin/asterisk.md" sidebar_label: "Asterisk" learn_status: "Published" -learn_topic_type: "References" -learn_rel_path: "References/Collectors references/Apm/Statsd" +learn_rel_path: "Integrations/Monitor/VoIP" --> -# Asterisk monitoring with Netdata +# Asterisk collector Monitors [Asterisk](https://www.asterisk.org/) dialplan application's statistics. diff --git a/collectors/statsd.plugin/k6.md b/collectors/statsd.plugin/k6.md index 7a1e36773..13608a8a8 100644 --- a/collectors/statsd.plugin/k6.md +++ b/collectors/statsd.plugin/k6.md @@ -3,11 +3,10 @@ title: "K6 load test monitoring with Netdata" custom_edit_url: "https://github.com/netdata/netdata/edit/master/collectors/statsd.plugin/k6.md" sidebar_label: "K6 Load Testing" learn_status: "Published" -learn_topic_type: "References" -learn_rel_path: "References/Collectors references/Apm/Statsd" +learn_rel_path: "Integrations/Monitor/apps" --> -# K6 Load Testing monitoring with Netdata +# K6 load test collector Monitors the impact of load testing experiments performed with [K6](https://k6.io/). diff --git a/collectors/statsd.plugin/statsd.c b/collectors/statsd.plugin/statsd.c index d15129b9c..1425d0a97 100644 --- a/collectors/statsd.plugin/statsd.c +++ b/collectors/statsd.plugin/statsd.c @@ -1418,7 +1418,7 @@ static int statsd_readfile(const char *filename, STATSD_APP *app, STATSD_APP_CHA } else if (!strcmp(name, "metrics")) { simple_pattern_free(app->metrics); - app->metrics = simple_pattern_create(value, NULL, SIMPLE_PATTERN_EXACT); + app->metrics = simple_pattern_create(value, NULL, SIMPLE_PATTERN_EXACT, true); } else if (!strcmp(name, "private charts")) { if (!strcmp(value, "yes") || !strcmp(value, "on")) @@ -1480,7 +1480,7 @@ static int statsd_readfile(const char *filename, STATSD_APP *app, STATSD_APP_CHA else if (!strcmp(name, "dimension")) { // metric [name [type [multiplier [divisor]]]] char *words[10] = { NULL }; - size_t num_words = pluginsd_split_words(value, words, 10, NULL, NULL, 0); + size_t num_words = pluginsd_split_words(value, words, 10); int pattern = 0; size_t i = 0; @@ -1533,7 +1533,7 @@ static int statsd_readfile(const char *filename, STATSD_APP *app, STATSD_APP_CHA ); if(pattern) - dim->metric_pattern = simple_pattern_create(dim->metric, NULL, SIMPLE_PATTERN_EXACT); + dim->metric_pattern = simple_pattern_create(dim->metric, NULL, SIMPLE_PATTERN_EXACT, true); } else { error("STATSD: ignoring line %zu ('%s') of file '%s'. Unknown keyword for the [%s] section.", line, name, filename, chart->id); @@ -2129,7 +2129,7 @@ static inline void check_if_metric_is_for_app(STATSD_INDEX *index, STATSD_METRIC strcpy(wildcarded, dim->name); char *ws = &wildcarded[dim_name_len]; - if(simple_pattern_matches_extract(dim->metric_pattern, m->name, ws, wildcarded_len - dim_name_len)) { + if(simple_pattern_matches_extract(dim->metric_pattern, m->name, ws, wildcarded_len - dim_name_len) == SP_MATCHED_POSITIVE) { char *final_name = NULL; @@ -2462,7 +2462,9 @@ void *statsd_main(void *ptr) { statsd.recvmmsg_size = (size_t)config_get_number(CONFIG_SECTION_STATSD, "udp messages to process at once", (long long)statsd.recvmmsg_size); #endif - statsd.charts_for = simple_pattern_create(config_get(CONFIG_SECTION_STATSD, "create private charts for metrics matching", "*"), NULL, SIMPLE_PATTERN_EXACT); + statsd.charts_for = simple_pattern_create( + config_get(CONFIG_SECTION_STATSD, "create private charts for metrics matching", "*"), NULL, + SIMPLE_PATTERN_EXACT, true); statsd.max_private_charts_hard = (size_t)config_get_number(CONFIG_SECTION_STATSD, "max private charts hard limit", (long long)statsd.max_private_charts_hard); statsd.private_charts_rrd_history_entries = (int)config_get_number(CONFIG_SECTION_STATSD, "private charts history", default_rrd_history_entries); statsd.decimal_detail = (collected_number)config_get_number(CONFIG_SECTION_STATSD, "decimal detail", (long long int)statsd.decimal_detail); |