diff options
Diffstat (limited to 'src/collectors/python.d.plugin/traefik')
-rw-r--r-- | src/collectors/python.d.plugin/traefik/README.md | 98 | ||||
-rw-r--r-- | src/collectors/python.d.plugin/traefik/metadata.yaml | 125 | ||||
-rw-r--r-- | src/collectors/python.d.plugin/traefik/traefik.chart.py | 198 | ||||
-rw-r--r-- | src/collectors/python.d.plugin/traefik/traefik.conf | 77 |
4 files changed, 498 insertions, 0 deletions
diff --git a/src/collectors/python.d.plugin/traefik/README.md b/src/collectors/python.d.plugin/traefik/README.md new file mode 100644 index 000000000..ac025c760 --- /dev/null +++ b/src/collectors/python.d.plugin/traefik/README.md @@ -0,0 +1,98 @@ +<!-- +title: "Traefik monitoring with Netdata" +custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/traefik/README.md" +sidebar_label: "traefik-python.d.plugin" +learn_status: "Published" +learn_topic_type: "References" +learn_rel_path: "Integrations/Monitor/Webapps" +--> + +# Traefik collector + +Uses the `health` API to provide statistics. + +It produces: + +1. **Responses** by statuses + + - success (1xx, 2xx, 304) + - error (5xx) + - redirect (3xx except 304) + - bad (4xx) + - other (all other responses) + +2. **Responses** by codes + + - 2xx (successful) + - 5xx (internal server errors) + - 3xx (redirect) + - 4xx (bad) + - 1xx (informational) + - other (non-standart responses) + +3. **Detailed Response Codes** requests/s (number of responses for each response code family individually) + +4. **Requests**/s + + - request statistics + +5. **Total response time** + + - sum of all response time + +6. **Average response time** + +7. **Average response time per iteration** + +8. **Uptime** + + - Traefik server uptime + +## Configuration + +Edit the `python.d/traefik.conf` configuration file using `edit-config` from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically +at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/traefik.conf +``` + +Needs only `url` to server's `health` + +Here is an example for local server: + +```yaml +update_every: 1 +priority: 60000 + +local: + url: 'http://localhost:8080/health' +``` + +Without configuration, module attempts to connect to `http://localhost:8080/health`. + + + + +### Troubleshooting + +To troubleshoot issues with the `traefik` module, run the `python.d.plugin` with the debug option enabled. The +output will give you the output of the data collection job or error messages on why the collector isn't working. + +First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's +not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the +plugin's directory, switch to the `netdata` user. + +```bash +cd /usr/libexec/netdata/plugins.d/ +sudo su -s /bin/bash netdata +``` + +Now you can manually run the `traefik` module in debug mode: + +```bash +./python.d.plugin traefik debug trace +``` + diff --git a/src/collectors/python.d.plugin/traefik/metadata.yaml b/src/collectors/python.d.plugin/traefik/metadata.yaml new file mode 100644 index 000000000..0594f8fb2 --- /dev/null +++ b/src/collectors/python.d.plugin/traefik/metadata.yaml @@ -0,0 +1,125 @@ +# This collector will not appear in documentation, as the go version is preferred, +# https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/traefik/README.md +# +# meta: +# plugin_name: python.d.plugin +# module_name: traefik +# monitored_instance: +# name: python.d traefik +# link: '' +# categories: [] +# icon_filename: '' +# related_resources: +# integrations: +# list: [] +# info_provided_to_referring_integrations: +# description: '' +# keywords: [] +# most_popular: false +# overview: +# data_collection: +# metrics_description: '' +# method_description: '' +# supported_platforms: +# include: [] +# exclude: [] +# multi_instance: true +# additional_permissions: +# description: '' +# default_behavior: +# auto_detection: +# description: '' +# limits: +# description: '' +# performance_impact: +# description: '' +# setup: +# prerequisites: +# list: [] +# configuration: +# file: +# name: '' +# description: '' +# options: +# description: '' +# folding: +# title: '' +# enabled: true +# list: [] +# examples: +# folding: +# enabled: true +# title: '' +# list: [] +# troubleshooting: +# problems: +# list: [] +# alerts: [] +# metrics: +# folding: +# title: Metrics +# enabled: false +# description: "" +# availability: [] +# scopes: +# - name: global +# description: "" +# labels: [] +# metrics: +# - name: traefik.response_statuses +# description: Response statuses +# unit: "requests/s" +# chart_type: stacked +# dimensions: +# - name: success +# - name: error +# - name: redirect +# - name: bad +# - name: other +# - name: traefik.response_codes +# description: Responses by codes +# unit: "requests/s" +# chart_type: stacked +# dimensions: +# - name: 2xx +# - name: 5xx +# - name: 3xx +# - name: 4xx +# - name: 1xx +# - name: other +# - name: traefik.detailed_response_codes +# description: Detailed response codes +# unit: "requests/s" +# chart_type: stacked +# dimensions: +# - name: a dimension for each response code family +# - name: traefik.requests +# description: Requests +# unit: "requests/s" +# chart_type: line +# dimensions: +# - name: requests +# - name: traefik.total_response_time +# description: Total response time +# unit: "seconds" +# chart_type: line +# dimensions: +# - name: response +# - name: traefik.average_response_time +# description: Average response time +# unit: "milliseconds" +# chart_type: line +# dimensions: +# - name: response +# - name: traefik.average_response_time_per_iteration +# description: Average response time per iteration +# unit: "milliseconds" +# chart_type: line +# dimensions: +# - name: response +# - name: traefik.uptime +# description: Uptime +# unit: "seconds" +# chart_type: line +# dimensions: +# - name: uptime diff --git a/src/collectors/python.d.plugin/traefik/traefik.chart.py b/src/collectors/python.d.plugin/traefik/traefik.chart.py new file mode 100644 index 000000000..5a498467f --- /dev/null +++ b/src/collectors/python.d.plugin/traefik/traefik.chart.py @@ -0,0 +1,198 @@ +# -*- coding: utf-8 -*- +# Description: traefik netdata python.d module +# Author: Alexandre Menezes (@ale_menezes) +# SPDX-License-Identifier: GPL-3.0-or-later + +from collections import defaultdict +from json import loads + +from bases.FrameworkServices.UrlService import UrlService + +ORDER = [ + 'response_statuses', + 'response_codes', + 'detailed_response_codes', + 'requests', + 'total_response_time', + 'average_response_time', + 'average_response_time_per_iteration', + 'uptime' +] + +CHARTS = { + 'response_statuses': { + 'options': [None, 'Response statuses', 'requests/s', 'responses', 'traefik.response_statuses', 'stacked'], + 'lines': [ + ['successful_requests', 'success', 'incremental'], + ['server_errors', 'error', 'incremental'], + ['redirects', 'redirect', 'incremental'], + ['bad_requests', 'bad', 'incremental'], + ['other_requests', 'other', 'incremental'] + ] + }, + 'response_codes': { + 'options': [None, 'Responses by codes', 'requests/s', 'responses', 'traefik.response_codes', 'stacked'], + 'lines': [ + ['2xx', None, 'incremental'], + ['5xx', None, 'incremental'], + ['3xx', None, 'incremental'], + ['4xx', None, 'incremental'], + ['1xx', None, 'incremental'], + ['other', None, 'incremental'] + ] + }, + 'detailed_response_codes': { + 'options': [None, 'Detailed response codes', 'requests/s', 'responses', 'traefik.detailed_response_codes', + 'stacked'], + 'lines': [] + }, + 'requests': { + 'options': [None, 'Requests', 'requests/s', 'requests', 'traefik.requests', 'line'], + 'lines': [ + ['total_count', 'requests', 'incremental'] + ] + }, + 'total_response_time': { + 'options': [None, 'Total response time', 'seconds', 'timings', 'traefik.total_response_time', 'line'], + 'lines': [ + ['total_response_time_sec', 'response', 'absolute', 1, 10000] + ] + }, + 'average_response_time': { + 'options': [None, 'Average response time', 'milliseconds', 'timings', 'traefik.average_response_time', 'line'], + 'lines': [ + ['average_response_time_sec', 'response', 'absolute', 1, 1000] + ] + }, + 'average_response_time_per_iteration': { + 'options': [None, 'Average response time per iteration', 'milliseconds', 'timings', + 'traefik.average_response_time_per_iteration', 'line'], + 'lines': [ + ['average_response_time_per_iteration_sec', 'response', 'incremental', 1, 10000] + ] + }, + 'uptime': { + 'options': [None, 'Uptime', 'seconds', 'uptime', 'traefik.uptime', 'line'], + 'lines': [ + ['uptime_sec', 'uptime', 'absolute'] + ] + } +} + +HEALTH_STATS = [ + 'uptime_sec', + 'average_response_time_sec', + 'total_response_time_sec', + 'total_count', + 'total_status_code_count' +] + + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.url = self.configuration.get('url', 'http://localhost:8080/health') + self.order = ORDER + self.definitions = CHARTS + self.last_total_response_time = 0 + self.last_total_count = 0 + self.data = { + 'successful_requests': 0, + 'redirects': 0, + 'bad_requests': 0, + 'server_errors': 0, + 'other_requests': 0, + '1xx': 0, + '2xx': 0, + '3xx': 0, + '4xx': 0, + '5xx': 0, + 'other': 0, + 'average_response_time_per_iteration_sec': 0, + } + + def _get_data(self): + data = self._get_raw_data() + + if not data: + return None + + data = loads(data) + + self.get_data_per_code_status(raw_data=data) + + self.get_data_per_code_family(raw_data=data) + + self.get_data_per_code(raw_data=data) + + self.data.update(fetch_data_(raw_data=data, metrics=HEALTH_STATS)) + + self.data['average_response_time_sec'] *= 1000000 + self.data['total_response_time_sec'] *= 10000 + if data['total_count'] != self.last_total_count: + self.data['average_response_time_per_iteration_sec'] = \ + (data['total_response_time_sec'] - self.last_total_response_time) * \ + 1000000 / (data['total_count'] - self.last_total_count) + else: + self.data['average_response_time_per_iteration_sec'] = 0 + self.last_total_response_time = data['total_response_time_sec'] + self.last_total_count = data['total_count'] + + return self.data or None + + def get_data_per_code_status(self, raw_data): + data = defaultdict(int) + for code, value in raw_data['total_status_code_count'].items(): + code_prefix = code[0] + if code_prefix == '1' or code_prefix == '2' or code == '304': + data['successful_requests'] += value + elif code_prefix == '3': + data['redirects'] += value + elif code_prefix == '4': + data['bad_requests'] += value + elif code_prefix == '5': + data['server_errors'] += value + else: + data['other_requests'] += value + self.data.update(data) + + def get_data_per_code_family(self, raw_data): + data = defaultdict(int) + for code, value in raw_data['total_status_code_count'].items(): + code_prefix = code[0] + if code_prefix == '1': + data['1xx'] += value + elif code_prefix == '2': + data['2xx'] += value + elif code_prefix == '3': + data['3xx'] += value + elif code_prefix == '4': + data['4xx'] += value + elif code_prefix == '5': + data['5xx'] += value + else: + data['other'] += value + self.data.update(data) + + def get_data_per_code(self, raw_data): + for code, value in raw_data['total_status_code_count'].items(): + if self.charts: + if code not in self.data: + self.charts['detailed_response_codes'].add_dimension([code, code, 'incremental']) + self.data[code] = value + + +def fetch_data_(raw_data, metrics): + data = dict() + + for metric in metrics: + value = raw_data + metrics_list = metric.split('.') + try: + for m in metrics_list: + value = value[m] + except KeyError: + continue + data['_'.join(metrics_list)] = value + + return data diff --git a/src/collectors/python.d.plugin/traefik/traefik.conf b/src/collectors/python.d.plugin/traefik/traefik.conf new file mode 100644 index 000000000..e3f182d32 --- /dev/null +++ b/src/collectors/python.d.plugin/traefik/traefik.conf @@ -0,0 +1,77 @@ +# netdata python.d.plugin configuration for traefik health data API +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, traefik plugin also supports the following: +# +# url: '<scheme>://<host>:<port>/<health_page_api>' +# # http://localhost:8080/health +# +# if the URL is password protected, the following are supported: +# +# user: 'username' +# pass: 'password' +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) +# +local: + url: 'http://localhost:8080/health' |