diff options
Diffstat (limited to 'collectors/python.d.plugin/httpcheck')
-rw-r--r-- | collectors/python.d.plugin/httpcheck/Makefile.inc | 13 | ||||
-rw-r--r-- | collectors/python.d.plugin/httpcheck/README.md | 59 | ||||
-rw-r--r-- | collectors/python.d.plugin/httpcheck/httpcheck.chart.py | 125 | ||||
-rw-r--r-- | collectors/python.d.plugin/httpcheck/httpcheck.conf | 107 |
4 files changed, 304 insertions, 0 deletions
diff --git a/collectors/python.d.plugin/httpcheck/Makefile.inc b/collectors/python.d.plugin/httpcheck/Makefile.inc new file mode 100644 index 0000000..4a5bd85 --- /dev/null +++ b/collectors/python.d.plugin/httpcheck/Makefile.inc @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_python_DATA += httpcheck/httpcheck.chart.py +dist_pythonconfig_DATA += httpcheck/httpcheck.conf + +# do not install these files, but include them in the distribution +dist_noinst_DATA += httpcheck/README.md httpcheck/Makefile.inc + diff --git a/collectors/python.d.plugin/httpcheck/README.md b/collectors/python.d.plugin/httpcheck/README.md new file mode 100644 index 0000000..55aad52 --- /dev/null +++ b/collectors/python.d.plugin/httpcheck/README.md @@ -0,0 +1,59 @@ +<!-- +title: "HTTP endpoint monitoring with Netdata" +custom_edit_url: https://github.com/netdata/netdata/edit/master/collectors/python.d.plugin/httpcheck/README.md +sidebar_label: "HTTP endpoints" +--> + +# HTTP endpoint monitoring with Netdata + +Monitors remote http server for availability and response time. + +Following charts are drawn per job: + +1. **Response time** ms + + - Time in 0.1 ms resolution in which the server responds. + If the connection failed, the value is missing. + +2. **Status** boolean + + - Connection successful + - Unexpected content: No Regex match found in the response + - Unexpected status code: Do we get 500 errors? + - Connection failed: port not listening or blocked + - Connection timed out: host or port unreachable + +## Configuration + +Edit the `python.d/httpcheck.conf` configuration file using `edit-config` from the Netdata [config +directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`. + +```bash +cd /etc/netdata # Replace this path with your Netdata config directory, if different +sudo ./edit-config python.d/httpcheck.conf +``` + +Sample configuration and their default values. + +```yaml +server: + url: 'http://host:port/path' # required + status_accepted: # optional + - 200 + timeout: 1 # optional, supports decimals (e.g. 0.2) + update_every: 3 # optional + regex: 'REGULAR_EXPRESSION' # optional, see https://docs.python.org/3/howto/regex.html + redirect: yes # optional +``` + +### Notes + +- The status chart is primarily intended for alarms, badges or for access via API. +- A system/service/firewall might block Netdata's access if a portscan or + similar is detected. +- This plugin is meant for simple use cases. Currently, the accuracy of the + response time is low and should be used as reference only. + +--- + +[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fcollectors%2Fpython.d.plugin%2Fhttpcheck%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)](<>) diff --git a/collectors/python.d.plugin/httpcheck/httpcheck.chart.py b/collectors/python.d.plugin/httpcheck/httpcheck.chart.py new file mode 100644 index 0000000..75718bb --- /dev/null +++ b/collectors/python.d.plugin/httpcheck/httpcheck.chart.py @@ -0,0 +1,125 @@ +# -*- coding: utf-8 -*- +# Description: http check netdata python.d module +# Original Author: ccremer (github.com/ccremer) +# SPDX-License-Identifier: GPL-3.0-or-later + +import re + +import urllib3 + +try: + from time import monotonic as time +except ImportError: + from time import time + +from bases.FrameworkServices.UrlService import UrlService + +# default module values (can be overridden per job in `config`) +update_every = 3 +priority = 60000 + +# Response +HTTP_RESPONSE_TIME = 'time' +HTTP_RESPONSE_LENGTH = 'length' + +# Status dimensions +HTTP_SUCCESS = 'success' +HTTP_BAD_CONTENT = 'bad_content' +HTTP_BAD_STATUS = 'bad_status' +HTTP_TIMEOUT = 'timeout' +HTTP_NO_CONNECTION = 'no_connection' + +ORDER = [ + 'response_time', + 'response_length', + 'status', +] + +CHARTS = { + 'response_time': { + 'options': [None, 'HTTP response time', 'milliseconds', 'response', 'httpcheck.responsetime', 'line'], + 'lines': [ + [HTTP_RESPONSE_TIME, 'time', 'absolute', 100, 1000] + ] + }, + 'response_length': { + 'options': [None, 'HTTP response body length', 'characters', 'response', 'httpcheck.responselength', 'line'], + 'lines': [ + [HTTP_RESPONSE_LENGTH, 'length', 'absolute'] + ] + }, + 'status': { + 'options': [None, 'HTTP status', 'boolean', 'status', 'httpcheck.status', 'line'], + 'lines': [ + [HTTP_SUCCESS, 'success', 'absolute'], + [HTTP_BAD_CONTENT, 'bad content', 'absolute'], + [HTTP_BAD_STATUS, 'bad status', 'absolute'], + [HTTP_TIMEOUT, 'timeout', 'absolute'], + [HTTP_NO_CONNECTION, 'no connection', 'absolute'] + ] + } +} + + +class Service(UrlService): + def __init__(self, configuration=None, name=None): + UrlService.__init__(self, configuration=configuration, name=name) + self.order = ORDER + self.definitions = CHARTS + pattern = self.configuration.get('regex') + self.regex = re.compile(pattern) if pattern else None + self.status_codes_accepted = self.configuration.get('status_accepted', [200]) + self.follow_redirect = self.configuration.get('redirect', True) + + def _get_data(self): + """ + Format data received from http request + :return: dict + """ + data = dict() + data[HTTP_SUCCESS] = 0 + data[HTTP_BAD_CONTENT] = 0 + data[HTTP_BAD_STATUS] = 0 + data[HTTP_TIMEOUT] = 0 + data[HTTP_NO_CONNECTION] = 0 + url = self.url + try: + start = time() + status, content = self._get_raw_data_with_status(retries=1 if self.follow_redirect else False, + redirect=self.follow_redirect) + diff = time() - start + data[HTTP_RESPONSE_TIME] = max(round(diff * 10000), 0) + self.debug('Url: {url}. Host responded with status code {code} in {diff} s'.format( + url=url, code=status, diff=diff + )) + self.process_response(content, data, status) + + except urllib3.exceptions.NewConnectionError as error: + self.debug('Connection failed: {url}. Error: {error}'.format(url=url, error=error)) + data[HTTP_NO_CONNECTION] = 1 + + except (urllib3.exceptions.TimeoutError, urllib3.exceptions.PoolError) as error: + self.debug('Connection timed out: {url}. Error: {error}'.format(url=url, error=error)) + data[HTTP_TIMEOUT] = 1 + + except urllib3.exceptions.HTTPError as error: + self.debug('Connection failed: {url}. Error: {error}'.format(url=url, error=error)) + data[HTTP_NO_CONNECTION] = 1 + + except (TypeError, AttributeError) as error: + self.error('Url: {url}. Error: {error}'.format(url=url, error=error)) + return None + + return data + + def process_response(self, content, data, status): + data[HTTP_RESPONSE_LENGTH] = len(content) + self.debug('Content: \n\n{content}\n'.format(content=content)) + if status in self.status_codes_accepted: + if self.regex and self.regex.search(content) is None: + self.debug('No match for regex "{regex}" found'.format(regex=self.regex.pattern)) + data[HTTP_BAD_CONTENT] = 1 + else: + data[HTTP_SUCCESS] = 1 + else: + data[HTTP_BAD_STATUS] = 1 diff --git a/collectors/python.d.plugin/httpcheck/httpcheck.conf b/collectors/python.d.plugin/httpcheck/httpcheck.conf new file mode 100644 index 0000000..95adba2 --- /dev/null +++ b/collectors/python.d.plugin/httpcheck/httpcheck.conf @@ -0,0 +1,107 @@ +# netdata python.d.plugin configuration for httpcheck +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the httpcheck default is used, which is at 3 seconds. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# penalty indicates whether to apply penalty to update_every in case of failures. +# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. +# penalty: yes + +# chart_cleanup sets the default chart cleanup interval in iterations. +# A chart is marked as obsolete if it has not been updated +# 'chart_cleanup' iterations in a row. +# They will be hidden immediately (not offered to dashboard viewer, +# streamed upstream and archived to backends) and deleted one hour +# later (configurable from netdata.conf). +# -- For this plugin, cleanup MUST be disabled, otherwise we lose response +# time charts +chart_cleanup: 0 + +# Autodetection and retries do not work for this plugin + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# ------------------------------- +# ATTENTION: Any valid configuration will be accepted, even if initial connection fails! +# ------------------------------- +# +# There is intentionally no default config, e.g. for 'localhost' + +# job_name: +# name: myname # [optional] the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 3 # [optional] the JOB's data collection frequency +# priority: 60000 # [optional] the JOB's order on the dashboard +# penalty: yes # the JOB's penalty +# timeout: 1 # [optional] the timeout when connecting, supports decimals (e.g. 0.5s) +# url: 'http[s]://host-ip-or-dns[:port][path]' +# # [required] the remote host url to connect to. If [:port] is missing, it defaults to 80 +# # for HTTP and 443 for HTTPS. [path] is optional too, defaults to / +# header: {'Content-Type': 'application/json'} +# # [optional] the HTTP header sent with the request. +# method: GET # [optional] the HTTP request method (POST, PUT, DELETE, HEAD etc.) +# redirect: yes # [optional] If the remote host returns 3xx status codes, the redirection url will be +# # followed (default). +# body: {'key': 'value'} # [optional] the body sent with the request (e.g. POST, PUT, PATCH). +# status_accepted: # [optional] By default, 200 is accepted. Anything else will result in 'bad status' in the +# # status chart, however: The response time will still be > 0, since the +# # host responded with something. +# # If redirect is enabled, the accepted status will be checked against the redirected page. +# - 200 # Multiple status codes are possible. If you specify 'status_accepted', you would still +# # need to add '200'. E.g. 'status_accepted: [301]' will trigger an error in 'bad status' +# # if code is 200. Do specify numerical entries such as 200, not 'OK'. +# regex: None # [optional] If the status code is accepted, the content of the response will be searched for this +# # regex (if defined). Be aware that you may need to escape the regex string. If redirect is enabled, +# # the regex will be matched to the redirected page, not the initial 3xx response. + +# Simple example: +# +# jira: +# url: 'https://jira.localdomain/' + + +# Complex example: +# +# cool_website: +# url: 'http://cool.website:8080/home' +# status_accepted: +# - 200 +# - 204 +# regex: <title>My cool website!<\/title> +# timeout: 2 + +# This plugin is intended for simple cases. Currently, the accuracy of the response time is low and should be used as reference only. + |