summaryrefslogtreecommitdiffstats
path: root/collectors/python.d.plugin/monit
diff options
context:
space:
mode:
Diffstat (limited to 'collectors/python.d.plugin/monit')
-rw-r--r--collectors/python.d.plugin/monit/monit.chart.py316
1 files changed, 241 insertions, 75 deletions
diff --git a/collectors/python.d.plugin/monit/monit.chart.py b/collectors/python.d.plugin/monit/monit.chart.py
index 3ac0032c..9f327057 100644
--- a/collectors/python.d.plugin/monit/monit.chart.py
+++ b/collectors/python.d.plugin/monit/monit.chart.py
@@ -4,23 +4,49 @@
# SPDX-License-Identifier: GPL-3.0-or-later
import xml.etree.ElementTree as ET
+
+from collections import namedtuple
+
from bases.FrameworkServices.UrlService import UrlService
-# see enum State_Type from monit.h (https://bitbucket.org/tildeslash/monit/src/master/src/monit.h)
-MONIT_SERVICE_NAMES = [
- 'Filesystem',
- 'Directory',
- 'File',
- 'Process',
- 'Host',
- 'System',
- 'Fifo',
- 'Program',
- 'Net',
-]
+MonitType = namedtuple('MonitType', ('index', 'name'))
+
+# see enum Service_Type from monit.h (https://bitbucket.org/tildeslash/monit/src/master/src/monit.h)
+# typedef enum {
+# Service_Filesystem = 0,
+# Service_Directory,
+# Service_File,
+# Service_Process,
+# Service_Host,
+# Service_System,
+# Service_Fifo,
+# Service_Program,
+# Service_Net,
+# Service_Last = Service_Net
+# } __attribute__((__packed__)) Service_Type;
-DEFAULT_SERVICES_IDS = [0, 1, 2, 3, 4, 6, 7, 8]
+TYPE_FILESYSTEM = MonitType(0, 'filesystem')
+TYPE_DIRECTORY = MonitType(1, 'directory')
+TYPE_FILE = MonitType(2, 'file')
+TYPE_PROCESS = MonitType(3, 'process')
+TYPE_HOST = MonitType(4, 'host')
+TYPE_SYSTEM = MonitType(5, 'system')
+TYPE_FIFO = MonitType(6, 'fifo')
+TYPE_PROGRAM = MonitType(7, 'program')
+TYPE_NET = MonitType(8, 'net')
+
+TYPES = (
+ TYPE_FILESYSTEM,
+ TYPE_DIRECTORY,
+ TYPE_FILE,
+ TYPE_PROCESS,
+ TYPE_HOST,
+ TYPE_SYSTEM,
+ TYPE_FIFO,
+ TYPE_PROGRAM,
+ TYPE_NET,
+)
# charts order (can be overridden if you want less charts, or different order)
ORDER = [
@@ -38,6 +64,7 @@ ORDER = [
'program',
'net'
]
+
CHARTS = {
'filesystem': {
'options': ['filesystems', 'Filesystems', 'filesystems', 'filesystem', 'monit.filesystems', 'line'],
@@ -83,7 +110,7 @@ CHARTS = {
'lines': []
},
'host_latency': {
- 'options': ['hosts latency', 'Hosts latency', 'milliseconds/s', 'network', 'monit.host_latency', 'line'],
+ 'options': ['hosts latency', 'Hosts latency', 'milliseconds', 'network', 'monit.host_latency', 'line'],
'lines': []
},
'net': {
@@ -94,85 +121,224 @@ CHARTS = {
}
+class BaseMonitService(object):
+ def __init__(self, typ, name, status, monitor):
+ self.type = typ
+ self.name = name
+ self.status = status
+ self.monitor = monitor
+
+ def __repr__(self):
+ return 'MonitService({0}:{1})'.format(self.type.name, self.name)
+
+ def __eq__(self, other):
+ if not isinstance(other, BaseMonitService):
+ return False
+ return self.type == other.type and self.name == other.name
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __hash__(self):
+ return hash(repr(self))
+
+ def is_running(self):
+ return self.status == '0' and self.monitor == '1'
+
+ def key(self):
+ return '{0}_{1}'.format(self.type.name, self.name)
+
+ def data(self):
+ return {self.key(): int(self.is_running())}
+
+
+class ProcessMonitService(BaseMonitService):
+ def __init__(self, typ, name, status, monitor):
+ super(ProcessMonitService, self).__init__(typ, name, status, monitor)
+ self.uptime = None
+ self.threads = None
+ self.children = None
+
+ def uptime_key(self):
+ return 'process_uptime_{0}'.format(self.name)
+
+ def threads_key(self):
+ return 'process_threads_{0}'.format(self.name)
+
+ def children_key(self):
+ return 'process_children_{0}'.format(self.name)
+
+ def data(self):
+ base_data = super(ProcessMonitService, self).data()
+ # skipping bugged metrics with negative uptime (monit before v5.16)
+ uptime = self.uptime if self.uptime and int(self.uptime) >= 0 else None
+ data = {
+ self.uptime_key(): uptime,
+ self.threads_key(): self.threads,
+ self.children_key(): self.children,
+ }
+ data.update(base_data)
+
+ return data
+
+
+class HostMonitService(BaseMonitService):
+ def __init__(self, typ, name, status, monitor):
+ super(HostMonitService, self).__init__(typ, name, status, monitor)
+ self.latency = None
+
+ def latency_key(self):
+ return 'host_latency_{0}'.format(self.name)
+
+ def data(self):
+ base_data = super(HostMonitService, self).data()
+ latency = float(self.latency) * 1000000 if self.latency else None
+ data = {self.latency_key(): latency}
+ data.update(base_data)
+
+ return data
+
+
class Service(UrlService):
def __init__(self, configuration=None, name=None):
UrlService.__init__(self, configuration=configuration, name=name)
self.order = ORDER
self.definitions = CHARTS
- base_url = self.configuration.get('url', 'http://localhost:2812')
+ base_url = self.configuration.get('url', "http://localhost:2812")
self.url = '{0}/_status?format=xml&level=full'.format(base_url)
+ self.active_services = list()
- def parse(self, data):
+ def parse(self, raw):
try:
- xml = ET.fromstring(data)
+ root = ET.fromstring(raw)
except ET.ParseError:
- self.error("URL {0} didn't return a vaild XML page. Please check your settings.".format(self.url))
+ self.error("URL {0} didn't return a valid XML page. Please check your settings.".format(self.url))
+ return None
+ return root
+
+ def _get_data(self):
+ raw = self._get_raw_data()
+ if not raw:
return None
- return xml
- def check(self):
- self._manager = self._build_manager()
+ root = self.parse(raw)
+ if root is None:
+ return None
- raw_data = self._get_raw_data()
- if not raw_data:
+ services = self.get_services(root)
+ if not services:
return None
- return bool(self.parse(raw_data))
+ if len(self.charts) > 0:
+ self.update_charts(services)
- def _get_data(self):
- raw_data = self._get_raw_data()
+ data = dict()
- if not raw_data:
- return None
+ for svc in services:
+ data.update(svc.data())
- xml = self.parse(raw_data)
- if not xml:
- return None
+ return data
- data = {}
- for service_id in DEFAULT_SERVICES_IDS:
- service_category = MONIT_SERVICE_NAMES[service_id].lower()
+ def get_services(self, root):
+ services = list()
- if service_category == 'system':
- self.debug("Skipping service from 'System' category, because it's useless in graphs")
+ for typ in TYPES:
+ if typ == TYPE_SYSTEM:
+ self.debug("skipping service from '{0}' category, it's useless in graphs".format(TYPE_SYSTEM.name))
continue
- xpath_query = "./service[@type='{0}']".format(service_id)
- self.debug('Searching for {0} as {1}'.format(service_category, xpath_query))
- for service_node in xml.findall(xpath_query):
-
- service_name = service_node.find('name').text
- service_status = service_node.find('status').text
- service_monitoring = service_node.find('monitor').text
- self.debug('=> found {0} with type={1}, status={2}, monitoring={3}'.format(service_name,
- service_id, service_status, service_monitoring))
-
- dimension_key = service_category + '_' + service_name
- if dimension_key not in self.charts[service_category]:
- self.charts[service_category].add_dimension([dimension_key, service_name, 'absolute'])
- data[dimension_key] = 1 if service_status == '0' and service_monitoring == '1' else 0
-
- if service_category == 'process':
- for subnode in ('uptime', 'threads', 'children'):
- subnode_value = service_node.find(subnode)
- if subnode_value is None:
- continue
- if subnode == 'uptime' and int(subnode_value.text) < 0:
- self.debug('Skipping bugged metrics with negative uptime (monit before v5.16')
- continue
- dimension_key = 'process_{0}_{1}'.format(subnode, service_name)
- if dimension_key not in self.charts['process_' + subnode]:
- self.charts['process_' + subnode].add_dimension([dimension_key, service_name, 'absolute'])
- data[dimension_key] = int(subnode_value.text)
-
- if service_category == 'host':
- subnode_value = service_node.find('./icmp/responsetime')
- if subnode_value is None:
- continue
- dimension_key = 'host_latency_{0}'.format(service_name)
- if dimension_key not in self.charts['host_latency']:
- self.charts['host_latency'].add_dimension([dimension_key, service_name,
- 'absolute', 1000, 1000000])
- data[dimension_key] = float(subnode_value.text) * 1000000
-
- return data or None
+ xpath_query = "./service[@type='{0}']".format(typ.index)
+ self.debug('Searching for {0} as {1}'.format(typ.name, xpath_query))
+
+ for svc_root in root.findall(xpath_query):
+ svc = create_service(svc_root, typ)
+ self.debug('=> found {0} with type={1}, status={2}, monitoring={3}'.format(
+ svc.name, svc.type.name, svc.status, svc.monitor))
+
+ services.append(svc)
+
+ return services
+
+ def update_charts(self, services):
+ remove = [svc for svc in self.active_services if svc not in services]
+ add = [svc for svc in services if svc not in self.active_services]
+
+ self.remove_services_from_charts(remove)
+ self.add_services_to_charts(add)
+
+ self.active_services = services
+
+ def add_services_to_charts(self, services):
+ for svc in services:
+ if svc.type == TYPE_HOST:
+ self.charts['host_latency'].add_dimension([svc.latency_key(), svc.name, 'absolute', 1000, 1000000])
+ if svc.type == TYPE_PROCESS:
+ self.charts['process_uptime'].add_dimension([svc.uptime_key(), svc.name])
+ self.charts['process_threads'].add_dimension([svc.threads_key(), svc.name])
+ self.charts['process_children'].add_dimension([svc.children_key(), svc.name])
+ self.charts[svc.type.name].add_dimension([svc.key(), svc.name])
+
+ def remove_services_from_charts(self, services):
+ for svc in services:
+ if svc.type == TYPE_HOST:
+ self.charts['host_latency'].del_dimension(svc.latency_key(), False)
+ if svc.type == TYPE_PROCESS:
+ self.charts['process_uptime'].del_dimension(svc.uptime_key(), False)
+ self.charts['process_threads'].del_dimension(svc.threads_key(), False)
+ self.charts['process_children'].del_dimension(svc.children_key(), False)
+ self.charts[svc.type.name].del_dimension(svc.key(), False)
+
+
+def create_service(root, typ):
+ if typ == TYPE_HOST:
+ return create_host_service(root)
+ elif typ == TYPE_PROCESS:
+ return create_process_service(root)
+ return create_base_service(root, typ)
+
+
+def create_host_service(root):
+ svc = HostMonitService(
+ TYPE_HOST,
+ root.find('name').text,
+ root.find('status').text,
+ root.find('monitor').text,
+ )
+
+ latency = root.find('./icmp/responsetime')
+ if latency is not None:
+ svc.latency = latency.text
+
+ return svc
+
+
+def create_process_service(root):
+ svc = ProcessMonitService(
+ TYPE_PROCESS,
+ root.find('name').text,
+ root.find('status').text,
+ root.find('monitor').text,
+ )
+
+ uptime = root.find('uptime')
+ if uptime is not None:
+ svc.uptime = uptime.text
+
+ threads = root.find('threads')
+ if threads is not None:
+ svc.threads = threads.text
+
+ children = root.find('children')
+ if children is not None:
+ svc.children = children.text
+
+ return svc
+
+
+def create_base_service(root, typ):
+ return BaseMonitService(
+ typ,
+ root.find('name').text,
+ root.find('status').text,
+ root.find('monitor').text,
+ )