diff options
Diffstat (limited to 'collectors/python.d.plugin/monit')
-rw-r--r-- | collectors/python.d.plugin/monit/monit.chart.py | 316 |
1 files changed, 241 insertions, 75 deletions
diff --git a/collectors/python.d.plugin/monit/monit.chart.py b/collectors/python.d.plugin/monit/monit.chart.py index 3ac0032c5..9f3270572 100644 --- a/collectors/python.d.plugin/monit/monit.chart.py +++ b/collectors/python.d.plugin/monit/monit.chart.py @@ -4,23 +4,49 @@ # SPDX-License-Identifier: GPL-3.0-or-later import xml.etree.ElementTree as ET + +from collections import namedtuple + from bases.FrameworkServices.UrlService import UrlService -# see enum State_Type from monit.h (https://bitbucket.org/tildeslash/monit/src/master/src/monit.h) -MONIT_SERVICE_NAMES = [ - 'Filesystem', - 'Directory', - 'File', - 'Process', - 'Host', - 'System', - 'Fifo', - 'Program', - 'Net', -] +MonitType = namedtuple('MonitType', ('index', 'name')) + +# see enum Service_Type from monit.h (https://bitbucket.org/tildeslash/monit/src/master/src/monit.h) +# typedef enum { +# Service_Filesystem = 0, +# Service_Directory, +# Service_File, +# Service_Process, +# Service_Host, +# Service_System, +# Service_Fifo, +# Service_Program, +# Service_Net, +# Service_Last = Service_Net +# } __attribute__((__packed__)) Service_Type; -DEFAULT_SERVICES_IDS = [0, 1, 2, 3, 4, 6, 7, 8] +TYPE_FILESYSTEM = MonitType(0, 'filesystem') +TYPE_DIRECTORY = MonitType(1, 'directory') +TYPE_FILE = MonitType(2, 'file') +TYPE_PROCESS = MonitType(3, 'process') +TYPE_HOST = MonitType(4, 'host') +TYPE_SYSTEM = MonitType(5, 'system') +TYPE_FIFO = MonitType(6, 'fifo') +TYPE_PROGRAM = MonitType(7, 'program') +TYPE_NET = MonitType(8, 'net') + +TYPES = ( + TYPE_FILESYSTEM, + TYPE_DIRECTORY, + TYPE_FILE, + TYPE_PROCESS, + TYPE_HOST, + TYPE_SYSTEM, + TYPE_FIFO, + TYPE_PROGRAM, + TYPE_NET, +) # charts order (can be overridden if you want less charts, or different order) ORDER = [ @@ -38,6 +64,7 @@ ORDER = [ 'program', 'net' ] + CHARTS = { 'filesystem': { 'options': ['filesystems', 'Filesystems', 'filesystems', 'filesystem', 'monit.filesystems', 'line'], @@ -83,7 +110,7 @@ CHARTS = { 'lines': [] }, 'host_latency': { - 'options': ['hosts latency', 'Hosts latency', 'milliseconds/s', 'network', 'monit.host_latency', 'line'], + 'options': ['hosts latency', 'Hosts latency', 'milliseconds', 'network', 'monit.host_latency', 'line'], 'lines': [] }, 'net': { @@ -94,85 +121,224 @@ CHARTS = { } +class BaseMonitService(object): + def __init__(self, typ, name, status, monitor): + self.type = typ + self.name = name + self.status = status + self.monitor = monitor + + def __repr__(self): + return 'MonitService({0}:{1})'.format(self.type.name, self.name) + + def __eq__(self, other): + if not isinstance(other, BaseMonitService): + return False + return self.type == other.type and self.name == other.name + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(repr(self)) + + def is_running(self): + return self.status == '0' and self.monitor == '1' + + def key(self): + return '{0}_{1}'.format(self.type.name, self.name) + + def data(self): + return {self.key(): int(self.is_running())} + + +class ProcessMonitService(BaseMonitService): + def __init__(self, typ, name, status, monitor): + super(ProcessMonitService, self).__init__(typ, name, status, monitor) + self.uptime = None + self.threads = None + self.children = None + + def uptime_key(self): + return 'process_uptime_{0}'.format(self.name) + + def threads_key(self): + return 'process_threads_{0}'.format(self.name) + + def children_key(self): + return 'process_children_{0}'.format(self.name) + + def data(self): + base_data = super(ProcessMonitService, self).data() + # skipping bugged metrics with negative uptime (monit before v5.16) + uptime = self.uptime if self.uptime and int(self.uptime) >= 0 else None + data = { + self.uptime_key(): uptime, + self.threads_key(): self.threads, + self.children_key(): self.children, + } + data.update(base_data) + + return data + + +class HostMonitService(BaseMonitService): + def __init__(self, typ, name, status, monitor): + super(HostMonitService, self).__init__(typ, name, status, monitor) + self.latency = None + + def latency_key(self): + return 'host_latency_{0}'.format(self.name) + + def data(self): + base_data = super(HostMonitService, self).data() + latency = float(self.latency) * 1000000 if self.latency else None + data = {self.latency_key(): latency} + data.update(base_data) + + return data + + class Service(UrlService): def __init__(self, configuration=None, name=None): UrlService.__init__(self, configuration=configuration, name=name) self.order = ORDER self.definitions = CHARTS - base_url = self.configuration.get('url', 'http://localhost:2812') + base_url = self.configuration.get('url', "http://localhost:2812") self.url = '{0}/_status?format=xml&level=full'.format(base_url) + self.active_services = list() - def parse(self, data): + def parse(self, raw): try: - xml = ET.fromstring(data) + root = ET.fromstring(raw) except ET.ParseError: - self.error("URL {0} didn't return a vaild XML page. Please check your settings.".format(self.url)) + self.error("URL {0} didn't return a valid XML page. Please check your settings.".format(self.url)) + return None + return root + + def _get_data(self): + raw = self._get_raw_data() + if not raw: return None - return xml - def check(self): - self._manager = self._build_manager() + root = self.parse(raw) + if root is None: + return None - raw_data = self._get_raw_data() - if not raw_data: + services = self.get_services(root) + if not services: return None - return bool(self.parse(raw_data)) + if len(self.charts) > 0: + self.update_charts(services) - def _get_data(self): - raw_data = self._get_raw_data() + data = dict() - if not raw_data: - return None + for svc in services: + data.update(svc.data()) - xml = self.parse(raw_data) - if not xml: - return None + return data - data = {} - for service_id in DEFAULT_SERVICES_IDS: - service_category = MONIT_SERVICE_NAMES[service_id].lower() + def get_services(self, root): + services = list() - if service_category == 'system': - self.debug("Skipping service from 'System' category, because it's useless in graphs") + for typ in TYPES: + if typ == TYPE_SYSTEM: + self.debug("skipping service from '{0}' category, it's useless in graphs".format(TYPE_SYSTEM.name)) continue - xpath_query = "./service[@type='{0}']".format(service_id) - self.debug('Searching for {0} as {1}'.format(service_category, xpath_query)) - for service_node in xml.findall(xpath_query): - - service_name = service_node.find('name').text - service_status = service_node.find('status').text - service_monitoring = service_node.find('monitor').text - self.debug('=> found {0} with type={1}, status={2}, monitoring={3}'.format(service_name, - service_id, service_status, service_monitoring)) - - dimension_key = service_category + '_' + service_name - if dimension_key not in self.charts[service_category]: - self.charts[service_category].add_dimension([dimension_key, service_name, 'absolute']) - data[dimension_key] = 1 if service_status == '0' and service_monitoring == '1' else 0 - - if service_category == 'process': - for subnode in ('uptime', 'threads', 'children'): - subnode_value = service_node.find(subnode) - if subnode_value is None: - continue - if subnode == 'uptime' and int(subnode_value.text) < 0: - self.debug('Skipping bugged metrics with negative uptime (monit before v5.16') - continue - dimension_key = 'process_{0}_{1}'.format(subnode, service_name) - if dimension_key not in self.charts['process_' + subnode]: - self.charts['process_' + subnode].add_dimension([dimension_key, service_name, 'absolute']) - data[dimension_key] = int(subnode_value.text) - - if service_category == 'host': - subnode_value = service_node.find('./icmp/responsetime') - if subnode_value is None: - continue - dimension_key = 'host_latency_{0}'.format(service_name) - if dimension_key not in self.charts['host_latency']: - self.charts['host_latency'].add_dimension([dimension_key, service_name, - 'absolute', 1000, 1000000]) - data[dimension_key] = float(subnode_value.text) * 1000000 - - return data or None + xpath_query = "./service[@type='{0}']".format(typ.index) + self.debug('Searching for {0} as {1}'.format(typ.name, xpath_query)) + + for svc_root in root.findall(xpath_query): + svc = create_service(svc_root, typ) + self.debug('=> found {0} with type={1}, status={2}, monitoring={3}'.format( + svc.name, svc.type.name, svc.status, svc.monitor)) + + services.append(svc) + + return services + + def update_charts(self, services): + remove = [svc for svc in self.active_services if svc not in services] + add = [svc for svc in services if svc not in self.active_services] + + self.remove_services_from_charts(remove) + self.add_services_to_charts(add) + + self.active_services = services + + def add_services_to_charts(self, services): + for svc in services: + if svc.type == TYPE_HOST: + self.charts['host_latency'].add_dimension([svc.latency_key(), svc.name, 'absolute', 1000, 1000000]) + if svc.type == TYPE_PROCESS: + self.charts['process_uptime'].add_dimension([svc.uptime_key(), svc.name]) + self.charts['process_threads'].add_dimension([svc.threads_key(), svc.name]) + self.charts['process_children'].add_dimension([svc.children_key(), svc.name]) + self.charts[svc.type.name].add_dimension([svc.key(), svc.name]) + + def remove_services_from_charts(self, services): + for svc in services: + if svc.type == TYPE_HOST: + self.charts['host_latency'].del_dimension(svc.latency_key(), False) + if svc.type == TYPE_PROCESS: + self.charts['process_uptime'].del_dimension(svc.uptime_key(), False) + self.charts['process_threads'].del_dimension(svc.threads_key(), False) + self.charts['process_children'].del_dimension(svc.children_key(), False) + self.charts[svc.type.name].del_dimension(svc.key(), False) + + +def create_service(root, typ): + if typ == TYPE_HOST: + return create_host_service(root) + elif typ == TYPE_PROCESS: + return create_process_service(root) + return create_base_service(root, typ) + + +def create_host_service(root): + svc = HostMonitService( + TYPE_HOST, + root.find('name').text, + root.find('status').text, + root.find('monitor').text, + ) + + latency = root.find('./icmp/responsetime') + if latency is not None: + svc.latency = latency.text + + return svc + + +def create_process_service(root): + svc = ProcessMonitService( + TYPE_PROCESS, + root.find('name').text, + root.find('status').text, + root.find('monitor').text, + ) + + uptime = root.find('uptime') + if uptime is not None: + svc.uptime = uptime.text + + threads = root.find('threads') + if threads is not None: + svc.threads = threads.text + + children = root.find('children') + if children is not None: + svc.children = children.text + + return svc + + +def create_base_service(root, typ): + return BaseMonitService( + typ, + root.find('name').text, + root.find('status').text, + root.find('monitor').text, + ) |