summaryrefslogtreecommitdiffstats
path: root/collectors/python.d.plugin
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2019-07-08 20:14:42 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2019-07-08 20:14:42 +0000
commit4f88e1a9be89a257fd6ed3045703db6e900027ee (patch)
tree518eb3c3aa1dce9ea281d02e0fd3cc01a9e7913f /collectors/python.d.plugin
parentAdding upstream version 1.15.0. (diff)
downloadnetdata-fddc31ea5fb4218e2e5d9daba0780f0acfcf7e01.tar.xz
netdata-fddc31ea5fb4218e2e5d9daba0780f0acfcf7e01.zip
Adding upstream version 1.16.0.upstream/1.16.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'collectors/python.d.plugin')
-rw-r--r--collectors/python.d.plugin/Makefile.am1
-rw-r--r--collectors/python.d.plugin/README.md2
-rw-r--r--collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py4
-rw-r--r--collectors/python.d.plugin/dns_query_time/dns_query_time.chart.py19
-rw-r--r--collectors/python.d.plugin/elasticsearch/elasticsearch.chart.py24
-rw-r--r--collectors/python.d.plugin/monit/monit.chart.py316
-rw-r--r--collectors/python.d.plugin/mysql/README.md22
-rw-r--r--collectors/python.d.plugin/mysql/mysql.chart.py134
-rw-r--r--collectors/python.d.plugin/python.d.conf5
-rw-r--r--collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py23
-rw-r--r--collectors/python.d.plugin/riakkv/Makefile.inc13
-rw-r--r--collectors/python.d.plugin/riakkv/README.md110
-rw-r--r--collectors/python.d.plugin/riakkv/riakkv.chart.py315
-rw-r--r--collectors/python.d.plugin/riakkv/riakkv.conf68
-rw-r--r--collectors/python.d.plugin/smartd_log/README.md6
-rw-r--r--collectors/python.d.plugin/tomcat/tomcat.chart.py41
-rw-r--r--collectors/python.d.plugin/varnish/varnish.chart.py58
-rw-r--r--collectors/python.d.plugin/web_log/web_log.chart.py5
18 files changed, 1046 insertions, 120 deletions
diff --git a/collectors/python.d.plugin/Makefile.am b/collectors/python.d.plugin/Makefile.am
index 652a35da4..ad72cfaef 100644
--- a/collectors/python.d.plugin/Makefile.am
+++ b/collectors/python.d.plugin/Makefile.am
@@ -87,6 +87,7 @@ include rabbitmq/Makefile.inc
include redis/Makefile.inc
include rethinkdbs/Makefile.inc
include retroshare/Makefile.inc
+include riakkv/Makefile.inc
include samba/Makefile.inc
include sensors/Makefile.inc
include smartd_log/Makefile.inc
diff --git a/collectors/python.d.plugin/README.md b/collectors/python.d.plugin/README.md
index 8955197a7..32437c6db 100644
--- a/collectors/python.d.plugin/README.md
+++ b/collectors/python.d.plugin/README.md
@@ -150,7 +150,7 @@ Classes implement `_get_raw_data` which should be used to grab raw data. This me
_This is last resort class, if a new module cannot be written by using other framework class this one can be used._
-_Example: `mysql`, `sensors`_
+_Example: `ceph`, `sensors`_
It is the lowest-level class which implements most of module logic, like:
- threading
diff --git a/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py b/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py
index 052c93144..3fcb5fda8 100644
--- a/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py
+++ b/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py
@@ -56,8 +56,8 @@ GOOD_PD_STATUS = (
)
RE_LD = re.compile(
- r'Logical device number\s+([0-9]+).*?'
- r'Status of logical device\s+: ([a-zA-Z]+)'
+ r'Logical [dD]evice number\s+([0-9]+).*?'
+ r'Status of [lL]ogical [dD]evice\s+: ([a-zA-Z]+)'
)
diff --git a/collectors/python.d.plugin/dns_query_time/dns_query_time.chart.py b/collectors/python.d.plugin/dns_query_time/dns_query_time.chart.py
index 47a7d23f6..7fe860314 100644
--- a/collectors/python.d.plugin/dns_query_time/dns_query_time.chart.py
+++ b/collectors/python.d.plugin/dns_query_time/dns_query_time.chart.py
@@ -8,11 +8,6 @@ from socket import getaddrinfo, gaierror
from threading import Thread
try:
- from time import monotonic as time
-except ImportError:
- from time import time
-
-try:
import dns.message
import dns.query
import dns.name
@@ -89,13 +84,15 @@ def dns_request(server_list, timeout, domains):
request = dns.message.make_query(domain, dns.rdatatype.A)
try:
- dns_start = time()
- dns.query.udp(request, ns, timeout=t)
- dns_end = time()
- query_time = round((dns_end - dns_start) * 1000)
- q.put({'_'.join(['ns', ns.replace('.', '_')]): query_time})
+ resp = dns.query.udp(request, ns, timeout=t)
+ if (resp.rcode() == dns.rcode.NOERROR and resp.answer):
+ query_time = resp.time * 1000
+ else:
+ query_time = -100
except dns.exception.Timeout:
- q.put({'_'.join(['ns', ns.replace('.', '_')]): -100})
+ query_time = -100
+ finally:
+ q.put({'_'.join(['ns', ns.replace('.', '_')]): query_time})
for server in server_list:
th = Thread(target=dns_req, args=(server, timeout, que))
diff --git a/collectors/python.d.plugin/elasticsearch/elasticsearch.chart.py b/collectors/python.d.plugin/elasticsearch/elasticsearch.chart.py
index 9b3c1284d..20109c64f 100644
--- a/collectors/python.d.plugin/elasticsearch/elasticsearch.chart.py
+++ b/collectors/python.d.plugin/elasticsearch/elasticsearch.chart.py
@@ -10,9 +10,9 @@ from collections import namedtuple
from socket import gethostbyname, gaierror
try:
- from queue import Queue
+ from queue import Queue
except ImportError:
- from Queue import Queue
+ from Queue import Queue
from bases.FrameworkServices.UrlService import UrlService
@@ -83,11 +83,11 @@ NODE_STATS = [
]
CLUSTER_STATS = [
- 'nodes.count.data_only',
- 'nodes.count.master_data',
+ 'nodes.count.data',
+ 'nodes.count.master',
'nodes.count.total',
- 'nodes.count.master_only',
- 'nodes.count.client',
+ 'nodes.count.coordinating_only',
+ 'nodes.count.ingest',
'indices.docs.count',
'indices.query_cache.hit_count',
'indices.query_cache.miss_count',
@@ -371,7 +371,7 @@ CHARTS = {
},
'cluster_health_nodes': {
'options': [None, 'Nodes Statistics', 'nodes', 'cluster health API',
- 'elastic.cluster_health_nodes', 'stacked'],
+ 'elastic.cluster_health_nodes', 'area'],
'lines': [
['number_of_nodes', 'nodes', 'absolute'],
['number_of_data_nodes', 'data_nodes', 'absolute'],
@@ -417,13 +417,13 @@ CHARTS = {
},
'cluster_stats_nodes': {
'options': [None, 'Nodes Statistics', 'nodes', 'cluster stats API',
- 'elastic.cluster_nodes', 'stacked'],
+ 'elastic.cluster_nodes', 'area'],
'lines': [
- ['nodes_count_data_only', 'data_only', 'absolute'],
- ['nodes_count_master_data', 'master_data', 'absolute'],
+ ['nodes_count_data', 'data', 'absolute'],
+ ['nodes_count_master', 'master', 'absolute'],
['nodes_count_total', 'total', 'absolute'],
- ['nodes_count_master_only', 'master_only', 'absolute'],
- ['nodes_count_client', 'client', 'absolute']
+ ['nodes_count_ingest', 'ingest', 'absolute'],
+ ['nodes_count_coordinating_only', 'coordinating_only', 'absolute']
]
},
'cluster_stats_query_cache': {
diff --git a/collectors/python.d.plugin/monit/monit.chart.py b/collectors/python.d.plugin/monit/monit.chart.py
index 3ac0032c5..9f3270572 100644
--- a/collectors/python.d.plugin/monit/monit.chart.py
+++ b/collectors/python.d.plugin/monit/monit.chart.py
@@ -4,23 +4,49 @@
# SPDX-License-Identifier: GPL-3.0-or-later
import xml.etree.ElementTree as ET
+
+from collections import namedtuple
+
from bases.FrameworkServices.UrlService import UrlService
-# see enum State_Type from monit.h (https://bitbucket.org/tildeslash/monit/src/master/src/monit.h)
-MONIT_SERVICE_NAMES = [
- 'Filesystem',
- 'Directory',
- 'File',
- 'Process',
- 'Host',
- 'System',
- 'Fifo',
- 'Program',
- 'Net',
-]
+MonitType = namedtuple('MonitType', ('index', 'name'))
+
+# see enum Service_Type from monit.h (https://bitbucket.org/tildeslash/monit/src/master/src/monit.h)
+# typedef enum {
+# Service_Filesystem = 0,
+# Service_Directory,
+# Service_File,
+# Service_Process,
+# Service_Host,
+# Service_System,
+# Service_Fifo,
+# Service_Program,
+# Service_Net,
+# Service_Last = Service_Net
+# } __attribute__((__packed__)) Service_Type;
-DEFAULT_SERVICES_IDS = [0, 1, 2, 3, 4, 6, 7, 8]
+TYPE_FILESYSTEM = MonitType(0, 'filesystem')
+TYPE_DIRECTORY = MonitType(1, 'directory')
+TYPE_FILE = MonitType(2, 'file')
+TYPE_PROCESS = MonitType(3, 'process')
+TYPE_HOST = MonitType(4, 'host')
+TYPE_SYSTEM = MonitType(5, 'system')
+TYPE_FIFO = MonitType(6, 'fifo')
+TYPE_PROGRAM = MonitType(7, 'program')
+TYPE_NET = MonitType(8, 'net')
+
+TYPES = (
+ TYPE_FILESYSTEM,
+ TYPE_DIRECTORY,
+ TYPE_FILE,
+ TYPE_PROCESS,
+ TYPE_HOST,
+ TYPE_SYSTEM,
+ TYPE_FIFO,
+ TYPE_PROGRAM,
+ TYPE_NET,
+)
# charts order (can be overridden if you want less charts, or different order)
ORDER = [
@@ -38,6 +64,7 @@ ORDER = [
'program',
'net'
]
+
CHARTS = {
'filesystem': {
'options': ['filesystems', 'Filesystems', 'filesystems', 'filesystem', 'monit.filesystems', 'line'],
@@ -83,7 +110,7 @@ CHARTS = {
'lines': []
},
'host_latency': {
- 'options': ['hosts latency', 'Hosts latency', 'milliseconds/s', 'network', 'monit.host_latency', 'line'],
+ 'options': ['hosts latency', 'Hosts latency', 'milliseconds', 'network', 'monit.host_latency', 'line'],
'lines': []
},
'net': {
@@ -94,85 +121,224 @@ CHARTS = {
}
+class BaseMonitService(object):
+ def __init__(self, typ, name, status, monitor):
+ self.type = typ
+ self.name = name
+ self.status = status
+ self.monitor = monitor
+
+ def __repr__(self):
+ return 'MonitService({0}:{1})'.format(self.type.name, self.name)
+
+ def __eq__(self, other):
+ if not isinstance(other, BaseMonitService):
+ return False
+ return self.type == other.type and self.name == other.name
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __hash__(self):
+ return hash(repr(self))
+
+ def is_running(self):
+ return self.status == '0' and self.monitor == '1'
+
+ def key(self):
+ return '{0}_{1}'.format(self.type.name, self.name)
+
+ def data(self):
+ return {self.key(): int(self.is_running())}
+
+
+class ProcessMonitService(BaseMonitService):
+ def __init__(self, typ, name, status, monitor):
+ super(ProcessMonitService, self).__init__(typ, name, status, monitor)
+ self.uptime = None
+ self.threads = None
+ self.children = None
+
+ def uptime_key(self):
+ return 'process_uptime_{0}'.format(self.name)
+
+ def threads_key(self):
+ return 'process_threads_{0}'.format(self.name)
+
+ def children_key(self):
+ return 'process_children_{0}'.format(self.name)
+
+ def data(self):
+ base_data = super(ProcessMonitService, self).data()
+ # skipping bugged metrics with negative uptime (monit before v5.16)
+ uptime = self.uptime if self.uptime and int(self.uptime) >= 0 else None
+ data = {
+ self.uptime_key(): uptime,
+ self.threads_key(): self.threads,
+ self.children_key(): self.children,
+ }
+ data.update(base_data)
+
+ return data
+
+
+class HostMonitService(BaseMonitService):
+ def __init__(self, typ, name, status, monitor):
+ super(HostMonitService, self).__init__(typ, name, status, monitor)
+ self.latency = None
+
+ def latency_key(self):
+ return 'host_latency_{0}'.format(self.name)
+
+ def data(self):
+ base_data = super(HostMonitService, self).data()
+ latency = float(self.latency) * 1000000 if self.latency else None
+ data = {self.latency_key(): latency}
+ data.update(base_data)
+
+ return data
+
+
class Service(UrlService):
def __init__(self, configuration=None, name=None):
UrlService.__init__(self, configuration=configuration, name=name)
self.order = ORDER
self.definitions = CHARTS
- base_url = self.configuration.get('url', 'http://localhost:2812')
+ base_url = self.configuration.get('url', "http://localhost:2812")
self.url = '{0}/_status?format=xml&level=full'.format(base_url)
+ self.active_services = list()
- def parse(self, data):
+ def parse(self, raw):
try:
- xml = ET.fromstring(data)
+ root = ET.fromstring(raw)
except ET.ParseError:
- self.error("URL {0} didn't return a vaild XML page. Please check your settings.".format(self.url))
+ self.error("URL {0} didn't return a valid XML page. Please check your settings.".format(self.url))
+ return None
+ return root
+
+ def _get_data(self):
+ raw = self._get_raw_data()
+ if not raw:
return None
- return xml
- def check(self):
- self._manager = self._build_manager()
+ root = self.parse(raw)
+ if root is None:
+ return None
- raw_data = self._get_raw_data()
- if not raw_data:
+ services = self.get_services(root)
+ if not services:
return None
- return bool(self.parse(raw_data))
+ if len(self.charts) > 0:
+ self.update_charts(services)
- def _get_data(self):
- raw_data = self._get_raw_data()
+ data = dict()
- if not raw_data:
- return None
+ for svc in services:
+ data.update(svc.data())
- xml = self.parse(raw_data)
- if not xml:
- return None
+ return data
- data = {}
- for service_id in DEFAULT_SERVICES_IDS:
- service_category = MONIT_SERVICE_NAMES[service_id].lower()
+ def get_services(self, root):
+ services = list()
- if service_category == 'system':
- self.debug("Skipping service from 'System' category, because it's useless in graphs")
+ for typ in TYPES:
+ if typ == TYPE_SYSTEM:
+ self.debug("skipping service from '{0}' category, it's useless in graphs".format(TYPE_SYSTEM.name))
continue
- xpath_query = "./service[@type='{0}']".format(service_id)
- self.debug('Searching for {0} as {1}'.format(service_category, xpath_query))
- for service_node in xml.findall(xpath_query):
-
- service_name = service_node.find('name').text
- service_status = service_node.find('status').text
- service_monitoring = service_node.find('monitor').text
- self.debug('=> found {0} with type={1}, status={2}, monitoring={3}'.format(service_name,
- service_id, service_status, service_monitoring))
-
- dimension_key = service_category + '_' + service_name
- if dimension_key not in self.charts[service_category]:
- self.charts[service_category].add_dimension([dimension_key, service_name, 'absolute'])
- data[dimension_key] = 1 if service_status == '0' and service_monitoring == '1' else 0
-
- if service_category == 'process':
- for subnode in ('uptime', 'threads', 'children'):
- subnode_value = service_node.find(subnode)
- if subnode_value is None:
- continue
- if subnode == 'uptime' and int(subnode_value.text) < 0:
- self.debug('Skipping bugged metrics with negative uptime (monit before v5.16')
- continue
- dimension_key = 'process_{0}_{1}'.format(subnode, service_name)
- if dimension_key not in self.charts['process_' + subnode]:
- self.charts['process_' + subnode].add_dimension([dimension_key, service_name, 'absolute'])
- data[dimension_key] = int(subnode_value.text)
-
- if service_category == 'host':
- subnode_value = service_node.find('./icmp/responsetime')
- if subnode_value is None:
- continue
- dimension_key = 'host_latency_{0}'.format(service_name)
- if dimension_key not in self.charts['host_latency']:
- self.charts['host_latency'].add_dimension([dimension_key, service_name,
- 'absolute', 1000, 1000000])
- data[dimension_key] = float(subnode_value.text) * 1000000
-
- return data or None
+ xpath_query = "./service[@type='{0}']".format(typ.index)
+ self.debug('Searching for {0} as {1}'.format(typ.name, xpath_query))
+
+ for svc_root in root.findall(xpath_query):
+ svc = create_service(svc_root, typ)
+ self.debug('=> found {0} with type={1}, status={2}, monitoring={3}'.format(
+ svc.name, svc.type.name, svc.status, svc.monitor))
+
+ services.append(svc)
+
+ return services
+
+ def update_charts(self, services):
+ remove = [svc for svc in self.active_services if svc not in services]
+ add = [svc for svc in services if svc not in self.active_services]
+
+ self.remove_services_from_charts(remove)
+ self.add_services_to_charts(add)
+
+ self.active_services = services
+
+ def add_services_to_charts(self, services):
+ for svc in services:
+ if svc.type == TYPE_HOST:
+ self.charts['host_latency'].add_dimension([svc.latency_key(), svc.name, 'absolute', 1000, 1000000])
+ if svc.type == TYPE_PROCESS:
+ self.charts['process_uptime'].add_dimension([svc.uptime_key(), svc.name])
+ self.charts['process_threads'].add_dimension([svc.threads_key(), svc.name])
+ self.charts['process_children'].add_dimension([svc.children_key(), svc.name])
+ self.charts[svc.type.name].add_dimension([svc.key(), svc.name])
+
+ def remove_services_from_charts(self, services):
+ for svc in services:
+ if svc.type == TYPE_HOST:
+ self.charts['host_latency'].del_dimension(svc.latency_key(), False)
+ if svc.type == TYPE_PROCESS:
+ self.charts['process_uptime'].del_dimension(svc.uptime_key(), False)
+ self.charts['process_threads'].del_dimension(svc.threads_key(), False)
+ self.charts['process_children'].del_dimension(svc.children_key(), False)
+ self.charts[svc.type.name].del_dimension(svc.key(), False)
+
+
+def create_service(root, typ):
+ if typ == TYPE_HOST:
+ return create_host_service(root)
+ elif typ == TYPE_PROCESS:
+ return create_process_service(root)
+ return create_base_service(root, typ)
+
+
+def create_host_service(root):
+ svc = HostMonitService(
+ TYPE_HOST,
+ root.find('name').text,
+ root.find('status').text,
+ root.find('monitor').text,
+ )
+
+ latency = root.find('./icmp/responsetime')
+ if latency is not None:
+ svc.latency = latency.text
+
+ return svc
+
+
+def create_process_service(root):
+ svc = ProcessMonitService(
+ TYPE_PROCESS,
+ root.find('name').text,
+ root.find('status').text,
+ root.find('monitor').text,
+ )
+
+ uptime = root.find('uptime')
+ if uptime is not None:
+ svc.uptime = uptime.text
+
+ threads = root.find('threads')
+ if threads is not None:
+ svc.threads = threads.text
+
+ children = root.find('children')
+ if children is not None:
+ svc.children = children.text
+
+ return svc
+
+
+def create_base_service(root, typ):
+ return BaseMonitService(
+ typ,
+ root.find('name').text,
+ root.find('status').text,
+ root.find('monitor').text,
+ )
diff --git a/collectors/python.d.plugin/mysql/README.md b/collectors/python.d.plugin/mysql/README.md
index eba9d7a2e..f7028ab68 100644
--- a/collectors/python.d.plugin/mysql/README.md
+++ b/collectors/python.d.plugin/mysql/README.md
@@ -218,6 +218,24 @@ It will produce following charts (if data is available):
45. **Flow Control** in ms
* paused
+46. **Users CPU time** in percentage
+ * users
+
+**Per user statistics:**
+
+1. **Rows Operations** in operations/s
+ * read
+ * send
+ * updated
+ * inserted
+ * deleted
+
+2. **Commands** in commands/s
+ * select
+ * update
+ * other
+
+
### configuration
You can provide, per server, the following:
@@ -234,7 +252,7 @@ You can provide, per server, the following:
- ca: the path name of the Certificate Authority (CA) certificate file. This option, if used, must specify the same certificate used by the server.
- capath: the path name of the directory that contains trusted SSL CA certificate files.
- cipher: the list of permitted ciphers for SSL encryption.
-
+
Here is an example for 3 servers:
```yaml
@@ -260,6 +278,8 @@ remote:
If no configuration is given, module will attempt to connect to mysql server via unix socket at `/var/run/mysqld/mysqld.sock` without password and with username `root`
+`userstats` graph works only if you enable such plugin in MariaDB server and set proper mysql priviliges (SUPER or PROCESS). For more detail please check [MariaDB User Statistics page](https://mariadb.com/kb/en/library/user-statistics/)
+
---
[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fcollectors%2Fpython.d.plugin%2Fmysql%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]()
diff --git a/collectors/python.d.plugin/mysql/mysql.chart.py b/collectors/python.d.plugin/mysql/mysql.chart.py
index 139fac158..82bd90794 100644
--- a/collectors/python.d.plugin/mysql/mysql.chart.py
+++ b/collectors/python.d.plugin/mysql/mysql.chart.py
@@ -11,6 +11,7 @@ from bases.FrameworkServices.MySQLService import MySQLService
QUERY_GLOBAL = 'SHOW GLOBAL STATUS;'
QUERY_SLAVE = 'SHOW SLAVE STATUS;'
QUERY_VARIABLES = 'SHOW GLOBAL VARIABLES LIKE \'max_connections\';'
+QUERY_USER_STATISTICS = 'SHOW USER_STATISTICS;'
GLOBAL_STATS = [
'Bytes_received',
@@ -90,6 +91,7 @@ GLOBAL_STATS = [
'Innodb_buffer_pool_write_requests',
'Innodb_buffer_pool_reads',
'Innodb_buffer_pool_wait_free',
+ 'Innodb_deadlocks',
'Qcache_hits',
'Qcache_lowmem_prunes',
'Qcache_inserts',
@@ -149,6 +151,18 @@ SLAVE_STATS = [
('Slave_IO_Running', slave_running)
]
+USER_STATISTICS = [
+ 'Select_commands',
+ 'Update_commands',
+ 'Other_commands',
+ 'Cpu_time',
+ 'Rows_read',
+ 'Rows_sent',
+ 'Rows_deleted',
+ 'Rows_inserted',
+ 'Rows_updated'
+]
+
VARIABLES = [
'max_connections'
]
@@ -178,6 +192,7 @@ ORDER = [
'innodb_os_log_fsync_writes',
'innodb_os_log_io',
'innodb_cur_row_lock',
+ 'innodb_deadlocks',
'innodb_rows',
'innodb_buffer_pool_pages',
'innodb_buffer_pool_flush_pages_requests',
@@ -200,7 +215,8 @@ ORDER = [
'galera_bytes',
'galera_queue',
'galera_conflicts',
- 'galera_flow_control'
+ 'galera_flow_control',
+ 'userstats_cpu'
]
CHARTS = {
@@ -382,6 +398,13 @@ CHARTS = {
['Innodb_row_lock_current_waits', 'current_waits', 'absolute']
]
},
+ 'innodb_deadlocks': {
+ 'options': [None, 'InnoDB Deadlocks', 'operations/s', 'innodb',
+ 'mysql.innodb_deadlocks', 'area'],
+ 'lines': [
+ ['Innodb_deadlocks', 'deadlocks', 'incremental']
+ ]
+ },
'innodb_rows': {
'options': [None, 'InnoDB Row Operations', 'operations/s', 'innodb', 'mysql.innodb_rows', 'area'],
'lines': [
@@ -570,10 +593,45 @@ CHARTS = {
'lines': [
['wsrep_flow_control_paused_ns', 'paused', 'incremental', 1, 1000000],
]
+ },
+ 'userstats_cpu': {
+ 'options': [None, 'Users CPU time', 'percentage', 'userstats', 'mysql.userstats_cpu', 'stacked'],
+ 'lines': []
}
}
+def userstats_chart_template(name):
+ order = [
+ 'userstats_rows_{0}'.format(name),
+ 'userstats_commands_{0}'.format(name)
+ ]
+ family = 'userstats {0}'.format(name)
+
+ charts = {
+ order[0]: {
+ 'options': [None, 'Rows Operations', 'operations/s', family, 'mysql.userstats_rows', 'stacked'],
+ 'lines': [
+ ['userstats_{0}_Rows_read'.format(name), 'read', 'incremental'],
+ ['userstats_{0}_Rows_send'.format(name), 'send', 'incremental'],
+ ['userstats_{0}_Rows_updated'.format(name), 'updated', 'incremental'],
+ ['userstats_{0}_Rows_inserted'.format(name), 'inserted', 'incremental'],
+ ['userstats_{0}_Rows_deleted'.format(name), 'deleted', 'incremental']
+ ]
+ },
+ order[1]: {
+ 'options': [None, 'Commands', 'commands/s', family, 'mysql.userstats_commands', 'stacked'],
+ 'lines': [
+ ['userstats_{0}_Select_commands'.format(name), 'select', 'incremental'],
+ ['userstats_{0}_Update_commands'.format(name), 'update', 'incremental'],
+ ['userstats_{0}_Other_commands'.format(name), 'other', 'incremental']
+ ]
+ }
+ }
+
+ return order, charts
+
+
class Service(MySQLService):
def __init__(self, configuration=None, name=None):
MySQLService.__init__(self, configuration=configuration, name=name)
@@ -583,6 +641,7 @@ class Service(MySQLService):
global_status=QUERY_GLOBAL,
slave_status=QUERY_SLAVE,
variables=QUERY_VARIABLES,
+ user_statistics=QUERY_USER_STATISTICS,
)
def _get_data(self):
@@ -612,6 +671,12 @@ class Service(MySQLService):
else:
self.queries.pop('slave_status')
+ if 'user_statistics' in raw_data:
+ if raw_data['user_statistics'][0]:
+ to_netdata.update(self.get_userstats(raw_data))
+ else:
+ self.queries.pop('user_statistics')
+
if 'variables' in raw_data:
variables = dict(raw_data['variables'][0])
for key in VARIABLES:
@@ -619,3 +684,70 @@ class Service(MySQLService):
to_netdata[key] = variables[key]
return to_netdata or None
+
+ # raw_data['user_statistics'] contains the following data structure:
+ # (
+ # (
+ # ('netdata', 42L, 0L, 1264L, 3.111252999999968, 2.968510299999994, 110267L, 19741424L, 0L, 0L, 1265L, 0L,
+ # 0L, 0L, 3L, 0L, 1301L, 0L, 0L, 7633L, 0L, 83L, 44L, 0L, 0L),
+ # ('root', 60L, 0L, 184L, 0.22856499999999966, 0.1601419999999998, 11605L, 1516513L, 0L, 9L, 220L, 0L, 2L, 1L,
+ # 6L, 4L,127L, 0L, 0L, 45L, 0L, 45L, 0L, 0L, 0L)
+ # ),
+ # (
+ # ('User', 253, 9, 128, 128, 0, 0),
+ # ('Total_connections', 3, 2, 11, 11, 0, 0),
+ # ('Concurrent_connections', 3, 1, 11, 11, 0, 0),
+ # ('Connected_time', 3, 4, 11, 11, 0, 0),
+ # ('Busy_time', 5, 21, 21, 21, 31, 0),
+ # ('Cpu_time', 5, 18, 21, 21, 31, 0),
+ # ('Bytes_received', 8, 6, 21, 21, 0, 0),
+ # ('Bytes_sent', 8, 8, 21, 21, 0, 0),
+ # ('Binlog_bytes_written', 8, 1, 21, 21, 0, 0),
+ # ('Rows_read', 8, 1, 21, 21, 0, 0),
+ # ('Rows_sent', 8, 4, 21, 21, 0, 0),
+ # ('Rows_deleted', 8, 1, 21, 21, 0, 0),
+ # ('Rows_inserted', 8, 1, 21, 21, 0, 0),
+ # ('Rows_updated', 8, 1, 21, 21, 0, 0),
+ # ('Select_commands', 8, 1, 21, 21, 0, 0),
+ # ('Update_commands', 8, 1, 21, 21, 0, 0),
+ # ('Other_commands', 8, 4, 21, 21, 0, 0),
+ # ('Commit_transactions', 8, 1, 21, 21, 0, 0),
+ # ('Rollback_transactions', 8, 1, 21, 21, 0, 0),
+ # ('Denied_connections', 8, 4, 21, 21, 0, 0),
+ # ('Lost_connections', 8, 1, 21, 21, 0, 0),
+ # ('Access_denied', 8, 2, 21, 21, 0, 0),
+ # ('Empty_queries', 8, 2, 21, 21, 0, 0),
+ # ('Total_ssl_connections', 8, 1, 21, 21, 0, 0),
+ # ('Max_statement_time_exceeded', 8, 1, 21, 21, 0, 0)),
+ # )
+ def get_userstats(self, raw_data):
+ data = dict()
+ userstats_vars = [e[0] for e in raw_data['user_statistics'][1]]
+ for i, _ in enumerate(raw_data['user_statistics'][0]):
+ user_name = raw_data['user_statistics'][0][i][0]
+ userstats = dict(zip(userstats_vars, raw_data['user_statistics'][0][i]))
+
+ if len(self.charts) > 0:
+ if ('userstats_{0}_Cpu_time'.format(user_name)) not in self.charts['userstats_cpu']:
+ self.add_userstats_dimensions(user_name)
+ self.create_new_userstats_charts(user_name)
+
+ for key in USER_STATISTICS:
+ if key in userstats:
+ data['userstats_{0}_{1}'.format(user_name, key)] = userstats[key]
+
+ return data
+
+ def add_userstats_dimensions(self, name):
+ self.charts['userstats_cpu'].add_dimension(['userstats_{0}_Cpu_time'.format(name), name, 'incremental', 100, 1])
+
+ def create_new_userstats_charts(self, tube):
+ order, charts = userstats_chart_template(tube)
+
+ for chart_name in order:
+ params = [chart_name] + charts[chart_name]['options']
+ dimensions = charts[chart_name]['lines']
+
+ new_chart = self.charts.add_chart(params)
+ for dimension in dimensions:
+ new_chart.add_dimension(dimension)
diff --git a/collectors/python.d.plugin/python.d.conf b/collectors/python.d.plugin/python.d.conf
index 63eecbba8..e2ee8eeec 100644
--- a/collectors/python.d.plugin/python.d.conf
+++ b/collectors/python.d.plugin/python.d.conf
@@ -41,7 +41,7 @@ chrony: no
# dockerd: yes
# dovecot: yes
# elasticsearch: yes
-# energi: yes
+# energid: yes
# this is just an example
example: no
@@ -88,6 +88,7 @@ nginx_log: no
# redis: yes
# rethinkdbs: yes
# retroshare: yes
+# riakkv: yes
# samba: yes
# sensors: yes
# smartd_log: yes
@@ -101,4 +102,4 @@ unbound: no
# uwsgi: yes
# varnish: yes
# w1sensor: yes
-# web_log: yes \ No newline at end of file
+# web_log: yes
diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py
index 439456655..b6f75bd5c 100644
--- a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py
+++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py
@@ -6,6 +6,8 @@
import urllib3
+from distutils.version import StrictVersion as version
+
from bases.FrameworkServices.SimpleService import SimpleService
try:
@@ -14,9 +16,30 @@ except AttributeError:
pass
+# https://github.com/urllib3/urllib3/blob/master/CHANGES.rst#19-2014-07-04
+# New retry logic and urllib3.util.retry.Retry configuration object. (Issue https://github.com/urllib3/urllib3/pull/326)
+URLLIB3_MIN_REQUIRED_VERSION = '1.9'
+URLLIB3_VERSION = urllib3.__version__
+URLLIB3 = 'urllib3'
+
+
+def version_check():
+ if version(URLLIB3_VERSION) >= version(URLLIB3_MIN_REQUIRED_VERSION):
+ return
+
+ err = '{0} version: {1}, minimum required version: {2}, please upgrade'.format(
+ URLLIB3,
+ URLLIB3_VERSION,
+ URLLIB3_MIN_REQUIRED_VERSION,
+ )
+ raise Exception(err)
+
+
class UrlService(SimpleService):
def __init__(self, configuration=None, name=None):
+ version_check()
SimpleService.__init__(self, configuration=configuration, name=name)
+ self.debug("{0} version: {1}".format(URLLIB3, URLLIB3_VERSION))
self.url = self.configuration.get('url')
self.user = self.configuration.get('user')
self.password = self.configuration.get('pass')
diff --git a/collectors/python.d.plugin/riakkv/Makefile.inc b/collectors/python.d.plugin/riakkv/Makefile.inc
new file mode 100644
index 000000000..87d29f82f
--- /dev/null
+++ b/collectors/python.d.plugin/riakkv/Makefile.inc
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# THIS IS NOT A COMPLETE Makefile
+# IT IS INCLUDED BY ITS PARENT'S Makefile.am
+# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT
+
+# install these files
+dist_python_DATA += riakkv/riakkv.chart.py
+dist_pythonconfig_DATA += riakkv/riakkv.conf
+
+# do not install these files, but include them in the distribution
+dist_noinst_DATA += riakkv/README.md riakkv/Makefile.inc
+
diff --git a/collectors/python.d.plugin/riakkv/README.md b/collectors/python.d.plugin/riakkv/README.md
new file mode 100644
index 000000000..0bcf22c5b
--- /dev/null
+++ b/collectors/python.d.plugin/riakkv/README.md
@@ -0,0 +1,110 @@
+# riakkv
+
+Monitors one or more Riak KV servers.
+
+**Requirements:**
+
+* An accessible `/stats` endpoint. See [the Riak KV configuration reference]
+ documentation](https://docs.riak.com/riak/kv/2.2.3/configuring/reference/#client-interfaces)
+ for how to enable this.
+
+The following charts are included, which are mostly derived from the metrics
+listed
+[here](https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html#riak-metrics-to-graph).
+
+1. **Throughput** in operations/s
+ * **KV operations**
+ * gets
+ * puts
+
+ * **Data type updates**
+ * counters
+ * sets
+ * maps
+
+ * **Search queries**
+ * queries
+
+ * **Search documents**
+ * indexed
+
+ * **Strong consistency operations**
+ * gets
+ * puts
+
+2. **Latency** in milliseconds
+ * **KV latency** of the past minute
+ * get (mean, median, 95th / 99th / 100th percentile)
+ * put (mean, median, 95th / 99th / 100th percentile)
+
+ * **Data type latency** of the past minute
+ * counter_merge (mean, median, 95th / 99th / 100th percentile)
+ * set_merge (mean, median, 95th / 99th / 100th percentile)
+ * map_merge (mean, median, 95th / 99th / 100th percentile)
+
+ * **Search latency** of the past minute
+ * query (median, min, max, 95th / 99th percentile)
+ * index (median, min, max, 95th / 99th percentile)
+
+ * **Strong consistency latency** of the past minute
+ * get (mean, median, 95th / 99th / 100th percentile)
+ * put (mean, median, 95th / 99th / 100th percentile)
+
+3. **Erlang VM metrics**
+ * **System counters**
+ * processes
+
+ * **Memory allocation** in MB
+ * processes.allocated
+ * processes.used
+
+4. **General load / health metrics**
+ * **Siblings encountered in KV operations** during the past minute
+ * get (mean, median, 95th / 99th / 100th percentile)
+
+ * **Object size in KV operations** during the past minute in KB
+ * get (mean, median, 95th / 99th / 100th percentile)
+
+ * **Message queue length** in unprocessed messages
+ * vnodeq_size (mean, median, 95th / 99th / 100th percentile)
+
+ * **Index operations** encountered by Search
+ * errors
+
+ * **Protocol buffer connections**
+ * active
+
+ * **Repair operations coordinated by this node**
+ * read
+
+ * **Active finite state machines by kind**
+ * get
+ * put
+ * secondary_index
+ * list_keys
+
+ * **Rejected finite state machines**
+ * get
+ * put
+
+ * **Number of writes to Search failed due to bad data format by reason**
+ * bad_entry
+ * extract_fail
+
+
+### configuration
+
+The module needs to be passed the full URL to Riak's stats endpoint.
+For example:
+
+```yaml
+myriak:
+ url: http://myriak.example.com:8098/stats
+```
+
+With no explicit configuration given, the module will attempt to connect to
+`http://localhost:8098/stats`.
+
+The default update frequency for the plugin is set to 2 seconds as Riak
+internally updates the metrics every second. If we were to update the metrics
+every second, the resulting graph would contain odd jitter.
diff --git a/collectors/python.d.plugin/riakkv/riakkv.chart.py b/collectors/python.d.plugin/riakkv/riakkv.chart.py
new file mode 100644
index 000000000..f81e177a5
--- /dev/null
+++ b/collectors/python.d.plugin/riakkv/riakkv.chart.py
@@ -0,0 +1,315 @@
+# -*- coding: utf-8 -*-
+# Description: riak netdata python.d module
+#
+# See also:
+# https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html
+
+from json import loads
+
+from bases.FrameworkServices.UrlService import UrlService
+
+# Riak updates the metrics at the /stats endpoint every 1 second.
+# If we use `update_every = 1` here, that means we might get weird jitter in the graph,
+# so the default is set to 2 seconds to prevent it.
+update_every = 2
+
+# charts order (can be overridden if you want less charts, or different order)
+ORDER = [
+ # Throughput metrics
+ # https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html#throughput-metrics
+ # Collected in totals.
+ "kv.node_operations", # K/V node operations.
+ "dt.vnode_updates", # Data type vnode updates.
+ "search.queries", # Search queries on the node.
+ "search.documents", # Documents indexed by Search.
+ "consistent.operations", # Consistent node operations.
+
+ # Latency metrics
+ # https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html#throughput-metrics
+ # Collected for the past minute in milliseconds,
+ # returned from riak in microseconds.
+ "kv.latency.get", # K/V GET FSM traversal latency.
+ "kv.latency.put", # K/V PUT FSM traversal latency.
+ "dt.latency.counter", # Update Counter Data type latency.
+ "dt.latency.set", # Update Set Data type latency.
+ "dt.latency.map", # Update Map Data type latency.
+ "search.latency.query", # Search query latency.
+ "search.latency.index", # Time it takes for search to index a new document.
+ "consistent.latency.get", # Strong consistent read latency.
+ "consistent.latency.put", # Strong consistent write latency.
+
+ # Erlang resource usage metrics
+ # https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html#erlang-resource-usage-metrics
+ # Processes collected as a gauge,
+ # memory collected as Megabytes, returned as bytes from Riak.
+ "vm.processes", # Number of processes currently running in the Erlang VM.
+ "vm.memory.processes", # Total amount of memory allocated & used for Erlang processes.
+
+ # General Riak Load / Health metrics
+ # https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html#general-riak-load-health-metrics
+ # The following are collected by Riak over the past minute:
+ "kv.siblings_encountered.get", # Siblings encountered during GET operations by this node.
+ "kv.objsize.get", # Object size encountered by this node.
+ "search.vnodeq_size", # Number of unprocessed messages in the vnode message queues (Search).
+ # The following are calculated in total, or as gauges:
+ "search.index_errors", # Errors of the search subsystem while indexing documents.
+ "core.pbc", # Number of currently active protocol buffer connections.
+ "core.repairs", # Total read repair operations coordinated by this node.
+ "core.fsm_active", # Active finite state machines by kind.
+ "core.fsm_rejected", # Rejected finite state machines by kind.
+
+ # General Riak Search Load / Health metrics
+ # https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html#general-riak-search-load-health-metrics
+ # Reported as counters.
+ "search.errors", # Write and read errors of the Search subsystem.
+]
+
+CHARTS = {
+ # Throughput metrics
+ "kv.node_operations": {
+ "options": [None, "Reads & writes coordinated by this node", "operations/s", "throughput", "riak.kv.throughput", "line"],
+ "lines": [
+ ["node_gets_total", "gets", "incremental"],
+ ["node_puts_total", "puts", "incremental"]
+ ]
+ },
+ "dt.vnode_updates": {
+ "options": [None, "Update operations coordinated by local vnodes by data type", "operations/s", "throughput", "riak.dt.vnode_updates", "line"],
+ "lines": [
+ ["vnode_counter_update_total", "counters", "incremental"],
+ ["vnode_set_update_total", "sets", "incremental"],
+ ["vnode_map_update_total", "maps", "incremental"],
+ ]
+ },
+ "search.queries": {
+ "options": [None, "Search queries on the node", "queries/s", "throughput", "riak.search", "line"],
+ "lines": [
+ ["search_query_throughput_count", "queries", "incremental"]
+ ]
+ },
+ "search.documents": {
+ "options": [None, "Documents indexed by search", "documents/s", "throughput", "riak.search.documents", "line"],
+ "lines": [
+ ["search_index_throughput_count", "indexed", "incremental"]
+ ]
+ },
+ "consistent.operations": {
+ "options": [None, "Consistent node operations", "operations/s", "throughput", "riak.consistent.operations", "line"],
+ "lines": [
+ ["consistent_gets_total", "gets", "incremental"],
+ ["consistent_puts_total", "puts", "incremental"],
+ ]
+ },
+
+ # Latency metrics
+ "kv.latency.get": {
+ "options": [None, "Time between reception of a client GET request and subsequent response to client", "ms", "latency", "riak.kv.latency.get", "line"],
+ "lines": [
+ ["node_get_fsm_time_mean", "mean", "absolute", 1, 1000],
+ ["node_get_fsm_time_median", "median", "absolute", 1, 1000],
+ ["node_get_fsm_time_95", "95", "absolute", 1, 1000],
+ ["node_get_fsm_time_99", "99", "absolute", 1, 1000],
+ ["node_get_fsm_time_100", "100", "absolute", 1, 1000],
+ ]
+ },
+ "kv.latency.put": {
+ "options": [None, "Time between reception of a client PUT request and subsequent response to client", "ms", "latency", "riak.kv.latency.put", "line"],
+ "lines": [
+ ["node_put_fsm_time_mean", "mean", "absolute", 1, 1000],
+ ["node_put_fsm_time_median", "median", "absolute", 1, 1000],
+ ["node_put_fsm_time_95", "95", "absolute", 1, 1000],
+ ["node_put_fsm_time_99", "99", "absolute", 1, 1000],
+ ["node_put_fsm_time_100", "100", "absolute", 1, 1000],
+ ]
+ },
+ "dt.latency.counter": {
+ "options": [None, "Time it takes to perform an Update Counter operation", "ms", "latency", "riak.dt.latency.counter_merge", "line"],
+ "lines": [
+ ["object_counter_merge_time_mean", "mean", "absolute", 1, 1000],
+ ["object_counter_merge_time_median", "median", "absolute", 1, 1000],
+ ["object_counter_merge_time_95", "95", "absolute", 1, 1000],
+ ["object_counter_merge_time_99", "99", "absolute", 1, 1000],
+ ["object_counter_merge_time_100", "100", "absolute", 1, 1000],
+ ]
+ },
+ "dt.latency.set": {
+ "options": [None, "Time it takes to perform an Update Set operation", "ms", "latency", "riak.dt.latency.set_merge", "line"],
+ "lines": [
+ ["object_set_merge_time_mean", "mean", "absolute", 1, 1000],
+ ["object_set_merge_time_median", "median", "absolute", 1, 1000],
+ ["object_set_merge_time_95", "95", "absolute", 1, 1000],
+ ["object_set_merge_time_99", "99", "absolute", 1, 1000],
+ ["object_set_merge_time_100", "100", "absolute", 1, 1000],
+ ]
+ },
+ "dt.latency.map": {
+ "options": [None, "Time it takes to perform an Update Map operation", "ms", "latency", "riak.dt.latency.map_merge", "line"],
+ "lines": [
+ ["object_map_merge_time_mean", "mean", "absolute", 1, 1000],
+ ["object_map_merge_time_median", "median", "absolute", 1, 1000],
+ ["object_map_merge_time_95", "95", "absolute", 1, 1000],
+ ["object_map_merge_time_99", "99", "absolute", 1, 1000],
+ ["object_map_merge_time_100", "100", "absolute", 1, 1000],
+ ]
+ },
+ "search.latency.query": {
+ "options": [None, "Search query latency", "ms", "latency", "riak.search.latency.query", "line"],
+ "lines": [
+ ["search_query_latency_median", "median", "absolute", 1, 1000],
+ ["search_query_latency_min", "min", "absolute", 1, 1000],
+ ["search_query_latency_95", "95", "absolute", 1, 1000],
+ ["search_query_latency_99", "99", "absolute", 1, 1000],
+ ["search_query_latency_999", "999", "absolute", 1, 1000],
+ ["search_query_latency_max", "max", "absolute", 1, 1000],
+ ]
+ },
+ "search.latency.index": {
+ "options": [None, "Time it takes Search to index a new document", "ms", "latency", "riak.search.latency.index", "line"],
+ "lines": [
+ ["search_index_latency_median", "median", "absolute", 1, 1000],
+ ["search_index_latency_min", "min", "absolute", 1, 1000],
+ ["search_index_latency_95", "95", "absolute", 1, 1000],
+ ["search_index_latency_99", "99", "absolute", 1, 1000],
+ ["search_index_latency_999", "999", "absolute", 1, 1000],
+ ["search_index_latency_max", "max", "absolute", 1, 1000],
+ ]
+ },
+
+ # Riak Strong Consistency metrics
+ "consistent.latency.get": {
+ "options": [None, "Strongly consistent read latency", "ms", "latency", "riak.consistent.latency.get", "line"],
+ "lines": [
+ ["consistent_get_time_mean", "mean", "absolute", 1, 1000],
+ ["consistent_get_time_median", "median", "absolute", 1, 1000],
+ ["consistent_get_time_95", "95", "absolute", 1, 1000],
+ ["consistent_get_time_99", "99", "absolute", 1, 1000],
+ ["consistent_get_time_100", "100", "absolute", 1, 1000],
+ ]
+ },
+ "consistent.latency.put": {
+ "options": [None, "Strongly consistent write latency", "ms", "latency", "riak.consistent.latency.put", "line"],
+ "lines": [
+ ["consistent_put_time_mean", "mean", "absolute", 1, 1000],
+ ["consistent_put_time_median", "median", "absolute", 1, 1000],
+ ["consistent_put_time_95", "95", "absolute", 1, 1000],
+ ["consistent_put_time_99", "99", "absolute", 1, 1000],
+ ["consistent_put_time_100", "100", "absolute", 1, 1000],
+ ]
+ },
+
+ # BEAM metrics
+ "vm.processes": {
+ "options": [None, "Total processes running in the Erlang VM", "total", "vm", "riak.vm", "line"],
+ "lines": [
+ ["sys_process_count", "processes", "absolute"],
+ ]
+ },
+ "vm.memory.processes": {
+ "options": [None, "Memory allocated & used by Erlang processes", "MB", "vm", "riak.vm.memory.processes", "line"],
+ "lines": [
+ ["memory_processes", "allocated", "absolute", 1, 1024 * 1024],
+ ["memory_processes_used", "used", "absolute", 1, 1024 * 1024]
+ ]
+ },
+
+ # General Riak Load/Health metrics
+ "kv.siblings_encountered.get": {
+ "options": [None, "Number of siblings encountered during GET operations by this node during the past minute", "siblings", "load", "riak.kv.siblings_encountered.get", "line"],
+ "lines": [
+ ["node_get_fsm_siblings_mean", "mean", "absolute"],
+ ["node_get_fsm_siblings_median", "median", "absolute"],
+ ["node_get_fsm_siblings_95", "95", "absolute"],
+ ["node_get_fsm_siblings_99", "99", "absolute"],
+ ["node_get_fsm_siblings_100", "100", "absolute"],
+ ]
+ },
+ "kv.objsize.get": {
+ "options": [None, "Object size encountered by this node during the past minute", "KB", "load", "riak.kv.objsize.get", "line"],
+ "lines": [
+ ["node_get_fsm_objsize_mean", "mean", "absolute", 1, 1024],
+ ["node_get_fsm_objsize_median", "median", "absolute", 1, 1024],
+ ["node_get_fsm_objsize_95", "95", "absolute", 1, 1024],
+ ["node_get_fsm_objsize_99", "99", "absolute", 1, 1024],
+ ["node_get_fsm_objsize_100", "100", "absolute", 1, 1024],
+ ]
+ },
+ "search.vnodeq_size": {
+ "options": [None, "Number of unprocessed messages in the vnode message queues of Search on this node in the past minute", "messages", "load", "riak.search.vnodeq_size", "line"],
+ "lines": [
+ ["riak_search_vnodeq_mean", "mean", "absolute"],
+ ["riak_search_vnodeq_median", "median", "absolute"],
+ ["riak_search_vnodeq_95", "95", "absolute"],
+ ["riak_search_vnodeq_99", "99", "absolute"],
+ ["riak_search_vnodeq_100", "100", "absolute"],
+ ]
+ },
+ "search.index_errors": {
+ "options": [None, "Number of document index errors encountered by Search", "errors", "load", "riak.search.index", "line"],
+ "lines": [
+ ["search_index_fail_count", "errors", "absolute"]
+ ]
+ },
+ "core.pbc": {
+ "options": [None, "Protocol buffer connections by status", "connections", "load", "riak.core.protobuf_connections", "line"],
+ "lines": [
+ ["pbc_active", "active", "absolute"],
+ # ["pbc_connects", "established_pastmin", "absolute"]
+ ]
+ },
+ "core.repairs": {
+ "options": [None, "Number of repair operations this node has coordinated", "repairs", "load", "riak.core.repairs", "line"],
+ "lines": [
+ ["read_repairs", "read", "absolute"]
+ ]
+ },
+ "core.fsm_active": {
+ "options": [None, "Active finite state machines by kind", "fsms", "load", "riak.core.fsm_active", "line"],
+ "lines": [
+ ["node_get_fsm_active", "get", "absolute"],
+ ["node_put_fsm_active", "put", "absolute"],
+ ["index_fsm_active", "secondary index", "absolute"],
+ ["list_fsm_active", "list keys", "absolute"]
+ ]
+ },
+ "core.fsm_rejected": {
+ # Writing "Sidejob's" here seems to cause some weird issues: it results in this chart being rendered in
+ # its own context and additionally, moves the entire Riak graph all the way up to the top of the Netdata
+ # dashboard for some reason.
+ "options": [None, "Finite state machines being rejected by Sidejobs overload protection", "fsms", "load", "riak.core.fsm_rejected", "line"],
+ "lines": [
+ ["node_get_fsm_rejected", "get", "absolute"],
+ ["node_put_fsm_rejected", "put", "absolute"]
+ ]
+ },
+
+ # General Riak Search Load / Health metrics
+ "search.errors": {
+ "options": [None, "Number of writes to Search failed due to bad data format by reason", "writes", "load", "riak.search.index", "line"],
+ "lines": [
+ ["search_index_bad_entry_count", "bad_entry", "absolute"],
+ ["search_index_extract_fail_count", "extract_fail", "absolute"],
+ ]
+ }
+}
+
+
+class Service(UrlService):
+ def __init__(self, configuration=None, name=None):
+ UrlService.__init__(self, configuration=configuration, name=name)
+ self.order = ORDER
+ self.definitions = CHARTS
+
+ def _get_data(self):
+ """
+ Format data received from http request
+ :return: dict
+ """
+ raw = self._get_raw_data()
+ if not raw:
+ return None
+
+ try:
+ return loads(raw)
+ except (TypeError, ValueError) as err:
+ self.error(err)
+ return None
diff --git a/collectors/python.d.plugin/riakkv/riakkv.conf b/collectors/python.d.plugin/riakkv/riakkv.conf
new file mode 100644
index 000000000..be01c48ac
--- /dev/null
+++ b/collectors/python.d.plugin/riakkv/riakkv.conf
@@ -0,0 +1,68 @@
+# netdata python.d.plugin configuration for riak
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# penalty indicates whether to apply penalty to update_every in case of failures.
+# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes.
+# penalty: yes
+
+# autodetection_retry sets the job re-check interval in seconds.
+# The job is not deleted if check fails.
+# Attempts to start the job are made once every autodetection_retry.
+# This feature is disabled by default.
+# autodetection_retry: 0
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# penalty: yes # the JOB's penalty
+# autodetection_retry: 0 # the JOB's re-check interval in seconds
+#
+#
+# ----------------------------------------------------------------------
+# AUTO-DETECTION JOBS
+# only one of them will run (they have the same name)
+
+local:
+ url : 'http://localhost:8098/stats'
diff --git a/collectors/python.d.plugin/smartd_log/README.md b/collectors/python.d.plugin/smartd_log/README.md
index 3b0816fb8..f6584be70 100644
--- a/collectors/python.d.plugin/smartd_log/README.md
+++ b/collectors/python.d.plugin/smartd_log/README.md
@@ -85,7 +85,11 @@ For this you need to set `smartd_opts` (or `SMARTD_ARGS`, check _smartd.service_
# dump smartd attrs info every 600 seconds
smartd_opts="-A /var/log/smartd/ -i 600"
```
-
+You may need to create the smartd directory before smartd will write to it:
+```
+mkdir -p /var/log/smartd
+```
+Otherwise, all the smartd `.csv` files may get written to `/var/lib/smartmontools` (default location). See also [https://linux.die.net/man/8/smartd](https://linux.die.net/man/8/smartd) for more info on the `-A --attributelog=PREFIX` command.
`smartd` appends logs at every run. It's strongly recommended to use `logrotate` for smartd files.
diff --git a/collectors/python.d.plugin/tomcat/tomcat.chart.py b/collectors/python.d.plugin/tomcat/tomcat.chart.py
index 01578c56e..ab3003304 100644
--- a/collectors/python.d.plugin/tomcat/tomcat.chart.py
+++ b/collectors/python.d.plugin/tomcat/tomcat.chart.py
@@ -5,11 +5,17 @@
# SPDX-License-Identifier: GPL-3.0-or-later
import xml.etree.ElementTree as ET
+import re
from bases.FrameworkServices.UrlService import UrlService
MiB = 1 << 20
+# Regex fix for Tomcat single quote XML attributes
+# affecting Tomcat < 8.5.24 & 9.0.2 running with Java > 9
+# cf. https://bz.apache.org/bugzilla/show_bug.cgi?id=61603
+single_quote_regex = re.compile(r"='([^']+)'([^']+)''")
+
ORDER = [
'accesses',
'bandwidth',
@@ -95,6 +101,32 @@ class Service(UrlService):
self.definitions = CHARTS
self.url = self.configuration.get('url', 'http://127.0.0.1:8080/manager/status?XML=true')
self.connector_name = self.configuration.get('connector_name', None)
+ self.parse = self.xml_parse
+
+ def xml_parse(self, data):
+ try:
+ return ET.fromstring(data)
+ except ET.ParseError:
+ self.debug('%s is not a valid XML page. Please add "?XML=true" to tomcat status page.' % self.url)
+ return None
+
+ def xml_single_quote_fix_parse(self, data):
+ data = single_quote_regex.sub(r"='\g<1>\g<2>'", data)
+ return self.xml_parse(data)
+
+ def check(self):
+ self._manager = self._build_manager()
+
+ raw_data = self._get_raw_data()
+ if not raw_data:
+ return False
+
+ if single_quote_regex.search(raw_data):
+ self.warning('Tomcat status page is returning invalid single quote XML, please consider upgrading '
+ 'your Tomcat installation. See https://bz.apache.org/bugzilla/show_bug.cgi?id=61603')
+ self.parse = self.xml_single_quote_fix_parse
+
+ return self.parse(raw_data) is not None
def _get_data(self):
"""
@@ -104,11 +136,10 @@ class Service(UrlService):
data = None
raw_data = self._get_raw_data()
if raw_data:
- try:
- xml = ET.fromstring(raw_data)
- except ET.ParseError:
- self.debug('%s is not a vaild XML page. Please add "?XML=true" to tomcat status page.' % self.url)
+ xml = self.parse(raw_data)
+ if xml is None:
return None
+
data = {}
jvm = xml.find('jvm')
@@ -153,7 +184,7 @@ class Service(UrlService):
data['metaspace_committed'] = pool.get('usageCommitted')
data['metaspace_max'] = pool.get('usageMax')
- if connector:
+ if connector is not None:
thread_info = connector.find('threadInfo')
data['currentThreadsBusy'] = thread_info.get('currentThreadsBusy')
data['currentThreadCount'] = thread_info.get('currentThreadCount')
diff --git a/collectors/python.d.plugin/varnish/varnish.chart.py b/collectors/python.d.plugin/varnish/varnish.chart.py
index 70af50ccb..58745e24d 100644
--- a/collectors/python.d.plugin/varnish/varnish.chart.py
+++ b/collectors/python.d.plugin/varnish/varnish.chart.py
@@ -5,9 +5,8 @@
import re
-from bases.collection import find_binary
from bases.FrameworkServices.ExecutableService import ExecutableService
-
+from bases.collection import find_binary
ORDER = [
'session_connections',
@@ -138,6 +137,18 @@ CHARTS = {
VARNISHSTAT = 'varnishstat'
+re_version = re.compile(r'varnish-(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)')
+
+
+class VarnishVersion:
+ def __init__(self, major, minor, patch):
+ self.major = major
+ self.minor = minor
+ self.patch = patch
+
+ def __str__(self):
+ return '{0}.{1}.{2}'.format(self.major, self.minor, self.patch)
+
class Parser:
_backend_new = re.compile(r'VBE.([\d\w_.]+)\(.*?\).(beresp[\w_]+)\s+(\d+)')
@@ -185,10 +196,32 @@ class Service(ExecutableService):
self.error("can't locate '{0}' binary or binary is not executable by user netdata".format(VARNISHSTAT))
return False
+ command = [varnishstat, '-V']
+ reply = self._get_raw_data(stderr=True, command=command)
+ if not reply:
+ self.error(
+ "no output from '{0}'. Is varnish running? Not enough privileges?".format(' '.join(self.command)))
+ return False
+
+ ver = parse_varnish_version(reply)
+ if not ver:
+ self.error("failed to parse reply from '{0}', used regex :'{1}', reply : {2}".format(
+ ' '.join(command),
+ re_version.pattern,
+ reply,
+ ))
+ return False
+
if self.instance_name:
- self.command = [varnishstat, '-1', '-n', self.instance_name, '-t', '1']
+ self.command = [varnishstat, '-1', '-n', self.instance_name]
else:
- self.command = [varnishstat, '-1', '-t', '1']
+ self.command = [varnishstat, '-1']
+
+ if ver.major > 4:
+ self.command.extend(['-t', '1'])
+
+ self.info("varnish version: {0}, will use command: '{1}'".format(ver, ' '.join(self.command)))
+
return True
def check(self):
@@ -198,14 +231,14 @@ class Service(ExecutableService):
# STDOUT is not empty
reply = self._get_raw_data()
if not reply:
- self.error("No output from 'varnishstat'. Is it running? Not enough privileges?")
+ self.error("no output from '{0}'. Is it running? Not enough privileges?".format(' '.join(self.command)))
return False
self.parser.init(reply)
# Output is parsable
if not self.parser.re_default:
- self.error('Cant parse the output...')
+ self.error('cant parse the output...')
return False
if self.parser.re_backend:
@@ -260,3 +293,16 @@ class Service(ExecutableService):
self.order.insert(0, chart_name)
self.definitions.update(chart)
+
+
+def parse_varnish_version(lines):
+ m = re_version.search(lines[0])
+ if not m:
+ return None
+
+ m = m.groupdict()
+ return VarnishVersion(
+ int(m['major']),
+ int(m['minor']),
+ int(m['patch']),
+ )
diff --git a/collectors/python.d.plugin/web_log/web_log.chart.py b/collectors/python.d.plugin/web_log/web_log.chart.py
index 6d6a261c4..fa5a8bc3e 100644
--- a/collectors/python.d.plugin/web_log/web_log.chart.py
+++ b/collectors/python.d.plugin/web_log/web_log.chart.py
@@ -4,9 +4,8 @@
# SPDX-License-Identifier: GPL-3.0-or-later
import bisect
-import re
import os
-
+import re
from collections import namedtuple, defaultdict
from copy import deepcopy
@@ -660,7 +659,7 @@ class Web:
r' (?P<bytes_sent>\d+)'
r' (?P<resp_length>\d+)'
r' (?P<resp_time>\d+\.\d+)'
- r' (?P<resp_time_upstream>[\d.-]+) ')
+ r' (?P<resp_time_upstream>[\d.-]+)')
nginx_ext_append = re.compile(r'(?P<address>[\da-f.:]+)'
r' -.*?"(?P<request>[^"]*)"'