summaryrefslogtreecommitdiffstats
path: root/python.d/python_modules/base.py
diff options
context:
space:
mode:
authorLennart Weller <lhw@ring0.de>2017-01-24 15:21:09 +0000
committerLennart Weller <lhw@ring0.de>2017-01-24 15:21:09 +0000
commit3ed3b02ed96ddab1c084811f3579b3a2aec83e04 (patch)
tree7a61ab288ae47800c4f11be5677d6ad8288dcd98 /python.d/python_modules/base.py
parentNew upstream version 1.4.0+dfsg (diff)
downloadnetdata-3ed3b02ed96ddab1c084811f3579b3a2aec83e04.tar.xz
netdata-3ed3b02ed96ddab1c084811f3579b3a2aec83e04.zip
New upstream version 1.5.0+dfsgupstream/1.5.0+dfsg
Diffstat (limited to 'python.d/python_modules/base.py')
-rw-r--r--python.d/python_modules/base.py297
1 files changed, 194 insertions, 103 deletions
diff --git a/python.d/python_modules/base.py b/python.d/python_modules/base.py
index 1508e0965..320c54bae 100644
--- a/python.d/python_modules/base.py
+++ b/python.d/python_modules/base.py
@@ -116,33 +116,21 @@ class SimpleService(threading.Thread):
Return value presents exit status of update()
:return: boolean
"""
- t_start = time.time()
- timetable = self.timetable
+ t_start = float(time.time())
chart_name = self.chart_name
- # check if it is time to execute job update() function
- if timetable['next'] > t_start:
- self.debug(chart_name, "will be run in", str(int((timetable['next'] - t_start) * 1000)), "ms")
- return True
- since_last = int((t_start - timetable['last']) * 1000000)
- self.debug(chart_name,
- "ready to run, after", str(int((t_start - timetable['last']) * 1000)),
- "ms (update_every:", str(timetable['freq'] * 1000),
- "ms, latency:", str(int((t_start - timetable['next']) * 1000)), "ms")
+ since_last = int((t_start - self.timetable['last']) * 1000000)
if self.__first_run:
since_last = 0
+
if not self.update(since_last):
self.error("update function failed.")
return False
- t_end = time.time()
- self.timetable['next'] = t_end - (t_end % timetable['freq']) + timetable['freq']
+
# draw performance graph
- run_time = str(int((t_end - t_start) * 1000))
- # noinspection SqlNoDataSourceInspection
+ run_time = int((time.time() - t_start) * 1000)
print("BEGIN netdata.plugin_pythond_%s %s\nSET run_time = %s\nEND\n" %
- (self.chart_name, str(since_last), run_time))
- # sys.stdout.write("BEGIN netdata.plugin_pythond_%s %s\nSET run_time = %s\nEND\n" %
- # (self.chart_name, str(since_last), run_time))
+ (self.chart_name, str(since_last), str(run_time)))
self.debug(chart_name, "updated in", str(run_time), "ms")
self.timetable['last'] = t_start
@@ -155,23 +143,48 @@ class SimpleService(threading.Thread):
Exits when job failed or timed out.
:return: None
"""
- self.timetable['last'] = time.time()
+ step = float(self.timetable['freq'])
+ penalty = 0
+ self.timetable['last'] = float(time.time() - step)
+ self.debug("starting data collection - update frequency:", str(step), " retries allowed:", str(self.retries))
while True: # run forever, unless something is wrong
+ now = float(time.time())
+ next = self.timetable['next'] = now - (now % step) + step + penalty
+
+ # it is important to do this in a loop
+ # sleep() is interruptable
+ while now < next:
+ self.debug("sleeping for", str(next - now), "secs to reach frequency of", str(step), "secs, now:", str(now), " next:", str(next), " penalty:", str(penalty))
+ time.sleep(next - now)
+ now = float(time.time())
+
+ # do the job
try:
status = self._run_once()
except Exception as e:
- self.error("Something wrong: ", str(e))
- return
- if status: # handle retries if update failed
- time.sleep(self.timetable['next'] - time.time())
+ status = False
+
+ if status:
+ # it is good
self.retries_left = self.retries
+ penalty = 0
else:
+ # it failed
self.retries_left -= 1
if self.retries_left <= 0:
- self.error("no more retries. Exiting")
- return
+ if penalty == 0:
+ penalty = float(self.retries * step) / 2
+ else:
+ penalty *= 1.5
+
+ if penalty > 600:
+ penalty = 600
+
+ self.retries_left = self.retries
+ self.alert("failed to collect data for " + str(self.retries) + " times - increasing penalty to " + str(penalty) + " sec and trying again")
+
else:
- time.sleep(self.timetable['freq'])
+ self.error("failed to collect data - " + str(self.retries_left) + " retries left - penalty: " + str(penalty) + " sec")
# --- CHART ---
@@ -307,7 +320,10 @@ class SimpleService(threading.Thread):
"""
Upload new data to netdata.
"""
- print(self._data_stream)
+ try:
+ print(self._data_stream)
+ except Exception as e:
+ msg.fatal('cannot send data to netdata:', str(e))
self._data_stream = ""
# --- ERROR HANDLING ---
@@ -318,6 +334,12 @@ class SimpleService(threading.Thread):
"""
msg.error(self.chart_name, *params)
+ def alert(self, *params):
+ """
+ Show error message on stderr
+ """
+ msg.alert(self.chart_name, *params)
+
def debug(self, *params):
"""
Show debug message on stderr
@@ -345,10 +367,18 @@ class SimpleService(threading.Thread):
:return: boolean
"""
self.debug("Module", str(self.__module__), "doesn't implement check() function. Using default.")
- if self._get_data() is None or len(self._get_data()) == 0:
+ data = self._get_data()
+
+ if data is None:
+ self.debug("failed to receive data during check().")
return False
- else:
- return True
+
+ if len(data) == 0:
+ self.debug("empty data during check().")
+ return False
+
+ self.debug("successfully received data during check(): '" + str(data) + "'")
+ return True
def create(self):
"""
@@ -357,6 +387,7 @@ class SimpleService(threading.Thread):
"""
data = self._get_data()
if data is None:
+ self.debug("failed to receive data during create().")
return False
idx = 0
@@ -380,7 +411,7 @@ class SimpleService(threading.Thread):
"""
data = self._get_data()
if data is None:
- self.debug("_get_data() returned no data")
+ self.debug("failed to receive data during update().")
return False
updated = False
@@ -458,7 +489,7 @@ class UrlService(SimpleService):
return None
try:
- raw = f.read().decode('utf-8')
+ raw = f.read().decode('utf-8', 'ignore')
except Exception as e:
self.error(str(e))
finally:
@@ -506,8 +537,83 @@ class SocketService(SimpleService):
self.unix_socket = None
self.request = ""
self.__socket_config = None
+ self.__empty_request = "".encode()
SimpleService.__init__(self, configuration=configuration, name=name)
+ def _socketerror(self, message=None):
+ if self.unix_socket is not None:
+ self.error("unix socket '" + self.unix_socket + "':", message)
+ else:
+ if self.__socket_config is not None:
+ af, socktype, proto, canonname, sa = self.__socket_config
+ self.error("socket to '" + str(sa[0]) + "' port " + str(sa[1]) + ":", message)
+ else:
+ self.error("unknown socket:", message)
+
+ def _connect2socket(self, res=None):
+ """
+ Connect to a socket, passing the result of getaddrinfo()
+ :return: boolean
+ """
+ if res is None:
+ res = self.__socket_config
+ if res is None:
+ self.error("Cannot create socket to 'None':")
+ return False
+
+ af, socktype, proto, canonname, sa = res
+ try:
+ self.debug("creating socket to '" + str(sa[0]) + "', port " + str(sa[1]))
+ self._sock = socket.socket(af, socktype, proto)
+ except socket.error as e:
+ self.error("Failed to create socket to '" + str(sa[0]) + "', port " + str(sa[1]) + ":", str(e))
+ self._sock = None
+ self.__socket_config = None
+ return False
+
+ try:
+ self.debug("connecting socket to '" + str(sa[0]) + "', port " + str(sa[1]))
+ self._sock.connect(sa)
+ except socket.error as e:
+ self.error("Failed to connect to '" + str(sa[0]) + "', port " + str(sa[1]) + ":", str(e))
+ self._disconnect()
+ self.__socket_config = None
+ return False
+
+ self.debug("connected to '" + str(sa[0]) + "', port " + str(sa[1]))
+ self.__socket_config = res
+ return True
+
+ def _connect2unixsocket(self):
+ """
+ Connect to a unix socket, given its filename
+ :return: boolean
+ """
+ if self.unix_socket is None:
+ self.error("cannot connect to unix socket 'None'")
+ return False
+
+ try:
+ self.debug("attempting DGRAM unix socket '" + str(self.unix_socket) + "'")
+ self._sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
+ self._sock.connect(self.unix_socket)
+ self.debug("connected DGRAM unix socket '" + str(self.unix_socket) + "'")
+ return True
+ except socket.error as e:
+ self.debug("Failed to connect DGRAM unix socket '" + str(self.unix_socket) + "':", str(e))
+
+ try:
+ self.debug("attempting STREAM unix socket '" + str(self.unix_socket) + "'")
+ self._sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ self._sock.connect(self.unix_socket)
+ self.debug("connected STREAM unix socket '" + str(self.unix_socket) + "'")
+ return True
+ except socket.error as e:
+ self.debug("Failed to connect STREAM unix socket '" + str(self.unix_socket) + "':", str(e))
+ self.error("Failed to connect to unix socket '" + str(self.unix_socket) + "':", str(e))
+ self._sock = None
+ return False
+
def _connect(self):
"""
Recreate socket and connect to it since sockets cannot be reused after closing
@@ -515,63 +621,38 @@ class SocketService(SimpleService):
:return:
"""
try:
- if self.unix_socket is None:
- if self.__socket_config is None:
- # establish ipv6 or ipv4 connection.
- for res in socket.getaddrinfo(self.host, self.port, socket.AF_UNSPEC, socket.SOCK_STREAM):
- try:
- # noinspection SpellCheckingInspection
- af, socktype, proto, canonname, sa = res
- self._sock = socket.socket(af, socktype, proto)
- except socket.error as e:
- self.debug("Cannot create socket:", str(e))
- self._sock = None
- continue
- try:
- self._sock.connect(sa)
- except socket.error as e:
- self.debug("Cannot connect to socket:", str(e))
- self._disconnect()
- continue
- self.__socket_config = res
- break
- else:
- # connect to socket with previously established configuration
- try:
- af, socktype, proto, canonname, sa = self.__socket_config
- self._sock = socket.socket(af, socktype, proto)
- self._sock.connect(sa)
- except socket.error as e:
- self.debug("Cannot create or connect to socket:", str(e))
- self._disconnect()
+ if self.unix_socket is not None:
+ self._connect2unixsocket()
+
else:
- # connect to unix socket
- try:
- self._sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
- self._sock.connect(self.unix_socket)
- except socket.error:
- self._sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
- self._sock.connect(self.unix_socket)
+ if self.__socket_config is not None:
+ self._connect2socket()
+ else:
+ for res in socket.getaddrinfo(self.host, self.port, socket.AF_UNSPEC, socket.SOCK_STREAM):
+ if self._connect2socket(res): break
except Exception as e:
- self.error(str(e),
- "Cannot create socket with following configuration: host:", str(self.host),
- "port:", str(self.port),
- "socket:", str(self.unix_socket))
self._sock = None
- self._sock.setblocking(0)
+ self.__socket_config = None
+
+ if self._sock is not None:
+ self._sock.setblocking(0)
+ self._sock.settimeout(5)
+ self.debug("set socket timeout to: " + str(self._sock.gettimeout()))
def _disconnect(self):
"""
Close socket connection
:return:
"""
- try:
- self._sock.shutdown(2) # 0 - read, 1 - write, 2 - all
- self._sock.close()
- except Exception:
- pass
- self._sock = None
+ if self._sock is not None:
+ try:
+ self.debug("closing socket")
+ self._sock.shutdown(2) # 0 - read, 1 - write, 2 - all
+ self._sock.close()
+ except Exception:
+ pass
+ self._sock = None
def _send(self):
"""
@@ -579,15 +660,13 @@ class SocketService(SimpleService):
:return: boolean
"""
# Send request if it is needed
- if self.request != "".encode():
+ if self.request != self.__empty_request:
try:
+ self.debug("sending request:", str(self.request))
self._sock.send(self.request)
except Exception as e:
+ self._socketerror("error sending request:" + str(e))
self._disconnect()
- self.error(str(e),
- "used configuration: host:", str(self.host),
- "port:", str(self.port),
- "socket:", str(self.unix_socket))
return False
return True
@@ -598,24 +677,28 @@ class SocketService(SimpleService):
"""
data = ""
while True:
+ self.debug("receiving response")
try:
- ready_to_read, _, in_error = select.select([self._sock], [], [], 5)
+ buf = self._sock.recv(4096)
except Exception as e:
- self.debug("SELECT", str(e))
+ self._socketerror("failed to receive response:" + str(e))
self._disconnect()
break
- if len(ready_to_read) > 0:
- buf = self._sock.recv(4096)
- if len(buf) == 0 or buf is None: # handle server disconnect
- break
- data += buf.decode()
- if self._check_raw_data(data):
- break
- else:
- self.error("Socket timed out.")
+
+ if buf is None or len(buf) == 0: # handle server disconnect
+ if data == "":
+ self._socketerror("unexpectedly disconnected")
+ else:
+ self.debug("server closed the connection")
self._disconnect()
break
+ self.debug("received data:", str(buf))
+ data += buf.decode('utf-8', 'ignore')
+ if self._check_raw_data(data):
+ break
+
+ self.debug("final response:", str(data))
return data
def _get_raw_data(self):
@@ -625,6 +708,8 @@ class SocketService(SimpleService):
"""
if self._sock is None:
self._connect()
+ if self._sock is None:
+ return None
# Send request if it is needed
if not self._send():
@@ -654,6 +739,7 @@ class SocketService(SimpleService):
self.name = ""
else:
self.name = str(self.name)
+
try:
self.unix_socket = str(self.configuration['socket'])
except (KeyError, TypeError):
@@ -667,10 +753,12 @@ class SocketService(SimpleService):
self.port = int(self.configuration['port'])
except (KeyError, TypeError):
self.debug("No port specified. Using: '" + str(self.port) + "'")
+
try:
self.request = str(self.configuration['request'])
except (KeyError, TypeError):
self.debug("No request specified. Using: '" + str(self.request) + "'")
+
self.request = self.request.encode()
def check(self):
@@ -724,7 +812,7 @@ class LogService(SimpleService):
try:
self.log_path = str(self.configuration['path'])
except (KeyError, TypeError):
- self.error("No path to log specified. Using: '" + self.log_path + "'")
+ self.info("No path to log specified. Using: '" + self.log_path + "'")
if os.access(self.log_path, os.R_OK):
return True
@@ -779,22 +867,25 @@ class ExecutableService(SimpleService):
try:
self.command = str(self.configuration['command'])
except (KeyError, TypeError):
- self.error("No command specified. Using: '" + self.command + "'")
- command = self.command.split(' ')
+ self.info("No command specified. Using: '" + self.command + "'")
+ # Splitting self.command on every space so subprocess.Popen reads it properly
+ self.command = self.command.split(' ')
- for arg in command[1:]:
+ for arg in self.command[1:]:
if any(st in arg for st in self.bad_substrings):
self.error("Bad command argument:" + " ".join(self.command[1:]))
return False
+
# test command and search for it in /usr/sbin or /sbin when failed
- base = command[0].split('/')[-1]
+ base = self.command[0].split('/')[-1]
if self._get_raw_data() is None:
for prefix in ['/sbin/', '/usr/sbin/']:
- command[0] = prefix + base
- if os.path.isfile(command[0]):
+ self.command[0] = prefix + base
+ if os.path.isfile(self.command[0]):
break
- self.command = command
+
if self._get_data() is None or len(self._get_data()) == 0:
self.error("Command", self.command, "returned no data")
return False
+
return True