diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 02:57:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 02:57:58 +0000 |
commit | be1c7e50e1e8809ea56f2c9d472eccd8ffd73a97 (patch) | |
tree | 9754ff1ca740f6346cf8483ec915d4054bc5da2d /collectors/python.d.plugin/python_modules | |
parent | Initial commit. (diff) | |
download | netdata-be1c7e50e1e8809ea56f2c9d472eccd8ffd73a97.tar.xz netdata-be1c7e50e1e8809ea56f2c9d472eccd8ffd73a97.zip |
Adding upstream version 1.44.3.upstream/1.44.3upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'collectors/python.d.plugin/python_modules')
90 files changed, 24966 insertions, 0 deletions
diff --git a/collectors/python.d.plugin/python_modules/__init__.py b/collectors/python.d.plugin/python_modules/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/collectors/python.d.plugin/python_modules/__init__.py diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/ExecutableService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/ExecutableService.py new file mode 100644 index 00000000..a74b4239 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/ExecutableService.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Pawel Krupa (paulfantom) +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +import os + +from subprocess import Popen, PIPE + +from bases.FrameworkServices.SimpleService import SimpleService +from bases.collection import find_binary + + +class ExecutableService(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.command = None + + def _get_raw_data(self, stderr=False, command=None): + """ + Get raw data from executed command + :return: <list> + """ + command = command or self.command + self.debug("Executing command '{0}'".format(' '.join(command))) + try: + p = Popen(command, stdout=PIPE, stderr=PIPE) + except Exception as error: + self.error('Executing command {0} resulted in error: {1}'.format(command, error)) + return None + + data = list() + std = p.stderr if stderr else p.stdout + for line in std: + try: + data.append(line.decode('utf-8')) + except (TypeError, UnicodeDecodeError): + continue + + return data + + def check(self): + """ + Parse basic configuration, check if command is whitelisted and is returning values + :return: <boolean> + """ + # Preference: 1. "command" from configuration file 2. "command" from plugin (if specified) + if 'command' in self.configuration: + self.command = self.configuration['command'] + + # "command" must be: 1.not None 2. type <str> + if not (self.command and isinstance(self.command, str)): + self.error('Command is not defined or command type is not <str>') + return False + + # Split "command" into: 1. command <str> 2. options <list> + command, opts = self.command.split()[0], self.command.split()[1:] + + # Check for "bad" symbols in options. No pipes, redirects etc. + opts_list = ['&', '|', ';', '>', '<'] + bad_opts = set(''.join(opts)) & set(opts_list) + if bad_opts: + self.error("Bad command argument(s): {opts}".format(opts=bad_opts)) + return False + + # Find absolute path ('echo' => '/bin/echo') + if '/' not in command: + command = find_binary(command) + if not command: + self.error('Can\'t locate "{command}" binary'.format(command=self.command)) + return False + # Check if binary exist and executable + else: + if not os.access(command, os.X_OK): + self.error('"{binary}" is not executable'.format(binary=command)) + return False + + self.command = [command] + opts if opts else [command] + + try: + data = self._get_data() + except Exception as error: + self.error('_get_data() failed. Command: {command}. Error: {error}'.format(command=self.command, + error=error)) + return False + + if isinstance(data, dict) and data: + return True + self.error('Command "{command}" returned no data'.format(command=self.command)) + return False diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/LogService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/LogService.py new file mode 100644 index 00000000..a55e33f5 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/LogService.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Pawel Krupa (paulfantom) +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +from glob import glob +import sys +import os + +from bases.FrameworkServices.SimpleService import SimpleService + + +class LogService(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.log_path = self.configuration.get('path') + self.__glob_path = self.log_path + self._last_position = 0 + self.__re_find = dict(current=0, run=0, maximum=60) + self.__open_args = {'errors': 'replace'} if sys.version_info[0] > 2 else {} + + def _get_raw_data(self): + """ + Get log lines since last poll + :return: list + """ + lines = list() + try: + if self.__re_find['current'] == self.__re_find['run']: + self._find_recent_log_file() + size = os.path.getsize(self.log_path) + if size == self._last_position: + self.__re_find['current'] += 1 + return list() # return empty list if nothing has changed + elif size < self._last_position: + self._last_position = 0 # read from beginning if file has shrunk + + with open(self.log_path, **self.__open_args) as fp: + fp.seek(self._last_position) + for line in fp: + lines.append(line) + self._last_position = fp.tell() + self.__re_find['current'] = 0 + except (OSError, IOError) as error: + self.__re_find['current'] += 1 + self.error(str(error)) + + return lines or None + + def _find_recent_log_file(self): + """ + :return: + """ + self.__re_find['run'] = self.__re_find['maximum'] + self.__re_find['current'] = 0 + self.__glob_path = self.__glob_path or self.log_path # workaround for modules w/o config files + path_list = glob(self.__glob_path) + if path_list: + self.log_path = max(path_list) + return True + return False + + def check(self): + """ + Parse basic configuration and check if log file exists + :return: boolean + """ + if not self.log_path: + self.error('No path to log specified') + return None + + if self._find_recent_log_file() and os.access(self.log_path, os.R_OK) and os.path.isfile(self.log_path): + return True + self.error('Cannot access {0}'.format(self.log_path)) + return False + + def create(self): + # set cursor at last byte of log file + self._last_position = os.path.getsize(self.log_path) + status = SimpleService.create(self) + return status diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/MySQLService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/MySQLService.py new file mode 100644 index 00000000..7f5c7d22 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/MySQLService.py @@ -0,0 +1,163 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +from sys import exc_info + +try: + import MySQLdb + + PY_MYSQL = True +except ImportError: + try: + import pymysql as MySQLdb + + PY_MYSQL = True + except ImportError: + PY_MYSQL = False + +from bases.FrameworkServices.SimpleService import SimpleService + + +class MySQLService(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.__connection = None + self.__conn_properties = dict() + self.extra_conn_properties = dict() + self.__queries = self.configuration.get('queries', dict()) + self.queries = dict() + + def __connect(self): + try: + connection = MySQLdb.connect(connect_timeout=self.update_every, **self.__conn_properties) + except (MySQLdb.MySQLError, TypeError, AttributeError) as error: + return None, str(error) + else: + return connection, None + + def check(self): + def get_connection_properties(conf, extra_conf): + properties = dict() + if conf.get('user'): + properties['user'] = conf['user'] + if conf.get('pass'): + properties['passwd'] = conf['pass'] + + if conf.get('socket'): + properties['unix_socket'] = conf['socket'] + elif conf.get('host'): + properties['host'] = conf['host'] + properties['port'] = int(conf.get('port', 3306)) + elif conf.get('my.cnf'): + properties['read_default_file'] = conf['my.cnf'] + + if conf.get('ssl'): + properties['ssl'] = conf['ssl'] + + if isinstance(extra_conf, dict) and extra_conf: + properties.update(extra_conf) + + return properties or None + + def is_valid_queries_dict(raw_queries, log_error): + """ + :param raw_queries: dict: + :param log_error: function: + :return: dict or None + + raw_queries is valid when: type <dict> and not empty after is_valid_query(for all queries) + """ + + def is_valid_query(query): + return all([isinstance(query, str), + query.startswith(('SELECT', 'select', 'SHOW', 'show'))]) + + if hasattr(raw_queries, 'keys') and raw_queries: + valid_queries = dict([(n, q) for n, q in raw_queries.items() if is_valid_query(q)]) + bad_queries = set(raw_queries) - set(valid_queries) + + if bad_queries: + log_error('Removed query(s): {queries}'.format(queries=bad_queries)) + return valid_queries + else: + log_error('Unsupported "queries" format. Must be not empty <dict>') + return None + + if not PY_MYSQL: + self.error('MySQLdb or PyMySQL module is needed to use mysql.chart.py plugin') + return False + + # Preference: 1. "queries" from the configuration file 2. "queries" from the module + self.queries = self.__queries or self.queries + # Check if "self.queries" exist, not empty and all queries are in valid format + self.queries = is_valid_queries_dict(self.queries, self.error) + if not self.queries: + return None + + # Get connection properties + self.__conn_properties = get_connection_properties(self.configuration, self.extra_conn_properties) + if not self.__conn_properties: + self.error('Connection properties are missing') + return False + + # Create connection to the database + self.__connection, error = self.__connect() + if error: + self.error('Can\'t establish connection to MySQL: {error}'.format(error=error)) + return False + + try: + data = self._get_data() + except Exception as error: + self.error('_get_data() failed. Error: {error}'.format(error=error)) + return False + + if isinstance(data, dict) and data: + return True + self.error("_get_data() returned no data or type is not <dict>") + return False + + def _get_raw_data(self, description=None): + """ + Get raw data from MySQL server + :return: dict: fetchall() or (fetchall(), description) + """ + + if not self.__connection: + self.__connection, error = self.__connect() + if error: + return None + + raw_data = dict() + queries = dict(self.queries) + try: + cursor = self.__connection.cursor() + for name, query in queries.items(): + try: + cursor.execute(query) + except (MySQLdb.ProgrammingError, MySQLdb.OperationalError) as error: + if self.__is_error_critical(err_class=exc_info()[0], err_text=str(error)): + cursor.close() + raise RuntimeError + self.error('Removed query: {name}[{query}]. Error: error'.format(name=name, + query=query, + error=error)) + self.queries.pop(name) + continue + else: + raw_data[name] = (cursor.fetchall(), cursor.description) if description else cursor.fetchall() + cursor.close() + self.__connection.commit() + except (MySQLdb.MySQLError, RuntimeError, TypeError, AttributeError): + self.__connection.close() + self.__connection = None + return None + else: + return raw_data or None + + @staticmethod + def __is_error_critical(err_class, err_text): + return err_class == MySQLdb.OperationalError and all(['denied' not in err_text, + 'Unknown column' not in err_text]) diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py new file mode 100644 index 00000000..3f122e1d --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py @@ -0,0 +1,261 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Pawel Krupa (paulfantom) +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +import os + +from bases.charts import Charts, ChartError, create_runtime_chart +from bases.collection import safe_print +from bases.loggers import PythonDLogger +from third_party.monotonic import monotonic +from time import sleep, time + +RUNTIME_CHART_UPDATE = 'BEGIN netdata.runtime_{job_name} {since_last}\n' \ + 'SET run_time = {elapsed}\n' \ + 'END\n' + +PENALTY_EVERY = 5 +MAX_PENALTY = 10 * 60 # 10 minutes + +ND_INTERNAL_MONITORING_DISABLED = os.getenv("NETDATA_INTERNALS_MONITORING") == "NO" + + +class RuntimeCounters: + def __init__(self, configuration): + """ + :param configuration: <dict> + """ + self.update_every = int(configuration.pop('update_every')) + self.do_penalty = configuration.pop('penalty') + + self.start_mono = 0 + self.start_real = 0 + self.retries = 0 + self.penalty = 0 + self.elapsed = 0 + self.prev_update = 0 + + self.runs = 1 + + def calc_next(self): + self.start_mono = monotonic() + return self.start_mono - (self.start_mono % self.update_every) + self.update_every + self.penalty + + def sleep_until_next(self): + next_time = self.calc_next() + while self.start_mono < next_time: + sleep(next_time - self.start_mono) + self.start_mono = monotonic() + self.start_real = time() + + def handle_retries(self): + self.retries += 1 + if self.do_penalty and self.retries % PENALTY_EVERY == 0: + self.penalty = round(min(self.retries * self.update_every / 2, MAX_PENALTY)) + + +def clean_module_name(name): + if name.startswith('pythond_'): + return name[8:] + return name + + +class SimpleService(PythonDLogger, object): + """ + Prototype of Service class. + Implemented basic functionality to run jobs by `python.d.plugin` + """ + + def __init__(self, configuration, name=''): + """ + :param configuration: <dict> + :param name: <str> + """ + PythonDLogger.__init__(self) + self.configuration = configuration + self.order = list() + self.definitions = dict() + + self.module_name = clean_module_name(self.__module__) + self.job_name = configuration.pop('job_name') + self.actual_job_name = self.job_name or self.module_name + self.override_name = configuration.pop('override_name') + self.fake_name = None + + self._runtime_counters = RuntimeCounters(configuration=configuration) + self.charts = Charts(job_name=self.actual_name, + actual_job_name=self.actual_job_name, + priority=configuration.pop('priority'), + cleanup=configuration.pop('chart_cleanup'), + get_update_every=self.get_update_every, + module_name=self.module_name) + + def __repr__(self): + return '<{cls_bases}: {name}>'.format(cls_bases=', '.join(c.__name__ for c in self.__class__.__bases__), + name=self.name) + + @property + def name(self): + name = self.override_name or self.job_name + if name and name != self.module_name: + return '_'.join([self.module_name, name]) + return self.module_name + + def actual_name(self): + return self.fake_name or self.name + + @property + def runs_counter(self): + return self._runtime_counters.runs + + @property + def update_every(self): + return self._runtime_counters.update_every + + @update_every.setter + def update_every(self, value): + """ + :param value: <int> + :return: + """ + self._runtime_counters.update_every = value + + def get_update_every(self): + return self.update_every + + def check(self): + """ + check() prototype + :return: boolean + """ + self.debug("job doesn't implement check() method. Using default which simply invokes get_data().") + data = self.get_data() + if data and isinstance(data, dict): + return True + self.debug('returned value is wrong: {0}'.format(data)) + return False + + @create_runtime_chart + def create(self): + for chart_name in self.order: + chart_config = self.definitions.get(chart_name) + + if not chart_config: + self.debug("create() => [NOT ADDED] chart '{chart_name}' not in definitions. " + "Skipping it.".format(chart_name=chart_name)) + continue + + # create chart + chart_params = [chart_name] + chart_config['options'] + try: + self.charts.add_chart(params=chart_params) + except ChartError as error: + self.error("create() => [NOT ADDED] (chart '{chart}': {error})".format(chart=chart_name, + error=error)) + continue + + # add dimensions to chart + for dimension in chart_config['lines']: + try: + self.charts[chart_name].add_dimension(dimension) + except ChartError as error: + self.error("create() => [NOT ADDED] (dimension '{dimension}': {error})".format(dimension=dimension, + error=error)) + continue + + # add variables to chart + if 'variables' in chart_config: + for variable in chart_config['variables']: + try: + self.charts[chart_name].add_variable(variable) + except ChartError as error: + self.error("create() => [NOT ADDED] (variable '{var}': {error})".format(var=variable, + error=error)) + continue + + del self.order + del self.definitions + + # True if job has at least 1 chart else False + return bool(self.charts) + + def run(self): + """ + Runs job in thread. Handles retries. + Exits when job failed or timed out. + :return: None + """ + job = self._runtime_counters + self.debug('started, update frequency: {freq}'.format(freq=job.update_every)) + + while True: + job.sleep_until_next() + + since = 0 + if job.prev_update: + since = int((job.start_real - job.prev_update) * 1e6) + + try: + updated = self.update(interval=since) + except Exception as error: + self.error('update() unhandled exception: {error}'.format(error=error)) + updated = False + + job.runs += 1 + + if not updated: + job.handle_retries() + else: + job.elapsed = int((monotonic() - job.start_mono) * 1e3) + job.prev_update = job.start_real + job.retries, job.penalty = 0, 0 + if not ND_INTERNAL_MONITORING_DISABLED: + safe_print(RUNTIME_CHART_UPDATE.format(job_name=self.name, + since_last=since, + elapsed=job.elapsed)) + self.debug('update => [{status}] (elapsed time: {elapsed}, failed retries in a row: {retries})'.format( + status='OK' if updated else 'FAILED', + elapsed=job.elapsed if updated else '-', + retries=job.retries)) + + def update(self, interval): + """ + :return: + """ + data = self.get_data() + if not data: + self.debug('get_data() returned no data') + return False + elif not isinstance(data, dict): + self.debug('get_data() returned incorrect type data') + return False + + updated = False + + for chart in self.charts: + if chart.flags.obsoleted: + if chart.can_be_updated(data): + chart.refresh() + else: + continue + elif self.charts.cleanup and chart.penalty >= self.charts.cleanup: + chart.obsolete() + self.info("chart '{0}' was suppressed due to non updating".format(chart.name)) + continue + + ok = chart.update(data, interval) + if ok: + updated = True + + if not updated: + self.debug('none of the charts has been updated') + + return updated + + def get_data(self): + return self._get_data() + + def _get_data(self): + raise NotImplementedError diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SocketService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SocketService.py new file mode 100644 index 00000000..d6c75505 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SocketService.py @@ -0,0 +1,336 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Pawel Krupa (paulfantom) +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +import errno +import socket + +try: + import ssl +except ImportError: + _TLS_SUPPORT = False +else: + _TLS_SUPPORT = True + +if _TLS_SUPPORT: + try: + PROTOCOL_TLS = ssl.PROTOCOL_TLS + except AttributeError: + PROTOCOL_TLS = ssl.PROTOCOL_SSLv23 + +from bases.FrameworkServices.SimpleService import SimpleService + + +DEFAULT_CONNECT_TIMEOUT = 2.0 +DEFAULT_READ_TIMEOUT = 2.0 +DEFAULT_WRITE_TIMEOUT = 2.0 + + +class SocketService(SimpleService): + def __init__(self, configuration=None, name=None): + self._sock = None + self._keep_alive = False + self.host = 'localhost' + self.port = None + self.unix_socket = None + self.dgram_socket = False + self.request = '' + self.tls = False + self.cert = None + self.key = None + self.__socket_config = None + self.__empty_request = "".encode() + SimpleService.__init__(self, configuration=configuration, name=name) + self.connect_timeout = configuration.get('connect_timeout', DEFAULT_CONNECT_TIMEOUT) + self.read_timeout = configuration.get('read_timeout', DEFAULT_READ_TIMEOUT) + self.write_timeout = configuration.get('write_timeout', DEFAULT_WRITE_TIMEOUT) + + def _socket_error(self, message=None): + if self.unix_socket is not None: + self.error('unix socket "{socket}": {message}'.format(socket=self.unix_socket, + message=message)) + else: + if self.__socket_config is not None: + _, _, _, _, sa = self.__socket_config + self.error('socket to "{address}" port {port}: {message}'.format(address=sa[0], + port=sa[1], + message=message)) + else: + self.error('unknown socket: {0}'.format(message)) + + def _connect2socket(self, res=None): + """ + Connect to a socket, passing the result of getaddrinfo() + :return: boolean + """ + if res is None: + res = self.__socket_config + if res is None: + self.error("Cannot create socket to 'None':") + return False + + af, sock_type, proto, _, sa = res + try: + self.debug('Creating socket to "{address}", port {port}'.format(address=sa[0], port=sa[1])) + self._sock = socket.socket(af, sock_type, proto) + except socket.error as error: + self.error('Failed to create socket "{address}", port {port}, error: {error}'.format(address=sa[0], + port=sa[1], + error=error)) + self._sock = None + self.__socket_config = None + return False + + if self.tls: + try: + self.debug('Encapsulating socket with TLS') + self.debug('Using keyfile: {0}, certfile: {1}, cert_reqs: {2}, ssl_version: {3}'.format( + self.key, self.cert, ssl.CERT_NONE, PROTOCOL_TLS + )) + self._sock = ssl.wrap_socket(self._sock, + keyfile=self.key, + certfile=self.cert, + server_side=False, + cert_reqs=ssl.CERT_NONE, + ssl_version=PROTOCOL_TLS, + ) + except (socket.error, ssl.SSLError, IOError, OSError) as error: + self.error('failed to wrap socket : {0}'.format(repr(error))) + self._disconnect() + self.__socket_config = None + return False + + try: + self.debug('connecting socket to "{address}", port {port}'.format(address=sa[0], port=sa[1])) + self._sock.settimeout(self.connect_timeout) + self.debug('set socket connect timeout to: {0}'.format(self._sock.gettimeout())) + self._sock.connect(sa) + except (socket.error, ssl.SSLError) as error: + self.error('Failed to connect to "{address}", port {port}, error: {error}'.format(address=sa[0], + port=sa[1], + error=error)) + self._disconnect() + self.__socket_config = None + return False + + self.debug('connected to "{address}", port {port}'.format(address=sa[0], port=sa[1])) + self.__socket_config = res + return True + + def _connect2unixsocket(self): + """ + Connect to a unix socket, given its filename + :return: boolean + """ + if self.unix_socket is None: + self.error("cannot connect to unix socket 'None'") + return False + + try: + self.debug('attempting DGRAM unix socket "{0}"'.format(self.unix_socket)) + self._sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) + self._sock.settimeout(self.connect_timeout) + self.debug('set socket connect timeout to: {0}'.format(self._sock.gettimeout())) + self._sock.connect(self.unix_socket) + self.debug('connected DGRAM unix socket "{0}"'.format(self.unix_socket)) + return True + except socket.error as error: + self.debug('Failed to connect DGRAM unix socket "{socket}": {error}'.format(socket=self.unix_socket, + error=error)) + + try: + self.debug('attempting STREAM unix socket "{0}"'.format(self.unix_socket)) + self._sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + self._sock.settimeout(self.connect_timeout) + self.debug('set socket connect timeout to: {0}'.format(self._sock.gettimeout())) + self._sock.connect(self.unix_socket) + self.debug('connected STREAM unix socket "{0}"'.format(self.unix_socket)) + return True + except socket.error as error: + self.debug('Failed to connect STREAM unix socket "{socket}": {error}'.format(socket=self.unix_socket, + error=error)) + self._sock = None + return False + + def _connect(self): + """ + Recreate socket and connect to it since sockets cannot be reused after closing + Available configurations are IPv6, IPv4 or UNIX socket + :return: + """ + try: + if self.unix_socket is not None: + self._connect2unixsocket() + + else: + if self.__socket_config is not None: + self._connect2socket() + else: + if self.dgram_socket: + sock_type = socket.SOCK_DGRAM + else: + sock_type = socket.SOCK_STREAM + for res in socket.getaddrinfo(self.host, self.port, socket.AF_UNSPEC, sock_type): + if self._connect2socket(res): + break + + except Exception as error: + self.error('unhandled exception during connect : {0}'.format(repr(error))) + self._sock = None + self.__socket_config = None + + def _disconnect(self): + """ + Close socket connection + :return: + """ + if self._sock is not None: + try: + self.debug('closing socket') + self._sock.shutdown(2) # 0 - read, 1 - write, 2 - all + self._sock.close() + except Exception as error: + if not (hasattr(error, 'errno') and error.errno == errno.ENOTCONN): + self.error(error) + self._sock = None + + def _send(self, request=None): + """ + Send request. + :return: boolean + """ + # Send request if it is needed + if self.request != self.__empty_request: + try: + self.debug('set socket write timeout to: {0}'.format(self._sock.gettimeout())) + self._sock.settimeout(self.write_timeout) + self.debug('sending request: {0}'.format(request or self.request)) + self._sock.send(request or self.request) + except Exception as error: + self._socket_error('error sending request: {0}'.format(error)) + self._disconnect() + return False + return True + + def _receive(self, raw=False): + """ + Receive data from socket + :param raw: set `True` to return bytes + :type raw: bool + :return: decoded str or raw bytes + :rtype: str/bytes + """ + data = "" if not raw else b"" + while True: + self.debug('receiving response') + try: + self.debug('set socket read timeout to: {0}'.format(self._sock.gettimeout())) + self._sock.settimeout(self.read_timeout) + buf = self._sock.recv(4096) + except Exception as error: + self._socket_error('failed to receive response: {0}'.format(error)) + self._disconnect() + break + + if buf is None or len(buf) == 0: # handle server disconnect + if data == "" or data == b"": + self._socket_error('unexpectedly disconnected') + else: + self.debug('server closed the connection') + self._disconnect() + break + + self.debug('received data') + data += buf.decode('utf-8', 'ignore') if not raw else buf + if self._check_raw_data(data): + break + + self.debug(u'final response: {0}'.format(data if not raw else u'binary data')) + return data + + def _get_raw_data(self, raw=False, request=None): + """ + Get raw data with low-level "socket" module. + :param raw: set `True` to return bytes + :type raw: bool + :return: decoded data (str) or raw data (bytes) + :rtype: str/bytes + """ + if self._sock is None: + self._connect() + if self._sock is None: + return None + + # Send request if it is needed + if not self._send(request): + return None + + data = self._receive(raw) + + if not self._keep_alive: + self._disconnect() + + return data + + @staticmethod + def _check_raw_data(data): + """ + Check if all data has been gathered from socket + :param data: str + :return: boolean + """ + return bool(data) + + def _parse_config(self): + """ + Parse configuration data + :return: boolean + """ + try: + self.unix_socket = str(self.configuration['socket']) + except (KeyError, TypeError): + self.debug('No unix socket specified. Trying TCP/IP socket.') + self.unix_socket = None + try: + self.host = str(self.configuration['host']) + except (KeyError, TypeError): + self.debug('No host specified. Using: "{0}"'.format(self.host)) + try: + self.port = int(self.configuration['port']) + except (KeyError, TypeError): + self.debug('No port specified. Using: "{0}"'.format(self.port)) + + self.tls = bool(self.configuration.get('tls', self.tls)) + if self.tls and not _TLS_SUPPORT: + self.warning('TLS requested but no TLS module found, disabling TLS support.') + self.tls = False + if _TLS_SUPPORT and not self.tls: + self.debug('No TLS preference specified, not using TLS.') + + if self.tls and _TLS_SUPPORT: + self.key = self.configuration.get('tls_key_file') + self.cert = self.configuration.get('tls_cert_file') + if not self.cert: + # If there's not a valid certificate, clear the key too. + self.debug('No valid TLS client certificate configuration found.') + self.key = None + self.cert = None + elif not self.key: + # If a key isn't listed, the config may still be + # valid, because there may be a key attached to the + # certificate. + self.info('No TLS client key specified, assuming it\'s attached to the certificate.') + self.key = None + + try: + self.request = str(self.configuration['request']) + except (KeyError, TypeError): + self.debug('No request specified. Using: "{0}"'.format(self.request)) + + self.request = self.request.encode() + + def check(self): + self._parse_config() + return SimpleService.check(self) diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py new file mode 100644 index 00000000..76129d37 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py @@ -0,0 +1,188 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Pawel Krupa (paulfantom) +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +import urllib3 + +from bases.FrameworkServices.SimpleService import SimpleService + +try: + urllib3.disable_warnings() +except AttributeError: + pass + +URLLIB3_VERSION = urllib3.__version__ +URLLIB3 = 'urllib3' + +class UrlService(SimpleService): + def __init__(self, configuration=None, name=None): + SimpleService.__init__(self, configuration=configuration, name=name) + self.debug("{0} version: {1}".format(URLLIB3, URLLIB3_VERSION)) + self.url = self.configuration.get('url') + self.user = self.configuration.get('user') + self.password = self.configuration.get('pass') + self.proxy_user = self.configuration.get('proxy_user') + self.proxy_password = self.configuration.get('proxy_pass') + self.proxy_url = self.configuration.get('proxy_url') + self.method = self.configuration.get('method', 'GET') + self.header = self.configuration.get('header') + self.body = self.configuration.get('body') + self.request_timeout = self.configuration.get('timeout', 1) + self.respect_retry_after_header = self.configuration.get('respect_retry_after_header') + self.tls_verify = self.configuration.get('tls_verify') + self.tls_ca_file = self.configuration.get('tls_ca_file') + self.tls_key_file = self.configuration.get('tls_key_file') + self.tls_cert_file = self.configuration.get('tls_cert_file') + self._manager = None + + def __make_headers(self, **header_kw): + user = header_kw.get('user') or self.user + password = header_kw.get('pass') or self.password + proxy_user = header_kw.get('proxy_user') or self.proxy_user + proxy_password = header_kw.get('proxy_pass') or self.proxy_password + custom_header = header_kw.get('header') or self.header + header_params = dict(keep_alive=True) + proxy_header_params = dict() + if user and password: + header_params['basic_auth'] = '{user}:{password}'.format(user=user, + password=password) + if proxy_user and proxy_password: + proxy_header_params['proxy_basic_auth'] = '{user}:{password}'.format(user=proxy_user, + password=proxy_password) + try: + header, proxy_header = urllib3.make_headers(**header_params), urllib3.make_headers(**proxy_header_params) + except TypeError as error: + self.error('build_header() error: {error}'.format(error=error)) + return None, None + else: + header.update(custom_header or dict()) + return header, proxy_header + + def _build_manager(self, **header_kw): + header, proxy_header = self.__make_headers(**header_kw) + if header is None or proxy_header is None: + return None + proxy_url = header_kw.get('proxy_url') or self.proxy_url + if proxy_url: + manager = urllib3.ProxyManager + params = dict(proxy_url=proxy_url, headers=header, proxy_headers=proxy_header) + else: + manager = urllib3.PoolManager + params = dict(headers=header) + tls_cert_file = self.tls_cert_file + if tls_cert_file: + params['cert_file'] = tls_cert_file + # NOTE: key_file is useless without cert_file, but + # cert_file may include the key as well. + tls_key_file = self.tls_key_file + if tls_key_file: + params['key_file'] = tls_key_file + tls_ca_file = self.tls_ca_file + if tls_ca_file: + params['ca_certs'] = tls_ca_file + try: + url = header_kw.get('url') or self.url + is_https = url.startswith('https') + if skip_tls_verify(is_https, self.tls_verify, tls_ca_file): + params['ca_certs'] = None + params['cert_reqs'] = 'CERT_NONE' + if is_https: + params['assert_hostname'] = False + return manager(**params) + except (urllib3.exceptions.ProxySchemeUnknown, TypeError) as error: + self.error('build_manager() error:', str(error)) + return None + + def _get_raw_data(self, url=None, manager=None, **kwargs): + """ + Get raw data from http request + :return: str + """ + try: + response = self._do_request(url, manager, **kwargs) + except Exception as error: + self.error('Url: {url}. Error: {error}'.format(url=url or self.url, error=error)) + return None + + if response.status == 200: + if isinstance(response.data, str): + return response.data + return response.data.decode(errors='ignore') + else: + self.debug('Url: {url}. Http response status code: {code}'.format(url=url or self.url, code=response.status)) + return None + + def _get_raw_data_with_status(self, url=None, manager=None, retries=1, redirect=True, **kwargs): + """ + Get status and response body content from http request. Does not catch exceptions + :return: int, str + """ + response = self._do_request(url, manager, retries, redirect, **kwargs) + + if isinstance(response.data, str): + return response.status, response.data + return response.status, response.data.decode(errors='ignore') + + def _do_request(self, url=None, manager=None, retries=1, redirect=True, **kwargs): + """ + Get response from http request. Does not catch exceptions + :return: HTTPResponse + """ + url = url or self.url + manager = manager or self._manager + retry = urllib3.Retry(retries) + if hasattr(retry, 'respect_retry_after_header'): + retry.respect_retry_after_header = bool(self.respect_retry_after_header) + + if self.body: + kwargs['body'] = self.body + + response = manager.request( + method=self.method, + url=url, + timeout=self.request_timeout, + retries=retry, + headers=manager.headers, + redirect=redirect, + **kwargs + ) + return response + + def check(self): + """ + Format configuration data and try to connect to server + :return: boolean + """ + if not (self.url and isinstance(self.url, str)): + self.error('URL is not defined or type is not <str>') + return False + + self._manager = self._build_manager() + if not self._manager: + return False + + try: + data = self._get_data() + except Exception as error: + self.error('_get_data() failed. Url: {url}. Error: {error}'.format(url=self.url, error=error)) + return False + + if isinstance(data, dict) and data: + return True + self.error('_get_data() returned no data or type is not <dict>') + return False + + +def skip_tls_verify(is_https, tls_verify, tls_ca_file): + # default 'tls_verify' value is None + # logic is: + # - never skip if there is 'tls_ca_file' file + # - skip by default for https + # - do not skip by default for http + if tls_ca_file: + return False + if is_https and not tls_verify: + return True + return tls_verify is False diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/__init__.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/__init__.py diff --git a/collectors/python.d.plugin/python_modules/bases/__init__.py b/collectors/python.d.plugin/python_modules/bases/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/__init__.py diff --git a/collectors/python.d.plugin/python_modules/bases/charts.py b/collectors/python.d.plugin/python_modules/bases/charts.py new file mode 100644 index 00000000..203ad167 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/charts.py @@ -0,0 +1,431 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +import os + +from bases.collection import safe_print + +CHART_PARAMS = ['type', 'id', 'name', 'title', 'units', 'family', 'context', 'chart_type', 'hidden'] +DIMENSION_PARAMS = ['id', 'name', 'algorithm', 'multiplier', 'divisor', 'hidden'] +VARIABLE_PARAMS = ['id', 'value'] + +CHART_TYPES = ['line', 'area', 'stacked'] +DIMENSION_ALGORITHMS = ['absolute', 'incremental', 'percentage-of-absolute-row', 'percentage-of-incremental-row'] + +CHART_BEGIN = 'BEGIN {type}.{id} {since_last}\n' +CHART_CREATE = "CHART {type}.{id} '{name}' '{title}' '{units}' '{family}' '{context}' " \ + "{chart_type} {priority} {update_every} '{hidden}' 'python.d.plugin' '{module_name}'\n" +CHART_OBSOLETE = "CHART {type}.{id} '{name}' '{title}' '{units}' '{family}' '{context}' " \ + "{chart_type} {priority} {update_every} '{hidden} obsolete'\n" + +CLABEL_COLLECT_JOB = "CLABEL '_collect_job' '{actual_job_name}' '0'\n" +CLABEL_COMMIT = "CLABEL_COMMIT\n" + +DIMENSION_CREATE = "DIMENSION '{id}' '{name}' {algorithm} {multiplier} {divisor} '{hidden} {obsolete}'\n" +DIMENSION_SET = "SET '{id}' = {value}\n" + +CHART_VARIABLE_SET = "VARIABLE CHART '{id}' = {value}\n" + +# 1 is label source auto +# https://github.com/netdata/netdata/blob/cc2586de697702f86a3c34e60e23652dd4ddcb42/database/rrd.h#L205 +RUNTIME_CHART_CREATE = "CHART netdata.runtime_{job_name} '' 'Execution time' 'ms' 'python.d' " \ + "netdata.pythond_runtime line 145000 {update_every} '' 'python.d.plugin' '{module_name}'\n" \ + "CLABEL '_collect_job' '{actual_job_name}' '1'\n" \ + "CLABEL_COMMIT\n" \ + "DIMENSION run_time 'run time' absolute 1 1\n" + +ND_INTERNAL_MONITORING_DISABLED = os.getenv("NETDATA_INTERNALS_MONITORING") == "NO" + + +def create_runtime_chart(func): + """ + Calls a wrapped function, then prints runtime chart to stdout. + + Used as a decorator for SimpleService.create() method. + The whole point of making 'create runtime chart' functionality as a decorator was + to help users who re-implements create() in theirs classes. + + :param func: class method + :return: + """ + + def wrapper(*args, **kwargs): + self = args[0] + if not ND_INTERNAL_MONITORING_DISABLED: + chart = RUNTIME_CHART_CREATE.format( + job_name=self.name, + actual_job_name=self.actual_job_name, + update_every=self._runtime_counters.update_every, + module_name=self.module_name, + ) + safe_print(chart) + ok = func(*args, **kwargs) + return ok + + return wrapper + + +class ChartError(Exception): + """Base-class for all exceptions raised by this module""" + + +class DuplicateItemError(ChartError): + """Occurs when user re-adds a chart or a dimension that has already been added""" + + +class ItemTypeError(ChartError): + """Occurs when user passes value of wrong type to Chart, Dimension or ChartVariable class""" + + +class ItemValueError(ChartError): + """Occurs when user passes inappropriate value to Chart, Dimension or ChartVariable class""" + + +class Charts: + """Represent a collection of charts + + All charts stored in a dict. + Chart is a instance of Chart class. + Charts adding must be done using Charts.add_chart() method only""" + + def __init__(self, job_name, actual_job_name, priority, cleanup, get_update_every, module_name): + """ + :param job_name: <bound method> + :param priority: <int> + :param get_update_every: <bound method> + """ + self.job_name = job_name + self.actual_job_name = actual_job_name + self.priority = priority + self.cleanup = cleanup + self.get_update_every = get_update_every + self.module_name = module_name + self.charts = dict() + + def __len__(self): + return len(self.charts) + + def __iter__(self): + return iter(self.charts.values()) + + def __repr__(self): + return 'Charts({0})'.format(self) + + def __str__(self): + return str([chart for chart in self.charts]) + + def __contains__(self, item): + return item in self.charts + + def __getitem__(self, item): + return self.charts[item] + + def __delitem__(self, key): + del self.charts[key] + + def __bool__(self): + return bool(self.charts) + + def __nonzero__(self): + return self.__bool__() + + def add_chart(self, params): + """ + Create Chart instance and add it to the dict + + Manually adds job name, priority and update_every to params. + :param params: <list> + :return: + """ + params = [self.job_name()] + params + new_chart = Chart(params) + + new_chart.params['update_every'] = self.get_update_every() + new_chart.params['priority'] = self.priority + new_chart.params['module_name'] = self.module_name + new_chart.params['actual_job_name'] = self.actual_job_name + + self.priority += 1 + self.charts[new_chart.id] = new_chart + + return new_chart + + def active_charts(self): + return [chart.id for chart in self if not chart.flags.obsoleted] + + +class Chart: + """Represent a chart""" + + def __init__(self, params): + """ + :param params: <list> + """ + if not isinstance(params, list): + raise ItemTypeError("'chart' must be a list type") + if not len(params) >= 8: + raise ItemValueError("invalid value for 'chart', must be {0}".format(CHART_PARAMS)) + + self.params = dict(zip(CHART_PARAMS, (p or str() for p in params))) + self.name = '{type}.{id}'.format(type=self.params['type'], + id=self.params['id']) + if self.params.get('chart_type') not in CHART_TYPES: + self.params['chart_type'] = 'absolute' + hidden = str(self.params.get('hidden', '')) + self.params['hidden'] = 'hidden' if hidden == 'hidden' else '' + + self.dimensions = list() + self.variables = set() + self.flags = ChartFlags() + self.penalty = 0 + + def __getattr__(self, item): + try: + return self.params[item] + except KeyError: + raise AttributeError("'{instance}' has no attribute '{attr}'".format(instance=repr(self), + attr=item)) + + def __repr__(self): + return 'Chart({0})'.format(self.id) + + def __str__(self): + return self.id + + def __iter__(self): + return iter(self.dimensions) + + def __contains__(self, item): + return item in [dimension.id for dimension in self.dimensions] + + def add_variable(self, variable): + """ + :param variable: <list> + :return: + """ + self.variables.add(ChartVariable(variable)) + + def add_dimension(self, dimension): + """ + :param dimension: <list> + :return: + """ + dim = Dimension(dimension) + + if dim.id in self: + raise DuplicateItemError("'{dimension}' already in '{chart}' dimensions".format(dimension=dim.id, + chart=self.name)) + self.refresh() + self.dimensions.append(dim) + return dim + + def del_dimension(self, dimension_id, hide=True): + if dimension_id not in self: + return + idx = self.dimensions.index(dimension_id) + dimension = self.dimensions[idx] + if hide: + dimension.params['hidden'] = 'hidden' + dimension.params['obsolete'] = 'obsolete' + self.create() + self.dimensions.remove(dimension) + + def hide_dimension(self, dimension_id, reverse=False): + if dimension_id not in self: + return + idx = self.dimensions.index(dimension_id) + dimension = self.dimensions[idx] + dimension.params['hidden'] = 'hidden' if not reverse else str() + self.refresh() + + def create(self): + """ + :return: + """ + chart = CHART_CREATE.format(**self.params) + labels = CLABEL_COLLECT_JOB.format(**self.params) + CLABEL_COMMIT + dimensions = ''.join([dimension.create() for dimension in self.dimensions]) + variables = ''.join([var.set(var.value) for var in self.variables if var]) + + self.flags.push = False + self.flags.created = True + + safe_print(chart + labels + dimensions + variables) + + def can_be_updated(self, data): + for dim in self.dimensions: + if dim.get_value(data) is not None: + return True + return False + + def update(self, data, interval): + updated_dimensions, updated_variables = str(), str() + + for dim in self.dimensions: + value = dim.get_value(data) + if value is not None: + updated_dimensions += dim.set(value) + + for var in self.variables: + value = var.get_value(data) + if value is not None: + updated_variables += var.set(value) + + if updated_dimensions: + since_last = interval if self.flags.updated else 0 + + if self.flags.push: + self.create() + + chart_begin = CHART_BEGIN.format(type=self.type, id=self.id, since_last=since_last) + safe_print(chart_begin, updated_dimensions, updated_variables, 'END\n') + + self.flags.updated = True + self.penalty = 0 + else: + self.penalty += 1 + self.flags.updated = False + + return bool(updated_dimensions) + + def obsolete(self): + self.flags.obsoleted = True + if self.flags.created: + safe_print(CHART_OBSOLETE.format(**self.params)) + + def refresh(self): + self.penalty = 0 + self.flags.push = True + self.flags.obsoleted = False + + +class Dimension: + """Represent a dimension""" + + def __init__(self, params): + """ + :param params: <list> + """ + if not isinstance(params, list): + raise ItemTypeError("'dimension' must be a list type") + if not params: + raise ItemValueError("invalid value for 'dimension', must be {0}".format(DIMENSION_PARAMS)) + + self.params = dict(zip(DIMENSION_PARAMS, (p or str() for p in params))) + self.params['name'] = self.params.get('name') or self.params['id'] + + if self.params.get('algorithm') not in DIMENSION_ALGORITHMS: + self.params['algorithm'] = 'absolute' + if not isinstance(self.params.get('multiplier'), int): + self.params['multiplier'] = 1 + if not isinstance(self.params.get('divisor'), int): + self.params['divisor'] = 1 + self.params.setdefault('hidden', '') + self.params.setdefault('obsolete', '') + + def __getattr__(self, item): + try: + return self.params[item] + except KeyError: + raise AttributeError("'{instance}' has no attribute '{attr}'".format(instance=repr(self), + attr=item)) + + def __repr__(self): + return 'Dimension({0})'.format(self.id) + + def __str__(self): + return self.id + + def __eq__(self, other): + if not isinstance(other, Dimension): + return self.id == other + return self.id == other.id + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(repr(self)) + + def create(self): + return DIMENSION_CREATE.format(**self.params) + + def set(self, value): + """ + :param value: <str>: must be a digit + :return: + """ + return DIMENSION_SET.format(id=self.id, + value=value) + + def get_value(self, data): + try: + return int(data[self.id]) + except (KeyError, TypeError): + return None + + +class ChartVariable: + """Represent a chart variable""" + + def __init__(self, params): + """ + :param params: <list> + """ + if not isinstance(params, list): + raise ItemTypeError("'variable' must be a list type") + if not params: + raise ItemValueError("invalid value for 'variable' must be: {0}".format(VARIABLE_PARAMS)) + + self.params = dict(zip(VARIABLE_PARAMS, params)) + self.params.setdefault('value', None) + + def __getattr__(self, item): + try: + return self.params[item] + except KeyError: + raise AttributeError("'{instance}' has no attribute '{attr}'".format(instance=repr(self), + attr=item)) + + def __bool__(self): + return self.value is not None + + def __nonzero__(self): + return self.__bool__() + + def __repr__(self): + return 'ChartVariable({0})'.format(self.id) + + def __str__(self): + return self.id + + def __eq__(self, other): + if isinstance(other, ChartVariable): + return self.id == other.id + return False + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(repr(self)) + + def set(self, value): + return CHART_VARIABLE_SET.format(id=self.id, + value=value) + + def get_value(self, data): + try: + return int(data[self.id]) + except (KeyError, TypeError): + return None + + +class ChartFlags: + def __init__(self): + self.push = True + self.created = False + self.updated = False + self.obsoleted = False diff --git a/collectors/python.d.plugin/python_modules/bases/collection.py b/collectors/python.d.plugin/python_modules/bases/collection.py new file mode 100644 index 00000000..93bf8cf0 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/collection.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +import os + +from threading import Lock + +PATH = os.getenv('PATH', '/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin').split(':') + +CHART_BEGIN = 'BEGIN {0} {1}\n' +CHART_CREATE = "CHART {0} '{1}' '{2}' '{3}' '{4}' '{5}' {6} {7} {8}\n" +DIMENSION_CREATE = "DIMENSION '{0}' '{1}' {2} {3} {4} '{5}'\n" +DIMENSION_SET = "SET '{0}' = {1}\n" + +print_lock = Lock() + + +def setdefault_values(config, base_dict): + for key, value in base_dict.items(): + config.setdefault(key, value) + return config + + +def run_and_exit(func): + def wrapper(*args, **kwargs): + func(*args, **kwargs) + exit(1) + + return wrapper + + +def on_try_except_finally(on_except=(None,), on_finally=(None,)): + except_func = on_except[0] + finally_func = on_finally[0] + + def decorator(func): + def wrapper(*args, **kwargs): + try: + func(*args, **kwargs) + except Exception: + if except_func: + except_func(*on_except[1:]) + finally: + if finally_func: + finally_func(*on_finally[1:]) + + return wrapper + + return decorator + + +def static_vars(**kwargs): + def decorate(func): + for k in kwargs: + setattr(func, k, kwargs[k]) + return func + + return decorate + + +@on_try_except_finally(on_except=(exit, 1)) +def safe_print(*msg): + """ + :param msg: + :return: + """ + print_lock.acquire() + print(''.join(msg)) + print_lock.release() + + +def find_binary(binary): + """ + :param binary: <str> + :return: + """ + for directory in PATH: + binary_name = os.path.join(directory, binary) + if os.path.isfile(binary_name) and os.access(binary_name, os.X_OK): + return binary_name + return None + + +def read_last_line(f): + with open(f, 'rb') as opened: + opened.seek(-2, 2) + while opened.read(1) != b'\n': + opened.seek(-2, 1) + if opened.tell() == 0: + break + result = opened.readline() + return result.decode() + + +def unicode_str(arg): + """Return the argument as a unicode string. + + The `unicode` function has been removed from Python3 and `str` takes its + place. This function is a helper which will try using Python 2's `unicode` + and if it doesn't exist, assume we're using Python 3 and use `str`. + + :param arg: + :return: <str> + """ + # TODO: fix + try: + # https://github.com/netdata/netdata/issues/7613 + if isinstance(arg, unicode): + return arg + return unicode(arg, errors='ignore') + # https://github.com/netdata/netdata/issues/7642 + except TypeError: + return unicode(arg) + except NameError: + return str(arg) diff --git a/collectors/python.d.plugin/python_modules/bases/loaders.py b/collectors/python.d.plugin/python_modules/bases/loaders.py new file mode 100644 index 00000000..095f3a3b --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/loaders.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + + +from sys import version_info + +PY_VERSION = version_info[:2] + +try: + if PY_VERSION > (3, 1): + from pyyaml3 import SafeLoader as YamlSafeLoader + else: + from pyyaml2 import SafeLoader as YamlSafeLoader +except ImportError: + from yaml import SafeLoader as YamlSafeLoader + + +try: + from collections import OrderedDict +except ImportError: + from third_party.ordereddict import OrderedDict + + +DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map' if PY_VERSION > (3, 1) else u'tag:yaml.org,2002:map' + + +def dict_constructor(loader, node): + return OrderedDict(loader.construct_pairs(node)) + + +YamlSafeLoader.add_constructor(DEFAULT_MAPPING_TAG, dict_constructor) + + +def load_yaml(stream): + loader = YamlSafeLoader(stream) + try: + return loader.get_single_data() + finally: + loader.dispose() + + +def load_config(file_name): + with open(file_name, 'r') as stream: + return load_yaml(stream) diff --git a/collectors/python.d.plugin/python_modules/bases/loggers.py b/collectors/python.d.plugin/python_modules/bases/loggers.py new file mode 100644 index 00000000..7ae8ab0c --- /dev/null +++ b/collectors/python.d.plugin/python_modules/bases/loggers.py @@ -0,0 +1,198 @@ +# -*- coding: utf-8 -*- +# Description: +# Author: Ilya Mashchenko (ilyam8) +# SPDX-License-Identifier: GPL-3.0-or-later + +import logging +import os +import stat +import traceback + +from sys import exc_info + +try: + from time import monotonic as time +except ImportError: + from time import time + +from bases.collection import on_try_except_finally, unicode_str + +LOGGING_LEVELS = { + 'CRITICAL': 50, + 'ERROR': 40, + 'WARNING': 30, + 'INFO': 20, + 'DEBUG': 10, + 'NOTSET': 0, +} + + +def is_stderr_connected_to_journal(): + journal_stream = os.environ.get("JOURNAL_STREAM") + if not journal_stream: + return False + + colon_index = journal_stream.find(":") + if colon_index <= 0: + return False + + device, inode = journal_stream[:colon_index], journal_stream[colon_index + 1:] + + try: + device_number, inode_number = os.fstat(2)[stat.ST_DEV], os.fstat(2)[stat.ST_INO] + except OSError: + return False + + return str(device_number) == device and str(inode_number) == inode + + +is_journal = is_stderr_connected_to_journal() + +DEFAULT_LOG_LINE_FORMAT = '%(asctime)s: %(name)s %(levelname)s : %(message)s' +PYTHON_D_LOG_LINE_FORMAT = '%(asctime)s: %(name)s %(levelname)s: %(module_name)s[%(job_name)s] : %(message)s' + +if is_journal: + DEFAULT_LOG_LINE_FORMAT = '%(name)s %(levelname)s : %(message)s' + PYTHON_D_LOG_LINE_FORMAT = '%(name)s %(levelname)s: %(module_name)s[%(job_name)s] : %(message)s ' + +DEFAULT_LOG_TIME_FORMAT = '%Y-%m-%d %H:%M:%S' +PYTHON_D_LOG_NAME = 'python.d' + + +def add_traceback(func): + def on_call(*args): + self = args[0] + + if not self.log_traceback: + func(*args) + else: + if exc_info()[0]: + func(*args) + func(self, traceback.format_exc()) + else: + func(*args) + + return on_call + + +class BaseLogger(object): + def __init__( + self, + logger_name, + log_fmt=DEFAULT_LOG_LINE_FORMAT, + date_fmt=DEFAULT_LOG_TIME_FORMAT, + handler=logging.StreamHandler, + ): + self.logger = logging.getLogger(logger_name) + self._muted = False + if not self.has_handlers(): + self.severity = 'INFO' + self.logger.addHandler(handler()) + self.set_formatter(fmt=log_fmt, date_fmt=date_fmt) + + def __repr__(self): + return '<Logger: {name})>'.format(name=self.logger.name) + + def set_formatter(self, fmt, date_fmt=DEFAULT_LOG_TIME_FORMAT): + if self.has_handlers(): + self.logger.handlers[0].setFormatter(logging.Formatter(fmt=fmt, datefmt=date_fmt)) + + def has_handlers(self): + return self.logger.handlers + + @property + def severity(self): + return self.logger.getEffectiveLevel() + + @severity.setter + def severity(self, level): + if level in LOGGING_LEVELS: + self.logger.setLevel(LOGGING_LEVELS[level]) + + def _log(self, level, *msg, **kwargs): + if not self._muted: + self.logger.log(level, ' '.join(map(unicode_str, msg)), **kwargs) + + def debug(self, *msg, **kwargs): + self._log(logging.DEBUG, *msg, **kwargs) + + def info(self, *msg, **kwargs): + self._log(logging.INFO, *msg, **kwargs) + + def warning(self, *msg, **kwargs): + self._log(logging.WARN, *msg, **kwargs) + + def error(self, *msg, **kwargs): + self._log(logging.ERROR, *msg, **kwargs) + + def alert(self, *msg, **kwargs): + self._log(logging.CRITICAL, *msg, **kwargs) + + @on_try_except_finally(on_finally=(exit, 1)) + def fatal(self, *msg, **kwargs): + self._log(logging.CRITICAL, *msg, **kwargs) + + def mute(self): + self._muted = True + + def unmute(self): + self._muted = False + + +class PythonDLogger(object): + def __init__( + self, + logger_name=PYTHON_D_LOG_NAME, + log_fmt=PYTHON_D_LOG_LINE_FORMAT, + ): + self.logger = BaseLogger(logger_name, log_fmt=log_fmt) + self.module_name = 'plugin' + self.job_name = 'main' + + _LOG_TRACEBACK = False + + @property + def log_traceback(self): + return PythonDLogger._LOG_TRACEBACK + + @log_traceback.setter + def log_traceback(self, value): + PythonDLogger._LOG_TRACEBACK = value + + def debug(self, *msg): + self.logger.debug(*msg, extra={ + 'module_name': self.module_name, + 'job_name': self.job_name or self.module_name, + }) + + def info(self, *msg): + self.logger.info(*msg, extra={ + 'module_name': self.module_name, + 'job_name': self.job_name or self.module_name, + }) + + def warning(self, *msg): + self.logger.warning(*msg, extra={ + 'module_name': self.module_name, + 'job_name': self.job_name or self.module_name, + }) + + @add_traceback + def error(self, *msg): + self.logger.error(*msg, extra={ + 'module_name': self.module_name, + 'job_name': self.job_name or self.module_name, + }) + + @add_traceback + def alert(self, *msg): + self.logger.alert(*msg, extra={ + 'module_name': self.module_name, + 'job_name': self.job_name or self.module_name, + }) + + def fatal(self, *msg): + self.logger.fatal(*msg, extra={ + 'module_name': self.module_name, + 'job_name': self.job_name or self.module_name, + }) diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/__init__.py b/collectors/python.d.plugin/python_modules/pyyaml2/__init__.py new file mode 100644 index 00000000..4d560e43 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/__init__.py @@ -0,0 +1,316 @@ +# SPDX-License-Identifier: MIT + +from error import * + +from tokens import * +from events import * +from nodes import * + +from loader import * +from dumper import * + +__version__ = '3.11' + +try: + from cyaml import * + __with_libyaml__ = True +except ImportError: + __with_libyaml__ = False + +def scan(stream, Loader=Loader): + """ + Scan a YAML stream and produce scanning tokens. + """ + loader = Loader(stream) + try: + while loader.check_token(): + yield loader.get_token() + finally: + loader.dispose() + +def parse(stream, Loader=Loader): + """ + Parse a YAML stream and produce parsing events. + """ + loader = Loader(stream) + try: + while loader.check_event(): + yield loader.get_event() + finally: + loader.dispose() + +def compose(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding representation tree. + """ + loader = Loader(stream) + try: + return loader.get_single_node() + finally: + loader.dispose() + +def compose_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding representation trees. + """ + loader = Loader(stream) + try: + while loader.check_node(): + yield loader.get_node() + finally: + loader.dispose() + +def load(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + """ + loader = Loader(stream) + try: + return loader.get_single_data() + finally: + loader.dispose() + +def load_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + """ + loader = Loader(stream) + try: + while loader.check_data(): + yield loader.get_data() + finally: + loader.dispose() + +def safe_load(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + Resolve only basic YAML tags. + """ + return load(stream, SafeLoader) + +def safe_load_all(stream): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + Resolve only basic YAML tags. + """ + return load_all(stream, SafeLoader) + +def emit(events, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + """ + Emit YAML parsing events into a stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + from StringIO import StringIO + stream = StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + try: + for event in events: + dumper.emit(event) + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def serialize_all(nodes, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding='utf-8', explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of representation trees into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + from StringIO import StringIO + else: + from cStringIO import StringIO + stream = StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + try: + dumper.open() + for node in nodes: + dumper.serialize(node) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def serialize(node, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a representation tree into a YAML stream. + If stream is None, return the produced string instead. + """ + return serialize_all([node], stream, Dumper=Dumper, **kwds) + +def dump_all(documents, stream=None, Dumper=Dumper, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding='utf-8', explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of Python objects into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + from StringIO import StringIO + else: + from cStringIO import StringIO + stream = StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, default_style=default_style, + default_flow_style=default_flow_style, + canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + try: + dumper.open() + for data in documents: + dumper.represent(data) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def dump(data, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a Python object into a YAML stream. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=Dumper, **kwds) + +def safe_dump_all(documents, stream=None, **kwds): + """ + Serialize a sequence of Python objects into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all(documents, stream, Dumper=SafeDumper, **kwds) + +def safe_dump(data, stream=None, **kwds): + """ + Serialize a Python object into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=SafeDumper, **kwds) + +def add_implicit_resolver(tag, regexp, first=None, + Loader=Loader, Dumper=Dumper): + """ + Add an implicit scalar detector. + If an implicit scalar value matches the given regexp, + the corresponding tag is assigned to the scalar. + first is a sequence of possible initial characters or None. + """ + Loader.add_implicit_resolver(tag, regexp, first) + Dumper.add_implicit_resolver(tag, regexp, first) + +def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper): + """ + Add a path based resolver for the given tag. + A path is a list of keys that forms a path + to a node in the representation tree. + Keys can be string values, integers, or None. + """ + Loader.add_path_resolver(tag, path, kind) + Dumper.add_path_resolver(tag, path, kind) + +def add_constructor(tag, constructor, Loader=Loader): + """ + Add a constructor for the given tag. + Constructor is a function that accepts a Loader instance + and a node object and produces the corresponding Python object. + """ + Loader.add_constructor(tag, constructor) + +def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader): + """ + Add a multi-constructor for the given tag prefix. + Multi-constructor is called for a node if its tag starts with tag_prefix. + Multi-constructor accepts a Loader instance, a tag suffix, + and a node object and produces the corresponding Python object. + """ + Loader.add_multi_constructor(tag_prefix, multi_constructor) + +def add_representer(data_type, representer, Dumper=Dumper): + """ + Add a representer for the given type. + Representer is a function accepting a Dumper instance + and an instance of the given data type + and producing the corresponding representation node. + """ + Dumper.add_representer(data_type, representer) + +def add_multi_representer(data_type, multi_representer, Dumper=Dumper): + """ + Add a representer for the given type. + Multi-representer is a function accepting a Dumper instance + and an instance of the given data type or subtype + and producing the corresponding representation node. + """ + Dumper.add_multi_representer(data_type, multi_representer) + +class YAMLObjectMetaclass(type): + """ + The metaclass for YAMLObject. + """ + def __init__(cls, name, bases, kwds): + super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds) + if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: + cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml) + cls.yaml_dumper.add_representer(cls, cls.to_yaml) + +class YAMLObject(object): + """ + An object that can dump itself to a YAML stream + and load itself from a YAML stream. + """ + + __metaclass__ = YAMLObjectMetaclass + __slots__ = () # no direct instantiation, so allow immutable subclasses + + yaml_loader = Loader + yaml_dumper = Dumper + + yaml_tag = None + yaml_flow_style = None + + def from_yaml(cls, loader, node): + """ + Convert a representation node to a Python object. + """ + return loader.construct_yaml_object(node, cls) + from_yaml = classmethod(from_yaml) + + def to_yaml(cls, dumper, data): + """ + Convert a Python object to a representation node. + """ + return dumper.represent_yaml_object(cls.yaml_tag, data, cls, + flow_style=cls.yaml_flow_style) + to_yaml = classmethod(to_yaml) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/composer.py b/collectors/python.d.plugin/python_modules/pyyaml2/composer.py new file mode 100644 index 00000000..6b41b806 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/composer.py @@ -0,0 +1,140 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['Composer', 'ComposerError'] + +from error import MarkedYAMLError +from events import * +from nodes import * + +class ComposerError(MarkedYAMLError): + pass + +class Composer(object): + + def __init__(self): + self.anchors = {} + + def check_node(self): + # Drop the STREAM-START event. + if self.check_event(StreamStartEvent): + self.get_event() + + # If there are more documents available? + return not self.check_event(StreamEndEvent) + + def get_node(self): + # Get the root node of the next document. + if not self.check_event(StreamEndEvent): + return self.compose_document() + + def get_single_node(self): + # Drop the STREAM-START event. + self.get_event() + + # Compose a document if the stream is not empty. + document = None + if not self.check_event(StreamEndEvent): + document = self.compose_document() + + # Ensure that the stream contains no more documents. + if not self.check_event(StreamEndEvent): + event = self.get_event() + raise ComposerError("expected a single document in the stream", + document.start_mark, "but found another document", + event.start_mark) + + # Drop the STREAM-END event. + self.get_event() + + return document + + def compose_document(self): + # Drop the DOCUMENT-START event. + self.get_event() + + # Compose the root node. + node = self.compose_node(None, None) + + # Drop the DOCUMENT-END event. + self.get_event() + + self.anchors = {} + return node + + def compose_node(self, parent, index): + if self.check_event(AliasEvent): + event = self.get_event() + anchor = event.anchor + if anchor not in self.anchors: + raise ComposerError(None, None, "found undefined alias %r" + % anchor.encode('utf-8'), event.start_mark) + return self.anchors[anchor] + event = self.peek_event() + anchor = event.anchor + if anchor is not None: + if anchor in self.anchors: + raise ComposerError("found duplicate anchor %r; first occurence" + % anchor.encode('utf-8'), self.anchors[anchor].start_mark, + "second occurence", event.start_mark) + self.descend_resolver(parent, index) + if self.check_event(ScalarEvent): + node = self.compose_scalar_node(anchor) + elif self.check_event(SequenceStartEvent): + node = self.compose_sequence_node(anchor) + elif self.check_event(MappingStartEvent): + node = self.compose_mapping_node(anchor) + self.ascend_resolver() + return node + + def compose_scalar_node(self, anchor): + event = self.get_event() + tag = event.tag + if tag is None or tag == u'!': + tag = self.resolve(ScalarNode, event.value, event.implicit) + node = ScalarNode(tag, event.value, + event.start_mark, event.end_mark, style=event.style) + if anchor is not None: + self.anchors[anchor] = node + return node + + def compose_sequence_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == u'!': + tag = self.resolve(SequenceNode, None, start_event.implicit) + node = SequenceNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style) + if anchor is not None: + self.anchors[anchor] = node + index = 0 + while not self.check_event(SequenceEndEvent): + node.value.append(self.compose_node(node, index)) + index += 1 + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + + def compose_mapping_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == u'!': + tag = self.resolve(MappingNode, None, start_event.implicit) + node = MappingNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style) + if anchor is not None: + self.anchors[anchor] = node + while not self.check_event(MappingEndEvent): + #key_event = self.peek_event() + item_key = self.compose_node(node, None) + #if item_key in node.value: + # raise ComposerError("while composing a mapping", start_event.start_mark, + # "found duplicate key", key_event.start_mark) + item_value = self.compose_node(node, item_key) + #node.value[item_key] = item_value + node.value.append((item_key, item_value)) + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/constructor.py b/collectors/python.d.plugin/python_modules/pyyaml2/constructor.py new file mode 100644 index 00000000..8ad1b90a --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/constructor.py @@ -0,0 +1,676 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor', + 'ConstructorError'] + +from error import * +from nodes import * + +import datetime + +import binascii, re, sys, types + +class ConstructorError(MarkedYAMLError): + pass + +class BaseConstructor(object): + + yaml_constructors = {} + yaml_multi_constructors = {} + + def __init__(self): + self.constructed_objects = {} + self.recursive_objects = {} + self.state_generators = [] + self.deep_construct = False + + def check_data(self): + # If there are more documents available? + return self.check_node() + + def get_data(self): + # Construct and return the next document. + if self.check_node(): + return self.construct_document(self.get_node()) + + def get_single_data(self): + # Ensure that the stream contains a single document and construct it. + node = self.get_single_node() + if node is not None: + return self.construct_document(node) + return None + + def construct_document(self, node): + data = self.construct_object(node) + while self.state_generators: + state_generators = self.state_generators + self.state_generators = [] + for generator in state_generators: + for dummy in generator: + pass + self.constructed_objects = {} + self.recursive_objects = {} + self.deep_construct = False + return data + + def construct_object(self, node, deep=False): + if node in self.constructed_objects: + return self.constructed_objects[node] + if deep: + old_deep = self.deep_construct + self.deep_construct = True + if node in self.recursive_objects: + raise ConstructorError(None, None, + "found unconstructable recursive node", node.start_mark) + self.recursive_objects[node] = None + constructor = None + tag_suffix = None + if node.tag in self.yaml_constructors: + constructor = self.yaml_constructors[node.tag] + else: + for tag_prefix in self.yaml_multi_constructors: + if node.tag.startswith(tag_prefix): + tag_suffix = node.tag[len(tag_prefix):] + constructor = self.yaml_multi_constructors[tag_prefix] + break + else: + if None in self.yaml_multi_constructors: + tag_suffix = node.tag + constructor = self.yaml_multi_constructors[None] + elif None in self.yaml_constructors: + constructor = self.yaml_constructors[None] + elif isinstance(node, ScalarNode): + constructor = self.__class__.construct_scalar + elif isinstance(node, SequenceNode): + constructor = self.__class__.construct_sequence + elif isinstance(node, MappingNode): + constructor = self.__class__.construct_mapping + if tag_suffix is None: + data = constructor(self, node) + else: + data = constructor(self, tag_suffix, node) + if isinstance(data, types.GeneratorType): + generator = data + data = generator.next() + if self.deep_construct: + for dummy in generator: + pass + else: + self.state_generators.append(generator) + self.constructed_objects[node] = data + del self.recursive_objects[node] + if deep: + self.deep_construct = old_deep + return data + + def construct_scalar(self, node): + if not isinstance(node, ScalarNode): + raise ConstructorError(None, None, + "expected a scalar node, but found %s" % node.id, + node.start_mark) + return node.value + + def construct_sequence(self, node, deep=False): + if not isinstance(node, SequenceNode): + raise ConstructorError(None, None, + "expected a sequence node, but found %s" % node.id, + node.start_mark) + return [self.construct_object(child, deep=deep) + for child in node.value] + + def construct_mapping(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError(None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + mapping = {} + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + try: + hash(key) + except TypeError, exc: + raise ConstructorError("while constructing a mapping", node.start_mark, + "found unacceptable key (%s)" % exc, key_node.start_mark) + value = self.construct_object(value_node, deep=deep) + mapping[key] = value + return mapping + + def construct_pairs(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError(None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + pairs = [] + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + value = self.construct_object(value_node, deep=deep) + pairs.append((key, value)) + return pairs + + def add_constructor(cls, tag, constructor): + if not 'yaml_constructors' in cls.__dict__: + cls.yaml_constructors = cls.yaml_constructors.copy() + cls.yaml_constructors[tag] = constructor + add_constructor = classmethod(add_constructor) + + def add_multi_constructor(cls, tag_prefix, multi_constructor): + if not 'yaml_multi_constructors' in cls.__dict__: + cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() + cls.yaml_multi_constructors[tag_prefix] = multi_constructor + add_multi_constructor = classmethod(add_multi_constructor) + +class SafeConstructor(BaseConstructor): + + def construct_scalar(self, node): + if isinstance(node, MappingNode): + for key_node, value_node in node.value: + if key_node.tag == u'tag:yaml.org,2002:value': + return self.construct_scalar(value_node) + return BaseConstructor.construct_scalar(self, node) + + def flatten_mapping(self, node): + merge = [] + index = 0 + while index < len(node.value): + key_node, value_node = node.value[index] + if key_node.tag == u'tag:yaml.org,2002:merge': + del node.value[index] + if isinstance(value_node, MappingNode): + self.flatten_mapping(value_node) + merge.extend(value_node.value) + elif isinstance(value_node, SequenceNode): + submerge = [] + for subnode in value_node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing a mapping", + node.start_mark, + "expected a mapping for merging, but found %s" + % subnode.id, subnode.start_mark) + self.flatten_mapping(subnode) + submerge.append(subnode.value) + submerge.reverse() + for value in submerge: + merge.extend(value) + else: + raise ConstructorError("while constructing a mapping", node.start_mark, + "expected a mapping or list of mappings for merging, but found %s" + % value_node.id, value_node.start_mark) + elif key_node.tag == u'tag:yaml.org,2002:value': + key_node.tag = u'tag:yaml.org,2002:str' + index += 1 + else: + index += 1 + if merge: + node.value = merge + node.value + + def construct_mapping(self, node, deep=False): + if isinstance(node, MappingNode): + self.flatten_mapping(node) + return BaseConstructor.construct_mapping(self, node, deep=deep) + + def construct_yaml_null(self, node): + self.construct_scalar(node) + return None + + bool_values = { + u'yes': True, + u'no': False, + u'true': True, + u'false': False, + u'on': True, + u'off': False, + } + + def construct_yaml_bool(self, node): + value = self.construct_scalar(node) + return self.bool_values[value.lower()] + + def construct_yaml_int(self, node): + value = str(self.construct_scalar(node)) + value = value.replace('_', '') + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '0': + return 0 + elif value.startswith('0b'): + return sign*int(value[2:], 2) + elif value.startswith('0x'): + return sign*int(value[2:], 16) + elif value[0] == '0': + return sign*int(value, 8) + elif ':' in value: + digits = [int(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*int(value) + + inf_value = 1e300 + while inf_value != inf_value*inf_value: + inf_value *= inf_value + nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99). + + def construct_yaml_float(self, node): + value = str(self.construct_scalar(node)) + value = value.replace('_', '').lower() + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '.inf': + return sign*self.inf_value + elif value == '.nan': + return self.nan_value + elif ':' in value: + digits = [float(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0.0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*float(value) + + def construct_yaml_binary(self, node): + value = self.construct_scalar(node) + try: + return str(value).decode('base64') + except (binascii.Error, UnicodeEncodeError), exc: + raise ConstructorError(None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + + timestamp_regexp = re.compile( + ur'''^(?P<year>[0-9][0-9][0-9][0-9]) + -(?P<month>[0-9][0-9]?) + -(?P<day>[0-9][0-9]?) + (?:(?:[Tt]|[ \t]+) + (?P<hour>[0-9][0-9]?) + :(?P<minute>[0-9][0-9]) + :(?P<second>[0-9][0-9]) + (?:\.(?P<fraction>[0-9]*))? + (?:[ \t]*(?P<tz>Z|(?P<tz_sign>[-+])(?P<tz_hour>[0-9][0-9]?) + (?::(?P<tz_minute>[0-9][0-9]))?))?)?$''', re.X) + + def construct_yaml_timestamp(self, node): + value = self.construct_scalar(node) + match = self.timestamp_regexp.match(node.value) + values = match.groupdict() + year = int(values['year']) + month = int(values['month']) + day = int(values['day']) + if not values['hour']: + return datetime.date(year, month, day) + hour = int(values['hour']) + minute = int(values['minute']) + second = int(values['second']) + fraction = 0 + if values['fraction']: + fraction = values['fraction'][:6] + while len(fraction) < 6: + fraction += '0' + fraction = int(fraction) + delta = None + if values['tz_sign']: + tz_hour = int(values['tz_hour']) + tz_minute = int(values['tz_minute'] or 0) + delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute) + if values['tz_sign'] == '-': + delta = -delta + data = datetime.datetime(year, month, day, hour, minute, second, fraction) + if delta: + data -= delta + return data + + def construct_yaml_omap(self, node): + # Note: we do not check for duplicate keys, because it's too + # CPU-expensive. + omap = [] + yield omap + if not isinstance(node, SequenceNode): + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a mapping of length 1, but found %s" % subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a single mapping item, but found %d items" % len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + omap.append((key, value)) + + def construct_yaml_pairs(self, node): + # Note: the same code as `construct_yaml_omap`. + pairs = [] + yield pairs + if not isinstance(node, SequenceNode): + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a mapping of length 1, but found %s" % subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a single mapping item, but found %d items" % len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + pairs.append((key, value)) + + def construct_yaml_set(self, node): + data = set() + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_str(self, node): + value = self.construct_scalar(node) + try: + return value.encode('ascii') + except UnicodeEncodeError: + return value + + def construct_yaml_seq(self, node): + data = [] + yield data + data.extend(self.construct_sequence(node)) + + def construct_yaml_map(self, node): + data = {} + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_object(self, node, cls): + data = cls.__new__(cls) + yield data + if hasattr(data, '__setstate__'): + state = self.construct_mapping(node, deep=True) + data.__setstate__(state) + else: + state = self.construct_mapping(node) + data.__dict__.update(state) + + def construct_undefined(self, node): + raise ConstructorError(None, None, + "could not determine a constructor for the tag %r" % node.tag.encode('utf-8'), + node.start_mark) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:null', + SafeConstructor.construct_yaml_null) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:bool', + SafeConstructor.construct_yaml_bool) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:int', + SafeConstructor.construct_yaml_int) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:float', + SafeConstructor.construct_yaml_float) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:binary', + SafeConstructor.construct_yaml_binary) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:timestamp', + SafeConstructor.construct_yaml_timestamp) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:omap', + SafeConstructor.construct_yaml_omap) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:pairs', + SafeConstructor.construct_yaml_pairs) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:set', + SafeConstructor.construct_yaml_set) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:str', + SafeConstructor.construct_yaml_str) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:seq', + SafeConstructor.construct_yaml_seq) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:map', + SafeConstructor.construct_yaml_map) + +SafeConstructor.add_constructor(None, + SafeConstructor.construct_undefined) + +class Constructor(SafeConstructor): + + def construct_python_str(self, node): + return self.construct_scalar(node).encode('utf-8') + + def construct_python_unicode(self, node): + return self.construct_scalar(node) + + def construct_python_long(self, node): + return long(self.construct_yaml_int(node)) + + def construct_python_complex(self, node): + return complex(self.construct_scalar(node)) + + def construct_python_tuple(self, node): + return tuple(self.construct_sequence(node)) + + def find_python_module(self, name, mark): + if not name: + raise ConstructorError("while constructing a Python module", mark, + "expected non-empty name appended to the tag", mark) + try: + __import__(name) + except ImportError, exc: + raise ConstructorError("while constructing a Python module", mark, + "cannot find module %r (%s)" % (name.encode('utf-8'), exc), mark) + return sys.modules[name] + + def find_python_name(self, name, mark): + if not name: + raise ConstructorError("while constructing a Python object", mark, + "expected non-empty name appended to the tag", mark) + if u'.' in name: + module_name, object_name = name.rsplit('.', 1) + else: + module_name = '__builtin__' + object_name = name + try: + __import__(module_name) + except ImportError, exc: + raise ConstructorError("while constructing a Python object", mark, + "cannot find module %r (%s)" % (module_name.encode('utf-8'), exc), mark) + module = sys.modules[module_name] + if not hasattr(module, object_name): + raise ConstructorError("while constructing a Python object", mark, + "cannot find %r in the module %r" % (object_name.encode('utf-8'), + module.__name__), mark) + return getattr(module, object_name) + + def construct_python_name(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError("while constructing a Python name", node.start_mark, + "expected the empty value, but found %r" % value.encode('utf-8'), + node.start_mark) + return self.find_python_name(suffix, node.start_mark) + + def construct_python_module(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError("while constructing a Python module", node.start_mark, + "expected the empty value, but found %r" % value.encode('utf-8'), + node.start_mark) + return self.find_python_module(suffix, node.start_mark) + + class classobj: pass + + def make_python_instance(self, suffix, node, + args=None, kwds=None, newobj=False): + if not args: + args = [] + if not kwds: + kwds = {} + cls = self.find_python_name(suffix, node.start_mark) + if newobj and isinstance(cls, type(self.classobj)) \ + and not args and not kwds: + instance = self.classobj() + instance.__class__ = cls + return instance + elif newobj and isinstance(cls, type): + return cls.__new__(cls, *args, **kwds) + else: + return cls(*args, **kwds) + + def set_python_instance_state(self, instance, state): + if hasattr(instance, '__setstate__'): + instance.__setstate__(state) + else: + slotstate = {} + if isinstance(state, tuple) and len(state) == 2: + state, slotstate = state + if hasattr(instance, '__dict__'): + instance.__dict__.update(state) + elif state: + slotstate.update(state) + for key, value in slotstate.items(): + setattr(object, key, value) + + def construct_python_object(self, suffix, node): + # Format: + # !!python/object:module.name { ... state ... } + instance = self.make_python_instance(suffix, node, newobj=True) + yield instance + deep = hasattr(instance, '__setstate__') + state = self.construct_mapping(node, deep=deep) + self.set_python_instance_state(instance, state) + + def construct_python_object_apply(self, suffix, node, newobj=False): + # Format: + # !!python/object/apply # (or !!python/object/new) + # args: [ ... arguments ... ] + # kwds: { ... keywords ... } + # state: ... state ... + # listitems: [ ... listitems ... ] + # dictitems: { ... dictitems ... } + # or short format: + # !!python/object/apply [ ... arguments ... ] + # The difference between !!python/object/apply and !!python/object/new + # is how an object is created, check make_python_instance for details. + if isinstance(node, SequenceNode): + args = self.construct_sequence(node, deep=True) + kwds = {} + state = {} + listitems = [] + dictitems = {} + else: + value = self.construct_mapping(node, deep=True) + args = value.get('args', []) + kwds = value.get('kwds', {}) + state = value.get('state', {}) + listitems = value.get('listitems', []) + dictitems = value.get('dictitems', {}) + instance = self.make_python_instance(suffix, node, args, kwds, newobj) + if state: + self.set_python_instance_state(instance, state) + if listitems: + instance.extend(listitems) + if dictitems: + for key in dictitems: + instance[key] = dictitems[key] + return instance + + def construct_python_object_new(self, suffix, node): + return self.construct_python_object_apply(suffix, node, newobj=True) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/none', + Constructor.construct_yaml_null) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/bool', + Constructor.construct_yaml_bool) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/str', + Constructor.construct_python_str) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/unicode', + Constructor.construct_python_unicode) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/int', + Constructor.construct_yaml_int) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/long', + Constructor.construct_python_long) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/float', + Constructor.construct_yaml_float) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/complex', + Constructor.construct_python_complex) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/list', + Constructor.construct_yaml_seq) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/tuple', + Constructor.construct_python_tuple) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/dict', + Constructor.construct_yaml_map) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/name:', + Constructor.construct_python_name) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/module:', + Constructor.construct_python_module) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/object:', + Constructor.construct_python_object) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/object/apply:', + Constructor.construct_python_object_apply) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/object/new:', + Constructor.construct_python_object_new) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/cyaml.py b/collectors/python.d.plugin/python_modules/pyyaml2/cyaml.py new file mode 100644 index 00000000..2858ab47 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/cyaml.py @@ -0,0 +1,86 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader', + 'CBaseDumper', 'CSafeDumper', 'CDumper'] + +from _yaml import CParser, CEmitter + +from constructor import * + +from serializer import * +from representer import * + +from resolver import * + +class CBaseLoader(CParser, BaseConstructor, BaseResolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + +class CSafeLoader(CParser, SafeConstructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + SafeConstructor.__init__(self) + Resolver.__init__(self) + +class CLoader(CParser, Constructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + Constructor.__init__(self) + Resolver.__init__(self) + +class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class CSafeDumper(CEmitter, SafeRepresenter, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class CDumper(CEmitter, Serializer, Representer, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/dumper.py b/collectors/python.d.plugin/python_modules/pyyaml2/dumper.py new file mode 100644 index 00000000..3685cbee --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/dumper.py @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseDumper', 'SafeDumper', 'Dumper'] + +from emitter import * +from serializer import * +from representer import * +from resolver import * + +class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class Dumper(Emitter, Serializer, Representer, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/emitter.py b/collectors/python.d.plugin/python_modules/pyyaml2/emitter.py new file mode 100644 index 00000000..9a460a0f --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/emitter.py @@ -0,0 +1,1141 @@ +# SPDX-License-Identifier: MIT + +# Emitter expects events obeying the following grammar: +# stream ::= STREAM-START document* STREAM-END +# document ::= DOCUMENT-START node DOCUMENT-END +# node ::= SCALAR | sequence | mapping +# sequence ::= SEQUENCE-START node* SEQUENCE-END +# mapping ::= MAPPING-START (node node)* MAPPING-END + +__all__ = ['Emitter', 'EmitterError'] + +from error import YAMLError +from events import * + +class EmitterError(YAMLError): + pass + +class ScalarAnalysis(object): + def __init__(self, scalar, empty, multiline, + allow_flow_plain, allow_block_plain, + allow_single_quoted, allow_double_quoted, + allow_block): + self.scalar = scalar + self.empty = empty + self.multiline = multiline + self.allow_flow_plain = allow_flow_plain + self.allow_block_plain = allow_block_plain + self.allow_single_quoted = allow_single_quoted + self.allow_double_quoted = allow_double_quoted + self.allow_block = allow_block + +class Emitter(object): + + DEFAULT_TAG_PREFIXES = { + u'!' : u'!', + u'tag:yaml.org,2002:' : u'!!', + } + + def __init__(self, stream, canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + + # The stream should have the methods `write` and possibly `flush`. + self.stream = stream + + # Encoding can be overriden by STREAM-START. + self.encoding = None + + # Emitter is a state machine with a stack of states to handle nested + # structures. + self.states = [] + self.state = self.expect_stream_start + + # Current event and the event queue. + self.events = [] + self.event = None + + # The current indentation level and the stack of previous indents. + self.indents = [] + self.indent = None + + # Flow level. + self.flow_level = 0 + + # Contexts. + self.root_context = False + self.sequence_context = False + self.mapping_context = False + self.simple_key_context = False + + # Characteristics of the last emitted character: + # - current position. + # - is it a whitespace? + # - is it an indention character + # (indentation space, '-', '?', or ':')? + self.line = 0 + self.column = 0 + self.whitespace = True + self.indention = True + + # Whether the document requires an explicit document indicator + self.open_ended = False + + # Formatting details. + self.canonical = canonical + self.allow_unicode = allow_unicode + self.best_indent = 2 + if indent and 1 < indent < 10: + self.best_indent = indent + self.best_width = 80 + if width and width > self.best_indent*2: + self.best_width = width + self.best_line_break = u'\n' + if line_break in [u'\r', u'\n', u'\r\n']: + self.best_line_break = line_break + + # Tag prefixes. + self.tag_prefixes = None + + # Prepared anchor and tag. + self.prepared_anchor = None + self.prepared_tag = None + + # Scalar analysis and style. + self.analysis = None + self.style = None + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def emit(self, event): + self.events.append(event) + while not self.need_more_events(): + self.event = self.events.pop(0) + self.state() + self.event = None + + # In some cases, we wait for a few next events before emitting. + + def need_more_events(self): + if not self.events: + return True + event = self.events[0] + if isinstance(event, DocumentStartEvent): + return self.need_events(1) + elif isinstance(event, SequenceStartEvent): + return self.need_events(2) + elif isinstance(event, MappingStartEvent): + return self.need_events(3) + else: + return False + + def need_events(self, count): + level = 0 + for event in self.events[1:]: + if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): + level += 1 + elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): + level -= 1 + elif isinstance(event, StreamEndEvent): + level = -1 + if level < 0: + return False + return (len(self.events) < count+1) + + def increase_indent(self, flow=False, indentless=False): + self.indents.append(self.indent) + if self.indent is None: + if flow: + self.indent = self.best_indent + else: + self.indent = 0 + elif not indentless: + self.indent += self.best_indent + + # States. + + # Stream handlers. + + def expect_stream_start(self): + if isinstance(self.event, StreamStartEvent): + if self.event.encoding and not getattr(self.stream, 'encoding', None): + self.encoding = self.event.encoding + self.write_stream_start() + self.state = self.expect_first_document_start + else: + raise EmitterError("expected StreamStartEvent, but got %s" + % self.event) + + def expect_nothing(self): + raise EmitterError("expected nothing, but got %s" % self.event) + + # Document handlers. + + def expect_first_document_start(self): + return self.expect_document_start(first=True) + + def expect_document_start(self, first=False): + if isinstance(self.event, DocumentStartEvent): + if (self.event.version or self.event.tags) and self.open_ended: + self.write_indicator(u'...', True) + self.write_indent() + if self.event.version: + version_text = self.prepare_version(self.event.version) + self.write_version_directive(version_text) + self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() + if self.event.tags: + handles = self.event.tags.keys() + handles.sort() + for handle in handles: + prefix = self.event.tags[handle] + self.tag_prefixes[prefix] = handle + handle_text = self.prepare_tag_handle(handle) + prefix_text = self.prepare_tag_prefix(prefix) + self.write_tag_directive(handle_text, prefix_text) + implicit = (first and not self.event.explicit and not self.canonical + and not self.event.version and not self.event.tags + and not self.check_empty_document()) + if not implicit: + self.write_indent() + self.write_indicator(u'---', True) + if self.canonical: + self.write_indent() + self.state = self.expect_document_root + elif isinstance(self.event, StreamEndEvent): + if self.open_ended: + self.write_indicator(u'...', True) + self.write_indent() + self.write_stream_end() + self.state = self.expect_nothing + else: + raise EmitterError("expected DocumentStartEvent, but got %s" + % self.event) + + def expect_document_end(self): + if isinstance(self.event, DocumentEndEvent): + self.write_indent() + if self.event.explicit: + self.write_indicator(u'...', True) + self.write_indent() + self.flush_stream() + self.state = self.expect_document_start + else: + raise EmitterError("expected DocumentEndEvent, but got %s" + % self.event) + + def expect_document_root(self): + self.states.append(self.expect_document_end) + self.expect_node(root=True) + + # Node handlers. + + def expect_node(self, root=False, sequence=False, mapping=False, + simple_key=False): + self.root_context = root + self.sequence_context = sequence + self.mapping_context = mapping + self.simple_key_context = simple_key + if isinstance(self.event, AliasEvent): + self.expect_alias() + elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): + self.process_anchor(u'&') + self.process_tag() + if isinstance(self.event, ScalarEvent): + self.expect_scalar() + elif isinstance(self.event, SequenceStartEvent): + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_sequence(): + self.expect_flow_sequence() + else: + self.expect_block_sequence() + elif isinstance(self.event, MappingStartEvent): + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_mapping(): + self.expect_flow_mapping() + else: + self.expect_block_mapping() + else: + raise EmitterError("expected NodeEvent, but got %s" % self.event) + + def expect_alias(self): + if self.event.anchor is None: + raise EmitterError("anchor is not specified for alias") + self.process_anchor(u'*') + self.state = self.states.pop() + + def expect_scalar(self): + self.increase_indent(flow=True) + self.process_scalar() + self.indent = self.indents.pop() + self.state = self.states.pop() + + # Flow sequence handlers. + + def expect_flow_sequence(self): + self.write_indicator(u'[', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_sequence_item + + def expect_first_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator(u']', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + def expect_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(u',', False) + self.write_indent() + self.write_indicator(u']', False) + self.state = self.states.pop() + else: + self.write_indicator(u',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + # Flow mapping handlers. + + def expect_flow_mapping(self): + self.write_indicator(u'{', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_mapping_key + + def expect_first_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator(u'}', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator(u'?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(u',', False) + self.write_indent() + self.write_indicator(u'}', False) + self.state = self.states.pop() + else: + self.write_indicator(u',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator(u'?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_simple_value(self): + self.write_indicator(u':', False) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + def expect_flow_mapping_value(self): + if self.canonical or self.column > self.best_width: + self.write_indent() + self.write_indicator(u':', True) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + # Block sequence handlers. + + def expect_block_sequence(self): + indentless = (self.mapping_context and not self.indention) + self.increase_indent(flow=False, indentless=indentless) + self.state = self.expect_first_block_sequence_item + + def expect_first_block_sequence_item(self): + return self.expect_block_sequence_item(first=True) + + def expect_block_sequence_item(self, first=False): + if not first and isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + self.write_indicator(u'-', True, indention=True) + self.states.append(self.expect_block_sequence_item) + self.expect_node(sequence=True) + + # Block mapping handlers. + + def expect_block_mapping(self): + self.increase_indent(flow=False) + self.state = self.expect_first_block_mapping_key + + def expect_first_block_mapping_key(self): + return self.expect_block_mapping_key(first=True) + + def expect_block_mapping_key(self, first=False): + if not first and isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + if self.check_simple_key(): + self.states.append(self.expect_block_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator(u'?', True, indention=True) + self.states.append(self.expect_block_mapping_value) + self.expect_node(mapping=True) + + def expect_block_mapping_simple_value(self): + self.write_indicator(u':', False) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + def expect_block_mapping_value(self): + self.write_indent() + self.write_indicator(u':', True, indention=True) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + # Checkers. + + def check_empty_sequence(self): + return (isinstance(self.event, SequenceStartEvent) and self.events + and isinstance(self.events[0], SequenceEndEvent)) + + def check_empty_mapping(self): + return (isinstance(self.event, MappingStartEvent) and self.events + and isinstance(self.events[0], MappingEndEvent)) + + def check_empty_document(self): + if not isinstance(self.event, DocumentStartEvent) or not self.events: + return False + event = self.events[0] + return (isinstance(event, ScalarEvent) and event.anchor is None + and event.tag is None and event.implicit and event.value == u'') + + def check_simple_key(self): + length = 0 + if isinstance(self.event, NodeEvent) and self.event.anchor is not None: + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + length += len(self.prepared_anchor) + if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ + and self.event.tag is not None: + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(self.event.tag) + length += len(self.prepared_tag) + if isinstance(self.event, ScalarEvent): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + length += len(self.analysis.scalar) + return (length < 128 and (isinstance(self.event, AliasEvent) + or (isinstance(self.event, ScalarEvent) + and not self.analysis.empty and not self.analysis.multiline) + or self.check_empty_sequence() or self.check_empty_mapping())) + + # Anchor, Tag, and Scalar processors. + + def process_anchor(self, indicator): + if self.event.anchor is None: + self.prepared_anchor = None + return + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + if self.prepared_anchor: + self.write_indicator(indicator+self.prepared_anchor, True) + self.prepared_anchor = None + + def process_tag(self): + tag = self.event.tag + if isinstance(self.event, ScalarEvent): + if self.style is None: + self.style = self.choose_scalar_style() + if ((not self.canonical or tag is None) and + ((self.style == '' and self.event.implicit[0]) + or (self.style != '' and self.event.implicit[1]))): + self.prepared_tag = None + return + if self.event.implicit[0] and tag is None: + tag = u'!' + self.prepared_tag = None + else: + if (not self.canonical or tag is None) and self.event.implicit: + self.prepared_tag = None + return + if tag is None: + raise EmitterError("tag is not specified") + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(tag) + if self.prepared_tag: + self.write_indicator(self.prepared_tag, True) + self.prepared_tag = None + + def choose_scalar_style(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.event.style == '"' or self.canonical: + return '"' + if not self.event.style and self.event.implicit[0]: + if (not (self.simple_key_context and + (self.analysis.empty or self.analysis.multiline)) + and (self.flow_level and self.analysis.allow_flow_plain + or (not self.flow_level and self.analysis.allow_block_plain))): + return '' + if self.event.style and self.event.style in '|>': + if (not self.flow_level and not self.simple_key_context + and self.analysis.allow_block): + return self.event.style + if not self.event.style or self.event.style == '\'': + if (self.analysis.allow_single_quoted and + not (self.simple_key_context and self.analysis.multiline)): + return '\'' + return '"' + + def process_scalar(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.style is None: + self.style = self.choose_scalar_style() + split = (not self.simple_key_context) + #if self.analysis.multiline and split \ + # and (not self.style or self.style in '\'\"'): + # self.write_indent() + if self.style == '"': + self.write_double_quoted(self.analysis.scalar, split) + elif self.style == '\'': + self.write_single_quoted(self.analysis.scalar, split) + elif self.style == '>': + self.write_folded(self.analysis.scalar) + elif self.style == '|': + self.write_literal(self.analysis.scalar) + else: + self.write_plain(self.analysis.scalar, split) + self.analysis = None + self.style = None + + # Analyzers. + + def prepare_version(self, version): + major, minor = version + if major != 1: + raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) + return u'%d.%d' % (major, minor) + + def prepare_tag_handle(self, handle): + if not handle: + raise EmitterError("tag handle must not be empty") + if handle[0] != u'!' or handle[-1] != u'!': + raise EmitterError("tag handle must start and end with '!': %r" + % (handle.encode('utf-8'))) + for ch in handle[1:-1]: + if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_'): + raise EmitterError("invalid character %r in the tag handle: %r" + % (ch.encode('utf-8'), handle.encode('utf-8'))) + return handle + + def prepare_tag_prefix(self, prefix): + if not prefix: + raise EmitterError("tag prefix must not be empty") + chunks = [] + start = end = 0 + if prefix[0] == u'!': + end = 1 + while end < len(prefix): + ch = prefix[end] + if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-;/?!:@&=+$,_.~*\'()[]': + end += 1 + else: + if start < end: + chunks.append(prefix[start:end]) + start = end = end+1 + data = ch.encode('utf-8') + for ch in data: + chunks.append(u'%%%02X' % ord(ch)) + if start < end: + chunks.append(prefix[start:end]) + return u''.join(chunks) + + def prepare_tag(self, tag): + if not tag: + raise EmitterError("tag must not be empty") + if tag == u'!': + return tag + handle = None + suffix = tag + prefixes = self.tag_prefixes.keys() + prefixes.sort() + for prefix in prefixes: + if tag.startswith(prefix) \ + and (prefix == u'!' or len(prefix) < len(tag)): + handle = self.tag_prefixes[prefix] + suffix = tag[len(prefix):] + chunks = [] + start = end = 0 + while end < len(suffix): + ch = suffix[end] + if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-;/?:@&=+$,_.~*\'()[]' \ + or (ch == u'!' and handle != u'!'): + end += 1 + else: + if start < end: + chunks.append(suffix[start:end]) + start = end = end+1 + data = ch.encode('utf-8') + for ch in data: + chunks.append(u'%%%02X' % ord(ch)) + if start < end: + chunks.append(suffix[start:end]) + suffix_text = u''.join(chunks) + if handle: + return u'%s%s' % (handle, suffix_text) + else: + return u'!<%s>' % suffix_text + + def prepare_anchor(self, anchor): + if not anchor: + raise EmitterError("anchor must not be empty") + for ch in anchor: + if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_'): + raise EmitterError("invalid character %r in the anchor: %r" + % (ch.encode('utf-8'), anchor.encode('utf-8'))) + return anchor + + def analyze_scalar(self, scalar): + + # Empty scalar is a special case. + if not scalar: + return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, + allow_flow_plain=False, allow_block_plain=True, + allow_single_quoted=True, allow_double_quoted=True, + allow_block=False) + + # Indicators and special characters. + block_indicators = False + flow_indicators = False + line_breaks = False + special_characters = False + + # Important whitespace combinations. + leading_space = False + leading_break = False + trailing_space = False + trailing_break = False + break_space = False + space_break = False + + # Check document indicators. + if scalar.startswith(u'---') or scalar.startswith(u'...'): + block_indicators = True + flow_indicators = True + + # First character or preceded by a whitespace. + preceeded_by_whitespace = True + + # Last character or followed by a whitespace. + followed_by_whitespace = (len(scalar) == 1 or + scalar[1] in u'\0 \t\r\n\x85\u2028\u2029') + + # The previous character is a space. + previous_space = False + + # The previous character is a break. + previous_break = False + + index = 0 + while index < len(scalar): + ch = scalar[index] + + # Check for indicators. + if index == 0: + # Leading indicators are special characters. + if ch in u'#,[]{}&*!|>\'\"%@`': + flow_indicators = True + block_indicators = True + if ch in u'?:': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == u'-' and followed_by_whitespace: + flow_indicators = True + block_indicators = True + else: + # Some indicators cannot appear within a scalar as well. + if ch in u',?[]{}': + flow_indicators = True + if ch == u':': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == u'#' and preceeded_by_whitespace: + flow_indicators = True + block_indicators = True + + # Check for line breaks, special, and unicode characters. + if ch in u'\n\x85\u2028\u2029': + line_breaks = True + if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'): + if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF' + or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF': + unicode_characters = True + if not self.allow_unicode: + special_characters = True + else: + special_characters = True + + # Detect important whitespace combinations. + if ch == u' ': + if index == 0: + leading_space = True + if index == len(scalar)-1: + trailing_space = True + if previous_break: + break_space = True + previous_space = True + previous_break = False + elif ch in u'\n\x85\u2028\u2029': + if index == 0: + leading_break = True + if index == len(scalar)-1: + trailing_break = True + if previous_space: + space_break = True + previous_space = False + previous_break = True + else: + previous_space = False + previous_break = False + + # Prepare for the next character. + index += 1 + preceeded_by_whitespace = (ch in u'\0 \t\r\n\x85\u2028\u2029') + followed_by_whitespace = (index+1 >= len(scalar) or + scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029') + + # Let's decide what styles are allowed. + allow_flow_plain = True + allow_block_plain = True + allow_single_quoted = True + allow_double_quoted = True + allow_block = True + + # Leading and trailing whitespaces are bad for plain scalars. + if (leading_space or leading_break + or trailing_space or trailing_break): + allow_flow_plain = allow_block_plain = False + + # We do not permit trailing spaces for block scalars. + if trailing_space: + allow_block = False + + # Spaces at the beginning of a new line are only acceptable for block + # scalars. + if break_space: + allow_flow_plain = allow_block_plain = allow_single_quoted = False + + # Spaces followed by breaks, as well as special character are only + # allowed for double quoted scalars. + if space_break or special_characters: + allow_flow_plain = allow_block_plain = \ + allow_single_quoted = allow_block = False + + # Although the plain scalar writer supports breaks, we never emit + # multiline plain scalars. + if line_breaks: + allow_flow_plain = allow_block_plain = False + + # Flow indicators are forbidden for flow plain scalars. + if flow_indicators: + allow_flow_plain = False + + # Block indicators are forbidden for block plain scalars. + if block_indicators: + allow_block_plain = False + + return ScalarAnalysis(scalar=scalar, + empty=False, multiline=line_breaks, + allow_flow_plain=allow_flow_plain, + allow_block_plain=allow_block_plain, + allow_single_quoted=allow_single_quoted, + allow_double_quoted=allow_double_quoted, + allow_block=allow_block) + + # Writers. + + def flush_stream(self): + if hasattr(self.stream, 'flush'): + self.stream.flush() + + def write_stream_start(self): + # Write BOM if needed. + if self.encoding and self.encoding.startswith('utf-16'): + self.stream.write(u'\uFEFF'.encode(self.encoding)) + + def write_stream_end(self): + self.flush_stream() + + def write_indicator(self, indicator, need_whitespace, + whitespace=False, indention=False): + if self.whitespace or not need_whitespace: + data = indicator + else: + data = u' '+indicator + self.whitespace = whitespace + self.indention = self.indention and indention + self.column += len(data) + self.open_ended = False + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_indent(self): + indent = self.indent or 0 + if not self.indention or self.column > indent \ + or (self.column == indent and not self.whitespace): + self.write_line_break() + if self.column < indent: + self.whitespace = True + data = u' '*(indent-self.column) + self.column = indent + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_line_break(self, data=None): + if data is None: + data = self.best_line_break + self.whitespace = True + self.indention = True + self.line += 1 + self.column = 0 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_version_directive(self, version_text): + data = u'%%YAML %s' % version_text + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + def write_tag_directive(self, handle_text, prefix_text): + data = u'%%TAG %s %s' % (handle_text, prefix_text) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + # Scalar streams. + + def write_single_quoted(self, text, split=True): + self.write_indicator(u'\'', True) + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch is None or ch != u' ': + if start+1 == end and self.column > self.best_width and split \ + and start != 0 and end != len(text): + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch is None or ch not in u'\n\x85\u2028\u2029': + if text[start] == u'\n': + self.write_line_break() + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + start = end + else: + if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'': + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch == u'\'': + data = u'\'\'' + self.column += 2 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + 1 + if ch is not None: + spaces = (ch == u' ') + breaks = (ch in u'\n\x85\u2028\u2029') + end += 1 + self.write_indicator(u'\'', False) + + ESCAPE_REPLACEMENTS = { + u'\0': u'0', + u'\x07': u'a', + u'\x08': u'b', + u'\x09': u't', + u'\x0A': u'n', + u'\x0B': u'v', + u'\x0C': u'f', + u'\x0D': u'r', + u'\x1B': u'e', + u'\"': u'\"', + u'\\': u'\\', + u'\x85': u'N', + u'\xA0': u'_', + u'\u2028': u'L', + u'\u2029': u'P', + } + + def write_double_quoted(self, text, split=True): + self.write_indicator(u'"', True) + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \ + or not (u'\x20' <= ch <= u'\x7E' + or (self.allow_unicode + and (u'\xA0' <= ch <= u'\uD7FF' + or u'\uE000' <= ch <= u'\uFFFD'))): + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + if ch in self.ESCAPE_REPLACEMENTS: + data = u'\\'+self.ESCAPE_REPLACEMENTS[ch] + elif ch <= u'\xFF': + data = u'\\x%02X' % ord(ch) + elif ch <= u'\uFFFF': + data = u'\\u%04X' % ord(ch) + else: + data = u'\\U%08X' % ord(ch) + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end+1 + if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \ + and self.column+(end-start) > self.best_width and split: + data = text[start:end]+u'\\' + if start < end: + start = end + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_indent() + self.whitespace = False + self.indention = False + if text[start] == u' ': + data = u'\\' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + end += 1 + self.write_indicator(u'"', False) + + def determine_block_hints(self, text): + hints = u'' + if text: + if text[0] in u' \n\x85\u2028\u2029': + hints += unicode(self.best_indent) + if text[-1] not in u'\n\x85\u2028\u2029': + hints += u'-' + elif len(text) == 1 or text[-2] in u'\n\x85\u2028\u2029': + hints += u'+' + return hints + + def write_folded(self, text): + hints = self.determine_block_hints(text) + self.write_indicator(u'>'+hints, True) + if hints[-1:] == u'+': + self.open_ended = True + self.write_line_break() + leading_space = True + spaces = False + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in u'\n\x85\u2028\u2029': + if not leading_space and ch is not None and ch != u' ' \ + and text[start] == u'\n': + self.write_line_break() + leading_space = (ch == u' ') + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + elif spaces: + if ch != u' ': + if start+1 == end and self.column > self.best_width: + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + else: + if ch is None or ch in u' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in u'\n\x85\u2028\u2029') + spaces = (ch == u' ') + end += 1 + + def write_literal(self, text): + hints = self.determine_block_hints(text) + self.write_indicator(u'|'+hints, True) + if hints[-1:] == u'+': + self.open_ended = True + self.write_line_break() + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in u'\n\x85\u2028\u2029': + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + else: + if ch is None or ch in u'\n\x85\u2028\u2029': + data = text[start:end] + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in u'\n\x85\u2028\u2029') + end += 1 + + def write_plain(self, text, split=True): + if self.root_context: + self.open_ended = True + if not text: + return + if not self.whitespace: + data = u' ' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.whitespace = False + self.indention = False + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch != u' ': + if start+1 == end and self.column > self.best_width and split: + self.write_indent() + self.whitespace = False + self.indention = False + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch not in u'\n\x85\u2028\u2029': + if text[start] == u'\n': + self.write_line_break() + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + self.whitespace = False + self.indention = False + start = end + else: + if ch is None or ch in u' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + spaces = (ch == u' ') + breaks = (ch in u'\n\x85\u2028\u2029') + end += 1 + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/error.py b/collectors/python.d.plugin/python_modules/pyyaml2/error.py new file mode 100644 index 00000000..5466be72 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/error.py @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError'] + +class Mark(object): + + def __init__(self, name, index, line, column, buffer, pointer): + self.name = name + self.index = index + self.line = line + self.column = column + self.buffer = buffer + self.pointer = pointer + + def get_snippet(self, indent=4, max_length=75): + if self.buffer is None: + return None + head = '' + start = self.pointer + while start > 0 and self.buffer[start-1] not in u'\0\r\n\x85\u2028\u2029': + start -= 1 + if self.pointer-start > max_length/2-1: + head = ' ... ' + start += 5 + break + tail = '' + end = self.pointer + while end < len(self.buffer) and self.buffer[end] not in u'\0\r\n\x85\u2028\u2029': + end += 1 + if end-self.pointer > max_length/2-1: + tail = ' ... ' + end -= 5 + break + snippet = self.buffer[start:end].encode('utf-8') + return ' '*indent + head + snippet + tail + '\n' \ + + ' '*(indent+self.pointer-start+len(head)) + '^' + + def __str__(self): + snippet = self.get_snippet() + where = " in \"%s\", line %d, column %d" \ + % (self.name, self.line+1, self.column+1) + if snippet is not None: + where += ":\n"+snippet + return where + +class YAMLError(Exception): + pass + +class MarkedYAMLError(YAMLError): + + def __init__(self, context=None, context_mark=None, + problem=None, problem_mark=None, note=None): + self.context = context + self.context_mark = context_mark + self.problem = problem + self.problem_mark = problem_mark + self.note = note + + def __str__(self): + lines = [] + if self.context is not None: + lines.append(self.context) + if self.context_mark is not None \ + and (self.problem is None or self.problem_mark is None + or self.context_mark.name != self.problem_mark.name + or self.context_mark.line != self.problem_mark.line + or self.context_mark.column != self.problem_mark.column): + lines.append(str(self.context_mark)) + if self.problem is not None: + lines.append(self.problem) + if self.problem_mark is not None: + lines.append(str(self.problem_mark)) + if self.note is not None: + lines.append(self.note) + return '\n'.join(lines) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/events.py b/collectors/python.d.plugin/python_modules/pyyaml2/events.py new file mode 100644 index 00000000..283452ad --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/events.py @@ -0,0 +1,87 @@ +# SPDX-License-Identifier: MIT + +# Abstract classes. + +class Event(object): + def __init__(self, start_mark=None, end_mark=None): + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + attributes = [key for key in ['anchor', 'tag', 'implicit', 'value'] + if hasattr(self, key)] + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + +class NodeEvent(Event): + def __init__(self, anchor, start_mark=None, end_mark=None): + self.anchor = anchor + self.start_mark = start_mark + self.end_mark = end_mark + +class CollectionStartEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None, + flow_style=None): + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + +class CollectionEndEvent(Event): + pass + +# Implementations. + +class StreamStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + +class StreamEndEvent(Event): + pass + +class DocumentStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None, version=None, tags=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + self.version = version + self.tags = tags + +class DocumentEndEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + +class AliasEvent(NodeEvent): + pass + +class ScalarEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, value, + start_mark=None, end_mark=None, style=None): + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + +class SequenceStartEvent(CollectionStartEvent): + pass + +class SequenceEndEvent(CollectionEndEvent): + pass + +class MappingStartEvent(CollectionStartEvent): + pass + +class MappingEndEvent(CollectionEndEvent): + pass + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/loader.py b/collectors/python.d.plugin/python_modules/pyyaml2/loader.py new file mode 100644 index 00000000..1c195531 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/loader.py @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseLoader', 'SafeLoader', 'Loader'] + +from reader import * +from scanner import * +from parser import * +from composer import * +from constructor import * +from resolver import * + +class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + +class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + SafeConstructor.__init__(self) + Resolver.__init__(self) + +class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + Constructor.__init__(self) + Resolver.__init__(self) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/nodes.py b/collectors/python.d.plugin/python_modules/pyyaml2/nodes.py new file mode 100644 index 00000000..ed2a1b43 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/nodes.py @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: MIT + +class Node(object): + def __init__(self, tag, value, start_mark, end_mark): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + value = self.value + #if isinstance(value, list): + # if len(value) == 0: + # value = '<empty>' + # elif len(value) == 1: + # value = '<1 item>' + # else: + # value = '<%d items>' % len(value) + #else: + # if len(value) > 75: + # value = repr(value[:70]+u' ... ') + # else: + # value = repr(value) + value = repr(value) + return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value) + +class ScalarNode(Node): + id = 'scalar' + def __init__(self, tag, value, + start_mark=None, end_mark=None, style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + +class CollectionNode(Node): + def __init__(self, tag, value, + start_mark=None, end_mark=None, flow_style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + +class SequenceNode(CollectionNode): + id = 'sequence' + +class MappingNode(CollectionNode): + id = 'mapping' + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/parser.py b/collectors/python.d.plugin/python_modules/pyyaml2/parser.py new file mode 100644 index 00000000..97ba0833 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/parser.py @@ -0,0 +1,590 @@ +# SPDX-License-Identifier: MIT + +# The following YAML grammar is LL(1) and is parsed by a recursive descent +# parser. +# +# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END +# implicit_document ::= block_node DOCUMENT-END* +# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +# block_node_or_indentless_sequence ::= +# ALIAS +# | properties (block_content | indentless_block_sequence)? +# | block_content +# | indentless_block_sequence +# block_node ::= ALIAS +# | properties block_content? +# | block_content +# flow_node ::= ALIAS +# | properties flow_content? +# | flow_content +# properties ::= TAG ANCHOR? | ANCHOR TAG? +# block_content ::= block_collection | flow_collection | SCALAR +# flow_content ::= flow_collection | SCALAR +# block_collection ::= block_sequence | block_mapping +# flow_collection ::= flow_sequence | flow_mapping +# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END +# indentless_sequence ::= (BLOCK-ENTRY block_node?)+ +# block_mapping ::= BLOCK-MAPPING_START +# ((KEY block_node_or_indentless_sequence?)? +# (VALUE block_node_or_indentless_sequence?)?)* +# BLOCK-END +# flow_sequence ::= FLOW-SEQUENCE-START +# (flow_sequence_entry FLOW-ENTRY)* +# flow_sequence_entry? +# FLOW-SEQUENCE-END +# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# flow_mapping ::= FLOW-MAPPING-START +# (flow_mapping_entry FLOW-ENTRY)* +# flow_mapping_entry? +# FLOW-MAPPING-END +# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# +# FIRST sets: +# +# stream: { STREAM-START } +# explicit_document: { DIRECTIVE DOCUMENT-START } +# implicit_document: FIRST(block_node) +# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_sequence: { BLOCK-SEQUENCE-START } +# block_mapping: { BLOCK-MAPPING-START } +# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } +# indentless_sequence: { ENTRY } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_sequence: { FLOW-SEQUENCE-START } +# flow_mapping: { FLOW-MAPPING-START } +# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } +# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } + +__all__ = ['Parser', 'ParserError'] + +from error import MarkedYAMLError +from tokens import * +from events import * +from scanner import * + +class ParserError(MarkedYAMLError): + pass + +class Parser(object): + # Since writing a recursive-descendant parser is a straightforward task, we + # do not give many comments here. + + DEFAULT_TAGS = { + u'!': u'!', + u'!!': u'tag:yaml.org,2002:', + } + + def __init__(self): + self.current_event = None + self.yaml_version = None + self.tag_handles = {} + self.states = [] + self.marks = [] + self.state = self.parse_stream_start + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def check_event(self, *choices): + # Check the type of the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + if self.current_event is not None: + if not choices: + return True + for choice in choices: + if isinstance(self.current_event, choice): + return True + return False + + def peek_event(self): + # Get the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + return self.current_event + + def get_event(self): + # Get the next event and proceed further. + if self.current_event is None: + if self.state: + self.current_event = self.state() + value = self.current_event + self.current_event = None + return value + + # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END + # implicit_document ::= block_node DOCUMENT-END* + # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + + def parse_stream_start(self): + + # Parse the stream start. + token = self.get_token() + event = StreamStartEvent(token.start_mark, token.end_mark, + encoding=token.encoding) + + # Prepare the next state. + self.state = self.parse_implicit_document_start + + return event + + def parse_implicit_document_start(self): + + # Parse an implicit document. + if not self.check_token(DirectiveToken, DocumentStartToken, + StreamEndToken): + self.tag_handles = self.DEFAULT_TAGS + token = self.peek_token() + start_mark = end_mark = token.start_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=False) + + # Prepare the next state. + self.states.append(self.parse_document_end) + self.state = self.parse_block_node + + return event + + else: + return self.parse_document_start() + + def parse_document_start(self): + + # Parse any extra document end indicators. + while self.check_token(DocumentEndToken): + self.get_token() + + # Parse an explicit document. + if not self.check_token(StreamEndToken): + token = self.peek_token() + start_mark = token.start_mark + version, tags = self.process_directives() + if not self.check_token(DocumentStartToken): + raise ParserError(None, None, + "expected '<document start>', but found %r" + % self.peek_token().id, + self.peek_token().start_mark) + token = self.get_token() + end_mark = token.end_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=True, version=version, tags=tags) + self.states.append(self.parse_document_end) + self.state = self.parse_document_content + else: + # Parse the end of the stream. + token = self.get_token() + event = StreamEndEvent(token.start_mark, token.end_mark) + assert not self.states + assert not self.marks + self.state = None + return event + + def parse_document_end(self): + + # Parse the document end. + token = self.peek_token() + start_mark = end_mark = token.start_mark + explicit = False + if self.check_token(DocumentEndToken): + token = self.get_token() + end_mark = token.end_mark + explicit = True + event = DocumentEndEvent(start_mark, end_mark, + explicit=explicit) + + # Prepare the next state. + self.state = self.parse_document_start + + return event + + def parse_document_content(self): + if self.check_token(DirectiveToken, + DocumentStartToken, DocumentEndToken, StreamEndToken): + event = self.process_empty_scalar(self.peek_token().start_mark) + self.state = self.states.pop() + return event + else: + return self.parse_block_node() + + def process_directives(self): + self.yaml_version = None + self.tag_handles = {} + while self.check_token(DirectiveToken): + token = self.get_token() + if token.name == u'YAML': + if self.yaml_version is not None: + raise ParserError(None, None, + "found duplicate YAML directive", token.start_mark) + major, minor = token.value + if major != 1: + raise ParserError(None, None, + "found incompatible YAML document (version 1.* is required)", + token.start_mark) + self.yaml_version = token.value + elif token.name == u'TAG': + handle, prefix = token.value + if handle in self.tag_handles: + raise ParserError(None, None, + "duplicate tag handle %r" % handle.encode('utf-8'), + token.start_mark) + self.tag_handles[handle] = prefix + if self.tag_handles: + value = self.yaml_version, self.tag_handles.copy() + else: + value = self.yaml_version, None + for key in self.DEFAULT_TAGS: + if key not in self.tag_handles: + self.tag_handles[key] = self.DEFAULT_TAGS[key] + return value + + # block_node_or_indentless_sequence ::= ALIAS + # | properties (block_content | indentless_block_sequence)? + # | block_content + # | indentless_block_sequence + # block_node ::= ALIAS + # | properties block_content? + # | block_content + # flow_node ::= ALIAS + # | properties flow_content? + # | flow_content + # properties ::= TAG ANCHOR? | ANCHOR TAG? + # block_content ::= block_collection | flow_collection | SCALAR + # flow_content ::= flow_collection | SCALAR + # block_collection ::= block_sequence | block_mapping + # flow_collection ::= flow_sequence | flow_mapping + + def parse_block_node(self): + return self.parse_node(block=True) + + def parse_flow_node(self): + return self.parse_node() + + def parse_block_node_or_indentless_sequence(self): + return self.parse_node(block=True, indentless_sequence=True) + + def parse_node(self, block=False, indentless_sequence=False): + if self.check_token(AliasToken): + token = self.get_token() + event = AliasEvent(token.value, token.start_mark, token.end_mark) + self.state = self.states.pop() + else: + anchor = None + tag = None + start_mark = end_mark = tag_mark = None + if self.check_token(AnchorToken): + token = self.get_token() + start_mark = token.start_mark + end_mark = token.end_mark + anchor = token.value + if self.check_token(TagToken): + token = self.get_token() + tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + elif self.check_token(TagToken): + token = self.get_token() + start_mark = tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + if self.check_token(AnchorToken): + token = self.get_token() + end_mark = token.end_mark + anchor = token.value + if tag is not None: + handle, suffix = tag + if handle is not None: + if handle not in self.tag_handles: + raise ParserError("while parsing a node", start_mark, + "found undefined tag handle %r" % handle.encode('utf-8'), + tag_mark) + tag = self.tag_handles[handle]+suffix + else: + tag = suffix + #if tag == u'!': + # raise ParserError("while parsing a node", start_mark, + # "found non-specific tag '!'", tag_mark, + # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.") + if start_mark is None: + start_mark = end_mark = self.peek_token().start_mark + event = None + implicit = (tag is None or tag == u'!') + if indentless_sequence and self.check_token(BlockEntryToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark) + self.state = self.parse_indentless_sequence_entry + else: + if self.check_token(ScalarToken): + token = self.get_token() + end_mark = token.end_mark + if (token.plain and tag is None) or tag == u'!': + implicit = (True, False) + elif tag is None: + implicit = (False, True) + else: + implicit = (False, False) + event = ScalarEvent(anchor, tag, implicit, token.value, + start_mark, end_mark, style=token.style) + self.state = self.states.pop() + elif self.check_token(FlowSequenceStartToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_sequence_first_entry + elif self.check_token(FlowMappingStartToken): + end_mark = self.peek_token().end_mark + event = MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_mapping_first_key + elif block and self.check_token(BlockSequenceStartToken): + end_mark = self.peek_token().start_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=False) + self.state = self.parse_block_sequence_first_entry + elif block and self.check_token(BlockMappingStartToken): + end_mark = self.peek_token().start_mark + event = MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=False) + self.state = self.parse_block_mapping_first_key + elif anchor is not None or tag is not None: + # Empty scalars are allowed even if a tag or an anchor is + # specified. + event = ScalarEvent(anchor, tag, (implicit, False), u'', + start_mark, end_mark) + self.state = self.states.pop() + else: + if block: + node = 'block' + else: + node = 'flow' + token = self.peek_token() + raise ParserError("while parsing a %s node" % node, start_mark, + "expected the node content, but found %r" % token.id, + token.start_mark) + return event + + # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END + + def parse_block_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_sequence_entry() + + def parse_block_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, BlockEndToken): + self.states.append(self.parse_block_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_block_sequence_entry + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError("while parsing a block collection", self.marks[-1], + "expected <block end>, but found %r" % token.id, token.start_mark) + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + # indentless_sequence ::= (BLOCK-ENTRY block_node?)+ + + def parse_indentless_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, + KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_indentless_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_indentless_sequence_entry + return self.process_empty_scalar(token.end_mark) + token = self.peek_token() + event = SequenceEndEvent(token.start_mark, token.start_mark) + self.state = self.states.pop() + return event + + # block_mapping ::= BLOCK-MAPPING_START + # ((KEY block_node_or_indentless_sequence?)? + # (VALUE block_node_or_indentless_sequence?)?)* + # BLOCK-END + + def parse_block_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_mapping_key() + + def parse_block_mapping_key(self): + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_value) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_value + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError("while parsing a block mapping", self.marks[-1], + "expected <block end>, but found %r" % token.id, token.start_mark) + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_block_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_key) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_block_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + # flow_sequence ::= FLOW-SEQUENCE-START + # (flow_sequence_entry FLOW-ENTRY)* + # flow_sequence_entry? + # FLOW-SEQUENCE-END + # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + # + # Note that while production rules for both flow_sequence_entry and + # flow_mapping_entry are equal, their interpretations are different. + # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` + # generate an inline mapping (set syntax). + + def parse_flow_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_sequence_entry(first=True) + + def parse_flow_sequence_entry(self, first=False): + if not self.check_token(FlowSequenceEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError("while parsing a flow sequence", self.marks[-1], + "expected ',' or ']', but got %r" % token.id, token.start_mark) + + if self.check_token(KeyToken): + token = self.peek_token() + event = MappingStartEvent(None, None, True, + token.start_mark, token.end_mark, + flow_style=True) + self.state = self.parse_flow_sequence_entry_mapping_key + return event + elif not self.check_token(FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry) + return self.parse_flow_node() + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_sequence_entry_mapping_key(self): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_value + return self.process_empty_scalar(token.end_mark) + + def parse_flow_sequence_entry_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_end) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_end + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_sequence_entry_mapping_end + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_sequence_entry_mapping_end(self): + self.state = self.parse_flow_sequence_entry + token = self.peek_token() + return MappingEndEvent(token.start_mark, token.start_mark) + + # flow_mapping ::= FLOW-MAPPING-START + # (flow_mapping_entry FLOW-ENTRY)* + # flow_mapping_entry? + # FLOW-MAPPING-END + # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + + def parse_flow_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_mapping_key(first=True) + + def parse_flow_mapping_key(self, first=False): + if not self.check_token(FlowMappingEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError("while parsing a flow mapping", self.marks[-1], + "expected ',' or '}', but got %r" % token.id, token.start_mark) + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_value + return self.process_empty_scalar(token.end_mark) + elif not self.check_token(FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_empty_value) + return self.parse_flow_node() + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_key) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_mapping_empty_value(self): + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(self.peek_token().start_mark) + + def process_empty_scalar(self, mark): + return ScalarEvent(None, None, (True, False), u'', mark, mark) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/reader.py b/collectors/python.d.plugin/python_modules/pyyaml2/reader.py new file mode 100644 index 00000000..8d422954 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/reader.py @@ -0,0 +1,191 @@ +# SPDX-License-Identifier: MIT +# This module contains abstractions for the input stream. You don't have to +# looks further, there are no pretty code. +# +# We define two classes here. +# +# Mark(source, line, column) +# It's just a record and its only use is producing nice error messages. +# Parser does not use it for any other purposes. +# +# Reader(source, data) +# Reader determines the encoding of `data` and converts it to unicode. +# Reader provides the following methods and attributes: +# reader.peek(length=1) - return the next `length` characters +# reader.forward(length=1) - move the current position to `length` characters. +# reader.index - the number of the current character. +# reader.line, stream.column - the line and the column of the current character. + +__all__ = ['Reader', 'ReaderError'] + +from error import YAMLError, Mark + +import codecs, re + +class ReaderError(YAMLError): + + def __init__(self, name, position, character, encoding, reason): + self.name = name + self.character = character + self.position = position + self.encoding = encoding + self.reason = reason + + def __str__(self): + if isinstance(self.character, str): + return "'%s' codec can't decode byte #x%02x: %s\n" \ + " in \"%s\", position %d" \ + % (self.encoding, ord(self.character), self.reason, + self.name, self.position) + else: + return "unacceptable character #x%04x: %s\n" \ + " in \"%s\", position %d" \ + % (self.character, self.reason, + self.name, self.position) + +class Reader(object): + # Reader: + # - determines the data encoding and converts it to unicode, + # - checks if characters are in allowed range, + # - adds '\0' to the end. + + # Reader accepts + # - a `str` object, + # - a `unicode` object, + # - a file-like object with its `read` method returning `str`, + # - a file-like object with its `read` method returning `unicode`. + + # Yeah, it's ugly and slow. + + def __init__(self, stream): + self.name = None + self.stream = None + self.stream_pointer = 0 + self.eof = True + self.buffer = u'' + self.pointer = 0 + self.raw_buffer = None + self.raw_decode = None + self.encoding = None + self.index = 0 + self.line = 0 + self.column = 0 + if isinstance(stream, unicode): + self.name = "<unicode string>" + self.check_printable(stream) + self.buffer = stream+u'\0' + elif isinstance(stream, str): + self.name = "<string>" + self.raw_buffer = stream + self.determine_encoding() + else: + self.stream = stream + self.name = getattr(stream, 'name', "<file>") + self.eof = False + self.raw_buffer = '' + self.determine_encoding() + + def peek(self, index=0): + try: + return self.buffer[self.pointer+index] + except IndexError: + self.update(index+1) + return self.buffer[self.pointer+index] + + def prefix(self, length=1): + if self.pointer+length >= len(self.buffer): + self.update(length) + return self.buffer[self.pointer:self.pointer+length] + + def forward(self, length=1): + if self.pointer+length+1 >= len(self.buffer): + self.update(length+1) + while length: + ch = self.buffer[self.pointer] + self.pointer += 1 + self.index += 1 + if ch in u'\n\x85\u2028\u2029' \ + or (ch == u'\r' and self.buffer[self.pointer] != u'\n'): + self.line += 1 + self.column = 0 + elif ch != u'\uFEFF': + self.column += 1 + length -= 1 + + def get_mark(self): + if self.stream is None: + return Mark(self.name, self.index, self.line, self.column, + self.buffer, self.pointer) + else: + return Mark(self.name, self.index, self.line, self.column, + None, None) + + def determine_encoding(self): + while not self.eof and len(self.raw_buffer) < 2: + self.update_raw() + if not isinstance(self.raw_buffer, unicode): + if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): + self.raw_decode = codecs.utf_16_le_decode + self.encoding = 'utf-16-le' + elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): + self.raw_decode = codecs.utf_16_be_decode + self.encoding = 'utf-16-be' + else: + self.raw_decode = codecs.utf_8_decode + self.encoding = 'utf-8' + self.update(1) + + NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') + def check_printable(self, data): + match = self.NON_PRINTABLE.search(data) + if match: + character = match.group() + position = self.index+(len(self.buffer)-self.pointer)+match.start() + raise ReaderError(self.name, position, ord(character), + 'unicode', "special characters are not allowed") + + def update(self, length): + if self.raw_buffer is None: + return + self.buffer = self.buffer[self.pointer:] + self.pointer = 0 + while len(self.buffer) < length: + if not self.eof: + self.update_raw() + if self.raw_decode is not None: + try: + data, converted = self.raw_decode(self.raw_buffer, + 'strict', self.eof) + except UnicodeDecodeError, exc: + character = exc.object[exc.start] + if self.stream is not None: + position = self.stream_pointer-len(self.raw_buffer)+exc.start + else: + position = exc.start + raise ReaderError(self.name, position, character, + exc.encoding, exc.reason) + else: + data = self.raw_buffer + converted = len(data) + self.check_printable(data) + self.buffer += data + self.raw_buffer = self.raw_buffer[converted:] + if self.eof: + self.buffer += u'\0' + self.raw_buffer = None + break + + def update_raw(self, size=1024): + data = self.stream.read(size) + if data: + self.raw_buffer += data + self.stream_pointer += len(data) + else: + self.eof = True + +#try: +# import psyco +# psyco.bind(Reader) +#except ImportError: +# pass + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/representer.py b/collectors/python.d.plugin/python_modules/pyyaml2/representer.py new file mode 100644 index 00000000..0a1404ec --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/representer.py @@ -0,0 +1,485 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer', + 'RepresenterError'] + +from error import * +from nodes import * + +import datetime + +import sys, copy_reg, types + +class RepresenterError(YAMLError): + pass + +class BaseRepresenter(object): + + yaml_representers = {} + yaml_multi_representers = {} + + def __init__(self, default_style=None, default_flow_style=None): + self.default_style = default_style + self.default_flow_style = default_flow_style + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def represent(self, data): + node = self.represent_data(data) + self.serialize(node) + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def get_classobj_bases(self, cls): + bases = [cls] + for base in cls.__bases__: + bases.extend(self.get_classobj_bases(base)) + return bases + + def represent_data(self, data): + if self.ignore_aliases(data): + self.alias_key = None + else: + self.alias_key = id(data) + if self.alias_key is not None: + if self.alias_key in self.represented_objects: + node = self.represented_objects[self.alias_key] + #if node is None: + # raise RepresenterError("recursive objects are not allowed: %r" % data) + return node + #self.represented_objects[alias_key] = None + self.object_keeper.append(data) + data_types = type(data).__mro__ + if type(data) is types.InstanceType: + data_types = self.get_classobj_bases(data.__class__)+list(data_types) + if data_types[0] in self.yaml_representers: + node = self.yaml_representers[data_types[0]](self, data) + else: + for data_type in data_types: + if data_type in self.yaml_multi_representers: + node = self.yaml_multi_representers[data_type](self, data) + break + else: + if None in self.yaml_multi_representers: + node = self.yaml_multi_representers[None](self, data) + elif None in self.yaml_representers: + node = self.yaml_representers[None](self, data) + else: + node = ScalarNode(None, unicode(data)) + #if alias_key is not None: + # self.represented_objects[alias_key] = node + return node + + def add_representer(cls, data_type, representer): + if not 'yaml_representers' in cls.__dict__: + cls.yaml_representers = cls.yaml_representers.copy() + cls.yaml_representers[data_type] = representer + add_representer = classmethod(add_representer) + + def add_multi_representer(cls, data_type, representer): + if not 'yaml_multi_representers' in cls.__dict__: + cls.yaml_multi_representers = cls.yaml_multi_representers.copy() + cls.yaml_multi_representers[data_type] = representer + add_multi_representer = classmethod(add_multi_representer) + + def represent_scalar(self, tag, value, style=None): + if style is None: + style = self.default_style + node = ScalarNode(tag, value, style=style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + return node + + def represent_sequence(self, tag, sequence, flow_style=None): + value = [] + node = SequenceNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + for item in sequence: + node_item = self.represent_data(item) + if not (isinstance(node_item, ScalarNode) and not node_item.style): + best_style = False + value.append(node_item) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def represent_mapping(self, tag, mapping, flow_style=None): + value = [] + node = MappingNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + if hasattr(mapping, 'items'): + mapping = mapping.items() + mapping.sort() + for item_key, item_value in mapping: + node_key = self.represent_data(item_key) + node_value = self.represent_data(item_value) + if not (isinstance(node_key, ScalarNode) and not node_key.style): + best_style = False + if not (isinstance(node_value, ScalarNode) and not node_value.style): + best_style = False + value.append((node_key, node_value)) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def ignore_aliases(self, data): + return False + +class SafeRepresenter(BaseRepresenter): + + def ignore_aliases(self, data): + if data in [None, ()]: + return True + if isinstance(data, (str, unicode, bool, int, float)): + return True + + def represent_none(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:null', + u'null') + + def represent_str(self, data): + tag = None + style = None + try: + data = unicode(data, 'ascii') + tag = u'tag:yaml.org,2002:str' + except UnicodeDecodeError: + try: + data = unicode(data, 'utf-8') + tag = u'tag:yaml.org,2002:str' + except UnicodeDecodeError: + data = data.encode('base64') + tag = u'tag:yaml.org,2002:binary' + style = '|' + return self.represent_scalar(tag, data, style=style) + + def represent_unicode(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:str', data) + + def represent_bool(self, data): + if data: + value = u'true' + else: + value = u'false' + return self.represent_scalar(u'tag:yaml.org,2002:bool', value) + + def represent_int(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data)) + + def represent_long(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data)) + + inf_value = 1e300 + while repr(inf_value) != repr(inf_value*inf_value): + inf_value *= inf_value + + def represent_float(self, data): + if data != data or (data == 0.0 and data == 1.0): + value = u'.nan' + elif data == self.inf_value: + value = u'.inf' + elif data == -self.inf_value: + value = u'-.inf' + else: + value = unicode(repr(data)).lower() + # Note that in some cases `repr(data)` represents a float number + # without the decimal parts. For instance: + # >>> repr(1e17) + # '1e17' + # Unfortunately, this is not a valid float representation according + # to the definition of the `!!float` tag. We fix this by adding + # '.0' before the 'e' symbol. + if u'.' not in value and u'e' in value: + value = value.replace(u'e', u'.0e', 1) + return self.represent_scalar(u'tag:yaml.org,2002:float', value) + + def represent_list(self, data): + #pairs = (len(data) > 0 and isinstance(data, list)) + #if pairs: + # for item in data: + # if not isinstance(item, tuple) or len(item) != 2: + # pairs = False + # break + #if not pairs: + return self.represent_sequence(u'tag:yaml.org,2002:seq', data) + #value = [] + #for item_key, item_value in data: + # value.append(self.represent_mapping(u'tag:yaml.org,2002:map', + # [(item_key, item_value)])) + #return SequenceNode(u'tag:yaml.org,2002:pairs', value) + + def represent_dict(self, data): + return self.represent_mapping(u'tag:yaml.org,2002:map', data) + + def represent_set(self, data): + value = {} + for key in data: + value[key] = None + return self.represent_mapping(u'tag:yaml.org,2002:set', value) + + def represent_date(self, data): + value = unicode(data.isoformat()) + return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) + + def represent_datetime(self, data): + value = unicode(data.isoformat(' ')) + return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) + + def represent_yaml_object(self, tag, data, cls, flow_style=None): + if hasattr(data, '__getstate__'): + state = data.__getstate__() + else: + state = data.__dict__.copy() + return self.represent_mapping(tag, state, flow_style=flow_style) + + def represent_undefined(self, data): + raise RepresenterError("cannot represent an object: %s" % data) + +SafeRepresenter.add_representer(type(None), + SafeRepresenter.represent_none) + +SafeRepresenter.add_representer(str, + SafeRepresenter.represent_str) + +SafeRepresenter.add_representer(unicode, + SafeRepresenter.represent_unicode) + +SafeRepresenter.add_representer(bool, + SafeRepresenter.represent_bool) + +SafeRepresenter.add_representer(int, + SafeRepresenter.represent_int) + +SafeRepresenter.add_representer(long, + SafeRepresenter.represent_long) + +SafeRepresenter.add_representer(float, + SafeRepresenter.represent_float) + +SafeRepresenter.add_representer(list, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(tuple, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(dict, + SafeRepresenter.represent_dict) + +SafeRepresenter.add_representer(set, + SafeRepresenter.represent_set) + +SafeRepresenter.add_representer(datetime.date, + SafeRepresenter.represent_date) + +SafeRepresenter.add_representer(datetime.datetime, + SafeRepresenter.represent_datetime) + +SafeRepresenter.add_representer(None, + SafeRepresenter.represent_undefined) + +class Representer(SafeRepresenter): + + def represent_str(self, data): + tag = None + style = None + try: + data = unicode(data, 'ascii') + tag = u'tag:yaml.org,2002:str' + except UnicodeDecodeError: + try: + data = unicode(data, 'utf-8') + tag = u'tag:yaml.org,2002:python/str' + except UnicodeDecodeError: + data = data.encode('base64') + tag = u'tag:yaml.org,2002:binary' + style = '|' + return self.represent_scalar(tag, data, style=style) + + def represent_unicode(self, data): + tag = None + try: + data.encode('ascii') + tag = u'tag:yaml.org,2002:python/unicode' + except UnicodeEncodeError: + tag = u'tag:yaml.org,2002:str' + return self.represent_scalar(tag, data) + + def represent_long(self, data): + tag = u'tag:yaml.org,2002:int' + if int(data) is not data: + tag = u'tag:yaml.org,2002:python/long' + return self.represent_scalar(tag, unicode(data)) + + def represent_complex(self, data): + if data.imag == 0.0: + data = u'%r' % data.real + elif data.real == 0.0: + data = u'%rj' % data.imag + elif data.imag > 0: + data = u'%r+%rj' % (data.real, data.imag) + else: + data = u'%r%rj' % (data.real, data.imag) + return self.represent_scalar(u'tag:yaml.org,2002:python/complex', data) + + def represent_tuple(self, data): + return self.represent_sequence(u'tag:yaml.org,2002:python/tuple', data) + + def represent_name(self, data): + name = u'%s.%s' % (data.__module__, data.__name__) + return self.represent_scalar(u'tag:yaml.org,2002:python/name:'+name, u'') + + def represent_module(self, data): + return self.represent_scalar( + u'tag:yaml.org,2002:python/module:'+data.__name__, u'') + + def represent_instance(self, data): + # For instances of classic classes, we use __getinitargs__ and + # __getstate__ to serialize the data. + + # If data.__getinitargs__ exists, the object must be reconstructed by + # calling cls(**args), where args is a tuple returned by + # __getinitargs__. Otherwise, the cls.__init__ method should never be + # called and the class instance is created by instantiating a trivial + # class and assigning to the instance's __class__ variable. + + # If data.__getstate__ exists, it returns the state of the object. + # Otherwise, the state of the object is data.__dict__. + + # We produce either a !!python/object or !!python/object/new node. + # If data.__getinitargs__ does not exist and state is a dictionary, we + # produce a !!python/object node . Otherwise we produce a + # !!python/object/new node. + + cls = data.__class__ + class_name = u'%s.%s' % (cls.__module__, cls.__name__) + args = None + state = None + if hasattr(data, '__getinitargs__'): + args = list(data.__getinitargs__()) + if hasattr(data, '__getstate__'): + state = data.__getstate__() + else: + state = data.__dict__ + if args is None and isinstance(state, dict): + return self.represent_mapping( + u'tag:yaml.org,2002:python/object:'+class_name, state) + if isinstance(state, dict) and not state: + return self.represent_sequence( + u'tag:yaml.org,2002:python/object/new:'+class_name, args) + value = {} + if args: + value['args'] = args + value['state'] = state + return self.represent_mapping( + u'tag:yaml.org,2002:python/object/new:'+class_name, value) + + def represent_object(self, data): + # We use __reduce__ API to save the data. data.__reduce__ returns + # a tuple of length 2-5: + # (function, args, state, listitems, dictitems) + + # For reconstructing, we calls function(*args), then set its state, + # listitems, and dictitems if they are not None. + + # A special case is when function.__name__ == '__newobj__'. In this + # case we create the object with args[0].__new__(*args). + + # Another special case is when __reduce__ returns a string - we don't + # support it. + + # We produce a !!python/object, !!python/object/new or + # !!python/object/apply node. + + cls = type(data) + if cls in copy_reg.dispatch_table: + reduce = copy_reg.dispatch_table[cls](data) + elif hasattr(data, '__reduce_ex__'): + reduce = data.__reduce_ex__(2) + elif hasattr(data, '__reduce__'): + reduce = data.__reduce__() + else: + raise RepresenterError("cannot represent object: %r" % data) + reduce = (list(reduce)+[None]*5)[:5] + function, args, state, listitems, dictitems = reduce + args = list(args) + if state is None: + state = {} + if listitems is not None: + listitems = list(listitems) + if dictitems is not None: + dictitems = dict(dictitems) + if function.__name__ == '__newobj__': + function = args[0] + args = args[1:] + tag = u'tag:yaml.org,2002:python/object/new:' + newobj = True + else: + tag = u'tag:yaml.org,2002:python/object/apply:' + newobj = False + function_name = u'%s.%s' % (function.__module__, function.__name__) + if not args and not listitems and not dictitems \ + and isinstance(state, dict) and newobj: + return self.represent_mapping( + u'tag:yaml.org,2002:python/object:'+function_name, state) + if not listitems and not dictitems \ + and isinstance(state, dict) and not state: + return self.represent_sequence(tag+function_name, args) + value = {} + if args: + value['args'] = args + if state or not isinstance(state, dict): + value['state'] = state + if listitems: + value['listitems'] = listitems + if dictitems: + value['dictitems'] = dictitems + return self.represent_mapping(tag+function_name, value) + +Representer.add_representer(str, + Representer.represent_str) + +Representer.add_representer(unicode, + Representer.represent_unicode) + +Representer.add_representer(long, + Representer.represent_long) + +Representer.add_representer(complex, + Representer.represent_complex) + +Representer.add_representer(tuple, + Representer.represent_tuple) + +Representer.add_representer(type, + Representer.represent_name) + +Representer.add_representer(types.ClassType, + Representer.represent_name) + +Representer.add_representer(types.FunctionType, + Representer.represent_name) + +Representer.add_representer(types.BuiltinFunctionType, + Representer.represent_name) + +Representer.add_representer(types.ModuleType, + Representer.represent_module) + +Representer.add_multi_representer(types.InstanceType, + Representer.represent_instance) + +Representer.add_multi_representer(object, + Representer.represent_object) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/resolver.py b/collectors/python.d.plugin/python_modules/pyyaml2/resolver.py new file mode 100644 index 00000000..49922deb --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/resolver.py @@ -0,0 +1,225 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseResolver', 'Resolver'] + +from error import * +from nodes import * + +import re + +class ResolverError(YAMLError): + pass + +class BaseResolver(object): + + DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str' + DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq' + DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map' + + yaml_implicit_resolvers = {} + yaml_path_resolvers = {} + + def __init__(self): + self.resolver_exact_paths = [] + self.resolver_prefix_paths = [] + + def add_implicit_resolver(cls, tag, regexp, first): + if not 'yaml_implicit_resolvers' in cls.__dict__: + cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy() + if first is None: + first = [None] + for ch in first: + cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) + add_implicit_resolver = classmethod(add_implicit_resolver) + + def add_path_resolver(cls, tag, path, kind=None): + # Note: `add_path_resolver` is experimental. The API could be changed. + # `new_path` is a pattern that is matched against the path from the + # root to the node that is being considered. `node_path` elements are + # tuples `(node_check, index_check)`. `node_check` is a node class: + # `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None` + # matches any kind of a node. `index_check` could be `None`, a boolean + # value, a string value, or a number. `None` and `False` match against + # any _value_ of sequence and mapping nodes. `True` matches against + # any _key_ of a mapping node. A string `index_check` matches against + # a mapping value that corresponds to a scalar key which content is + # equal to the `index_check` value. An integer `index_check` matches + # against a sequence value with the index equal to `index_check`. + if not 'yaml_path_resolvers' in cls.__dict__: + cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy() + new_path = [] + for element in path: + if isinstance(element, (list, tuple)): + if len(element) == 2: + node_check, index_check = element + elif len(element) == 1: + node_check = element[0] + index_check = True + else: + raise ResolverError("Invalid path element: %s" % element) + else: + node_check = None + index_check = element + if node_check is str: + node_check = ScalarNode + elif node_check is list: + node_check = SequenceNode + elif node_check is dict: + node_check = MappingNode + elif node_check not in [ScalarNode, SequenceNode, MappingNode] \ + and not isinstance(node_check, basestring) \ + and node_check is not None: + raise ResolverError("Invalid node checker: %s" % node_check) + if not isinstance(index_check, (basestring, int)) \ + and index_check is not None: + raise ResolverError("Invalid index checker: %s" % index_check) + new_path.append((node_check, index_check)) + if kind is str: + kind = ScalarNode + elif kind is list: + kind = SequenceNode + elif kind is dict: + kind = MappingNode + elif kind not in [ScalarNode, SequenceNode, MappingNode] \ + and kind is not None: + raise ResolverError("Invalid node kind: %s" % kind) + cls.yaml_path_resolvers[tuple(new_path), kind] = tag + add_path_resolver = classmethod(add_path_resolver) + + def descend_resolver(self, current_node, current_index): + if not self.yaml_path_resolvers: + return + exact_paths = {} + prefix_paths = [] + if current_node: + depth = len(self.resolver_prefix_paths) + for path, kind in self.resolver_prefix_paths[-1]: + if self.check_resolver_prefix(depth, path, kind, + current_node, current_index): + if len(path) > depth: + prefix_paths.append((path, kind)) + else: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + for path, kind in self.yaml_path_resolvers: + if not path: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + prefix_paths.append((path, kind)) + self.resolver_exact_paths.append(exact_paths) + self.resolver_prefix_paths.append(prefix_paths) + + def ascend_resolver(self): + if not self.yaml_path_resolvers: + return + self.resolver_exact_paths.pop() + self.resolver_prefix_paths.pop() + + def check_resolver_prefix(self, depth, path, kind, + current_node, current_index): + node_check, index_check = path[depth-1] + if isinstance(node_check, basestring): + if current_node.tag != node_check: + return + elif node_check is not None: + if not isinstance(current_node, node_check): + return + if index_check is True and current_index is not None: + return + if (index_check is False or index_check is None) \ + and current_index is None: + return + if isinstance(index_check, basestring): + if not (isinstance(current_index, ScalarNode) + and index_check == current_index.value): + return + elif isinstance(index_check, int) and not isinstance(index_check, bool): + if index_check != current_index: + return + return True + + def resolve(self, kind, value, implicit): + if kind is ScalarNode and implicit[0]: + if value == u'': + resolvers = self.yaml_implicit_resolvers.get(u'', []) + else: + resolvers = self.yaml_implicit_resolvers.get(value[0], []) + resolvers += self.yaml_implicit_resolvers.get(None, []) + for tag, regexp in resolvers: + if regexp.match(value): + return tag + implicit = implicit[1] + if self.yaml_path_resolvers: + exact_paths = self.resolver_exact_paths[-1] + if kind in exact_paths: + return exact_paths[kind] + if None in exact_paths: + return exact_paths[None] + if kind is ScalarNode: + return self.DEFAULT_SCALAR_TAG + elif kind is SequenceNode: + return self.DEFAULT_SEQUENCE_TAG + elif kind is MappingNode: + return self.DEFAULT_MAPPING_TAG + +class Resolver(BaseResolver): + pass + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:bool', + re.compile(ur'''^(?:yes|Yes|YES|no|No|NO + |true|True|TRUE|false|False|FALSE + |on|On|ON|off|Off|OFF)$''', re.X), + list(u'yYnNtTfFoO')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:float', + re.compile(ur'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)? + |\.[0-9_]+(?:[eE][-+][0-9]+)? + |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]* + |[-+]?\.(?:inf|Inf|INF) + |\.(?:nan|NaN|NAN))$''', re.X), + list(u'-+0123456789.')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:int', + re.compile(ur'''^(?:[-+]?0b[0-1_]+ + |[-+]?0[0-7_]+ + |[-+]?(?:0|[1-9][0-9_]*) + |[-+]?0x[0-9a-fA-F_]+ + |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X), + list(u'-+0123456789')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:merge', + re.compile(ur'^(?:<<)$'), + [u'<']) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:null', + re.compile(ur'''^(?: ~ + |null|Null|NULL + | )$''', re.X), + [u'~', u'n', u'N', u'']) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:timestamp', + re.compile(ur'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] + |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? + (?:[Tt]|[ \t]+)[0-9][0-9]? + :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)? + (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X), + list(u'0123456789')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:value', + re.compile(ur'^(?:=)$'), + [u'=']) + +# The following resolver is only for documentation purposes. It cannot work +# because plain scalars cannot start with '!', '&', or '*'. +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:yaml', + re.compile(ur'^(?:!|&|\*)$'), + list(u'!&*')) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/scanner.py b/collectors/python.d.plugin/python_modules/pyyaml2/scanner.py new file mode 100644 index 00000000..971da612 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/scanner.py @@ -0,0 +1,1458 @@ +# SPDX-License-Identifier: MIT + +# Scanner produces tokens of the following types: +# STREAM-START +# STREAM-END +# DIRECTIVE(name, value) +# DOCUMENT-START +# DOCUMENT-END +# BLOCK-SEQUENCE-START +# BLOCK-MAPPING-START +# BLOCK-END +# FLOW-SEQUENCE-START +# FLOW-MAPPING-START +# FLOW-SEQUENCE-END +# FLOW-MAPPING-END +# BLOCK-ENTRY +# FLOW-ENTRY +# KEY +# VALUE +# ALIAS(value) +# ANCHOR(value) +# TAG(value) +# SCALAR(value, plain, style) +# +# Read comments in the Scanner code for more details. +# + +__all__ = ['Scanner', 'ScannerError'] + +from error import MarkedYAMLError +from tokens import * + +class ScannerError(MarkedYAMLError): + pass + +class SimpleKey(object): + # See below simple keys treatment. + + def __init__(self, token_number, required, index, line, column, mark): + self.token_number = token_number + self.required = required + self.index = index + self.line = line + self.column = column + self.mark = mark + +class Scanner(object): + + def __init__(self): + """Initialize the scanner.""" + # It is assumed that Scanner and Reader will have a common descendant. + # Reader do the dirty work of checking for BOM and converting the + # input data to Unicode. It also adds NUL to the end. + # + # Reader supports the following methods + # self.peek(i=0) # peek the next i-th character + # self.prefix(l=1) # peek the next l characters + # self.forward(l=1) # read the next l characters and move the pointer. + + # Had we reached the end of the stream? + self.done = False + + # The number of unclosed '{' and '['. `flow_level == 0` means block + # context. + self.flow_level = 0 + + # List of processed tokens that are not yet emitted. + self.tokens = [] + + # Add the STREAM-START token. + self.fetch_stream_start() + + # Number of tokens that were emitted through the `get_token` method. + self.tokens_taken = 0 + + # The current indentation level. + self.indent = -1 + + # Past indentation levels. + self.indents = [] + + # Variables related to simple keys treatment. + + # A simple key is a key that is not denoted by the '?' indicator. + # Example of simple keys: + # --- + # block simple key: value + # ? not a simple key: + # : { flow simple key: value } + # We emit the KEY token before all keys, so when we find a potential + # simple key, we try to locate the corresponding ':' indicator. + # Simple keys should be limited to a single line and 1024 characters. + + # Can a simple key start at the current position? A simple key may + # start: + # - at the beginning of the line, not counting indentation spaces + # (in block context), + # - after '{', '[', ',' (in the flow context), + # - after '?', ':', '-' (in the block context). + # In the block context, this flag also signifies if a block collection + # may start at the current position. + self.allow_simple_key = True + + # Keep track of possible simple keys. This is a dictionary. The key + # is `flow_level`; there can be no more that one possible simple key + # for each level. The value is a SimpleKey record: + # (token_number, required, index, line, column, mark) + # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), + # '[', or '{' tokens. + self.possible_simple_keys = {} + + # Public methods. + + def check_token(self, *choices): + # Check if the next token is one of the given types. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + if not choices: + return True + for choice in choices: + if isinstance(self.tokens[0], choice): + return True + return False + + def peek_token(self): + # Return the next token, but do not delete if from the queue. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + return self.tokens[0] + + def get_token(self): + # Return the next token. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + self.tokens_taken += 1 + return self.tokens.pop(0) + + # Private methods. + + def need_more_tokens(self): + if self.done: + return False + if not self.tokens: + return True + # The current token may be a potential simple key, so we + # need to look further. + self.stale_possible_simple_keys() + if self.next_possible_simple_key() == self.tokens_taken: + return True + + def fetch_more_tokens(self): + + # Eat whitespaces and comments until we reach the next token. + self.scan_to_next_token() + + # Remove obsolete possible simple keys. + self.stale_possible_simple_keys() + + # Compare the current indentation and column. It may add some tokens + # and decrease the current indentation level. + self.unwind_indent(self.column) + + # Peek the next character. + ch = self.peek() + + # Is it the end of stream? + if ch == u'\0': + return self.fetch_stream_end() + + # Is it a directive? + if ch == u'%' and self.check_directive(): + return self.fetch_directive() + + # Is it the document start? + if ch == u'-' and self.check_document_start(): + return self.fetch_document_start() + + # Is it the document end? + if ch == u'.' and self.check_document_end(): + return self.fetch_document_end() + + # TODO: support for BOM within a stream. + #if ch == u'\uFEFF': + # return self.fetch_bom() <-- issue BOMToken + + # Note: the order of the following checks is NOT significant. + + # Is it the flow sequence start indicator? + if ch == u'[': + return self.fetch_flow_sequence_start() + + # Is it the flow mapping start indicator? + if ch == u'{': + return self.fetch_flow_mapping_start() + + # Is it the flow sequence end indicator? + if ch == u']': + return self.fetch_flow_sequence_end() + + # Is it the flow mapping end indicator? + if ch == u'}': + return self.fetch_flow_mapping_end() + + # Is it the flow entry indicator? + if ch == u',': + return self.fetch_flow_entry() + + # Is it the block entry indicator? + if ch == u'-' and self.check_block_entry(): + return self.fetch_block_entry() + + # Is it the key indicator? + if ch == u'?' and self.check_key(): + return self.fetch_key() + + # Is it the value indicator? + if ch == u':' and self.check_value(): + return self.fetch_value() + + # Is it an alias? + if ch == u'*': + return self.fetch_alias() + + # Is it an anchor? + if ch == u'&': + return self.fetch_anchor() + + # Is it a tag? + if ch == u'!': + return self.fetch_tag() + + # Is it a literal scalar? + if ch == u'|' and not self.flow_level: + return self.fetch_literal() + + # Is it a folded scalar? + if ch == u'>' and not self.flow_level: + return self.fetch_folded() + + # Is it a single quoted scalar? + if ch == u'\'': + return self.fetch_single() + + # Is it a double quoted scalar? + if ch == u'\"': + return self.fetch_double() + + # It must be a plain scalar then. + if self.check_plain(): + return self.fetch_plain() + + # No? It's an error. Let's produce a nice error message. + raise ScannerError("while scanning for the next token", None, + "found character %r that cannot start any token" + % ch.encode('utf-8'), self.get_mark()) + + # Simple keys treatment. + + def next_possible_simple_key(self): + # Return the number of the nearest possible simple key. Actually we + # don't need to loop through the whole dictionary. We may replace it + # with the following code: + # if not self.possible_simple_keys: + # return None + # return self.possible_simple_keys[ + # min(self.possible_simple_keys.keys())].token_number + min_token_number = None + for level in self.possible_simple_keys: + key = self.possible_simple_keys[level] + if min_token_number is None or key.token_number < min_token_number: + min_token_number = key.token_number + return min_token_number + + def stale_possible_simple_keys(self): + # Remove entries that are no longer possible simple keys. According to + # the YAML specification, simple keys + # - should be limited to a single line, + # - should be no longer than 1024 characters. + # Disabling this procedure will allow simple keys of any length and + # height (may cause problems if indentation is broken though). + for level in self.possible_simple_keys.keys(): + key = self.possible_simple_keys[level] + if key.line != self.line \ + or self.index-key.index > 1024: + if key.required: + raise ScannerError("while scanning a simple key", key.mark, + "could not found expected ':'", self.get_mark()) + del self.possible_simple_keys[level] + + def save_possible_simple_key(self): + # The next token may start a simple key. We check if it's possible + # and save its position. This function is called for + # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. + + # Check if a simple key is required at the current position. + required = not self.flow_level and self.indent == self.column + + # A simple key is required only if it is the first token in the current + # line. Therefore it is always allowed. + assert self.allow_simple_key or not required + + # The next token might be a simple key. Let's save it's number and + # position. + if self.allow_simple_key: + self.remove_possible_simple_key() + token_number = self.tokens_taken+len(self.tokens) + key = SimpleKey(token_number, required, + self.index, self.line, self.column, self.get_mark()) + self.possible_simple_keys[self.flow_level] = key + + def remove_possible_simple_key(self): + # Remove the saved possible key position at the current flow level. + if self.flow_level in self.possible_simple_keys: + key = self.possible_simple_keys[self.flow_level] + + if key.required: + raise ScannerError("while scanning a simple key", key.mark, + "could not found expected ':'", self.get_mark()) + + del self.possible_simple_keys[self.flow_level] + + # Indentation functions. + + def unwind_indent(self, column): + + ## In flow context, tokens should respect indentation. + ## Actually the condition should be `self.indent >= column` according to + ## the spec. But this condition will prohibit intuitively correct + ## constructions such as + ## key : { + ## } + #if self.flow_level and self.indent > column: + # raise ScannerError(None, None, + # "invalid intendation or unclosed '[' or '{'", + # self.get_mark()) + + # In the flow context, indentation is ignored. We make the scanner less + # restrictive then specification requires. + if self.flow_level: + return + + # In block context, we may need to issue the BLOCK-END tokens. + while self.indent > column: + mark = self.get_mark() + self.indent = self.indents.pop() + self.tokens.append(BlockEndToken(mark, mark)) + + def add_indent(self, column): + # Check if we need to increase indentation. + if self.indent < column: + self.indents.append(self.indent) + self.indent = column + return True + return False + + # Fetchers. + + def fetch_stream_start(self): + # We always add STREAM-START as the first token and STREAM-END as the + # last token. + + # Read the token. + mark = self.get_mark() + + # Add STREAM-START. + self.tokens.append(StreamStartToken(mark, mark, + encoding=self.encoding)) + + + def fetch_stream_end(self): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + self.possible_simple_keys = {} + + # Read the token. + mark = self.get_mark() + + # Add STREAM-END. + self.tokens.append(StreamEndToken(mark, mark)) + + # The steam is finished. + self.done = True + + def fetch_directive(self): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Scan and add DIRECTIVE. + self.tokens.append(self.scan_directive()) + + def fetch_document_start(self): + self.fetch_document_indicator(DocumentStartToken) + + def fetch_document_end(self): + self.fetch_document_indicator(DocumentEndToken) + + def fetch_document_indicator(self, TokenClass): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. Note that there could not be a block collection + # after '---'. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Add DOCUMENT-START or DOCUMENT-END. + start_mark = self.get_mark() + self.forward(3) + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_start(self): + self.fetch_flow_collection_start(FlowSequenceStartToken) + + def fetch_flow_mapping_start(self): + self.fetch_flow_collection_start(FlowMappingStartToken) + + def fetch_flow_collection_start(self, TokenClass): + + # '[' and '{' may start a simple key. + self.save_possible_simple_key() + + # Increase the flow level. + self.flow_level += 1 + + # Simple keys are allowed after '[' and '{'. + self.allow_simple_key = True + + # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_end(self): + self.fetch_flow_collection_end(FlowSequenceEndToken) + + def fetch_flow_mapping_end(self): + self.fetch_flow_collection_end(FlowMappingEndToken) + + def fetch_flow_collection_end(self, TokenClass): + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Decrease the flow level. + self.flow_level -= 1 + + # No simple keys after ']' or '}'. + self.allow_simple_key = False + + # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_entry(self): + + # Simple keys are allowed after ','. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add FLOW-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(FlowEntryToken(start_mark, end_mark)) + + def fetch_block_entry(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a new entry? + if not self.allow_simple_key: + raise ScannerError(None, None, + "sequence entries are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-SEQUENCE-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockSequenceStartToken(mark, mark)) + + # It's an error for the block entry to occur in the flow context, + # but we let the parser detect this. + else: + pass + + # Simple keys are allowed after '-'. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add BLOCK-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(BlockEntryToken(start_mark, end_mark)) + + def fetch_key(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a key (not nessesary a simple)? + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping keys are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-MAPPING-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after '?' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add KEY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(KeyToken(start_mark, end_mark)) + + def fetch_value(self): + + # Do we determine a simple key? + if self.flow_level in self.possible_simple_keys: + + # Add KEY. + key = self.possible_simple_keys[self.flow_level] + del self.possible_simple_keys[self.flow_level] + self.tokens.insert(key.token_number-self.tokens_taken, + KeyToken(key.mark, key.mark)) + + # If this key starts a new block mapping, we need to add + # BLOCK-MAPPING-START. + if not self.flow_level: + if self.add_indent(key.column): + self.tokens.insert(key.token_number-self.tokens_taken, + BlockMappingStartToken(key.mark, key.mark)) + + # There cannot be two simple keys one after another. + self.allow_simple_key = False + + # It must be a part of a complex key. + else: + + # Block context needs additional checks. + # (Do we really need them? They will be catched by the parser + # anyway.) + if not self.flow_level: + + # We are allowed to start a complex value if and only if + # we can start a simple key. + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping values are not allowed here", + self.get_mark()) + + # If this value starts a new block mapping, we need to add + # BLOCK-MAPPING-START. It will be detected as an error later by + # the parser. + if not self.flow_level: + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after ':' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add VALUE. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(ValueToken(start_mark, end_mark)) + + def fetch_alias(self): + + # ALIAS could be a simple key. + self.save_possible_simple_key() + + # No simple keys after ALIAS. + self.allow_simple_key = False + + # Scan and add ALIAS. + self.tokens.append(self.scan_anchor(AliasToken)) + + def fetch_anchor(self): + + # ANCHOR could start a simple key. + self.save_possible_simple_key() + + # No simple keys after ANCHOR. + self.allow_simple_key = False + + # Scan and add ANCHOR. + self.tokens.append(self.scan_anchor(AnchorToken)) + + def fetch_tag(self): + + # TAG could start a simple key. + self.save_possible_simple_key() + + # No simple keys after TAG. + self.allow_simple_key = False + + # Scan and add TAG. + self.tokens.append(self.scan_tag()) + + def fetch_literal(self): + self.fetch_block_scalar(style='|') + + def fetch_folded(self): + self.fetch_block_scalar(style='>') + + def fetch_block_scalar(self, style): + + # A simple key may follow a block scalar. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Scan and add SCALAR. + self.tokens.append(self.scan_block_scalar(style)) + + def fetch_single(self): + self.fetch_flow_scalar(style='\'') + + def fetch_double(self): + self.fetch_flow_scalar(style='"') + + def fetch_flow_scalar(self, style): + + # A flow scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after flow scalars. + self.allow_simple_key = False + + # Scan and add SCALAR. + self.tokens.append(self.scan_flow_scalar(style)) + + def fetch_plain(self): + + # A plain scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after plain scalars. But note that `scan_plain` will + # change this flag if the scan is finished at the beginning of the + # line. + self.allow_simple_key = False + + # Scan and add SCALAR. May change `allow_simple_key`. + self.tokens.append(self.scan_plain()) + + # Checkers. + + def check_directive(self): + + # DIRECTIVE: ^ '%' ... + # The '%' indicator is already checked. + if self.column == 0: + return True + + def check_document_start(self): + + # DOCUMENT-START: ^ '---' (' '|'\n') + if self.column == 0: + if self.prefix(3) == u'---' \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return True + + def check_document_end(self): + + # DOCUMENT-END: ^ '...' (' '|'\n') + if self.column == 0: + if self.prefix(3) == u'...' \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return True + + def check_block_entry(self): + + # BLOCK-ENTRY: '-' (' '|'\n') + return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + + def check_key(self): + + # KEY(flow context): '?' + if self.flow_level: + return True + + # KEY(block context): '?' (' '|'\n') + else: + return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + + def check_value(self): + + # VALUE(flow context): ':' + if self.flow_level: + return True + + # VALUE(block context): ':' (' '|'\n') + else: + return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + + def check_plain(self): + + # A plain scalar may start with any non-space character except: + # '-', '?', ':', ',', '[', ']', '{', '}', + # '#', '&', '*', '!', '|', '>', '\'', '\"', + # '%', '@', '`'. + # + # It may also start with + # '-', '?', ':' + # if it is followed by a non-space character. + # + # Note that we limit the last rule to the block context (except the + # '-' character) because we want the flow context to be space + # independent. + ch = self.peek() + return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ + or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029' + and (ch == u'-' or (not self.flow_level and ch in u'?:'))) + + # Scanners. + + def scan_to_next_token(self): + # We ignore spaces, line breaks and comments. + # If we find a line break in the block context, we set the flag + # `allow_simple_key` on. + # The byte order mark is stripped if it's the first character in the + # stream. We do not yet support BOM inside the stream as the + # specification requires. Any such mark will be considered as a part + # of the document. + # + # TODO: We need to make tab handling rules more sane. A good rule is + # Tabs cannot precede tokens + # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, + # KEY(block), VALUE(block), BLOCK-ENTRY + # So the checking code is + # if <TAB>: + # self.allow_simple_keys = False + # We also need to add the check for `allow_simple_keys == True` to + # `unwind_indent` before issuing BLOCK-END. + # Scanners for block, flow, and plain scalars need to be modified. + + if self.index == 0 and self.peek() == u'\uFEFF': + self.forward() + found = False + while not found: + while self.peek() == u' ': + self.forward() + if self.peek() == u'#': + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + if self.scan_line_break(): + if not self.flow_level: + self.allow_simple_key = True + else: + found = True + + def scan_directive(self): + # See the specification for details. + start_mark = self.get_mark() + self.forward() + name = self.scan_directive_name(start_mark) + value = None + if name == u'YAML': + value = self.scan_yaml_directive_value(start_mark) + end_mark = self.get_mark() + elif name == u'TAG': + value = self.scan_tag_directive_value(start_mark) + end_mark = self.get_mark() + else: + end_mark = self.get_mark() + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + self.scan_directive_ignored_line(start_mark) + return DirectiveToken(name, value, start_mark, end_mark) + + def scan_directive_name(self, start_mark): + # See the specification for details. + length = 0 + ch = self.peek(length) + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError("while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % ch.encode('utf-8'), self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % ch.encode('utf-8'), self.get_mark()) + return value + + def scan_yaml_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + major = self.scan_yaml_directive_number(start_mark) + if self.peek() != '.': + raise ScannerError("while scanning a directive", start_mark, + "expected a digit or '.', but found %r" + % self.peek().encode('utf-8'), + self.get_mark()) + self.forward() + minor = self.scan_yaml_directive_number(start_mark) + if self.peek() not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected a digit or ' ', but found %r" + % self.peek().encode('utf-8'), + self.get_mark()) + return (major, minor) + + def scan_yaml_directive_number(self, start_mark): + # See the specification for details. + ch = self.peek() + if not (u'0' <= ch <= u'9'): + raise ScannerError("while scanning a directive", start_mark, + "expected a digit, but found %r" % ch.encode('utf-8'), + self.get_mark()) + length = 0 + while u'0' <= self.peek(length) <= u'9': + length += 1 + value = int(self.prefix(length)) + self.forward(length) + return value + + def scan_tag_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + handle = self.scan_tag_directive_handle(start_mark) + while self.peek() == u' ': + self.forward() + prefix = self.scan_tag_directive_prefix(start_mark) + return (handle, prefix) + + def scan_tag_directive_handle(self, start_mark): + # See the specification for details. + value = self.scan_tag_handle('directive', start_mark) + ch = self.peek() + if ch != u' ': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % ch.encode('utf-8'), + self.get_mark()) + return value + + def scan_tag_directive_prefix(self, start_mark): + # See the specification for details. + value = self.scan_tag_uri('directive', start_mark) + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % ch.encode('utf-8'), + self.get_mark()) + return value + + def scan_directive_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + if self.peek() == u'#': + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in u'\0\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected a comment or a line break, but found %r" + % ch.encode('utf-8'), self.get_mark()) + self.scan_line_break() + + def scan_anchor(self, TokenClass): + # The specification does not restrict characters for anchors and + # aliases. This may lead to problems, for instance, the document: + # [ *alias, value ] + # can be interpteted in two ways, as + # [ "value" ] + # and + # [ *alias , "value" ] + # Therefore we restrict aliases to numbers and ASCII letters. + start_mark = self.get_mark() + indicator = self.peek() + if indicator == u'*': + name = 'alias' + else: + name = 'anchor' + self.forward() + length = 0 + ch = self.peek(length) + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError("while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % ch.encode('utf-8'), self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`': + raise ScannerError("while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % ch.encode('utf-8'), self.get_mark()) + end_mark = self.get_mark() + return TokenClass(value, start_mark, end_mark) + + def scan_tag(self): + # See the specification for details. + start_mark = self.get_mark() + ch = self.peek(1) + if ch == u'<': + handle = None + self.forward(2) + suffix = self.scan_tag_uri('tag', start_mark) + if self.peek() != u'>': + raise ScannerError("while parsing a tag", start_mark, + "expected '>', but found %r" % self.peek().encode('utf-8'), + self.get_mark()) + self.forward() + elif ch in u'\0 \t\r\n\x85\u2028\u2029': + handle = None + suffix = u'!' + self.forward() + else: + length = 1 + use_handle = False + while ch not in u'\0 \r\n\x85\u2028\u2029': + if ch == u'!': + use_handle = True + break + length += 1 + ch = self.peek(length) + handle = u'!' + if use_handle: + handle = self.scan_tag_handle('tag', start_mark) + else: + handle = u'!' + self.forward() + suffix = self.scan_tag_uri('tag', start_mark) + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a tag", start_mark, + "expected ' ', but found %r" % ch.encode('utf-8'), + self.get_mark()) + value = (handle, suffix) + end_mark = self.get_mark() + return TagToken(value, start_mark, end_mark) + + def scan_block_scalar(self, style): + # See the specification for details. + + if style == '>': + folded = True + else: + folded = False + + chunks = [] + start_mark = self.get_mark() + + # Scan the header. + self.forward() + chomping, increment = self.scan_block_scalar_indicators(start_mark) + self.scan_block_scalar_ignored_line(start_mark) + + # Determine the indentation level and go to the first non-empty line. + min_indent = self.indent+1 + if min_indent < 1: + min_indent = 1 + if increment is None: + breaks, max_indent, end_mark = self.scan_block_scalar_indentation() + indent = max(min_indent, max_indent) + else: + indent = min_indent+increment-1 + breaks, end_mark = self.scan_block_scalar_breaks(indent) + line_break = u'' + + # Scan the inner part of the block scalar. + while self.column == indent and self.peek() != u'\0': + chunks.extend(breaks) + leading_non_space = self.peek() not in u' \t' + length = 0 + while self.peek(length) not in u'\0\r\n\x85\u2028\u2029': + length += 1 + chunks.append(self.prefix(length)) + self.forward(length) + line_break = self.scan_line_break() + breaks, end_mark = self.scan_block_scalar_breaks(indent) + if self.column == indent and self.peek() != u'\0': + + # Unfortunately, folding rules are ambiguous. + # + # This is the folding according to the specification: + + if folded and line_break == u'\n' \ + and leading_non_space and self.peek() not in u' \t': + if not breaks: + chunks.append(u' ') + else: + chunks.append(line_break) + + # This is Clark Evans's interpretation (also in the spec + # examples): + # + #if folded and line_break == u'\n': + # if not breaks: + # if self.peek() not in ' \t': + # chunks.append(u' ') + # else: + # chunks.append(line_break) + #else: + # chunks.append(line_break) + else: + break + + # Chomp the tail. + if chomping is not False: + chunks.append(line_break) + if chomping is True: + chunks.extend(breaks) + + # We are done. + return ScalarToken(u''.join(chunks), False, start_mark, end_mark, + style) + + def scan_block_scalar_indicators(self, start_mark): + # See the specification for details. + chomping = None + increment = None + ch = self.peek() + if ch in u'+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch in u'0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError("while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", + self.get_mark()) + self.forward() + elif ch in u'0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError("while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", + self.get_mark()) + self.forward() + ch = self.peek() + if ch in u'+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a block scalar", start_mark, + "expected chomping or indentation indicators, but found %r" + % ch.encode('utf-8'), self.get_mark()) + return chomping, increment + + def scan_block_scalar_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + if self.peek() == u'#': + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in u'\0\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a block scalar", start_mark, + "expected a comment or a line break, but found %r" + % ch.encode('utf-8'), self.get_mark()) + self.scan_line_break() + + def scan_block_scalar_indentation(self): + # See the specification for details. + chunks = [] + max_indent = 0 + end_mark = self.get_mark() + while self.peek() in u' \r\n\x85\u2028\u2029': + if self.peek() != u' ': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + else: + self.forward() + if self.column > max_indent: + max_indent = self.column + return chunks, max_indent, end_mark + + def scan_block_scalar_breaks(self, indent): + # See the specification for details. + chunks = [] + end_mark = self.get_mark() + while self.column < indent and self.peek() == u' ': + self.forward() + while self.peek() in u'\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + while self.column < indent and self.peek() == u' ': + self.forward() + return chunks, end_mark + + def scan_flow_scalar(self, style): + # See the specification for details. + # Note that we loose indentation rules for quoted scalars. Quoted + # scalars don't need to adhere indentation because " and ' clearly + # mark the beginning and the end of them. Therefore we are less + # restrictive then the specification requires. We only need to check + # that document separators are not included in scalars. + if style == '"': + double = True + else: + double = False + chunks = [] + start_mark = self.get_mark() + quote = self.peek() + self.forward() + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + while self.peek() != quote: + chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + self.forward() + end_mark = self.get_mark() + return ScalarToken(u''.join(chunks), False, start_mark, end_mark, + style) + + ESCAPE_REPLACEMENTS = { + u'0': u'\0', + u'a': u'\x07', + u'b': u'\x08', + u't': u'\x09', + u'\t': u'\x09', + u'n': u'\x0A', + u'v': u'\x0B', + u'f': u'\x0C', + u'r': u'\x0D', + u'e': u'\x1B', + u' ': u'\x20', + u'\"': u'\"', + u'\\': u'\\', + u'N': u'\x85', + u'_': u'\xA0', + u'L': u'\u2028', + u'P': u'\u2029', + } + + ESCAPE_CODES = { + u'x': 2, + u'u': 4, + u'U': 8, + } + + def scan_flow_scalar_non_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + length = 0 + while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029': + length += 1 + if length: + chunks.append(self.prefix(length)) + self.forward(length) + ch = self.peek() + if not double and ch == u'\'' and self.peek(1) == u'\'': + chunks.append(u'\'') + self.forward(2) + elif (double and ch == u'\'') or (not double and ch in u'\"\\'): + chunks.append(ch) + self.forward() + elif double and ch == u'\\': + self.forward() + ch = self.peek() + if ch in self.ESCAPE_REPLACEMENTS: + chunks.append(self.ESCAPE_REPLACEMENTS[ch]) + self.forward() + elif ch in self.ESCAPE_CODES: + length = self.ESCAPE_CODES[ch] + self.forward() + for k in range(length): + if self.peek(k) not in u'0123456789ABCDEFabcdef': + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "expected escape sequence of %d hexdecimal numbers, but found %r" % + (length, self.peek(k).encode('utf-8')), self.get_mark()) + code = int(self.prefix(length), 16) + chunks.append(unichr(code)) + self.forward(length) + elif ch in u'\r\n\x85\u2028\u2029': + self.scan_line_break() + chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) + else: + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "found unknown escape character %r" % ch.encode('utf-8'), self.get_mark()) + else: + return chunks + + def scan_flow_scalar_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + length = 0 + while self.peek(length) in u' \t': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch == u'\0': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected end of stream", self.get_mark()) + elif ch in u'\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + breaks = self.scan_flow_scalar_breaks(double, start_mark) + if line_break != u'\n': + chunks.append(line_break) + elif not breaks: + chunks.append(u' ') + chunks.extend(breaks) + else: + chunks.append(whitespaces) + return chunks + + def scan_flow_scalar_breaks(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + # Instead of checking indentation, we check for document + # separators. + prefix = self.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected document separator", self.get_mark()) + while self.peek() in u' \t': + self.forward() + if self.peek() in u'\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + else: + return chunks + + def scan_plain(self): + # See the specification for details. + # We add an additional restriction for the flow context: + # plain scalars in the flow context cannot contain ',', ':' and '?'. + # We also keep track of the `allow_simple_key` flag here. + # Indentation rules are loosed for the flow context. + chunks = [] + start_mark = self.get_mark() + end_mark = start_mark + indent = self.indent+1 + # We allow zero indentation for scalars, but then we need to check for + # document separators at the beginning of the line. + #if indent == 0: + # indent = 1 + spaces = [] + while True: + length = 0 + if self.peek() == u'#': + break + while True: + ch = self.peek(length) + if ch in u'\0 \t\r\n\x85\u2028\u2029' \ + or (not self.flow_level and ch == u':' and + self.peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029') \ + or (self.flow_level and ch in u',:?[]{}'): + break + length += 1 + # It's not clear what we should do with ':' in the flow context. + if (self.flow_level and ch == u':' + and self.peek(length+1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'): + self.forward(length) + raise ScannerError("while scanning a plain scalar", start_mark, + "found unexpected ':'", self.get_mark(), + "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.") + if length == 0: + break + self.allow_simple_key = False + chunks.extend(spaces) + chunks.append(self.prefix(length)) + self.forward(length) + end_mark = self.get_mark() + spaces = self.scan_plain_spaces(indent, start_mark) + if not spaces or self.peek() == u'#' \ + or (not self.flow_level and self.column < indent): + break + return ScalarToken(u''.join(chunks), True, start_mark, end_mark) + + def scan_plain_spaces(self, indent, start_mark): + # See the specification for details. + # The specification is really confusing about tabs in plain scalars. + # We just forbid them completely. Do not use tabs in YAML! + chunks = [] + length = 0 + while self.peek(length) in u' ': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch in u'\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + self.allow_simple_key = True + prefix = self.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return + breaks = [] + while self.peek() in u' \r\n\x85\u2028\u2029': + if self.peek() == ' ': + self.forward() + else: + breaks.append(self.scan_line_break()) + prefix = self.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return + if line_break != u'\n': + chunks.append(line_break) + elif not breaks: + chunks.append(u' ') + chunks.extend(breaks) + elif whitespaces: + chunks.append(whitespaces) + return chunks + + def scan_tag_handle(self, name, start_mark): + # See the specification for details. + # For some strange reasons, the specification does not allow '_' in + # tag handles. I have allowed it anyway. + ch = self.peek() + if ch != u'!': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % ch.encode('utf-8'), + self.get_mark()) + length = 1 + ch = self.peek(length) + if ch != u' ': + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_': + length += 1 + ch = self.peek(length) + if ch != u'!': + self.forward(length) + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % ch.encode('utf-8'), + self.get_mark()) + length += 1 + value = self.prefix(length) + self.forward(length) + return value + + def scan_tag_uri(self, name, start_mark): + # See the specification for details. + # Note: we do not check if URI is well-formed. + chunks = [] + length = 0 + ch = self.peek(length) + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-;/?:@&=+$,_.!~*\'()[]%': + if ch == u'%': + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + chunks.append(self.scan_uri_escapes(name, start_mark)) + else: + length += 1 + ch = self.peek(length) + if length: + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + if not chunks: + raise ScannerError("while parsing a %s" % name, start_mark, + "expected URI, but found %r" % ch.encode('utf-8'), + self.get_mark()) + return u''.join(chunks) + + def scan_uri_escapes(self, name, start_mark): + # See the specification for details. + bytes = [] + mark = self.get_mark() + while self.peek() == u'%': + self.forward() + for k in range(2): + if self.peek(k) not in u'0123456789ABCDEFabcdef': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected URI escape sequence of 2 hexdecimal numbers, but found %r" % + (self.peek(k).encode('utf-8')), self.get_mark()) + bytes.append(chr(int(self.prefix(2), 16))) + self.forward(2) + try: + value = unicode(''.join(bytes), 'utf-8') + except UnicodeDecodeError, exc: + raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark) + return value + + def scan_line_break(self): + # Transforms: + # '\r\n' : '\n' + # '\r' : '\n' + # '\n' : '\n' + # '\x85' : '\n' + # '\u2028' : '\u2028' + # '\u2029 : '\u2029' + # default : '' + ch = self.peek() + if ch in u'\r\n\x85': + if self.prefix(2) == u'\r\n': + self.forward(2) + else: + self.forward() + return u'\n' + elif ch in u'\u2028\u2029': + self.forward() + return ch + return u'' + +#try: +# import psyco +# psyco.bind(Scanner) +#except ImportError: +# pass + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/serializer.py b/collectors/python.d.plugin/python_modules/pyyaml2/serializer.py new file mode 100644 index 00000000..15fdbb0c --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/serializer.py @@ -0,0 +1,112 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['Serializer', 'SerializerError'] + +from error import YAMLError +from events import * +from nodes import * + +class SerializerError(YAMLError): + pass + +class Serializer(object): + + ANCHOR_TEMPLATE = u'id%03d' + + def __init__(self, encoding=None, + explicit_start=None, explicit_end=None, version=None, tags=None): + self.use_encoding = encoding + self.use_explicit_start = explicit_start + self.use_explicit_end = explicit_end + self.use_version = version + self.use_tags = tags + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + self.closed = None + + def open(self): + if self.closed is None: + self.emit(StreamStartEvent(encoding=self.use_encoding)) + self.closed = False + elif self.closed: + raise SerializerError("serializer is closed") + else: + raise SerializerError("serializer is already opened") + + def close(self): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif not self.closed: + self.emit(StreamEndEvent()) + self.closed = True + + #def __del__(self): + # self.close() + + def serialize(self, node): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif self.closed: + raise SerializerError("serializer is closed") + self.emit(DocumentStartEvent(explicit=self.use_explicit_start, + version=self.use_version, tags=self.use_tags)) + self.anchor_node(node) + self.serialize_node(node, None, None) + self.emit(DocumentEndEvent(explicit=self.use_explicit_end)) + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + + def anchor_node(self, node): + if node in self.anchors: + if self.anchors[node] is None: + self.anchors[node] = self.generate_anchor(node) + else: + self.anchors[node] = None + if isinstance(node, SequenceNode): + for item in node.value: + self.anchor_node(item) + elif isinstance(node, MappingNode): + for key, value in node.value: + self.anchor_node(key) + self.anchor_node(value) + + def generate_anchor(self, node): + self.last_anchor_id += 1 + return self.ANCHOR_TEMPLATE % self.last_anchor_id + + def serialize_node(self, node, parent, index): + alias = self.anchors[node] + if node in self.serialized_nodes: + self.emit(AliasEvent(alias)) + else: + self.serialized_nodes[node] = True + self.descend_resolver(parent, index) + if isinstance(node, ScalarNode): + detected_tag = self.resolve(ScalarNode, node.value, (True, False)) + default_tag = self.resolve(ScalarNode, node.value, (False, True)) + implicit = (node.tag == detected_tag), (node.tag == default_tag) + self.emit(ScalarEvent(alias, node.tag, implicit, node.value, + style=node.style)) + elif isinstance(node, SequenceNode): + implicit = (node.tag + == self.resolve(SequenceNode, node.value, True)) + self.emit(SequenceStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style)) + index = 0 + for item in node.value: + self.serialize_node(item, node, index) + index += 1 + self.emit(SequenceEndEvent()) + elif isinstance(node, MappingNode): + implicit = (node.tag + == self.resolve(MappingNode, node.value, True)) + self.emit(MappingStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style)) + for key, value in node.value: + self.serialize_node(key, node, None) + self.serialize_node(value, node, key) + self.emit(MappingEndEvent()) + self.ascend_resolver() + diff --git a/collectors/python.d.plugin/python_modules/pyyaml2/tokens.py b/collectors/python.d.plugin/python_modules/pyyaml2/tokens.py new file mode 100644 index 00000000..c5c4fb11 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml2/tokens.py @@ -0,0 +1,105 @@ +# SPDX-License-Identifier: MIT + +class Token(object): + def __init__(self, start_mark, end_mark): + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + attributes = [key for key in self.__dict__ + if not key.endswith('_mark')] + attributes.sort() + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + +#class BOMToken(Token): +# id = '<byte order mark>' + +class DirectiveToken(Token): + id = '<directive>' + def __init__(self, name, value, start_mark, end_mark): + self.name = name + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class DocumentStartToken(Token): + id = '<document start>' + +class DocumentEndToken(Token): + id = '<document end>' + +class StreamStartToken(Token): + id = '<stream start>' + def __init__(self, start_mark=None, end_mark=None, + encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + +class StreamEndToken(Token): + id = '<stream end>' + +class BlockSequenceStartToken(Token): + id = '<block sequence start>' + +class BlockMappingStartToken(Token): + id = '<block mapping start>' + +class BlockEndToken(Token): + id = '<block end>' + +class FlowSequenceStartToken(Token): + id = '[' + +class FlowMappingStartToken(Token): + id = '{' + +class FlowSequenceEndToken(Token): + id = ']' + +class FlowMappingEndToken(Token): + id = '}' + +class KeyToken(Token): + id = '?' + +class ValueToken(Token): + id = ':' + +class BlockEntryToken(Token): + id = '-' + +class FlowEntryToken(Token): + id = ',' + +class AliasToken(Token): + id = '<alias>' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class AnchorToken(Token): + id = '<anchor>' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class TagToken(Token): + id = '<tag>' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class ScalarToken(Token): + id = '<scalar>' + def __init__(self, value, plain, start_mark, end_mark, style=None): + self.value = value + self.plain = plain + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/__init__.py b/collectors/python.d.plugin/python_modules/pyyaml3/__init__.py new file mode 100644 index 00000000..a884b33c --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/__init__.py @@ -0,0 +1,313 @@ +# SPDX-License-Identifier: MIT + +from .error import * + +from .tokens import * +from .events import * +from .nodes import * + +from .loader import * +from .dumper import * + +__version__ = '3.11' +try: + from .cyaml import * + __with_libyaml__ = True +except ImportError: + __with_libyaml__ = False + +import io + +def scan(stream, Loader=Loader): + """ + Scan a YAML stream and produce scanning tokens. + """ + loader = Loader(stream) + try: + while loader.check_token(): + yield loader.get_token() + finally: + loader.dispose() + +def parse(stream, Loader=Loader): + """ + Parse a YAML stream and produce parsing events. + """ + loader = Loader(stream) + try: + while loader.check_event(): + yield loader.get_event() + finally: + loader.dispose() + +def compose(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding representation tree. + """ + loader = Loader(stream) + try: + return loader.get_single_node() + finally: + loader.dispose() + +def compose_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding representation trees. + """ + loader = Loader(stream) + try: + while loader.check_node(): + yield loader.get_node() + finally: + loader.dispose() + +def load(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + """ + loader = Loader(stream) + try: + return loader.get_single_data() + finally: + loader.dispose() + +def load_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + """ + loader = Loader(stream) + try: + while loader.check_data(): + yield loader.get_data() + finally: + loader.dispose() + +def safe_load(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + Resolve only basic YAML tags. + """ + return load(stream, SafeLoader) + +def safe_load_all(stream): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + Resolve only basic YAML tags. + """ + return load_all(stream, SafeLoader) + +def emit(events, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + """ + Emit YAML parsing events into a stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + stream = io.StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + try: + for event in events: + dumper.emit(event) + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def serialize_all(nodes, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of representation trees into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + stream = io.StringIO() + else: + stream = io.BytesIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + try: + dumper.open() + for node in nodes: + dumper.serialize(node) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def serialize(node, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a representation tree into a YAML stream. + If stream is None, return the produced string instead. + """ + return serialize_all([node], stream, Dumper=Dumper, **kwds) + +def dump_all(documents, stream=None, Dumper=Dumper, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of Python objects into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + stream = io.StringIO() + else: + stream = io.BytesIO() + getvalue = stream.getvalue + dumper = Dumper(stream, default_style=default_style, + default_flow_style=default_flow_style, + canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + try: + dumper.open() + for data in documents: + dumper.represent(data) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + +def dump(data, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a Python object into a YAML stream. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=Dumper, **kwds) + +def safe_dump_all(documents, stream=None, **kwds): + """ + Serialize a sequence of Python objects into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all(documents, stream, Dumper=SafeDumper, **kwds) + +def safe_dump(data, stream=None, **kwds): + """ + Serialize a Python object into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=SafeDumper, **kwds) + +def add_implicit_resolver(tag, regexp, first=None, + Loader=Loader, Dumper=Dumper): + """ + Add an implicit scalar detector. + If an implicit scalar value matches the given regexp, + the corresponding tag is assigned to the scalar. + first is a sequence of possible initial characters or None. + """ + Loader.add_implicit_resolver(tag, regexp, first) + Dumper.add_implicit_resolver(tag, regexp, first) + +def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper): + """ + Add a path based resolver for the given tag. + A path is a list of keys that forms a path + to a node in the representation tree. + Keys can be string values, integers, or None. + """ + Loader.add_path_resolver(tag, path, kind) + Dumper.add_path_resolver(tag, path, kind) + +def add_constructor(tag, constructor, Loader=Loader): + """ + Add a constructor for the given tag. + Constructor is a function that accepts a Loader instance + and a node object and produces the corresponding Python object. + """ + Loader.add_constructor(tag, constructor) + +def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader): + """ + Add a multi-constructor for the given tag prefix. + Multi-constructor is called for a node if its tag starts with tag_prefix. + Multi-constructor accepts a Loader instance, a tag suffix, + and a node object and produces the corresponding Python object. + """ + Loader.add_multi_constructor(tag_prefix, multi_constructor) + +def add_representer(data_type, representer, Dumper=Dumper): + """ + Add a representer for the given type. + Representer is a function accepting a Dumper instance + and an instance of the given data type + and producing the corresponding representation node. + """ + Dumper.add_representer(data_type, representer) + +def add_multi_representer(data_type, multi_representer, Dumper=Dumper): + """ + Add a representer for the given type. + Multi-representer is a function accepting a Dumper instance + and an instance of the given data type or subtype + and producing the corresponding representation node. + """ + Dumper.add_multi_representer(data_type, multi_representer) + +class YAMLObjectMetaclass(type): + """ + The metaclass for YAMLObject. + """ + def __init__(cls, name, bases, kwds): + super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds) + if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: + cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml) + cls.yaml_dumper.add_representer(cls, cls.to_yaml) + +class YAMLObject(metaclass=YAMLObjectMetaclass): + """ + An object that can dump itself to a YAML stream + and load itself from a YAML stream. + """ + + __slots__ = () # no direct instantiation, so allow immutable subclasses + + yaml_loader = Loader + yaml_dumper = Dumper + + yaml_tag = None + yaml_flow_style = None + + @classmethod + def from_yaml(cls, loader, node): + """ + Convert a representation node to a Python object. + """ + return loader.construct_yaml_object(node, cls) + + @classmethod + def to_yaml(cls, dumper, data): + """ + Convert a Python object to a representation node. + """ + return dumper.represent_yaml_object(cls.yaml_tag, data, cls, + flow_style=cls.yaml_flow_style) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/composer.py b/collectors/python.d.plugin/python_modules/pyyaml3/composer.py new file mode 100644 index 00000000..c418bba9 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/composer.py @@ -0,0 +1,140 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['Composer', 'ComposerError'] + +from .error import MarkedYAMLError +from .events import * +from .nodes import * + +class ComposerError(MarkedYAMLError): + pass + +class Composer: + + def __init__(self): + self.anchors = {} + + def check_node(self): + # Drop the STREAM-START event. + if self.check_event(StreamStartEvent): + self.get_event() + + # If there are more documents available? + return not self.check_event(StreamEndEvent) + + def get_node(self): + # Get the root node of the next document. + if not self.check_event(StreamEndEvent): + return self.compose_document() + + def get_single_node(self): + # Drop the STREAM-START event. + self.get_event() + + # Compose a document if the stream is not empty. + document = None + if not self.check_event(StreamEndEvent): + document = self.compose_document() + + # Ensure that the stream contains no more documents. + if not self.check_event(StreamEndEvent): + event = self.get_event() + raise ComposerError("expected a single document in the stream", + document.start_mark, "but found another document", + event.start_mark) + + # Drop the STREAM-END event. + self.get_event() + + return document + + def compose_document(self): + # Drop the DOCUMENT-START event. + self.get_event() + + # Compose the root node. + node = self.compose_node(None, None) + + # Drop the DOCUMENT-END event. + self.get_event() + + self.anchors = {} + return node + + def compose_node(self, parent, index): + if self.check_event(AliasEvent): + event = self.get_event() + anchor = event.anchor + if anchor not in self.anchors: + raise ComposerError(None, None, "found undefined alias %r" + % anchor, event.start_mark) + return self.anchors[anchor] + event = self.peek_event() + anchor = event.anchor + if anchor is not None: + if anchor in self.anchors: + raise ComposerError("found duplicate anchor %r; first occurence" + % anchor, self.anchors[anchor].start_mark, + "second occurence", event.start_mark) + self.descend_resolver(parent, index) + if self.check_event(ScalarEvent): + node = self.compose_scalar_node(anchor) + elif self.check_event(SequenceStartEvent): + node = self.compose_sequence_node(anchor) + elif self.check_event(MappingStartEvent): + node = self.compose_mapping_node(anchor) + self.ascend_resolver() + return node + + def compose_scalar_node(self, anchor): + event = self.get_event() + tag = event.tag + if tag is None or tag == '!': + tag = self.resolve(ScalarNode, event.value, event.implicit) + node = ScalarNode(tag, event.value, + event.start_mark, event.end_mark, style=event.style) + if anchor is not None: + self.anchors[anchor] = node + return node + + def compose_sequence_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == '!': + tag = self.resolve(SequenceNode, None, start_event.implicit) + node = SequenceNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style) + if anchor is not None: + self.anchors[anchor] = node + index = 0 + while not self.check_event(SequenceEndEvent): + node.value.append(self.compose_node(node, index)) + index += 1 + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + + def compose_mapping_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == '!': + tag = self.resolve(MappingNode, None, start_event.implicit) + node = MappingNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style) + if anchor is not None: + self.anchors[anchor] = node + while not self.check_event(MappingEndEvent): + #key_event = self.peek_event() + item_key = self.compose_node(node, None) + #if item_key in node.value: + # raise ComposerError("while composing a mapping", start_event.start_mark, + # "found duplicate key", key_event.start_mark) + item_value = self.compose_node(node, item_key) + #node.value[item_key] = item_value + node.value.append((item_key, item_value)) + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/constructor.py b/collectors/python.d.plugin/python_modules/pyyaml3/constructor.py new file mode 100644 index 00000000..ee09a7a7 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/constructor.py @@ -0,0 +1,687 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor', + 'ConstructorError'] + +from .error import * +from .nodes import * + +import collections, datetime, base64, binascii, re, sys, types + +class ConstructorError(MarkedYAMLError): + pass + +class BaseConstructor: + + yaml_constructors = {} + yaml_multi_constructors = {} + + def __init__(self): + self.constructed_objects = {} + self.recursive_objects = {} + self.state_generators = [] + self.deep_construct = False + + def check_data(self): + # If there are more documents available? + return self.check_node() + + def get_data(self): + # Construct and return the next document. + if self.check_node(): + return self.construct_document(self.get_node()) + + def get_single_data(self): + # Ensure that the stream contains a single document and construct it. + node = self.get_single_node() + if node is not None: + return self.construct_document(node) + return None + + def construct_document(self, node): + data = self.construct_object(node) + while self.state_generators: + state_generators = self.state_generators + self.state_generators = [] + for generator in state_generators: + for dummy in generator: + pass + self.constructed_objects = {} + self.recursive_objects = {} + self.deep_construct = False + return data + + def construct_object(self, node, deep=False): + if node in self.constructed_objects: + return self.constructed_objects[node] + if deep: + old_deep = self.deep_construct + self.deep_construct = True + if node in self.recursive_objects: + raise ConstructorError(None, None, + "found unconstructable recursive node", node.start_mark) + self.recursive_objects[node] = None + constructor = None + tag_suffix = None + if node.tag in self.yaml_constructors: + constructor = self.yaml_constructors[node.tag] + else: + for tag_prefix in self.yaml_multi_constructors: + if node.tag.startswith(tag_prefix): + tag_suffix = node.tag[len(tag_prefix):] + constructor = self.yaml_multi_constructors[tag_prefix] + break + else: + if None in self.yaml_multi_constructors: + tag_suffix = node.tag + constructor = self.yaml_multi_constructors[None] + elif None in self.yaml_constructors: + constructor = self.yaml_constructors[None] + elif isinstance(node, ScalarNode): + constructor = self.__class__.construct_scalar + elif isinstance(node, SequenceNode): + constructor = self.__class__.construct_sequence + elif isinstance(node, MappingNode): + constructor = self.__class__.construct_mapping + if tag_suffix is None: + data = constructor(self, node) + else: + data = constructor(self, tag_suffix, node) + if isinstance(data, types.GeneratorType): + generator = data + data = next(generator) + if self.deep_construct: + for dummy in generator: + pass + else: + self.state_generators.append(generator) + self.constructed_objects[node] = data + del self.recursive_objects[node] + if deep: + self.deep_construct = old_deep + return data + + def construct_scalar(self, node): + if not isinstance(node, ScalarNode): + raise ConstructorError(None, None, + "expected a scalar node, but found %s" % node.id, + node.start_mark) + return node.value + + def construct_sequence(self, node, deep=False): + if not isinstance(node, SequenceNode): + raise ConstructorError(None, None, + "expected a sequence node, but found %s" % node.id, + node.start_mark) + return [self.construct_object(child, deep=deep) + for child in node.value] + + def construct_mapping(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError(None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + mapping = {} + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + if not isinstance(key, collections.Hashable): + raise ConstructorError("while constructing a mapping", node.start_mark, + "found unhashable key", key_node.start_mark) + value = self.construct_object(value_node, deep=deep) + mapping[key] = value + return mapping + + def construct_pairs(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError(None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + pairs = [] + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + value = self.construct_object(value_node, deep=deep) + pairs.append((key, value)) + return pairs + + @classmethod + def add_constructor(cls, tag, constructor): + if not 'yaml_constructors' in cls.__dict__: + cls.yaml_constructors = cls.yaml_constructors.copy() + cls.yaml_constructors[tag] = constructor + + @classmethod + def add_multi_constructor(cls, tag_prefix, multi_constructor): + if not 'yaml_multi_constructors' in cls.__dict__: + cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() + cls.yaml_multi_constructors[tag_prefix] = multi_constructor + +class SafeConstructor(BaseConstructor): + + def construct_scalar(self, node): + if isinstance(node, MappingNode): + for key_node, value_node in node.value: + if key_node.tag == 'tag:yaml.org,2002:value': + return self.construct_scalar(value_node) + return super().construct_scalar(node) + + def flatten_mapping(self, node): + merge = [] + index = 0 + while index < len(node.value): + key_node, value_node = node.value[index] + if key_node.tag == 'tag:yaml.org,2002:merge': + del node.value[index] + if isinstance(value_node, MappingNode): + self.flatten_mapping(value_node) + merge.extend(value_node.value) + elif isinstance(value_node, SequenceNode): + submerge = [] + for subnode in value_node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing a mapping", + node.start_mark, + "expected a mapping for merging, but found %s" + % subnode.id, subnode.start_mark) + self.flatten_mapping(subnode) + submerge.append(subnode.value) + submerge.reverse() + for value in submerge: + merge.extend(value) + else: + raise ConstructorError("while constructing a mapping", node.start_mark, + "expected a mapping or list of mappings for merging, but found %s" + % value_node.id, value_node.start_mark) + elif key_node.tag == 'tag:yaml.org,2002:value': + key_node.tag = 'tag:yaml.org,2002:str' + index += 1 + else: + index += 1 + if merge: + node.value = merge + node.value + + def construct_mapping(self, node, deep=False): + if isinstance(node, MappingNode): + self.flatten_mapping(node) + return super().construct_mapping(node, deep=deep) + + def construct_yaml_null(self, node): + self.construct_scalar(node) + return None + + bool_values = { + 'yes': True, + 'no': False, + 'true': True, + 'false': False, + 'on': True, + 'off': False, + } + + def construct_yaml_bool(self, node): + value = self.construct_scalar(node) + return self.bool_values[value.lower()] + + def construct_yaml_int(self, node): + value = self.construct_scalar(node) + value = value.replace('_', '') + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '0': + return 0 + elif value.startswith('0b'): + return sign*int(value[2:], 2) + elif value.startswith('0x'): + return sign*int(value[2:], 16) + elif value[0] == '0': + return sign*int(value, 8) + elif ':' in value: + digits = [int(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*int(value) + + inf_value = 1e300 + while inf_value != inf_value*inf_value: + inf_value *= inf_value + nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99). + + def construct_yaml_float(self, node): + value = self.construct_scalar(node) + value = value.replace('_', '').lower() + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '.inf': + return sign*self.inf_value + elif value == '.nan': + return self.nan_value + elif ':' in value: + digits = [float(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0.0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*float(value) + + def construct_yaml_binary(self, node): + try: + value = self.construct_scalar(node).encode('ascii') + except UnicodeEncodeError as exc: + raise ConstructorError(None, None, + "failed to convert base64 data into ascii: %s" % exc, + node.start_mark) + try: + if hasattr(base64, 'decodebytes'): + return base64.decodebytes(value) + else: + return base64.decodestring(value) + except binascii.Error as exc: + raise ConstructorError(None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + + timestamp_regexp = re.compile( + r'''^(?P<year>[0-9][0-9][0-9][0-9]) + -(?P<month>[0-9][0-9]?) + -(?P<day>[0-9][0-9]?) + (?:(?:[Tt]|[ \t]+) + (?P<hour>[0-9][0-9]?) + :(?P<minute>[0-9][0-9]) + :(?P<second>[0-9][0-9]) + (?:\.(?P<fraction>[0-9]*))? + (?:[ \t]*(?P<tz>Z|(?P<tz_sign>[-+])(?P<tz_hour>[0-9][0-9]?) + (?::(?P<tz_minute>[0-9][0-9]))?))?)?$''', re.X) + + def construct_yaml_timestamp(self, node): + value = self.construct_scalar(node) + match = self.timestamp_regexp.match(node.value) + values = match.groupdict() + year = int(values['year']) + month = int(values['month']) + day = int(values['day']) + if not values['hour']: + return datetime.date(year, month, day) + hour = int(values['hour']) + minute = int(values['minute']) + second = int(values['second']) + fraction = 0 + if values['fraction']: + fraction = values['fraction'][:6] + while len(fraction) < 6: + fraction += '0' + fraction = int(fraction) + delta = None + if values['tz_sign']: + tz_hour = int(values['tz_hour']) + tz_minute = int(values['tz_minute'] or 0) + delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute) + if values['tz_sign'] == '-': + delta = -delta + data = datetime.datetime(year, month, day, hour, minute, second, fraction) + if delta: + data -= delta + return data + + def construct_yaml_omap(self, node): + # Note: we do not check for duplicate keys, because it's too + # CPU-expensive. + omap = [] + yield omap + if not isinstance(node, SequenceNode): + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a mapping of length 1, but found %s" % subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a single mapping item, but found %d items" % len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + omap.append((key, value)) + + def construct_yaml_pairs(self, node): + # Note: the same code as `construct_yaml_omap`. + pairs = [] + yield pairs + if not isinstance(node, SequenceNode): + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a mapping of length 1, but found %s" % subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a single mapping item, but found %d items" % len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + pairs.append((key, value)) + + def construct_yaml_set(self, node): + data = set() + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_str(self, node): + return self.construct_scalar(node) + + def construct_yaml_seq(self, node): + data = [] + yield data + data.extend(self.construct_sequence(node)) + + def construct_yaml_map(self, node): + data = {} + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_object(self, node, cls): + data = cls.__new__(cls) + yield data + if hasattr(data, '__setstate__'): + state = self.construct_mapping(node, deep=True) + data.__setstate__(state) + else: + state = self.construct_mapping(node) + data.__dict__.update(state) + + def construct_undefined(self, node): + raise ConstructorError(None, None, + "could not determine a constructor for the tag %r" % node.tag, + node.start_mark) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:null', + SafeConstructor.construct_yaml_null) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:bool', + SafeConstructor.construct_yaml_bool) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:int', + SafeConstructor.construct_yaml_int) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:float', + SafeConstructor.construct_yaml_float) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:binary', + SafeConstructor.construct_yaml_binary) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:timestamp', + SafeConstructor.construct_yaml_timestamp) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:omap', + SafeConstructor.construct_yaml_omap) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:pairs', + SafeConstructor.construct_yaml_pairs) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:set', + SafeConstructor.construct_yaml_set) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:str', + SafeConstructor.construct_yaml_str) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:seq', + SafeConstructor.construct_yaml_seq) + +SafeConstructor.add_constructor( + 'tag:yaml.org,2002:map', + SafeConstructor.construct_yaml_map) + +SafeConstructor.add_constructor(None, + SafeConstructor.construct_undefined) + +class Constructor(SafeConstructor): + + def construct_python_str(self, node): + return self.construct_scalar(node) + + def construct_python_unicode(self, node): + return self.construct_scalar(node) + + def construct_python_bytes(self, node): + try: + value = self.construct_scalar(node).encode('ascii') + except UnicodeEncodeError as exc: + raise ConstructorError(None, None, + "failed to convert base64 data into ascii: %s" % exc, + node.start_mark) + try: + if hasattr(base64, 'decodebytes'): + return base64.decodebytes(value) + else: + return base64.decodestring(value) + except binascii.Error as exc: + raise ConstructorError(None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + + def construct_python_long(self, node): + return self.construct_yaml_int(node) + + def construct_python_complex(self, node): + return complex(self.construct_scalar(node)) + + def construct_python_tuple(self, node): + return tuple(self.construct_sequence(node)) + + def find_python_module(self, name, mark): + if not name: + raise ConstructorError("while constructing a Python module", mark, + "expected non-empty name appended to the tag", mark) + try: + __import__(name) + except ImportError as exc: + raise ConstructorError("while constructing a Python module", mark, + "cannot find module %r (%s)" % (name, exc), mark) + return sys.modules[name] + + def find_python_name(self, name, mark): + if not name: + raise ConstructorError("while constructing a Python object", mark, + "expected non-empty name appended to the tag", mark) + if '.' in name: + module_name, object_name = name.rsplit('.', 1) + else: + module_name = 'builtins' + object_name = name + try: + __import__(module_name) + except ImportError as exc: + raise ConstructorError("while constructing a Python object", mark, + "cannot find module %r (%s)" % (module_name, exc), mark) + module = sys.modules[module_name] + if not hasattr(module, object_name): + raise ConstructorError("while constructing a Python object", mark, + "cannot find %r in the module %r" + % (object_name, module.__name__), mark) + return getattr(module, object_name) + + def construct_python_name(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError("while constructing a Python name", node.start_mark, + "expected the empty value, but found %r" % value, node.start_mark) + return self.find_python_name(suffix, node.start_mark) + + def construct_python_module(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError("while constructing a Python module", node.start_mark, + "expected the empty value, but found %r" % value, node.start_mark) + return self.find_python_module(suffix, node.start_mark) + + def make_python_instance(self, suffix, node, + args=None, kwds=None, newobj=False): + if not args: + args = [] + if not kwds: + kwds = {} + cls = self.find_python_name(suffix, node.start_mark) + if newobj and isinstance(cls, type): + return cls.__new__(cls, *args, **kwds) + else: + return cls(*args, **kwds) + + def set_python_instance_state(self, instance, state): + if hasattr(instance, '__setstate__'): + instance.__setstate__(state) + else: + slotstate = {} + if isinstance(state, tuple) and len(state) == 2: + state, slotstate = state + if hasattr(instance, '__dict__'): + instance.__dict__.update(state) + elif state: + slotstate.update(state) + for key, value in slotstate.items(): + setattr(object, key, value) + + def construct_python_object(self, suffix, node): + # Format: + # !!python/object:module.name { ... state ... } + instance = self.make_python_instance(suffix, node, newobj=True) + yield instance + deep = hasattr(instance, '__setstate__') + state = self.construct_mapping(node, deep=deep) + self.set_python_instance_state(instance, state) + + def construct_python_object_apply(self, suffix, node, newobj=False): + # Format: + # !!python/object/apply # (or !!python/object/new) + # args: [ ... arguments ... ] + # kwds: { ... keywords ... } + # state: ... state ... + # listitems: [ ... listitems ... ] + # dictitems: { ... dictitems ... } + # or short format: + # !!python/object/apply [ ... arguments ... ] + # The difference between !!python/object/apply and !!python/object/new + # is how an object is created, check make_python_instance for details. + if isinstance(node, SequenceNode): + args = self.construct_sequence(node, deep=True) + kwds = {} + state = {} + listitems = [] + dictitems = {} + else: + value = self.construct_mapping(node, deep=True) + args = value.get('args', []) + kwds = value.get('kwds', {}) + state = value.get('state', {}) + listitems = value.get('listitems', []) + dictitems = value.get('dictitems', {}) + instance = self.make_python_instance(suffix, node, args, kwds, newobj) + if state: + self.set_python_instance_state(instance, state) + if listitems: + instance.extend(listitems) + if dictitems: + for key in dictitems: + instance[key] = dictitems[key] + return instance + + def construct_python_object_new(self, suffix, node): + return self.construct_python_object_apply(suffix, node, newobj=True) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/none', + Constructor.construct_yaml_null) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/bool', + Constructor.construct_yaml_bool) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/str', + Constructor.construct_python_str) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/unicode', + Constructor.construct_python_unicode) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/bytes', + Constructor.construct_python_bytes) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/int', + Constructor.construct_yaml_int) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/long', + Constructor.construct_python_long) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/float', + Constructor.construct_yaml_float) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/complex', + Constructor.construct_python_complex) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/list', + Constructor.construct_yaml_seq) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/tuple', + Constructor.construct_python_tuple) + +Constructor.add_constructor( + 'tag:yaml.org,2002:python/dict', + Constructor.construct_yaml_map) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/name:', + Constructor.construct_python_name) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/module:', + Constructor.construct_python_module) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/object:', + Constructor.construct_python_object) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/object/apply:', + Constructor.construct_python_object_apply) + +Constructor.add_multi_constructor( + 'tag:yaml.org,2002:python/object/new:', + Constructor.construct_python_object_new) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/cyaml.py b/collectors/python.d.plugin/python_modules/pyyaml3/cyaml.py new file mode 100644 index 00000000..e6c16d89 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/cyaml.py @@ -0,0 +1,86 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader', + 'CBaseDumper', 'CSafeDumper', 'CDumper'] + +from _yaml import CParser, CEmitter + +from .constructor import * + +from .serializer import * +from .representer import * + +from .resolver import * + +class CBaseLoader(CParser, BaseConstructor, BaseResolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + +class CSafeLoader(CParser, SafeConstructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + SafeConstructor.__init__(self) + Resolver.__init__(self) + +class CLoader(CParser, Constructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + Constructor.__init__(self) + Resolver.__init__(self) + +class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class CSafeDumper(CEmitter, SafeRepresenter, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class CDumper(CEmitter, Serializer, Representer, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/dumper.py b/collectors/python.d.plugin/python_modules/pyyaml3/dumper.py new file mode 100644 index 00000000..ba590c6e --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/dumper.py @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseDumper', 'SafeDumper', 'Dumper'] + +from .emitter import * +from .serializer import * +from .representer import * +from .resolver import * + +class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class Dumper(Emitter, Serializer, Representer, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/emitter.py b/collectors/python.d.plugin/python_modules/pyyaml3/emitter.py new file mode 100644 index 00000000..d4be65a8 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/emitter.py @@ -0,0 +1,1138 @@ +# SPDX-License-Identifier: MIT + +# Emitter expects events obeying the following grammar: +# stream ::= STREAM-START document* STREAM-END +# document ::= DOCUMENT-START node DOCUMENT-END +# node ::= SCALAR | sequence | mapping +# sequence ::= SEQUENCE-START node* SEQUENCE-END +# mapping ::= MAPPING-START (node node)* MAPPING-END + +__all__ = ['Emitter', 'EmitterError'] + +from .error import YAMLError +from .events import * + +class EmitterError(YAMLError): + pass + +class ScalarAnalysis: + def __init__(self, scalar, empty, multiline, + allow_flow_plain, allow_block_plain, + allow_single_quoted, allow_double_quoted, + allow_block): + self.scalar = scalar + self.empty = empty + self.multiline = multiline + self.allow_flow_plain = allow_flow_plain + self.allow_block_plain = allow_block_plain + self.allow_single_quoted = allow_single_quoted + self.allow_double_quoted = allow_double_quoted + self.allow_block = allow_block + +class Emitter: + + DEFAULT_TAG_PREFIXES = { + '!' : '!', + 'tag:yaml.org,2002:' : '!!', + } + + def __init__(self, stream, canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + + # The stream should have the methods `write` and possibly `flush`. + self.stream = stream + + # Encoding can be overriden by STREAM-START. + self.encoding = None + + # Emitter is a state machine with a stack of states to handle nested + # structures. + self.states = [] + self.state = self.expect_stream_start + + # Current event and the event queue. + self.events = [] + self.event = None + + # The current indentation level and the stack of previous indents. + self.indents = [] + self.indent = None + + # Flow level. + self.flow_level = 0 + + # Contexts. + self.root_context = False + self.sequence_context = False + self.mapping_context = False + self.simple_key_context = False + + # Characteristics of the last emitted character: + # - current position. + # - is it a whitespace? + # - is it an indention character + # (indentation space, '-', '?', or ':')? + self.line = 0 + self.column = 0 + self.whitespace = True + self.indention = True + + # Whether the document requires an explicit document indicator + self.open_ended = False + + # Formatting details. + self.canonical = canonical + self.allow_unicode = allow_unicode + self.best_indent = 2 + if indent and 1 < indent < 10: + self.best_indent = indent + self.best_width = 80 + if width and width > self.best_indent*2: + self.best_width = width + self.best_line_break = '\n' + if line_break in ['\r', '\n', '\r\n']: + self.best_line_break = line_break + + # Tag prefixes. + self.tag_prefixes = None + + # Prepared anchor and tag. + self.prepared_anchor = None + self.prepared_tag = None + + # Scalar analysis and style. + self.analysis = None + self.style = None + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def emit(self, event): + self.events.append(event) + while not self.need_more_events(): + self.event = self.events.pop(0) + self.state() + self.event = None + + # In some cases, we wait for a few next events before emitting. + + def need_more_events(self): + if not self.events: + return True + event = self.events[0] + if isinstance(event, DocumentStartEvent): + return self.need_events(1) + elif isinstance(event, SequenceStartEvent): + return self.need_events(2) + elif isinstance(event, MappingStartEvent): + return self.need_events(3) + else: + return False + + def need_events(self, count): + level = 0 + for event in self.events[1:]: + if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): + level += 1 + elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): + level -= 1 + elif isinstance(event, StreamEndEvent): + level = -1 + if level < 0: + return False + return (len(self.events) < count+1) + + def increase_indent(self, flow=False, indentless=False): + self.indents.append(self.indent) + if self.indent is None: + if flow: + self.indent = self.best_indent + else: + self.indent = 0 + elif not indentless: + self.indent += self.best_indent + + # States. + + # Stream handlers. + + def expect_stream_start(self): + if isinstance(self.event, StreamStartEvent): + if self.event.encoding and not hasattr(self.stream, 'encoding'): + self.encoding = self.event.encoding + self.write_stream_start() + self.state = self.expect_first_document_start + else: + raise EmitterError("expected StreamStartEvent, but got %s" + % self.event) + + def expect_nothing(self): + raise EmitterError("expected nothing, but got %s" % self.event) + + # Document handlers. + + def expect_first_document_start(self): + return self.expect_document_start(first=True) + + def expect_document_start(self, first=False): + if isinstance(self.event, DocumentStartEvent): + if (self.event.version or self.event.tags) and self.open_ended: + self.write_indicator('...', True) + self.write_indent() + if self.event.version: + version_text = self.prepare_version(self.event.version) + self.write_version_directive(version_text) + self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() + if self.event.tags: + handles = sorted(self.event.tags.keys()) + for handle in handles: + prefix = self.event.tags[handle] + self.tag_prefixes[prefix] = handle + handle_text = self.prepare_tag_handle(handle) + prefix_text = self.prepare_tag_prefix(prefix) + self.write_tag_directive(handle_text, prefix_text) + implicit = (first and not self.event.explicit and not self.canonical + and not self.event.version and not self.event.tags + and not self.check_empty_document()) + if not implicit: + self.write_indent() + self.write_indicator('---', True) + if self.canonical: + self.write_indent() + self.state = self.expect_document_root + elif isinstance(self.event, StreamEndEvent): + if self.open_ended: + self.write_indicator('...', True) + self.write_indent() + self.write_stream_end() + self.state = self.expect_nothing + else: + raise EmitterError("expected DocumentStartEvent, but got %s" + % self.event) + + def expect_document_end(self): + if isinstance(self.event, DocumentEndEvent): + self.write_indent() + if self.event.explicit: + self.write_indicator('...', True) + self.write_indent() + self.flush_stream() + self.state = self.expect_document_start + else: + raise EmitterError("expected DocumentEndEvent, but got %s" + % self.event) + + def expect_document_root(self): + self.states.append(self.expect_document_end) + self.expect_node(root=True) + + # Node handlers. + + def expect_node(self, root=False, sequence=False, mapping=False, + simple_key=False): + self.root_context = root + self.sequence_context = sequence + self.mapping_context = mapping + self.simple_key_context = simple_key + if isinstance(self.event, AliasEvent): + self.expect_alias() + elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): + self.process_anchor('&') + self.process_tag() + if isinstance(self.event, ScalarEvent): + self.expect_scalar() + elif isinstance(self.event, SequenceStartEvent): + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_sequence(): + self.expect_flow_sequence() + else: + self.expect_block_sequence() + elif isinstance(self.event, MappingStartEvent): + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_mapping(): + self.expect_flow_mapping() + else: + self.expect_block_mapping() + else: + raise EmitterError("expected NodeEvent, but got %s" % self.event) + + def expect_alias(self): + if self.event.anchor is None: + raise EmitterError("anchor is not specified for alias") + self.process_anchor('*') + self.state = self.states.pop() + + def expect_scalar(self): + self.increase_indent(flow=True) + self.process_scalar() + self.indent = self.indents.pop() + self.state = self.states.pop() + + # Flow sequence handlers. + + def expect_flow_sequence(self): + self.write_indicator('[', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_sequence_item + + def expect_first_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator(']', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + def expect_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(',', False) + self.write_indent() + self.write_indicator(']', False) + self.state = self.states.pop() + else: + self.write_indicator(',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + # Flow mapping handlers. + + def expect_flow_mapping(self): + self.write_indicator('{', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_mapping_key + + def expect_first_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator('}', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator('?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(',', False) + self.write_indent() + self.write_indicator('}', False) + self.state = self.states.pop() + else: + self.write_indicator(',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator('?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_simple_value(self): + self.write_indicator(':', False) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + def expect_flow_mapping_value(self): + if self.canonical or self.column > self.best_width: + self.write_indent() + self.write_indicator(':', True) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + # Block sequence handlers. + + def expect_block_sequence(self): + indentless = (self.mapping_context and not self.indention) + self.increase_indent(flow=False, indentless=indentless) + self.state = self.expect_first_block_sequence_item + + def expect_first_block_sequence_item(self): + return self.expect_block_sequence_item(first=True) + + def expect_block_sequence_item(self, first=False): + if not first and isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + self.write_indicator('-', True, indention=True) + self.states.append(self.expect_block_sequence_item) + self.expect_node(sequence=True) + + # Block mapping handlers. + + def expect_block_mapping(self): + self.increase_indent(flow=False) + self.state = self.expect_first_block_mapping_key + + def expect_first_block_mapping_key(self): + return self.expect_block_mapping_key(first=True) + + def expect_block_mapping_key(self, first=False): + if not first and isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + if self.check_simple_key(): + self.states.append(self.expect_block_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator('?', True, indention=True) + self.states.append(self.expect_block_mapping_value) + self.expect_node(mapping=True) + + def expect_block_mapping_simple_value(self): + self.write_indicator(':', False) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + def expect_block_mapping_value(self): + self.write_indent() + self.write_indicator(':', True, indention=True) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + # Checkers. + + def check_empty_sequence(self): + return (isinstance(self.event, SequenceStartEvent) and self.events + and isinstance(self.events[0], SequenceEndEvent)) + + def check_empty_mapping(self): + return (isinstance(self.event, MappingStartEvent) and self.events + and isinstance(self.events[0], MappingEndEvent)) + + def check_empty_document(self): + if not isinstance(self.event, DocumentStartEvent) or not self.events: + return False + event = self.events[0] + return (isinstance(event, ScalarEvent) and event.anchor is None + and event.tag is None and event.implicit and event.value == '') + + def check_simple_key(self): + length = 0 + if isinstance(self.event, NodeEvent) and self.event.anchor is not None: + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + length += len(self.prepared_anchor) + if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ + and self.event.tag is not None: + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(self.event.tag) + length += len(self.prepared_tag) + if isinstance(self.event, ScalarEvent): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + length += len(self.analysis.scalar) + return (length < 128 and (isinstance(self.event, AliasEvent) + or (isinstance(self.event, ScalarEvent) + and not self.analysis.empty and not self.analysis.multiline) + or self.check_empty_sequence() or self.check_empty_mapping())) + + # Anchor, Tag, and Scalar processors. + + def process_anchor(self, indicator): + if self.event.anchor is None: + self.prepared_anchor = None + return + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + if self.prepared_anchor: + self.write_indicator(indicator+self.prepared_anchor, True) + self.prepared_anchor = None + + def process_tag(self): + tag = self.event.tag + if isinstance(self.event, ScalarEvent): + if self.style is None: + self.style = self.choose_scalar_style() + if ((not self.canonical or tag is None) and + ((self.style == '' and self.event.implicit[0]) + or (self.style != '' and self.event.implicit[1]))): + self.prepared_tag = None + return + if self.event.implicit[0] and tag is None: + tag = '!' + self.prepared_tag = None + else: + if (not self.canonical or tag is None) and self.event.implicit: + self.prepared_tag = None + return + if tag is None: + raise EmitterError("tag is not specified") + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(tag) + if self.prepared_tag: + self.write_indicator(self.prepared_tag, True) + self.prepared_tag = None + + def choose_scalar_style(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.event.style == '"' or self.canonical: + return '"' + if not self.event.style and self.event.implicit[0]: + if (not (self.simple_key_context and + (self.analysis.empty or self.analysis.multiline)) + and (self.flow_level and self.analysis.allow_flow_plain + or (not self.flow_level and self.analysis.allow_block_plain))): + return '' + if self.event.style and self.event.style in '|>': + if (not self.flow_level and not self.simple_key_context + and self.analysis.allow_block): + return self.event.style + if not self.event.style or self.event.style == '\'': + if (self.analysis.allow_single_quoted and + not (self.simple_key_context and self.analysis.multiline)): + return '\'' + return '"' + + def process_scalar(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.style is None: + self.style = self.choose_scalar_style() + split = (not self.simple_key_context) + #if self.analysis.multiline and split \ + # and (not self.style or self.style in '\'\"'): + # self.write_indent() + if self.style == '"': + self.write_double_quoted(self.analysis.scalar, split) + elif self.style == '\'': + self.write_single_quoted(self.analysis.scalar, split) + elif self.style == '>': + self.write_folded(self.analysis.scalar) + elif self.style == '|': + self.write_literal(self.analysis.scalar) + else: + self.write_plain(self.analysis.scalar, split) + self.analysis = None + self.style = None + + # Analyzers. + + def prepare_version(self, version): + major, minor = version + if major != 1: + raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) + return '%d.%d' % (major, minor) + + def prepare_tag_handle(self, handle): + if not handle: + raise EmitterError("tag handle must not be empty") + if handle[0] != '!' or handle[-1] != '!': + raise EmitterError("tag handle must start and end with '!': %r" % handle) + for ch in handle[1:-1]: + if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_'): + raise EmitterError("invalid character %r in the tag handle: %r" + % (ch, handle)) + return handle + + def prepare_tag_prefix(self, prefix): + if not prefix: + raise EmitterError("tag prefix must not be empty") + chunks = [] + start = end = 0 + if prefix[0] == '!': + end = 1 + while end < len(prefix): + ch = prefix[end] + if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?!:@&=+$,_.~*\'()[]': + end += 1 + else: + if start < end: + chunks.append(prefix[start:end]) + start = end = end+1 + data = ch.encode('utf-8') + for ch in data: + chunks.append('%%%02X' % ord(ch)) + if start < end: + chunks.append(prefix[start:end]) + return ''.join(chunks) + + def prepare_tag(self, tag): + if not tag: + raise EmitterError("tag must not be empty") + if tag == '!': + return tag + handle = None + suffix = tag + prefixes = sorted(self.tag_prefixes.keys()) + for prefix in prefixes: + if tag.startswith(prefix) \ + and (prefix == '!' or len(prefix) < len(tag)): + handle = self.tag_prefixes[prefix] + suffix = tag[len(prefix):] + chunks = [] + start = end = 0 + while end < len(suffix): + ch = suffix[end] + if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?:@&=+$,_.~*\'()[]' \ + or (ch == '!' and handle != '!'): + end += 1 + else: + if start < end: + chunks.append(suffix[start:end]) + start = end = end+1 + data = ch.encode('utf-8') + for ch in data: + chunks.append('%%%02X' % ord(ch)) + if start < end: + chunks.append(suffix[start:end]) + suffix_text = ''.join(chunks) + if handle: + return '%s%s' % (handle, suffix_text) + else: + return '!<%s>' % suffix_text + + def prepare_anchor(self, anchor): + if not anchor: + raise EmitterError("anchor must not be empty") + for ch in anchor: + if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_'): + raise EmitterError("invalid character %r in the anchor: %r" + % (ch, anchor)) + return anchor + + def analyze_scalar(self, scalar): + + # Empty scalar is a special case. + if not scalar: + return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, + allow_flow_plain=False, allow_block_plain=True, + allow_single_quoted=True, allow_double_quoted=True, + allow_block=False) + + # Indicators and special characters. + block_indicators = False + flow_indicators = False + line_breaks = False + special_characters = False + + # Important whitespace combinations. + leading_space = False + leading_break = False + trailing_space = False + trailing_break = False + break_space = False + space_break = False + + # Check document indicators. + if scalar.startswith('---') or scalar.startswith('...'): + block_indicators = True + flow_indicators = True + + # First character or preceded by a whitespace. + preceeded_by_whitespace = True + + # Last character or followed by a whitespace. + followed_by_whitespace = (len(scalar) == 1 or + scalar[1] in '\0 \t\r\n\x85\u2028\u2029') + + # The previous character is a space. + previous_space = False + + # The previous character is a break. + previous_break = False + + index = 0 + while index < len(scalar): + ch = scalar[index] + + # Check for indicators. + if index == 0: + # Leading indicators are special characters. + if ch in '#,[]{}&*!|>\'\"%@`': + flow_indicators = True + block_indicators = True + if ch in '?:': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == '-' and followed_by_whitespace: + flow_indicators = True + block_indicators = True + else: + # Some indicators cannot appear within a scalar as well. + if ch in ',?[]{}': + flow_indicators = True + if ch == ':': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == '#' and preceeded_by_whitespace: + flow_indicators = True + block_indicators = True + + # Check for line breaks, special, and unicode characters. + if ch in '\n\x85\u2028\u2029': + line_breaks = True + if not (ch == '\n' or '\x20' <= ch <= '\x7E'): + if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF' + or '\uE000' <= ch <= '\uFFFD') and ch != '\uFEFF': + unicode_characters = True + if not self.allow_unicode: + special_characters = True + else: + special_characters = True + + # Detect important whitespace combinations. + if ch == ' ': + if index == 0: + leading_space = True + if index == len(scalar)-1: + trailing_space = True + if previous_break: + break_space = True + previous_space = True + previous_break = False + elif ch in '\n\x85\u2028\u2029': + if index == 0: + leading_break = True + if index == len(scalar)-1: + trailing_break = True + if previous_space: + space_break = True + previous_space = False + previous_break = True + else: + previous_space = False + previous_break = False + + # Prepare for the next character. + index += 1 + preceeded_by_whitespace = (ch in '\0 \t\r\n\x85\u2028\u2029') + followed_by_whitespace = (index+1 >= len(scalar) or + scalar[index+1] in '\0 \t\r\n\x85\u2028\u2029') + + # Let's decide what styles are allowed. + allow_flow_plain = True + allow_block_plain = True + allow_single_quoted = True + allow_double_quoted = True + allow_block = True + + # Leading and trailing whitespaces are bad for plain scalars. + if (leading_space or leading_break + or trailing_space or trailing_break): + allow_flow_plain = allow_block_plain = False + + # We do not permit trailing spaces for block scalars. + if trailing_space: + allow_block = False + + # Spaces at the beginning of a new line are only acceptable for block + # scalars. + if break_space: + allow_flow_plain = allow_block_plain = allow_single_quoted = False + + # Spaces followed by breaks, as well as special character are only + # allowed for double quoted scalars. + if space_break or special_characters: + allow_flow_plain = allow_block_plain = \ + allow_single_quoted = allow_block = False + + # Although the plain scalar writer supports breaks, we never emit + # multiline plain scalars. + if line_breaks: + allow_flow_plain = allow_block_plain = False + + # Flow indicators are forbidden for flow plain scalars. + if flow_indicators: + allow_flow_plain = False + + # Block indicators are forbidden for block plain scalars. + if block_indicators: + allow_block_plain = False + + return ScalarAnalysis(scalar=scalar, + empty=False, multiline=line_breaks, + allow_flow_plain=allow_flow_plain, + allow_block_plain=allow_block_plain, + allow_single_quoted=allow_single_quoted, + allow_double_quoted=allow_double_quoted, + allow_block=allow_block) + + # Writers. + + def flush_stream(self): + if hasattr(self.stream, 'flush'): + self.stream.flush() + + def write_stream_start(self): + # Write BOM if needed. + if self.encoding and self.encoding.startswith('utf-16'): + self.stream.write('\uFEFF'.encode(self.encoding)) + + def write_stream_end(self): + self.flush_stream() + + def write_indicator(self, indicator, need_whitespace, + whitespace=False, indention=False): + if self.whitespace or not need_whitespace: + data = indicator + else: + data = ' '+indicator + self.whitespace = whitespace + self.indention = self.indention and indention + self.column += len(data) + self.open_ended = False + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_indent(self): + indent = self.indent or 0 + if not self.indention or self.column > indent \ + or (self.column == indent and not self.whitespace): + self.write_line_break() + if self.column < indent: + self.whitespace = True + data = ' '*(indent-self.column) + self.column = indent + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_line_break(self, data=None): + if data is None: + data = self.best_line_break + self.whitespace = True + self.indention = True + self.line += 1 + self.column = 0 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_version_directive(self, version_text): + data = '%%YAML %s' % version_text + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + def write_tag_directive(self, handle_text, prefix_text): + data = '%%TAG %s %s' % (handle_text, prefix_text) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + # Scalar streams. + + def write_single_quoted(self, text, split=True): + self.write_indicator('\'', True) + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch is None or ch != ' ': + if start+1 == end and self.column > self.best_width and split \ + and start != 0 and end != len(text): + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch is None or ch not in '\n\x85\u2028\u2029': + if text[start] == '\n': + self.write_line_break() + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + start = end + else: + if ch is None or ch in ' \n\x85\u2028\u2029' or ch == '\'': + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch == '\'': + data = '\'\'' + self.column += 2 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + 1 + if ch is not None: + spaces = (ch == ' ') + breaks = (ch in '\n\x85\u2028\u2029') + end += 1 + self.write_indicator('\'', False) + + ESCAPE_REPLACEMENTS = { + '\0': '0', + '\x07': 'a', + '\x08': 'b', + '\x09': 't', + '\x0A': 'n', + '\x0B': 'v', + '\x0C': 'f', + '\x0D': 'r', + '\x1B': 'e', + '\"': '\"', + '\\': '\\', + '\x85': 'N', + '\xA0': '_', + '\u2028': 'L', + '\u2029': 'P', + } + + def write_double_quoted(self, text, split=True): + self.write_indicator('"', True) + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if ch is None or ch in '"\\\x85\u2028\u2029\uFEFF' \ + or not ('\x20' <= ch <= '\x7E' + or (self.allow_unicode + and ('\xA0' <= ch <= '\uD7FF' + or '\uE000' <= ch <= '\uFFFD'))): + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + if ch in self.ESCAPE_REPLACEMENTS: + data = '\\'+self.ESCAPE_REPLACEMENTS[ch] + elif ch <= '\xFF': + data = '\\x%02X' % ord(ch) + elif ch <= '\uFFFF': + data = '\\u%04X' % ord(ch) + else: + data = '\\U%08X' % ord(ch) + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end+1 + if 0 < end < len(text)-1 and (ch == ' ' or start >= end) \ + and self.column+(end-start) > self.best_width and split: + data = text[start:end]+'\\' + if start < end: + start = end + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_indent() + self.whitespace = False + self.indention = False + if text[start] == ' ': + data = '\\' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + end += 1 + self.write_indicator('"', False) + + def determine_block_hints(self, text): + hints = '' + if text: + if text[0] in ' \n\x85\u2028\u2029': + hints += str(self.best_indent) + if text[-1] not in '\n\x85\u2028\u2029': + hints += '-' + elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029': + hints += '+' + return hints + + def write_folded(self, text): + hints = self.determine_block_hints(text) + self.write_indicator('>'+hints, True) + if hints[-1:] == '+': + self.open_ended = True + self.write_line_break() + leading_space = True + spaces = False + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in '\n\x85\u2028\u2029': + if not leading_space and ch is not None and ch != ' ' \ + and text[start] == '\n': + self.write_line_break() + leading_space = (ch == ' ') + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + elif spaces: + if ch != ' ': + if start+1 == end and self.column > self.best_width: + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + else: + if ch is None or ch in ' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in '\n\x85\u2028\u2029') + spaces = (ch == ' ') + end += 1 + + def write_literal(self, text): + hints = self.determine_block_hints(text) + self.write_indicator('|'+hints, True) + if hints[-1:] == '+': + self.open_ended = True + self.write_line_break() + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in '\n\x85\u2028\u2029': + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + else: + if ch is None or ch in '\n\x85\u2028\u2029': + data = text[start:end] + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in '\n\x85\u2028\u2029') + end += 1 + + def write_plain(self, text, split=True): + if self.root_context: + self.open_ended = True + if not text: + return + if not self.whitespace: + data = ' ' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.whitespace = False + self.indention = False + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch != ' ': + if start+1 == end and self.column > self.best_width and split: + self.write_indent() + self.whitespace = False + self.indention = False + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch not in '\n\x85\u2028\u2029': + if text[start] == '\n': + self.write_line_break() + for br in text[start:end]: + if br == '\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + self.whitespace = False + self.indention = False + start = end + else: + if ch is None or ch in ' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + spaces = (ch == ' ') + breaks = (ch in '\n\x85\u2028\u2029') + end += 1 + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/error.py b/collectors/python.d.plugin/python_modules/pyyaml3/error.py new file mode 100644 index 00000000..5fec7d44 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/error.py @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError'] + +class Mark: + + def __init__(self, name, index, line, column, buffer, pointer): + self.name = name + self.index = index + self.line = line + self.column = column + self.buffer = buffer + self.pointer = pointer + + def get_snippet(self, indent=4, max_length=75): + if self.buffer is None: + return None + head = '' + start = self.pointer + while start > 0 and self.buffer[start-1] not in '\0\r\n\x85\u2028\u2029': + start -= 1 + if self.pointer-start > max_length/2-1: + head = ' ... ' + start += 5 + break + tail = '' + end = self.pointer + while end < len(self.buffer) and self.buffer[end] not in '\0\r\n\x85\u2028\u2029': + end += 1 + if end-self.pointer > max_length/2-1: + tail = ' ... ' + end -= 5 + break + snippet = self.buffer[start:end] + return ' '*indent + head + snippet + tail + '\n' \ + + ' '*(indent+self.pointer-start+len(head)) + '^' + + def __str__(self): + snippet = self.get_snippet() + where = " in \"%s\", line %d, column %d" \ + % (self.name, self.line+1, self.column+1) + if snippet is not None: + where += ":\n"+snippet + return where + +class YAMLError(Exception): + pass + +class MarkedYAMLError(YAMLError): + + def __init__(self, context=None, context_mark=None, + problem=None, problem_mark=None, note=None): + self.context = context + self.context_mark = context_mark + self.problem = problem + self.problem_mark = problem_mark + self.note = note + + def __str__(self): + lines = [] + if self.context is not None: + lines.append(self.context) + if self.context_mark is not None \ + and (self.problem is None or self.problem_mark is None + or self.context_mark.name != self.problem_mark.name + or self.context_mark.line != self.problem_mark.line + or self.context_mark.column != self.problem_mark.column): + lines.append(str(self.context_mark)) + if self.problem is not None: + lines.append(self.problem) + if self.problem_mark is not None: + lines.append(str(self.problem_mark)) + if self.note is not None: + lines.append(self.note) + return '\n'.join(lines) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/events.py b/collectors/python.d.plugin/python_modules/pyyaml3/events.py new file mode 100644 index 00000000..283452ad --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/events.py @@ -0,0 +1,87 @@ +# SPDX-License-Identifier: MIT + +# Abstract classes. + +class Event(object): + def __init__(self, start_mark=None, end_mark=None): + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + attributes = [key for key in ['anchor', 'tag', 'implicit', 'value'] + if hasattr(self, key)] + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + +class NodeEvent(Event): + def __init__(self, anchor, start_mark=None, end_mark=None): + self.anchor = anchor + self.start_mark = start_mark + self.end_mark = end_mark + +class CollectionStartEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None, + flow_style=None): + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + +class CollectionEndEvent(Event): + pass + +# Implementations. + +class StreamStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + +class StreamEndEvent(Event): + pass + +class DocumentStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None, version=None, tags=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + self.version = version + self.tags = tags + +class DocumentEndEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + +class AliasEvent(NodeEvent): + pass + +class ScalarEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, value, + start_mark=None, end_mark=None, style=None): + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + +class SequenceStartEvent(CollectionStartEvent): + pass + +class SequenceEndEvent(CollectionEndEvent): + pass + +class MappingStartEvent(CollectionStartEvent): + pass + +class MappingEndEvent(CollectionEndEvent): + pass + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/loader.py b/collectors/python.d.plugin/python_modules/pyyaml3/loader.py new file mode 100644 index 00000000..7ef6cf81 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/loader.py @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseLoader', 'SafeLoader', 'Loader'] + +from .reader import * +from .scanner import * +from .parser import * +from .composer import * +from .constructor import * +from .resolver import * + +class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + +class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + SafeConstructor.__init__(self) + Resolver.__init__(self) + +class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + Constructor.__init__(self) + Resolver.__init__(self) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/nodes.py b/collectors/python.d.plugin/python_modules/pyyaml3/nodes.py new file mode 100644 index 00000000..ed2a1b43 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/nodes.py @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: MIT + +class Node(object): + def __init__(self, tag, value, start_mark, end_mark): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + value = self.value + #if isinstance(value, list): + # if len(value) == 0: + # value = '<empty>' + # elif len(value) == 1: + # value = '<1 item>' + # else: + # value = '<%d items>' % len(value) + #else: + # if len(value) > 75: + # value = repr(value[:70]+u' ... ') + # else: + # value = repr(value) + value = repr(value) + return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value) + +class ScalarNode(Node): + id = 'scalar' + def __init__(self, tag, value, + start_mark=None, end_mark=None, style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + +class CollectionNode(Node): + def __init__(self, tag, value, + start_mark=None, end_mark=None, flow_style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + +class SequenceNode(CollectionNode): + id = 'sequence' + +class MappingNode(CollectionNode): + id = 'mapping' + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/parser.py b/collectors/python.d.plugin/python_modules/pyyaml3/parser.py new file mode 100644 index 00000000..bcec7f99 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/parser.py @@ -0,0 +1,590 @@ +# SPDX-License-Identifier: MIT + +# The following YAML grammar is LL(1) and is parsed by a recursive descent +# parser. +# +# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END +# implicit_document ::= block_node DOCUMENT-END* +# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +# block_node_or_indentless_sequence ::= +# ALIAS +# | properties (block_content | indentless_block_sequence)? +# | block_content +# | indentless_block_sequence +# block_node ::= ALIAS +# | properties block_content? +# | block_content +# flow_node ::= ALIAS +# | properties flow_content? +# | flow_content +# properties ::= TAG ANCHOR? | ANCHOR TAG? +# block_content ::= block_collection | flow_collection | SCALAR +# flow_content ::= flow_collection | SCALAR +# block_collection ::= block_sequence | block_mapping +# flow_collection ::= flow_sequence | flow_mapping +# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END +# indentless_sequence ::= (BLOCK-ENTRY block_node?)+ +# block_mapping ::= BLOCK-MAPPING_START +# ((KEY block_node_or_indentless_sequence?)? +# (VALUE block_node_or_indentless_sequence?)?)* +# BLOCK-END +# flow_sequence ::= FLOW-SEQUENCE-START +# (flow_sequence_entry FLOW-ENTRY)* +# flow_sequence_entry? +# FLOW-SEQUENCE-END +# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# flow_mapping ::= FLOW-MAPPING-START +# (flow_mapping_entry FLOW-ENTRY)* +# flow_mapping_entry? +# FLOW-MAPPING-END +# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# +# FIRST sets: +# +# stream: { STREAM-START } +# explicit_document: { DIRECTIVE DOCUMENT-START } +# implicit_document: FIRST(block_node) +# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_sequence: { BLOCK-SEQUENCE-START } +# block_mapping: { BLOCK-MAPPING-START } +# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } +# indentless_sequence: { ENTRY } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_sequence: { FLOW-SEQUENCE-START } +# flow_mapping: { FLOW-MAPPING-START } +# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } +# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } + +__all__ = ['Parser', 'ParserError'] + +from .error import MarkedYAMLError +from .tokens import * +from .events import * +from .scanner import * + +class ParserError(MarkedYAMLError): + pass + +class Parser: + # Since writing a recursive-descendant parser is a straightforward task, we + # do not give many comments here. + + DEFAULT_TAGS = { + '!': '!', + '!!': 'tag:yaml.org,2002:', + } + + def __init__(self): + self.current_event = None + self.yaml_version = None + self.tag_handles = {} + self.states = [] + self.marks = [] + self.state = self.parse_stream_start + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def check_event(self, *choices): + # Check the type of the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + if self.current_event is not None: + if not choices: + return True + for choice in choices: + if isinstance(self.current_event, choice): + return True + return False + + def peek_event(self): + # Get the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + return self.current_event + + def get_event(self): + # Get the next event and proceed further. + if self.current_event is None: + if self.state: + self.current_event = self.state() + value = self.current_event + self.current_event = None + return value + + # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END + # implicit_document ::= block_node DOCUMENT-END* + # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + + def parse_stream_start(self): + + # Parse the stream start. + token = self.get_token() + event = StreamStartEvent(token.start_mark, token.end_mark, + encoding=token.encoding) + + # Prepare the next state. + self.state = self.parse_implicit_document_start + + return event + + def parse_implicit_document_start(self): + + # Parse an implicit document. + if not self.check_token(DirectiveToken, DocumentStartToken, + StreamEndToken): + self.tag_handles = self.DEFAULT_TAGS + token = self.peek_token() + start_mark = end_mark = token.start_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=False) + + # Prepare the next state. + self.states.append(self.parse_document_end) + self.state = self.parse_block_node + + return event + + else: + return self.parse_document_start() + + def parse_document_start(self): + + # Parse any extra document end indicators. + while self.check_token(DocumentEndToken): + self.get_token() + + # Parse an explicit document. + if not self.check_token(StreamEndToken): + token = self.peek_token() + start_mark = token.start_mark + version, tags = self.process_directives() + if not self.check_token(DocumentStartToken): + raise ParserError(None, None, + "expected '<document start>', but found %r" + % self.peek_token().id, + self.peek_token().start_mark) + token = self.get_token() + end_mark = token.end_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=True, version=version, tags=tags) + self.states.append(self.parse_document_end) + self.state = self.parse_document_content + else: + # Parse the end of the stream. + token = self.get_token() + event = StreamEndEvent(token.start_mark, token.end_mark) + assert not self.states + assert not self.marks + self.state = None + return event + + def parse_document_end(self): + + # Parse the document end. + token = self.peek_token() + start_mark = end_mark = token.start_mark + explicit = False + if self.check_token(DocumentEndToken): + token = self.get_token() + end_mark = token.end_mark + explicit = True + event = DocumentEndEvent(start_mark, end_mark, + explicit=explicit) + + # Prepare the next state. + self.state = self.parse_document_start + + return event + + def parse_document_content(self): + if self.check_token(DirectiveToken, + DocumentStartToken, DocumentEndToken, StreamEndToken): + event = self.process_empty_scalar(self.peek_token().start_mark) + self.state = self.states.pop() + return event + else: + return self.parse_block_node() + + def process_directives(self): + self.yaml_version = None + self.tag_handles = {} + while self.check_token(DirectiveToken): + token = self.get_token() + if token.name == 'YAML': + if self.yaml_version is not None: + raise ParserError(None, None, + "found duplicate YAML directive", token.start_mark) + major, minor = token.value + if major != 1: + raise ParserError(None, None, + "found incompatible YAML document (version 1.* is required)", + token.start_mark) + self.yaml_version = token.value + elif token.name == 'TAG': + handle, prefix = token.value + if handle in self.tag_handles: + raise ParserError(None, None, + "duplicate tag handle %r" % handle, + token.start_mark) + self.tag_handles[handle] = prefix + if self.tag_handles: + value = self.yaml_version, self.tag_handles.copy() + else: + value = self.yaml_version, None + for key in self.DEFAULT_TAGS: + if key not in self.tag_handles: + self.tag_handles[key] = self.DEFAULT_TAGS[key] + return value + + # block_node_or_indentless_sequence ::= ALIAS + # | properties (block_content | indentless_block_sequence)? + # | block_content + # | indentless_block_sequence + # block_node ::= ALIAS + # | properties block_content? + # | block_content + # flow_node ::= ALIAS + # | properties flow_content? + # | flow_content + # properties ::= TAG ANCHOR? | ANCHOR TAG? + # block_content ::= block_collection | flow_collection | SCALAR + # flow_content ::= flow_collection | SCALAR + # block_collection ::= block_sequence | block_mapping + # flow_collection ::= flow_sequence | flow_mapping + + def parse_block_node(self): + return self.parse_node(block=True) + + def parse_flow_node(self): + return self.parse_node() + + def parse_block_node_or_indentless_sequence(self): + return self.parse_node(block=True, indentless_sequence=True) + + def parse_node(self, block=False, indentless_sequence=False): + if self.check_token(AliasToken): + token = self.get_token() + event = AliasEvent(token.value, token.start_mark, token.end_mark) + self.state = self.states.pop() + else: + anchor = None + tag = None + start_mark = end_mark = tag_mark = None + if self.check_token(AnchorToken): + token = self.get_token() + start_mark = token.start_mark + end_mark = token.end_mark + anchor = token.value + if self.check_token(TagToken): + token = self.get_token() + tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + elif self.check_token(TagToken): + token = self.get_token() + start_mark = tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + if self.check_token(AnchorToken): + token = self.get_token() + end_mark = token.end_mark + anchor = token.value + if tag is not None: + handle, suffix = tag + if handle is not None: + if handle not in self.tag_handles: + raise ParserError("while parsing a node", start_mark, + "found undefined tag handle %r" % handle, + tag_mark) + tag = self.tag_handles[handle]+suffix + else: + tag = suffix + #if tag == '!': + # raise ParserError("while parsing a node", start_mark, + # "found non-specific tag '!'", tag_mark, + # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.") + if start_mark is None: + start_mark = end_mark = self.peek_token().start_mark + event = None + implicit = (tag is None or tag == '!') + if indentless_sequence and self.check_token(BlockEntryToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark) + self.state = self.parse_indentless_sequence_entry + else: + if self.check_token(ScalarToken): + token = self.get_token() + end_mark = token.end_mark + if (token.plain and tag is None) or tag == '!': + implicit = (True, False) + elif tag is None: + implicit = (False, True) + else: + implicit = (False, False) + event = ScalarEvent(anchor, tag, implicit, token.value, + start_mark, end_mark, style=token.style) + self.state = self.states.pop() + elif self.check_token(FlowSequenceStartToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_sequence_first_entry + elif self.check_token(FlowMappingStartToken): + end_mark = self.peek_token().end_mark + event = MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_mapping_first_key + elif block and self.check_token(BlockSequenceStartToken): + end_mark = self.peek_token().start_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=False) + self.state = self.parse_block_sequence_first_entry + elif block and self.check_token(BlockMappingStartToken): + end_mark = self.peek_token().start_mark + event = MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=False) + self.state = self.parse_block_mapping_first_key + elif anchor is not None or tag is not None: + # Empty scalars are allowed even if a tag or an anchor is + # specified. + event = ScalarEvent(anchor, tag, (implicit, False), '', + start_mark, end_mark) + self.state = self.states.pop() + else: + if block: + node = 'block' + else: + node = 'flow' + token = self.peek_token() + raise ParserError("while parsing a %s node" % node, start_mark, + "expected the node content, but found %r" % token.id, + token.start_mark) + return event + + # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END + + def parse_block_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_sequence_entry() + + def parse_block_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, BlockEndToken): + self.states.append(self.parse_block_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_block_sequence_entry + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError("while parsing a block collection", self.marks[-1], + "expected <block end>, but found %r" % token.id, token.start_mark) + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + # indentless_sequence ::= (BLOCK-ENTRY block_node?)+ + + def parse_indentless_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, + KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_indentless_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_indentless_sequence_entry + return self.process_empty_scalar(token.end_mark) + token = self.peek_token() + event = SequenceEndEvent(token.start_mark, token.start_mark) + self.state = self.states.pop() + return event + + # block_mapping ::= BLOCK-MAPPING_START + # ((KEY block_node_or_indentless_sequence?)? + # (VALUE block_node_or_indentless_sequence?)?)* + # BLOCK-END + + def parse_block_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_mapping_key() + + def parse_block_mapping_key(self): + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_value) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_value + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError("while parsing a block mapping", self.marks[-1], + "expected <block end>, but found %r" % token.id, token.start_mark) + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_block_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_key) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_block_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + # flow_sequence ::= FLOW-SEQUENCE-START + # (flow_sequence_entry FLOW-ENTRY)* + # flow_sequence_entry? + # FLOW-SEQUENCE-END + # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + # + # Note that while production rules for both flow_sequence_entry and + # flow_mapping_entry are equal, their interpretations are different. + # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` + # generate an inline mapping (set syntax). + + def parse_flow_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_sequence_entry(first=True) + + def parse_flow_sequence_entry(self, first=False): + if not self.check_token(FlowSequenceEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError("while parsing a flow sequence", self.marks[-1], + "expected ',' or ']', but got %r" % token.id, token.start_mark) + + if self.check_token(KeyToken): + token = self.peek_token() + event = MappingStartEvent(None, None, True, + token.start_mark, token.end_mark, + flow_style=True) + self.state = self.parse_flow_sequence_entry_mapping_key + return event + elif not self.check_token(FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry) + return self.parse_flow_node() + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_sequence_entry_mapping_key(self): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_value + return self.process_empty_scalar(token.end_mark) + + def parse_flow_sequence_entry_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_end) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_end + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_sequence_entry_mapping_end + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_sequence_entry_mapping_end(self): + self.state = self.parse_flow_sequence_entry + token = self.peek_token() + return MappingEndEvent(token.start_mark, token.start_mark) + + # flow_mapping ::= FLOW-MAPPING-START + # (flow_mapping_entry FLOW-ENTRY)* + # flow_mapping_entry? + # FLOW-MAPPING-END + # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + + def parse_flow_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_mapping_key(first=True) + + def parse_flow_mapping_key(self, first=False): + if not self.check_token(FlowMappingEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError("while parsing a flow mapping", self.marks[-1], + "expected ',' or '}', but got %r" % token.id, token.start_mark) + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_value + return self.process_empty_scalar(token.end_mark) + elif not self.check_token(FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_empty_value) + return self.parse_flow_node() + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_key) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_mapping_empty_value(self): + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(self.peek_token().start_mark) + + def process_empty_scalar(self, mark): + return ScalarEvent(None, None, (True, False), '', mark, mark) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/reader.py b/collectors/python.d.plugin/python_modules/pyyaml3/reader.py new file mode 100644 index 00000000..0a515fd6 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/reader.py @@ -0,0 +1,193 @@ +# SPDX-License-Identifier: MIT +# This module contains abstractions for the input stream. You don't have to +# looks further, there are no pretty code. +# +# We define two classes here. +# +# Mark(source, line, column) +# It's just a record and its only use is producing nice error messages. +# Parser does not use it for any other purposes. +# +# Reader(source, data) +# Reader determines the encoding of `data` and converts it to unicode. +# Reader provides the following methods and attributes: +# reader.peek(length=1) - return the next `length` characters +# reader.forward(length=1) - move the current position to `length` characters. +# reader.index - the number of the current character. +# reader.line, stream.column - the line and the column of the current character. + +__all__ = ['Reader', 'ReaderError'] + +from .error import YAMLError, Mark + +import codecs, re + +class ReaderError(YAMLError): + + def __init__(self, name, position, character, encoding, reason): + self.name = name + self.character = character + self.position = position + self.encoding = encoding + self.reason = reason + + def __str__(self): + if isinstance(self.character, bytes): + return "'%s' codec can't decode byte #x%02x: %s\n" \ + " in \"%s\", position %d" \ + % (self.encoding, ord(self.character), self.reason, + self.name, self.position) + else: + return "unacceptable character #x%04x: %s\n" \ + " in \"%s\", position %d" \ + % (self.character, self.reason, + self.name, self.position) + +class Reader(object): + # Reader: + # - determines the data encoding and converts it to a unicode string, + # - checks if characters are in allowed range, + # - adds '\0' to the end. + + # Reader accepts + # - a `bytes` object, + # - a `str` object, + # - a file-like object with its `read` method returning `str`, + # - a file-like object with its `read` method returning `unicode`. + + # Yeah, it's ugly and slow. + + def __init__(self, stream): + self.name = None + self.stream = None + self.stream_pointer = 0 + self.eof = True + self.buffer = '' + self.pointer = 0 + self.raw_buffer = None + self.raw_decode = None + self.encoding = None + self.index = 0 + self.line = 0 + self.column = 0 + if isinstance(stream, str): + self.name = "<unicode string>" + self.check_printable(stream) + self.buffer = stream+'\0' + elif isinstance(stream, bytes): + self.name = "<byte string>" + self.raw_buffer = stream + self.determine_encoding() + else: + self.stream = stream + self.name = getattr(stream, 'name', "<file>") + self.eof = False + self.raw_buffer = None + self.determine_encoding() + + def peek(self, index=0): + try: + return self.buffer[self.pointer+index] + except IndexError: + self.update(index+1) + return self.buffer[self.pointer+index] + + def prefix(self, length=1): + if self.pointer+length >= len(self.buffer): + self.update(length) + return self.buffer[self.pointer:self.pointer+length] + + def forward(self, length=1): + if self.pointer+length+1 >= len(self.buffer): + self.update(length+1) + while length: + ch = self.buffer[self.pointer] + self.pointer += 1 + self.index += 1 + if ch in '\n\x85\u2028\u2029' \ + or (ch == '\r' and self.buffer[self.pointer] != '\n'): + self.line += 1 + self.column = 0 + elif ch != '\uFEFF': + self.column += 1 + length -= 1 + + def get_mark(self): + if self.stream is None: + return Mark(self.name, self.index, self.line, self.column, + self.buffer, self.pointer) + else: + return Mark(self.name, self.index, self.line, self.column, + None, None) + + def determine_encoding(self): + while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): + self.update_raw() + if isinstance(self.raw_buffer, bytes): + if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): + self.raw_decode = codecs.utf_16_le_decode + self.encoding = 'utf-16-le' + elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): + self.raw_decode = codecs.utf_16_be_decode + self.encoding = 'utf-16-be' + else: + self.raw_decode = codecs.utf_8_decode + self.encoding = 'utf-8' + self.update(1) + + NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') + def check_printable(self, data): + match = self.NON_PRINTABLE.search(data) + if match: + character = match.group() + position = self.index+(len(self.buffer)-self.pointer)+match.start() + raise ReaderError(self.name, position, ord(character), + 'unicode', "special characters are not allowed") + + def update(self, length): + if self.raw_buffer is None: + return + self.buffer = self.buffer[self.pointer:] + self.pointer = 0 + while len(self.buffer) < length: + if not self.eof: + self.update_raw() + if self.raw_decode is not None: + try: + data, converted = self.raw_decode(self.raw_buffer, + 'strict', self.eof) + except UnicodeDecodeError as exc: + character = self.raw_buffer[exc.start] + if self.stream is not None: + position = self.stream_pointer-len(self.raw_buffer)+exc.start + else: + position = exc.start + raise ReaderError(self.name, position, character, + exc.encoding, exc.reason) + else: + data = self.raw_buffer + converted = len(data) + self.check_printable(data) + self.buffer += data + self.raw_buffer = self.raw_buffer[converted:] + if self.eof: + self.buffer += '\0' + self.raw_buffer = None + break + + def update_raw(self, size=4096): + data = self.stream.read(size) + if self.raw_buffer is None: + self.raw_buffer = data + else: + self.raw_buffer += data + self.stream_pointer += len(data) + if not data: + self.eof = True + +#try: +# import psyco +# psyco.bind(Reader) +#except ImportError: +# pass + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/representer.py b/collectors/python.d.plugin/python_modules/pyyaml3/representer.py new file mode 100644 index 00000000..756a18dc --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/representer.py @@ -0,0 +1,375 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer', + 'RepresenterError'] + +from .error import * +from .nodes import * + +import datetime, sys, copyreg, types, base64 + +class RepresenterError(YAMLError): + pass + +class BaseRepresenter: + + yaml_representers = {} + yaml_multi_representers = {} + + def __init__(self, default_style=None, default_flow_style=None): + self.default_style = default_style + self.default_flow_style = default_flow_style + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def represent(self, data): + node = self.represent_data(data) + self.serialize(node) + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def represent_data(self, data): + if self.ignore_aliases(data): + self.alias_key = None + else: + self.alias_key = id(data) + if self.alias_key is not None: + if self.alias_key in self.represented_objects: + node = self.represented_objects[self.alias_key] + #if node is None: + # raise RepresenterError("recursive objects are not allowed: %r" % data) + return node + #self.represented_objects[alias_key] = None + self.object_keeper.append(data) + data_types = type(data).__mro__ + if data_types[0] in self.yaml_representers: + node = self.yaml_representers[data_types[0]](self, data) + else: + for data_type in data_types: + if data_type in self.yaml_multi_representers: + node = self.yaml_multi_representers[data_type](self, data) + break + else: + if None in self.yaml_multi_representers: + node = self.yaml_multi_representers[None](self, data) + elif None in self.yaml_representers: + node = self.yaml_representers[None](self, data) + else: + node = ScalarNode(None, str(data)) + #if alias_key is not None: + # self.represented_objects[alias_key] = node + return node + + @classmethod + def add_representer(cls, data_type, representer): + if not 'yaml_representers' in cls.__dict__: + cls.yaml_representers = cls.yaml_representers.copy() + cls.yaml_representers[data_type] = representer + + @classmethod + def add_multi_representer(cls, data_type, representer): + if not 'yaml_multi_representers' in cls.__dict__: + cls.yaml_multi_representers = cls.yaml_multi_representers.copy() + cls.yaml_multi_representers[data_type] = representer + + def represent_scalar(self, tag, value, style=None): + if style is None: + style = self.default_style + node = ScalarNode(tag, value, style=style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + return node + + def represent_sequence(self, tag, sequence, flow_style=None): + value = [] + node = SequenceNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + for item in sequence: + node_item = self.represent_data(item) + if not (isinstance(node_item, ScalarNode) and not node_item.style): + best_style = False + value.append(node_item) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def represent_mapping(self, tag, mapping, flow_style=None): + value = [] + node = MappingNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + if hasattr(mapping, 'items'): + mapping = list(mapping.items()) + try: + mapping = sorted(mapping) + except TypeError: + pass + for item_key, item_value in mapping: + node_key = self.represent_data(item_key) + node_value = self.represent_data(item_value) + if not (isinstance(node_key, ScalarNode) and not node_key.style): + best_style = False + if not (isinstance(node_value, ScalarNode) and not node_value.style): + best_style = False + value.append((node_key, node_value)) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def ignore_aliases(self, data): + return False + +class SafeRepresenter(BaseRepresenter): + + def ignore_aliases(self, data): + if data in [None, ()]: + return True + if isinstance(data, (str, bytes, bool, int, float)): + return True + + def represent_none(self, data): + return self.represent_scalar('tag:yaml.org,2002:null', 'null') + + def represent_str(self, data): + return self.represent_scalar('tag:yaml.org,2002:str', data) + + def represent_binary(self, data): + if hasattr(base64, 'encodebytes'): + data = base64.encodebytes(data).decode('ascii') + else: + data = base64.encodestring(data).decode('ascii') + return self.represent_scalar('tag:yaml.org,2002:binary', data, style='|') + + def represent_bool(self, data): + if data: + value = 'true' + else: + value = 'false' + return self.represent_scalar('tag:yaml.org,2002:bool', value) + + def represent_int(self, data): + return self.represent_scalar('tag:yaml.org,2002:int', str(data)) + + inf_value = 1e300 + while repr(inf_value) != repr(inf_value*inf_value): + inf_value *= inf_value + + def represent_float(self, data): + if data != data or (data == 0.0 and data == 1.0): + value = '.nan' + elif data == self.inf_value: + value = '.inf' + elif data == -self.inf_value: + value = '-.inf' + else: + value = repr(data).lower() + # Note that in some cases `repr(data)` represents a float number + # without the decimal parts. For instance: + # >>> repr(1e17) + # '1e17' + # Unfortunately, this is not a valid float representation according + # to the definition of the `!!float` tag. We fix this by adding + # '.0' before the 'e' symbol. + if '.' not in value and 'e' in value: + value = value.replace('e', '.0e', 1) + return self.represent_scalar('tag:yaml.org,2002:float', value) + + def represent_list(self, data): + #pairs = (len(data) > 0 and isinstance(data, list)) + #if pairs: + # for item in data: + # if not isinstance(item, tuple) or len(item) != 2: + # pairs = False + # break + #if not pairs: + return self.represent_sequence('tag:yaml.org,2002:seq', data) + #value = [] + #for item_key, item_value in data: + # value.append(self.represent_mapping(u'tag:yaml.org,2002:map', + # [(item_key, item_value)])) + #return SequenceNode(u'tag:yaml.org,2002:pairs', value) + + def represent_dict(self, data): + return self.represent_mapping('tag:yaml.org,2002:map', data) + + def represent_set(self, data): + value = {} + for key in data: + value[key] = None + return self.represent_mapping('tag:yaml.org,2002:set', value) + + def represent_date(self, data): + value = data.isoformat() + return self.represent_scalar('tag:yaml.org,2002:timestamp', value) + + def represent_datetime(self, data): + value = data.isoformat(' ') + return self.represent_scalar('tag:yaml.org,2002:timestamp', value) + + def represent_yaml_object(self, tag, data, cls, flow_style=None): + if hasattr(data, '__getstate__'): + state = data.__getstate__() + else: + state = data.__dict__.copy() + return self.represent_mapping(tag, state, flow_style=flow_style) + + def represent_undefined(self, data): + raise RepresenterError("cannot represent an object: %s" % data) + +SafeRepresenter.add_representer(type(None), + SafeRepresenter.represent_none) + +SafeRepresenter.add_representer(str, + SafeRepresenter.represent_str) + +SafeRepresenter.add_representer(bytes, + SafeRepresenter.represent_binary) + +SafeRepresenter.add_representer(bool, + SafeRepresenter.represent_bool) + +SafeRepresenter.add_representer(int, + SafeRepresenter.represent_int) + +SafeRepresenter.add_representer(float, + SafeRepresenter.represent_float) + +SafeRepresenter.add_representer(list, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(tuple, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(dict, + SafeRepresenter.represent_dict) + +SafeRepresenter.add_representer(set, + SafeRepresenter.represent_set) + +SafeRepresenter.add_representer(datetime.date, + SafeRepresenter.represent_date) + +SafeRepresenter.add_representer(datetime.datetime, + SafeRepresenter.represent_datetime) + +SafeRepresenter.add_representer(None, + SafeRepresenter.represent_undefined) + +class Representer(SafeRepresenter): + + def represent_complex(self, data): + if data.imag == 0.0: + data = '%r' % data.real + elif data.real == 0.0: + data = '%rj' % data.imag + elif data.imag > 0: + data = '%r+%rj' % (data.real, data.imag) + else: + data = '%r%rj' % (data.real, data.imag) + return self.represent_scalar('tag:yaml.org,2002:python/complex', data) + + def represent_tuple(self, data): + return self.represent_sequence('tag:yaml.org,2002:python/tuple', data) + + def represent_name(self, data): + name = '%s.%s' % (data.__module__, data.__name__) + return self.represent_scalar('tag:yaml.org,2002:python/name:'+name, '') + + def represent_module(self, data): + return self.represent_scalar( + 'tag:yaml.org,2002:python/module:'+data.__name__, '') + + def represent_object(self, data): + # We use __reduce__ API to save the data. data.__reduce__ returns + # a tuple of length 2-5: + # (function, args, state, listitems, dictitems) + + # For reconstructing, we calls function(*args), then set its state, + # listitems, and dictitems if they are not None. + + # A special case is when function.__name__ == '__newobj__'. In this + # case we create the object with args[0].__new__(*args). + + # Another special case is when __reduce__ returns a string - we don't + # support it. + + # We produce a !!python/object, !!python/object/new or + # !!python/object/apply node. + + cls = type(data) + if cls in copyreg.dispatch_table: + reduce = copyreg.dispatch_table[cls](data) + elif hasattr(data, '__reduce_ex__'): + reduce = data.__reduce_ex__(2) + elif hasattr(data, '__reduce__'): + reduce = data.__reduce__() + else: + raise RepresenterError("cannot represent object: %r" % data) + reduce = (list(reduce)+[None]*5)[:5] + function, args, state, listitems, dictitems = reduce + args = list(args) + if state is None: + state = {} + if listitems is not None: + listitems = list(listitems) + if dictitems is not None: + dictitems = dict(dictitems) + if function.__name__ == '__newobj__': + function = args[0] + args = args[1:] + tag = 'tag:yaml.org,2002:python/object/new:' + newobj = True + else: + tag = 'tag:yaml.org,2002:python/object/apply:' + newobj = False + function_name = '%s.%s' % (function.__module__, function.__name__) + if not args and not listitems and not dictitems \ + and isinstance(state, dict) and newobj: + return self.represent_mapping( + 'tag:yaml.org,2002:python/object:'+function_name, state) + if not listitems and not dictitems \ + and isinstance(state, dict) and not state: + return self.represent_sequence(tag+function_name, args) + value = {} + if args: + value['args'] = args + if state or not isinstance(state, dict): + value['state'] = state + if listitems: + value['listitems'] = listitems + if dictitems: + value['dictitems'] = dictitems + return self.represent_mapping(tag+function_name, value) + +Representer.add_representer(complex, + Representer.represent_complex) + +Representer.add_representer(tuple, + Representer.represent_tuple) + +Representer.add_representer(type, + Representer.represent_name) + +Representer.add_representer(types.FunctionType, + Representer.represent_name) + +Representer.add_representer(types.BuiltinFunctionType, + Representer.represent_name) + +Representer.add_representer(types.ModuleType, + Representer.represent_module) + +Representer.add_multi_representer(object, + Representer.represent_object) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/resolver.py b/collectors/python.d.plugin/python_modules/pyyaml3/resolver.py new file mode 100644 index 00000000..50945e04 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/resolver.py @@ -0,0 +1,225 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['BaseResolver', 'Resolver'] + +from .error import * +from .nodes import * + +import re + +class ResolverError(YAMLError): + pass + +class BaseResolver: + + DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str' + DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq' + DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map' + + yaml_implicit_resolvers = {} + yaml_path_resolvers = {} + + def __init__(self): + self.resolver_exact_paths = [] + self.resolver_prefix_paths = [] + + @classmethod + def add_implicit_resolver(cls, tag, regexp, first): + if not 'yaml_implicit_resolvers' in cls.__dict__: + cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy() + if first is None: + first = [None] + for ch in first: + cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) + + @classmethod + def add_path_resolver(cls, tag, path, kind=None): + # Note: `add_path_resolver` is experimental. The API could be changed. + # `new_path` is a pattern that is matched against the path from the + # root to the node that is being considered. `node_path` elements are + # tuples `(node_check, index_check)`. `node_check` is a node class: + # `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None` + # matches any kind of a node. `index_check` could be `None`, a boolean + # value, a string value, or a number. `None` and `False` match against + # any _value_ of sequence and mapping nodes. `True` matches against + # any _key_ of a mapping node. A string `index_check` matches against + # a mapping value that corresponds to a scalar key which content is + # equal to the `index_check` value. An integer `index_check` matches + # against a sequence value with the index equal to `index_check`. + if not 'yaml_path_resolvers' in cls.__dict__: + cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy() + new_path = [] + for element in path: + if isinstance(element, (list, tuple)): + if len(element) == 2: + node_check, index_check = element + elif len(element) == 1: + node_check = element[0] + index_check = True + else: + raise ResolverError("Invalid path element: %s" % element) + else: + node_check = None + index_check = element + if node_check is str: + node_check = ScalarNode + elif node_check is list: + node_check = SequenceNode + elif node_check is dict: + node_check = MappingNode + elif node_check not in [ScalarNode, SequenceNode, MappingNode] \ + and not isinstance(node_check, str) \ + and node_check is not None: + raise ResolverError("Invalid node checker: %s" % node_check) + if not isinstance(index_check, (str, int)) \ + and index_check is not None: + raise ResolverError("Invalid index checker: %s" % index_check) + new_path.append((node_check, index_check)) + if kind is str: + kind = ScalarNode + elif kind is list: + kind = SequenceNode + elif kind is dict: + kind = MappingNode + elif kind not in [ScalarNode, SequenceNode, MappingNode] \ + and kind is not None: + raise ResolverError("Invalid node kind: %s" % kind) + cls.yaml_path_resolvers[tuple(new_path), kind] = tag + + def descend_resolver(self, current_node, current_index): + if not self.yaml_path_resolvers: + return + exact_paths = {} + prefix_paths = [] + if current_node: + depth = len(self.resolver_prefix_paths) + for path, kind in self.resolver_prefix_paths[-1]: + if self.check_resolver_prefix(depth, path, kind, + current_node, current_index): + if len(path) > depth: + prefix_paths.append((path, kind)) + else: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + for path, kind in self.yaml_path_resolvers: + if not path: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + prefix_paths.append((path, kind)) + self.resolver_exact_paths.append(exact_paths) + self.resolver_prefix_paths.append(prefix_paths) + + def ascend_resolver(self): + if not self.yaml_path_resolvers: + return + self.resolver_exact_paths.pop() + self.resolver_prefix_paths.pop() + + def check_resolver_prefix(self, depth, path, kind, + current_node, current_index): + node_check, index_check = path[depth-1] + if isinstance(node_check, str): + if current_node.tag != node_check: + return + elif node_check is not None: + if not isinstance(current_node, node_check): + return + if index_check is True and current_index is not None: + return + if (index_check is False or index_check is None) \ + and current_index is None: + return + if isinstance(index_check, str): + if not (isinstance(current_index, ScalarNode) + and index_check == current_index.value): + return + elif isinstance(index_check, int) and not isinstance(index_check, bool): + if index_check != current_index: + return + return True + + def resolve(self, kind, value, implicit): + if kind is ScalarNode and implicit[0]: + if value == '': + resolvers = self.yaml_implicit_resolvers.get('', []) + else: + resolvers = self.yaml_implicit_resolvers.get(value[0], []) + resolvers += self.yaml_implicit_resolvers.get(None, []) + for tag, regexp in resolvers: + if regexp.match(value): + return tag + implicit = implicit[1] + if self.yaml_path_resolvers: + exact_paths = self.resolver_exact_paths[-1] + if kind in exact_paths: + return exact_paths[kind] + if None in exact_paths: + return exact_paths[None] + if kind is ScalarNode: + return self.DEFAULT_SCALAR_TAG + elif kind is SequenceNode: + return self.DEFAULT_SEQUENCE_TAG + elif kind is MappingNode: + return self.DEFAULT_MAPPING_TAG + +class Resolver(BaseResolver): + pass + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:bool', + re.compile(r'''^(?:yes|Yes|YES|no|No|NO + |true|True|TRUE|false|False|FALSE + |on|On|ON|off|Off|OFF)$''', re.X), + list('yYnNtTfFoO')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:float', + re.compile(r'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)? + |\.[0-9_]+(?:[eE][-+][0-9]+)? + |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]* + |[-+]?\.(?:inf|Inf|INF) + |\.(?:nan|NaN|NAN))$''', re.X), + list('-+0123456789.')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:int', + re.compile(r'''^(?:[-+]?0b[0-1_]+ + |[-+]?0[0-7_]+ + |[-+]?(?:0|[1-9][0-9_]*) + |[-+]?0x[0-9a-fA-F_]+ + |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X), + list('-+0123456789')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:merge', + re.compile(r'^(?:<<)$'), + ['<']) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:null', + re.compile(r'''^(?: ~ + |null|Null|NULL + | )$''', re.X), + ['~', 'n', 'N', '']) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:timestamp', + re.compile(r'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] + |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? + (?:[Tt]|[ \t]+)[0-9][0-9]? + :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)? + (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X), + list('0123456789')) + +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:value', + re.compile(r'^(?:=)$'), + ['=']) + +# The following resolver is only for documentation purposes. It cannot work +# because plain scalars cannot start with '!', '&', or '*'. +Resolver.add_implicit_resolver( + 'tag:yaml.org,2002:yaml', + re.compile(r'^(?:!|&|\*)$'), + list('!&*')) + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/scanner.py b/collectors/python.d.plugin/python_modules/pyyaml3/scanner.py new file mode 100644 index 00000000..b55854e8 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/scanner.py @@ -0,0 +1,1449 @@ +# SPDX-License-Identifier: MIT + +# Scanner produces tokens of the following types: +# STREAM-START +# STREAM-END +# DIRECTIVE(name, value) +# DOCUMENT-START +# DOCUMENT-END +# BLOCK-SEQUENCE-START +# BLOCK-MAPPING-START +# BLOCK-END +# FLOW-SEQUENCE-START +# FLOW-MAPPING-START +# FLOW-SEQUENCE-END +# FLOW-MAPPING-END +# BLOCK-ENTRY +# FLOW-ENTRY +# KEY +# VALUE +# ALIAS(value) +# ANCHOR(value) +# TAG(value) +# SCALAR(value, plain, style) +# +# Read comments in the Scanner code for more details. +# + +__all__ = ['Scanner', 'ScannerError'] + +from .error import MarkedYAMLError +from .tokens import * + +class ScannerError(MarkedYAMLError): + pass + +class SimpleKey: + # See below simple keys treatment. + + def __init__(self, token_number, required, index, line, column, mark): + self.token_number = token_number + self.required = required + self.index = index + self.line = line + self.column = column + self.mark = mark + +class Scanner: + + def __init__(self): + """Initialize the scanner.""" + # It is assumed that Scanner and Reader will have a common descendant. + # Reader do the dirty work of checking for BOM and converting the + # input data to Unicode. It also adds NUL to the end. + # + # Reader supports the following methods + # self.peek(i=0) # peek the next i-th character + # self.prefix(l=1) # peek the next l characters + # self.forward(l=1) # read the next l characters and move the pointer. + + # Had we reached the end of the stream? + self.done = False + + # The number of unclosed '{' and '['. `flow_level == 0` means block + # context. + self.flow_level = 0 + + # List of processed tokens that are not yet emitted. + self.tokens = [] + + # Add the STREAM-START token. + self.fetch_stream_start() + + # Number of tokens that were emitted through the `get_token` method. + self.tokens_taken = 0 + + # The current indentation level. + self.indent = -1 + + # Past indentation levels. + self.indents = [] + + # Variables related to simple keys treatment. + + # A simple key is a key that is not denoted by the '?' indicator. + # Example of simple keys: + # --- + # block simple key: value + # ? not a simple key: + # : { flow simple key: value } + # We emit the KEY token before all keys, so when we find a potential + # simple key, we try to locate the corresponding ':' indicator. + # Simple keys should be limited to a single line and 1024 characters. + + # Can a simple key start at the current position? A simple key may + # start: + # - at the beginning of the line, not counting indentation spaces + # (in block context), + # - after '{', '[', ',' (in the flow context), + # - after '?', ':', '-' (in the block context). + # In the block context, this flag also signifies if a block collection + # may start at the current position. + self.allow_simple_key = True + + # Keep track of possible simple keys. This is a dictionary. The key + # is `flow_level`; there can be no more that one possible simple key + # for each level. The value is a SimpleKey record: + # (token_number, required, index, line, column, mark) + # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), + # '[', or '{' tokens. + self.possible_simple_keys = {} + + # Public methods. + + def check_token(self, *choices): + # Check if the next token is one of the given types. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + if not choices: + return True + for choice in choices: + if isinstance(self.tokens[0], choice): + return True + return False + + def peek_token(self): + # Return the next token, but do not delete if from the queue. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + return self.tokens[0] + + def get_token(self): + # Return the next token. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + self.tokens_taken += 1 + return self.tokens.pop(0) + + # Private methods. + + def need_more_tokens(self): + if self.done: + return False + if not self.tokens: + return True + # The current token may be a potential simple key, so we + # need to look further. + self.stale_possible_simple_keys() + if self.next_possible_simple_key() == self.tokens_taken: + return True + + def fetch_more_tokens(self): + + # Eat whitespaces and comments until we reach the next token. + self.scan_to_next_token() + + # Remove obsolete possible simple keys. + self.stale_possible_simple_keys() + + # Compare the current indentation and column. It may add some tokens + # and decrease the current indentation level. + self.unwind_indent(self.column) + + # Peek the next character. + ch = self.peek() + + # Is it the end of stream? + if ch == '\0': + return self.fetch_stream_end() + + # Is it a directive? + if ch == '%' and self.check_directive(): + return self.fetch_directive() + + # Is it the document start? + if ch == '-' and self.check_document_start(): + return self.fetch_document_start() + + # Is it the document end? + if ch == '.' and self.check_document_end(): + return self.fetch_document_end() + + # TODO: support for BOM within a stream. + #if ch == '\uFEFF': + # return self.fetch_bom() <-- issue BOMToken + + # Note: the order of the following checks is NOT significant. + + # Is it the flow sequence start indicator? + if ch == '[': + return self.fetch_flow_sequence_start() + + # Is it the flow mapping start indicator? + if ch == '{': + return self.fetch_flow_mapping_start() + + # Is it the flow sequence end indicator? + if ch == ']': + return self.fetch_flow_sequence_end() + + # Is it the flow mapping end indicator? + if ch == '}': + return self.fetch_flow_mapping_end() + + # Is it the flow entry indicator? + if ch == ',': + return self.fetch_flow_entry() + + # Is it the block entry indicator? + if ch == '-' and self.check_block_entry(): + return self.fetch_block_entry() + + # Is it the key indicator? + if ch == '?' and self.check_key(): + return self.fetch_key() + + # Is it the value indicator? + if ch == ':' and self.check_value(): + return self.fetch_value() + + # Is it an alias? + if ch == '*': + return self.fetch_alias() + + # Is it an anchor? + if ch == '&': + return self.fetch_anchor() + + # Is it a tag? + if ch == '!': + return self.fetch_tag() + + # Is it a literal scalar? + if ch == '|' and not self.flow_level: + return self.fetch_literal() + + # Is it a folded scalar? + if ch == '>' and not self.flow_level: + return self.fetch_folded() + + # Is it a single quoted scalar? + if ch == '\'': + return self.fetch_single() + + # Is it a double quoted scalar? + if ch == '\"': + return self.fetch_double() + + # It must be a plain scalar then. + if self.check_plain(): + return self.fetch_plain() + + # No? It's an error. Let's produce a nice error message. + raise ScannerError("while scanning for the next token", None, + "found character %r that cannot start any token" % ch, + self.get_mark()) + + # Simple keys treatment. + + def next_possible_simple_key(self): + # Return the number of the nearest possible simple key. Actually we + # don't need to loop through the whole dictionary. We may replace it + # with the following code: + # if not self.possible_simple_keys: + # return None + # return self.possible_simple_keys[ + # min(self.possible_simple_keys.keys())].token_number + min_token_number = None + for level in self.possible_simple_keys: + key = self.possible_simple_keys[level] + if min_token_number is None or key.token_number < min_token_number: + min_token_number = key.token_number + return min_token_number + + def stale_possible_simple_keys(self): + # Remove entries that are no longer possible simple keys. According to + # the YAML specification, simple keys + # - should be limited to a single line, + # - should be no longer than 1024 characters. + # Disabling this procedure will allow simple keys of any length and + # height (may cause problems if indentation is broken though). + for level in list(self.possible_simple_keys): + key = self.possible_simple_keys[level] + if key.line != self.line \ + or self.index-key.index > 1024: + if key.required: + raise ScannerError("while scanning a simple key", key.mark, + "could not found expected ':'", self.get_mark()) + del self.possible_simple_keys[level] + + def save_possible_simple_key(self): + # The next token may start a simple key. We check if it's possible + # and save its position. This function is called for + # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. + + # Check if a simple key is required at the current position. + required = not self.flow_level and self.indent == self.column + + # A simple key is required only if it is the first token in the current + # line. Therefore it is always allowed. + assert self.allow_simple_key or not required + + # The next token might be a simple key. Let's save it's number and + # position. + if self.allow_simple_key: + self.remove_possible_simple_key() + token_number = self.tokens_taken+len(self.tokens) + key = SimpleKey(token_number, required, + self.index, self.line, self.column, self.get_mark()) + self.possible_simple_keys[self.flow_level] = key + + def remove_possible_simple_key(self): + # Remove the saved possible key position at the current flow level. + if self.flow_level in self.possible_simple_keys: + key = self.possible_simple_keys[self.flow_level] + + if key.required: + raise ScannerError("while scanning a simple key", key.mark, + "could not found expected ':'", self.get_mark()) + + del self.possible_simple_keys[self.flow_level] + + # Indentation functions. + + def unwind_indent(self, column): + + ## In flow context, tokens should respect indentation. + ## Actually the condition should be `self.indent >= column` according to + ## the spec. But this condition will prohibit intuitively correct + ## constructions such as + ## key : { + ## } + #if self.flow_level and self.indent > column: + # raise ScannerError(None, None, + # "invalid intendation or unclosed '[' or '{'", + # self.get_mark()) + + # In the flow context, indentation is ignored. We make the scanner less + # restrictive then specification requires. + if self.flow_level: + return + + # In block context, we may need to issue the BLOCK-END tokens. + while self.indent > column: + mark = self.get_mark() + self.indent = self.indents.pop() + self.tokens.append(BlockEndToken(mark, mark)) + + def add_indent(self, column): + # Check if we need to increase indentation. + if self.indent < column: + self.indents.append(self.indent) + self.indent = column + return True + return False + + # Fetchers. + + def fetch_stream_start(self): + # We always add STREAM-START as the first token and STREAM-END as the + # last token. + + # Read the token. + mark = self.get_mark() + + # Add STREAM-START. + self.tokens.append(StreamStartToken(mark, mark, + encoding=self.encoding)) + + + def fetch_stream_end(self): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + self.possible_simple_keys = {} + + # Read the token. + mark = self.get_mark() + + # Add STREAM-END. + self.tokens.append(StreamEndToken(mark, mark)) + + # The steam is finished. + self.done = True + + def fetch_directive(self): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Scan and add DIRECTIVE. + self.tokens.append(self.scan_directive()) + + def fetch_document_start(self): + self.fetch_document_indicator(DocumentStartToken) + + def fetch_document_end(self): + self.fetch_document_indicator(DocumentEndToken) + + def fetch_document_indicator(self, TokenClass): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. Note that there could not be a block collection + # after '---'. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Add DOCUMENT-START or DOCUMENT-END. + start_mark = self.get_mark() + self.forward(3) + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_start(self): + self.fetch_flow_collection_start(FlowSequenceStartToken) + + def fetch_flow_mapping_start(self): + self.fetch_flow_collection_start(FlowMappingStartToken) + + def fetch_flow_collection_start(self, TokenClass): + + # '[' and '{' may start a simple key. + self.save_possible_simple_key() + + # Increase the flow level. + self.flow_level += 1 + + # Simple keys are allowed after '[' and '{'. + self.allow_simple_key = True + + # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_end(self): + self.fetch_flow_collection_end(FlowSequenceEndToken) + + def fetch_flow_mapping_end(self): + self.fetch_flow_collection_end(FlowMappingEndToken) + + def fetch_flow_collection_end(self, TokenClass): + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Decrease the flow level. + self.flow_level -= 1 + + # No simple keys after ']' or '}'. + self.allow_simple_key = False + + # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_entry(self): + + # Simple keys are allowed after ','. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add FLOW-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(FlowEntryToken(start_mark, end_mark)) + + def fetch_block_entry(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a new entry? + if not self.allow_simple_key: + raise ScannerError(None, None, + "sequence entries are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-SEQUENCE-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockSequenceStartToken(mark, mark)) + + # It's an error for the block entry to occur in the flow context, + # but we let the parser detect this. + else: + pass + + # Simple keys are allowed after '-'. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add BLOCK-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(BlockEntryToken(start_mark, end_mark)) + + def fetch_key(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a key (not nessesary a simple)? + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping keys are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-MAPPING-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after '?' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add KEY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(KeyToken(start_mark, end_mark)) + + def fetch_value(self): + + # Do we determine a simple key? + if self.flow_level in self.possible_simple_keys: + + # Add KEY. + key = self.possible_simple_keys[self.flow_level] + del self.possible_simple_keys[self.flow_level] + self.tokens.insert(key.token_number-self.tokens_taken, + KeyToken(key.mark, key.mark)) + + # If this key starts a new block mapping, we need to add + # BLOCK-MAPPING-START. + if not self.flow_level: + if self.add_indent(key.column): + self.tokens.insert(key.token_number-self.tokens_taken, + BlockMappingStartToken(key.mark, key.mark)) + + # There cannot be two simple keys one after another. + self.allow_simple_key = False + + # It must be a part of a complex key. + else: + + # Block context needs additional checks. + # (Do we really need them? They will be catched by the parser + # anyway.) + if not self.flow_level: + + # We are allowed to start a complex value if and only if + # we can start a simple key. + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping values are not allowed here", + self.get_mark()) + + # If this value starts a new block mapping, we need to add + # BLOCK-MAPPING-START. It will be detected as an error later by + # the parser. + if not self.flow_level: + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after ':' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add VALUE. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(ValueToken(start_mark, end_mark)) + + def fetch_alias(self): + + # ALIAS could be a simple key. + self.save_possible_simple_key() + + # No simple keys after ALIAS. + self.allow_simple_key = False + + # Scan and add ALIAS. + self.tokens.append(self.scan_anchor(AliasToken)) + + def fetch_anchor(self): + + # ANCHOR could start a simple key. + self.save_possible_simple_key() + + # No simple keys after ANCHOR. + self.allow_simple_key = False + + # Scan and add ANCHOR. + self.tokens.append(self.scan_anchor(AnchorToken)) + + def fetch_tag(self): + + # TAG could start a simple key. + self.save_possible_simple_key() + + # No simple keys after TAG. + self.allow_simple_key = False + + # Scan and add TAG. + self.tokens.append(self.scan_tag()) + + def fetch_literal(self): + self.fetch_block_scalar(style='|') + + def fetch_folded(self): + self.fetch_block_scalar(style='>') + + def fetch_block_scalar(self, style): + + # A simple key may follow a block scalar. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Scan and add SCALAR. + self.tokens.append(self.scan_block_scalar(style)) + + def fetch_single(self): + self.fetch_flow_scalar(style='\'') + + def fetch_double(self): + self.fetch_flow_scalar(style='"') + + def fetch_flow_scalar(self, style): + + # A flow scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after flow scalars. + self.allow_simple_key = False + + # Scan and add SCALAR. + self.tokens.append(self.scan_flow_scalar(style)) + + def fetch_plain(self): + + # A plain scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after plain scalars. But note that `scan_plain` will + # change this flag if the scan is finished at the beginning of the + # line. + self.allow_simple_key = False + + # Scan and add SCALAR. May change `allow_simple_key`. + self.tokens.append(self.scan_plain()) + + # Checkers. + + def check_directive(self): + + # DIRECTIVE: ^ '%' ... + # The '%' indicator is already checked. + if self.column == 0: + return True + + def check_document_start(self): + + # DOCUMENT-START: ^ '---' (' '|'\n') + if self.column == 0: + if self.prefix(3) == '---' \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return True + + def check_document_end(self): + + # DOCUMENT-END: ^ '...' (' '|'\n') + if self.column == 0: + if self.prefix(3) == '...' \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return True + + def check_block_entry(self): + + # BLOCK-ENTRY: '-' (' '|'\n') + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + + def check_key(self): + + # KEY(flow context): '?' + if self.flow_level: + return True + + # KEY(block context): '?' (' '|'\n') + else: + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + + def check_value(self): + + # VALUE(flow context): ':' + if self.flow_level: + return True + + # VALUE(block context): ':' (' '|'\n') + else: + return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' + + def check_plain(self): + + # A plain scalar may start with any non-space character except: + # '-', '?', ':', ',', '[', ']', '{', '}', + # '#', '&', '*', '!', '|', '>', '\'', '\"', + # '%', '@', '`'. + # + # It may also start with + # '-', '?', ':' + # if it is followed by a non-space character. + # + # Note that we limit the last rule to the block context (except the + # '-' character) because we want the flow context to be space + # independent. + ch = self.peek() + return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ + or (self.peek(1) not in '\0 \t\r\n\x85\u2028\u2029' + and (ch == '-' or (not self.flow_level and ch in '?:'))) + + # Scanners. + + def scan_to_next_token(self): + # We ignore spaces, line breaks and comments. + # If we find a line break in the block context, we set the flag + # `allow_simple_key` on. + # The byte order mark is stripped if it's the first character in the + # stream. We do not yet support BOM inside the stream as the + # specification requires. Any such mark will be considered as a part + # of the document. + # + # TODO: We need to make tab handling rules more sane. A good rule is + # Tabs cannot precede tokens + # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, + # KEY(block), VALUE(block), BLOCK-ENTRY + # So the checking code is + # if <TAB>: + # self.allow_simple_keys = False + # We also need to add the check for `allow_simple_keys == True` to + # `unwind_indent` before issuing BLOCK-END. + # Scanners for block, flow, and plain scalars need to be modified. + + if self.index == 0 and self.peek() == '\uFEFF': + self.forward() + found = False + while not found: + while self.peek() == ' ': + self.forward() + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + if self.scan_line_break(): + if not self.flow_level: + self.allow_simple_key = True + else: + found = True + + def scan_directive(self): + # See the specification for details. + start_mark = self.get_mark() + self.forward() + name = self.scan_directive_name(start_mark) + value = None + if name == 'YAML': + value = self.scan_yaml_directive_value(start_mark) + end_mark = self.get_mark() + elif name == 'TAG': + value = self.scan_tag_directive_value(start_mark) + end_mark = self.get_mark() + else: + end_mark = self.get_mark() + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + self.scan_directive_ignored_line(start_mark) + return DirectiveToken(name, value, start_mark, end_mark) + + def scan_directive_name(self, start_mark): + # See the specification for details. + length = 0 + ch = self.peek(length) + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError("while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + return value + + def scan_yaml_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + major = self.scan_yaml_directive_number(start_mark) + if self.peek() != '.': + raise ScannerError("while scanning a directive", start_mark, + "expected a digit or '.', but found %r" % self.peek(), + self.get_mark()) + self.forward() + minor = self.scan_yaml_directive_number(start_mark) + if self.peek() not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected a digit or ' ', but found %r" % self.peek(), + self.get_mark()) + return (major, minor) + + def scan_yaml_directive_number(self, start_mark): + # See the specification for details. + ch = self.peek() + if not ('0' <= ch <= '9'): + raise ScannerError("while scanning a directive", start_mark, + "expected a digit, but found %r" % ch, self.get_mark()) + length = 0 + while '0' <= self.peek(length) <= '9': + length += 1 + value = int(self.prefix(length)) + self.forward(length) + return value + + def scan_tag_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + handle = self.scan_tag_directive_handle(start_mark) + while self.peek() == ' ': + self.forward() + prefix = self.scan_tag_directive_prefix(start_mark) + return (handle, prefix) + + def scan_tag_directive_handle(self, start_mark): + # See the specification for details. + value = self.scan_tag_handle('directive', start_mark) + ch = self.peek() + if ch != ' ': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % ch, self.get_mark()) + return value + + def scan_tag_directive_prefix(self, start_mark): + # See the specification for details. + value = self.scan_tag_uri('directive', start_mark) + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % ch, self.get_mark()) + return value + + def scan_directive_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in '\0\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected a comment or a line break, but found %r" + % ch, self.get_mark()) + self.scan_line_break() + + def scan_anchor(self, TokenClass): + # The specification does not restrict characters for anchors and + # aliases. This may lead to problems, for instance, the document: + # [ *alias, value ] + # can be interpteted in two ways, as + # [ "value" ] + # and + # [ *alias , "value" ] + # Therefore we restrict aliases to numbers and ASCII letters. + start_mark = self.get_mark() + indicator = self.peek() + if indicator == '*': + name = 'alias' + else: + name = 'anchor' + self.forward() + length = 0 + ch = self.peek(length) + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError("while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in '\0 \t\r\n\x85\u2028\u2029?:,]}%@`': + raise ScannerError("while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % ch, self.get_mark()) + end_mark = self.get_mark() + return TokenClass(value, start_mark, end_mark) + + def scan_tag(self): + # See the specification for details. + start_mark = self.get_mark() + ch = self.peek(1) + if ch == '<': + handle = None + self.forward(2) + suffix = self.scan_tag_uri('tag', start_mark) + if self.peek() != '>': + raise ScannerError("while parsing a tag", start_mark, + "expected '>', but found %r" % self.peek(), + self.get_mark()) + self.forward() + elif ch in '\0 \t\r\n\x85\u2028\u2029': + handle = None + suffix = '!' + self.forward() + else: + length = 1 + use_handle = False + while ch not in '\0 \r\n\x85\u2028\u2029': + if ch == '!': + use_handle = True + break + length += 1 + ch = self.peek(length) + handle = '!' + if use_handle: + handle = self.scan_tag_handle('tag', start_mark) + else: + handle = '!' + self.forward() + suffix = self.scan_tag_uri('tag', start_mark) + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a tag", start_mark, + "expected ' ', but found %r" % ch, self.get_mark()) + value = (handle, suffix) + end_mark = self.get_mark() + return TagToken(value, start_mark, end_mark) + + def scan_block_scalar(self, style): + # See the specification for details. + + if style == '>': + folded = True + else: + folded = False + + chunks = [] + start_mark = self.get_mark() + + # Scan the header. + self.forward() + chomping, increment = self.scan_block_scalar_indicators(start_mark) + self.scan_block_scalar_ignored_line(start_mark) + + # Determine the indentation level and go to the first non-empty line. + min_indent = self.indent+1 + if min_indent < 1: + min_indent = 1 + if increment is None: + breaks, max_indent, end_mark = self.scan_block_scalar_indentation() + indent = max(min_indent, max_indent) + else: + indent = min_indent+increment-1 + breaks, end_mark = self.scan_block_scalar_breaks(indent) + line_break = '' + + # Scan the inner part of the block scalar. + while self.column == indent and self.peek() != '\0': + chunks.extend(breaks) + leading_non_space = self.peek() not in ' \t' + length = 0 + while self.peek(length) not in '\0\r\n\x85\u2028\u2029': + length += 1 + chunks.append(self.prefix(length)) + self.forward(length) + line_break = self.scan_line_break() + breaks, end_mark = self.scan_block_scalar_breaks(indent) + if self.column == indent and self.peek() != '\0': + + # Unfortunately, folding rules are ambiguous. + # + # This is the folding according to the specification: + + if folded and line_break == '\n' \ + and leading_non_space and self.peek() not in ' \t': + if not breaks: + chunks.append(' ') + else: + chunks.append(line_break) + + # This is Clark Evans's interpretation (also in the spec + # examples): + # + #if folded and line_break == '\n': + # if not breaks: + # if self.peek() not in ' \t': + # chunks.append(' ') + # else: + # chunks.append(line_break) + #else: + # chunks.append(line_break) + else: + break + + # Chomp the tail. + if chomping is not False: + chunks.append(line_break) + if chomping is True: + chunks.extend(breaks) + + # We are done. + return ScalarToken(''.join(chunks), False, start_mark, end_mark, + style) + + def scan_block_scalar_indicators(self, start_mark): + # See the specification for details. + chomping = None + increment = None + ch = self.peek() + if ch in '+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch in '0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError("while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", + self.get_mark()) + self.forward() + elif ch in '0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError("while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", + self.get_mark()) + self.forward() + ch = self.peek() + if ch in '+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a block scalar", start_mark, + "expected chomping or indentation indicators, but found %r" + % ch, self.get_mark()) + return chomping, increment + + def scan_block_scalar_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == ' ': + self.forward() + if self.peek() == '#': + while self.peek() not in '\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in '\0\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a block scalar", start_mark, + "expected a comment or a line break, but found %r" % ch, + self.get_mark()) + self.scan_line_break() + + def scan_block_scalar_indentation(self): + # See the specification for details. + chunks = [] + max_indent = 0 + end_mark = self.get_mark() + while self.peek() in ' \r\n\x85\u2028\u2029': + if self.peek() != ' ': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + else: + self.forward() + if self.column > max_indent: + max_indent = self.column + return chunks, max_indent, end_mark + + def scan_block_scalar_breaks(self, indent): + # See the specification for details. + chunks = [] + end_mark = self.get_mark() + while self.column < indent and self.peek() == ' ': + self.forward() + while self.peek() in '\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + while self.column < indent and self.peek() == ' ': + self.forward() + return chunks, end_mark + + def scan_flow_scalar(self, style): + # See the specification for details. + # Note that we loose indentation rules for quoted scalars. Quoted + # scalars don't need to adhere indentation because " and ' clearly + # mark the beginning and the end of them. Therefore we are less + # restrictive then the specification requires. We only need to check + # that document separators are not included in scalars. + if style == '"': + double = True + else: + double = False + chunks = [] + start_mark = self.get_mark() + quote = self.peek() + self.forward() + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + while self.peek() != quote: + chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + self.forward() + end_mark = self.get_mark() + return ScalarToken(''.join(chunks), False, start_mark, end_mark, + style) + + ESCAPE_REPLACEMENTS = { + '0': '\0', + 'a': '\x07', + 'b': '\x08', + 't': '\x09', + '\t': '\x09', + 'n': '\x0A', + 'v': '\x0B', + 'f': '\x0C', + 'r': '\x0D', + 'e': '\x1B', + ' ': '\x20', + '\"': '\"', + '\\': '\\', + 'N': '\x85', + '_': '\xA0', + 'L': '\u2028', + 'P': '\u2029', + } + + ESCAPE_CODES = { + 'x': 2, + 'u': 4, + 'U': 8, + } + + def scan_flow_scalar_non_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + length = 0 + while self.peek(length) not in '\'\"\\\0 \t\r\n\x85\u2028\u2029': + length += 1 + if length: + chunks.append(self.prefix(length)) + self.forward(length) + ch = self.peek() + if not double and ch == '\'' and self.peek(1) == '\'': + chunks.append('\'') + self.forward(2) + elif (double and ch == '\'') or (not double and ch in '\"\\'): + chunks.append(ch) + self.forward() + elif double and ch == '\\': + self.forward() + ch = self.peek() + if ch in self.ESCAPE_REPLACEMENTS: + chunks.append(self.ESCAPE_REPLACEMENTS[ch]) + self.forward() + elif ch in self.ESCAPE_CODES: + length = self.ESCAPE_CODES[ch] + self.forward() + for k in range(length): + if self.peek(k) not in '0123456789ABCDEFabcdef': + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "expected escape sequence of %d hexdecimal numbers, but found %r" % + (length, self.peek(k)), self.get_mark()) + code = int(self.prefix(length), 16) + chunks.append(chr(code)) + self.forward(length) + elif ch in '\r\n\x85\u2028\u2029': + self.scan_line_break() + chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) + else: + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "found unknown escape character %r" % ch, self.get_mark()) + else: + return chunks + + def scan_flow_scalar_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + length = 0 + while self.peek(length) in ' \t': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch == '\0': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected end of stream", self.get_mark()) + elif ch in '\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + breaks = self.scan_flow_scalar_breaks(double, start_mark) + if line_break != '\n': + chunks.append(line_break) + elif not breaks: + chunks.append(' ') + chunks.extend(breaks) + else: + chunks.append(whitespaces) + return chunks + + def scan_flow_scalar_breaks(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + # Instead of checking indentation, we check for document + # separators. + prefix = self.prefix(3) + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected document separator", self.get_mark()) + while self.peek() in ' \t': + self.forward() + if self.peek() in '\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + else: + return chunks + + def scan_plain(self): + # See the specification for details. + # We add an additional restriction for the flow context: + # plain scalars in the flow context cannot contain ',', ':' and '?'. + # We also keep track of the `allow_simple_key` flag here. + # Indentation rules are loosed for the flow context. + chunks = [] + start_mark = self.get_mark() + end_mark = start_mark + indent = self.indent+1 + # We allow zero indentation for scalars, but then we need to check for + # document separators at the beginning of the line. + #if indent == 0: + # indent = 1 + spaces = [] + while True: + length = 0 + if self.peek() == '#': + break + while True: + ch = self.peek(length) + if ch in '\0 \t\r\n\x85\u2028\u2029' \ + or (not self.flow_level and ch == ':' and + self.peek(length+1) in '\0 \t\r\n\x85\u2028\u2029') \ + or (self.flow_level and ch in ',:?[]{}'): + break + length += 1 + # It's not clear what we should do with ':' in the flow context. + if (self.flow_level and ch == ':' + and self.peek(length+1) not in '\0 \t\r\n\x85\u2028\u2029,[]{}'): + self.forward(length) + raise ScannerError("while scanning a plain scalar", start_mark, + "found unexpected ':'", self.get_mark(), + "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.") + if length == 0: + break + self.allow_simple_key = False + chunks.extend(spaces) + chunks.append(self.prefix(length)) + self.forward(length) + end_mark = self.get_mark() + spaces = self.scan_plain_spaces(indent, start_mark) + if not spaces or self.peek() == '#' \ + or (not self.flow_level and self.column < indent): + break + return ScalarToken(''.join(chunks), True, start_mark, end_mark) + + def scan_plain_spaces(self, indent, start_mark): + # See the specification for details. + # The specification is really confusing about tabs in plain scalars. + # We just forbid them completely. Do not use tabs in YAML! + chunks = [] + length = 0 + while self.peek(length) in ' ': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch in '\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + self.allow_simple_key = True + prefix = self.prefix(3) + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return + breaks = [] + while self.peek() in ' \r\n\x85\u2028\u2029': + if self.peek() == ' ': + self.forward() + else: + breaks.append(self.scan_line_break()) + prefix = self.prefix(3) + if (prefix == '---' or prefix == '...') \ + and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': + return + if line_break != '\n': + chunks.append(line_break) + elif not breaks: + chunks.append(' ') + chunks.extend(breaks) + elif whitespaces: + chunks.append(whitespaces) + return chunks + + def scan_tag_handle(self, name, start_mark): + # See the specification for details. + # For some strange reasons, the specification does not allow '_' in + # tag handles. I have allowed it anyway. + ch = self.peek() + if ch != '!': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % ch, self.get_mark()) + length = 1 + ch = self.peek(length) + if ch != ' ': + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-_': + length += 1 + ch = self.peek(length) + if ch != '!': + self.forward(length) + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % ch, self.get_mark()) + length += 1 + value = self.prefix(length) + self.forward(length) + return value + + def scan_tag_uri(self, name, start_mark): + # See the specification for details. + # Note: we do not check if URI is well-formed. + chunks = [] + length = 0 + ch = self.peek(length) + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + or ch in '-;/?:@&=+$,_.!~*\'()[]%': + if ch == '%': + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + chunks.append(self.scan_uri_escapes(name, start_mark)) + else: + length += 1 + ch = self.peek(length) + if length: + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + if not chunks: + raise ScannerError("while parsing a %s" % name, start_mark, + "expected URI, but found %r" % ch, self.get_mark()) + return ''.join(chunks) + + def scan_uri_escapes(self, name, start_mark): + # See the specification for details. + codes = [] + mark = self.get_mark() + while self.peek() == '%': + self.forward() + for k in range(2): + if self.peek(k) not in '0123456789ABCDEFabcdef': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected URI escape sequence of 2 hexdecimal numbers, but found %r" + % self.peek(k), self.get_mark()) + codes.append(int(self.prefix(2), 16)) + self.forward(2) + try: + value = bytes(codes).decode('utf-8') + except UnicodeDecodeError as exc: + raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark) + return value + + def scan_line_break(self): + # Transforms: + # '\r\n' : '\n' + # '\r' : '\n' + # '\n' : '\n' + # '\x85' : '\n' + # '\u2028' : '\u2028' + # '\u2029 : '\u2029' + # default : '' + ch = self.peek() + if ch in '\r\n\x85': + if self.prefix(2) == '\r\n': + self.forward(2) + else: + self.forward() + return '\n' + elif ch in '\u2028\u2029': + self.forward() + return ch + return '' + +#try: +# import psyco +# psyco.bind(Scanner) +#except ImportError: +# pass + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/serializer.py b/collectors/python.d.plugin/python_modules/pyyaml3/serializer.py new file mode 100644 index 00000000..1ba2f7f9 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/serializer.py @@ -0,0 +1,112 @@ +# SPDX-License-Identifier: MIT + +__all__ = ['Serializer', 'SerializerError'] + +from .error import YAMLError +from .events import * +from .nodes import * + +class SerializerError(YAMLError): + pass + +class Serializer: + + ANCHOR_TEMPLATE = 'id%03d' + + def __init__(self, encoding=None, + explicit_start=None, explicit_end=None, version=None, tags=None): + self.use_encoding = encoding + self.use_explicit_start = explicit_start + self.use_explicit_end = explicit_end + self.use_version = version + self.use_tags = tags + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + self.closed = None + + def open(self): + if self.closed is None: + self.emit(StreamStartEvent(encoding=self.use_encoding)) + self.closed = False + elif self.closed: + raise SerializerError("serializer is closed") + else: + raise SerializerError("serializer is already opened") + + def close(self): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif not self.closed: + self.emit(StreamEndEvent()) + self.closed = True + + #def __del__(self): + # self.close() + + def serialize(self, node): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif self.closed: + raise SerializerError("serializer is closed") + self.emit(DocumentStartEvent(explicit=self.use_explicit_start, + version=self.use_version, tags=self.use_tags)) + self.anchor_node(node) + self.serialize_node(node, None, None) + self.emit(DocumentEndEvent(explicit=self.use_explicit_end)) + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + + def anchor_node(self, node): + if node in self.anchors: + if self.anchors[node] is None: + self.anchors[node] = self.generate_anchor(node) + else: + self.anchors[node] = None + if isinstance(node, SequenceNode): + for item in node.value: + self.anchor_node(item) + elif isinstance(node, MappingNode): + for key, value in node.value: + self.anchor_node(key) + self.anchor_node(value) + + def generate_anchor(self, node): + self.last_anchor_id += 1 + return self.ANCHOR_TEMPLATE % self.last_anchor_id + + def serialize_node(self, node, parent, index): + alias = self.anchors[node] + if node in self.serialized_nodes: + self.emit(AliasEvent(alias)) + else: + self.serialized_nodes[node] = True + self.descend_resolver(parent, index) + if isinstance(node, ScalarNode): + detected_tag = self.resolve(ScalarNode, node.value, (True, False)) + default_tag = self.resolve(ScalarNode, node.value, (False, True)) + implicit = (node.tag == detected_tag), (node.tag == default_tag) + self.emit(ScalarEvent(alias, node.tag, implicit, node.value, + style=node.style)) + elif isinstance(node, SequenceNode): + implicit = (node.tag + == self.resolve(SequenceNode, node.value, True)) + self.emit(SequenceStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style)) + index = 0 + for item in node.value: + self.serialize_node(item, node, index) + index += 1 + self.emit(SequenceEndEvent()) + elif isinstance(node, MappingNode): + implicit = (node.tag + == self.resolve(MappingNode, node.value, True)) + self.emit(MappingStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style)) + for key, value in node.value: + self.serialize_node(key, node, None) + self.serialize_node(value, node, key) + self.emit(MappingEndEvent()) + self.ascend_resolver() + diff --git a/collectors/python.d.plugin/python_modules/pyyaml3/tokens.py b/collectors/python.d.plugin/python_modules/pyyaml3/tokens.py new file mode 100644 index 00000000..c5c4fb11 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/pyyaml3/tokens.py @@ -0,0 +1,105 @@ +# SPDX-License-Identifier: MIT + +class Token(object): + def __init__(self, start_mark, end_mark): + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + attributes = [key for key in self.__dict__ + if not key.endswith('_mark')] + attributes.sort() + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + +#class BOMToken(Token): +# id = '<byte order mark>' + +class DirectiveToken(Token): + id = '<directive>' + def __init__(self, name, value, start_mark, end_mark): + self.name = name + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class DocumentStartToken(Token): + id = '<document start>' + +class DocumentEndToken(Token): + id = '<document end>' + +class StreamStartToken(Token): + id = '<stream start>' + def __init__(self, start_mark=None, end_mark=None, + encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + +class StreamEndToken(Token): + id = '<stream end>' + +class BlockSequenceStartToken(Token): + id = '<block sequence start>' + +class BlockMappingStartToken(Token): + id = '<block mapping start>' + +class BlockEndToken(Token): + id = '<block end>' + +class FlowSequenceStartToken(Token): + id = '[' + +class FlowMappingStartToken(Token): + id = '{' + +class FlowSequenceEndToken(Token): + id = ']' + +class FlowMappingEndToken(Token): + id = '}' + +class KeyToken(Token): + id = '?' + +class ValueToken(Token): + id = ':' + +class BlockEntryToken(Token): + id = '-' + +class FlowEntryToken(Token): + id = ',' + +class AliasToken(Token): + id = '<alias>' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class AnchorToken(Token): + id = '<anchor>' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class TagToken(Token): + id = '<tag>' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class ScalarToken(Token): + id = '<scalar>' + def __init__(self, value, plain, start_mark, end_mark, style=None): + self.value = value + self.plain = plain + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + diff --git a/collectors/python.d.plugin/python_modules/third_party/__init__.py b/collectors/python.d.plugin/python_modules/third_party/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/collectors/python.d.plugin/python_modules/third_party/__init__.py diff --git a/collectors/python.d.plugin/python_modules/third_party/boinc_client.py b/collectors/python.d.plugin/python_modules/third_party/boinc_client.py new file mode 100644 index 00000000..ec21779a --- /dev/null +++ b/collectors/python.d.plugin/python_modules/third_party/boinc_client.py @@ -0,0 +1,515 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# client.py - Somewhat higher-level GUI_RPC API for BOINC core client +# +# Copyright (C) 2013 Rodrigo Silva (MestreLion) <linux@rodrigosilva.com> +# Copyright (C) 2017 Austin S. Hemmelgarn +# +# SPDX-License-Identifier: GPL-3.0 + +# Based on client/boinc_cmd.cpp + +import hashlib +import socket +import sys +import time +from functools import total_ordering +from xml.etree import ElementTree + +GUI_RPC_PASSWD_FILE = "/var/lib/boinc/gui_rpc_auth.cfg" + +GUI_RPC_HOSTNAME = None # localhost +GUI_RPC_PORT = 31416 +GUI_RPC_TIMEOUT = 1 + +class Rpc(object): + ''' Class to perform GUI RPC calls to a BOINC core client. + Usage in a context manager ('with' block) is recommended to ensure + disconnect() is called. Using the same instance for all calls is also + recommended so it reuses the same socket connection + ''' + def __init__(self, hostname="", port=0, timeout=0, text_output=False): + self.hostname = hostname + self.port = port + self.timeout = timeout + self.sock = None + self.text_output = text_output + + @property + def sockargs(self): + return (self.hostname, self.port, self.timeout) + + def __enter__(self): self.connect(*self.sockargs); return self + def __exit__(self, *args): self.disconnect() + + def connect(self, hostname="", port=0, timeout=0): + ''' Connect to (hostname, port) with timeout in seconds. + Hostname defaults to None (localhost), and port to 31416 + Calling multiple times will disconnect previous connection (if any), + and (re-)connect to host. + ''' + if self.sock: + self.disconnect() + + self.hostname = hostname or GUI_RPC_HOSTNAME + self.port = port or GUI_RPC_PORT + self.timeout = timeout or GUI_RPC_TIMEOUT + + self.sock = socket.create_connection(self.sockargs[0:2], self.sockargs[2]) + + def disconnect(self): + ''' Disconnect from host. Calling multiple times is OK (idempotent) + ''' + if self.sock: + self.sock.close() + self.sock = None + + def call(self, request, text_output=None): + ''' Do an RPC call. Pack and send the XML request and return the + unpacked reply. request can be either plain XML text or a + xml.etree.ElementTree.Element object. Return ElementTree.Element + or XML text according to text_output flag. + Will auto-connect if not connected. + ''' + if text_output is None: + text_output = self.text_output + + if not self.sock: + self.connect(*self.sockargs) + + if not isinstance(request, ElementTree.Element): + request = ElementTree.fromstring(request) + + # pack request + end = '\003' + if sys.version_info[0] < 3: + req = "<boinc_gui_rpc_request>\n{0}\n</boinc_gui_rpc_request>\n{1}".format(ElementTree.tostring(request).replace(' />', '/>'), end) + else: + req = "<boinc_gui_rpc_request>\n{0}\n</boinc_gui_rpc_request>\n{1}".format(ElementTree.tostring(request, encoding='unicode').replace(' />', '/>'), end).encode() + + try: + self.sock.sendall(req) + except (socket.error, socket.herror, socket.gaierror, socket.timeout): + raise + + req = "" + while True: + try: + buf = self.sock.recv(8192) + if not buf: + raise socket.error("No data from socket") + if sys.version_info[0] >= 3: + buf = buf.decode() + except socket.error: + raise + n = buf.find(end) + if not n == -1: break + req += buf + req += buf[:n] + + # unpack reply (remove root tag, ie: first and last lines) + req = '\n'.join(req.strip().rsplit('\n')[1:-1]) + + if text_output: + return req + else: + return ElementTree.fromstring(req) + +def setattrs_from_xml(obj, xml, attrfuncdict={}): + ''' Helper to set values for attributes of a class instance by mapping + matching tags from a XML file. + attrfuncdict is a dict of functions to customize value data type of + each attribute. It falls back to simple int/float/bool/str detection + based on values defined in __init__(). This would not be needed if + Boinc used standard RPC protocol, which includes data type in XML. + ''' + if not isinstance(xml, ElementTree.Element): + xml = ElementTree.fromstring(xml) + for e in list(xml): + if hasattr(obj, e.tag): + attr = getattr(obj, e.tag) + attrfunc = attrfuncdict.get(e.tag, None) + if attrfunc is None: + if isinstance(attr, bool): attrfunc = parse_bool + elif isinstance(attr, int): attrfunc = parse_int + elif isinstance(attr, float): attrfunc = parse_float + elif isinstance(attr, str): attrfunc = parse_str + elif isinstance(attr, list): attrfunc = parse_list + else: attrfunc = lambda x: x + setattr(obj, e.tag, attrfunc(e)) + else: + pass + #print "class missing attribute '%s': %r" % (e.tag, obj) + return obj + + +def parse_bool(e): + ''' Helper to convert ElementTree.Element.text to boolean. + Treat '<foo/>' (and '<foo>[[:blank:]]</foo>') as True + Treat '0' and 'false' as False + ''' + if e.text is None: + return True + else: + return bool(e.text) and not e.text.strip().lower() in ('0', 'false') + + +def parse_int(e): + ''' Helper to convert ElementTree.Element.text to integer. + Treat '<foo/>' (and '<foo></foo>') as 0 + ''' + # int(float()) allows casting to int a value expressed as float in XML + return 0 if e.text is None else int(float(e.text.strip())) + + +def parse_float(e): + ''' Helper to convert ElementTree.Element.text to float. ''' + return 0.0 if e.text is None else float(e.text.strip()) + + +def parse_str(e): + ''' Helper to convert ElementTree.Element.text to string. ''' + return "" if e.text is None else e.text.strip() + + +def parse_list(e): + ''' Helper to convert ElementTree.Element to list. For now, simply return + the list of root element's children + ''' + return list(e) + + +class Enum(object): + UNKNOWN = -1 # Not in original API + + @classmethod + def name(cls, value): + ''' Quick-and-dirty fallback for getting the "name" of an enum item ''' + + # value as string, if it matches an enum attribute. + # Allows short usage as Enum.name("VALUE") besides Enum.name(Enum.VALUE) + if hasattr(cls, str(value)): + return cls.name(getattr(cls, value, None)) + + # value not handled in subclass name() + for k, v in cls.__dict__.items(): + if v == value: + return k.lower().replace('_', ' ') + + # value not found + return cls.name(Enum.UNKNOWN) + + +class CpuSched(Enum): + ''' values of ACTIVE_TASK::scheduler_state and ACTIVE_TASK::next_scheduler_state + "SCHEDULED" is synonymous with "executing" except when CPU throttling + is in use. + ''' + UNINITIALIZED = 0 + PREEMPTED = 1 + SCHEDULED = 2 + + +class ResultState(Enum): + ''' Values of RESULT::state in client. + THESE MUST BE IN NUMERICAL ORDER + (because of the > comparison in RESULT::computing_done()) + see html/inc/common_defs.inc + ''' + NEW = 0 + #// New result + FILES_DOWNLOADING = 1 + #// Input files for result (WU, app version) are being downloaded + FILES_DOWNLOADED = 2 + #// Files are downloaded, result can be (or is being) computed + COMPUTE_ERROR = 3 + #// computation failed; no file upload + FILES_UPLOADING = 4 + #// Output files for result are being uploaded + FILES_UPLOADED = 5 + #// Files are uploaded, notify scheduling server at some point + ABORTED = 6 + #// result was aborted + UPLOAD_FAILED = 7 + #// some output file permanent failure + + +class Process(Enum): + ''' values of ACTIVE_TASK::task_state ''' + UNINITIALIZED = 0 + #// process doesn't exist yet + EXECUTING = 1 + #// process is running, as far as we know + SUSPENDED = 9 + #// we've sent it a "suspend" message + ABORT_PENDING = 5 + #// process exceeded limits; send "abort" message, waiting to exit + QUIT_PENDING = 8 + #// we've sent it a "quit" message, waiting to exit + COPY_PENDING = 10 + #// waiting for async file copies to finish + + +class _Struct(object): + ''' base helper class with common methods for all classes derived from + BOINC's C++ structs + ''' + @classmethod + def parse(cls, xml): + return setattrs_from_xml(cls(), xml) + + def __str__(self, indent=0): + buf = '{0}{1}:\n'.format('\t' * indent, self.__class__.__name__) + for attr in self.__dict__: + value = getattr(self, attr) + if isinstance(value, list): + buf += '{0}\t{1} [\n'.format('\t' * indent, attr) + for v in value: buf += '\t\t{0}\t\t,\n'.format(v) + buf += '\t]\n' + else: + buf += '{0}\t{1}\t{2}\n'.format('\t' * indent, + attr, + value.__str__(indent+2) + if isinstance(value, _Struct) + else repr(value)) + return buf + + +@total_ordering +class VersionInfo(_Struct): + def __init__(self, major=0, minor=0, release=0): + self.major = major + self.minor = minor + self.release = release + + @property + def _tuple(self): + return (self.major, self.minor, self.release) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self._tuple == other._tuple + + def __ne__(self, other): + return not self.__eq__(other) + + def __gt__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return self._tuple > other._tuple + + def __str__(self): + return "{0}.{1}.{2}".format(self.major, self.minor, self.release) + + def __repr__(self): + return "{0}{1}".format(self.__class__.__name__, self._tuple) + + +class Result(_Struct): + ''' Also called "task" in some contexts ''' + def __init__(self): + # Names and values follow lib/gui_rpc_client.h @ RESULT + # Order too, except when grouping contradicts client/result.cpp + # RESULT::write_gui(), then XML order is used. + + self.name = "" + self.wu_name = "" + self.version_num = 0 + #// identifies the app used + self.plan_class = "" + self.project_url = "" # from PROJECT.master_url + self.report_deadline = 0.0 # seconds since epoch + self.received_time = 0.0 # seconds since epoch + #// when we got this from server + self.ready_to_report = False + #// we're ready to report this result to the server; + #// either computation is done and all the files have been uploaded + #// or there was an error + self.got_server_ack = False + #// we've received the ack for this result from the server + self.final_cpu_time = 0.0 + self.final_elapsed_time = 0.0 + self.state = ResultState.NEW + self.estimated_cpu_time_remaining = 0.0 + #// actually, estimated elapsed time remaining + self.exit_status = 0 + #// return value from the application + self.suspended_via_gui = False + self.project_suspended_via_gui = False + self.edf_scheduled = False + #// temporary used to tell GUI that this result is deadline-scheduled + self.coproc_missing = False + #// a coproc needed by this job is missing + #// (e.g. because user removed their GPU board). + self.scheduler_wait = False + self.scheduler_wait_reason = "" + self.network_wait = False + self.resources = "" + #// textual description of resources used + + #// the following defined if active + # XML is generated in client/app.cpp ACTIVE_TASK::write_gui() + self.active_task = False + self.active_task_state = Process.UNINITIALIZED + self.app_version_num = 0 + self.slot = -1 + self.pid = 0 + self.scheduler_state = CpuSched.UNINITIALIZED + self.checkpoint_cpu_time = 0.0 + self.current_cpu_time = 0.0 + self.fraction_done = 0.0 + self.elapsed_time = 0.0 + self.swap_size = 0 + self.working_set_size_smoothed = 0.0 + self.too_large = False + self.needs_shmem = False + self.graphics_exec_path = "" + self.web_graphics_url = "" + self.remote_desktop_addr = "" + self.slot_path = "" + #// only present if graphics_exec_path is + + # The following are not in original API, but are present in RPC XML reply + self.completed_time = 0.0 + #// time when ready_to_report was set + self.report_immediately = False + self.working_set_size = 0 + self.page_fault_rate = 0.0 + #// derived by higher-level code + + # The following are in API, but are NEVER in RPC XML reply. Go figure + self.signal = 0 + + self.app = None # APP* + self.wup = None # WORKUNIT* + self.project = None # PROJECT* + self.avp = None # APP_VERSION* + + @classmethod + def parse(cls, xml): + if not isinstance(xml, ElementTree.Element): + xml = ElementTree.fromstring(xml) + + # parse main XML + result = super(Result, cls).parse(xml) + + # parse '<active_task>' children + active_task = xml.find('active_task') + if active_task is None: + result.active_task = False # already the default after __init__() + else: + result.active_task = True # already the default after main parse + result = setattrs_from_xml(result, active_task) + + #// if CPU time is nonzero but elapsed time is zero, + #// we must be talking to an old client. + #// Set elapsed = CPU + #// (easier to deal with this here than in the manager) + if result.current_cpu_time != 0 and result.elapsed_time == 0: + result.elapsed_time = result.current_cpu_time + + if result.final_cpu_time != 0 and result.final_elapsed_time == 0: + result.final_elapsed_time = result.final_cpu_time + + return result + + def __str__(self): + buf = '{0}:\n'.format(self.__class__.__name__) + for attr in self.__dict__: + value = getattr(self, attr) + if attr in ['received_time', 'report_deadline']: + value = time.ctime(value) + buf += '\t{0}\t{1}\n'.format(attr, value) + return buf + + +class BoincClient(object): + + def __init__(self, host="", port=0, passwd=None): + self.hostname = host + self.port = port + self.passwd = passwd + self.rpc = Rpc(text_output=False) + self.version = None + self.authorized = False + + # Informative, not authoritative. Records status of *last* RPC call, + # but does not infer success about the *next* one. + # Thus, it should be read *after* an RPC call, not prior to one + self.connected = False + + def __enter__(self): self.connect(); return self + def __exit__(self, *args): self.disconnect() + + def connect(self): + try: + self.rpc.connect(self.hostname, self.port) + self.connected = True + except socket.error: + self.connected = False + return + self.authorized = self.authorize(self.passwd) + self.version = self.exchange_versions() + + def disconnect(self): + self.rpc.disconnect() + + def authorize(self, password): + ''' Request authorization. If password is None and we are connecting + to localhost, try to read password from the local config file + GUI_RPC_PASSWD_FILE. If file can't be read (not found or no + permission to read), try to authorize with a blank password. + If authorization is requested and fails, all subsequent calls + will be refused with socket.error 'Connection reset by peer' (104). + Since most local calls do no require authorization, do not attempt + it if you're not sure about the password. + ''' + if password is None and not self.hostname: + password = read_gui_rpc_password() or "" + nonce = self.rpc.call('<auth1/>').text + authhash = hashlib.md5('{0}{1}'.format(nonce, password).encode()).hexdigest().lower() + reply = self.rpc.call('<auth2><nonce_hash>{0}</nonce_hash></auth2>'.format(authhash)) + + if reply.tag == 'authorized': + return True + else: + return False + + def exchange_versions(self): + ''' Return VersionInfo instance with core client version info ''' + return VersionInfo.parse(self.rpc.call('<exchange_versions/>')) + + def get_tasks(self): + ''' Same as get_results(active_only=False) ''' + return self.get_results(False) + + def get_results(self, active_only=False): + ''' Get a list of results. + Those that are in progress will have information such as CPU time + and fraction done. Each result includes a name; + Use CC_STATE::lookup_result() to find this result in the current static state; + if it's not there, call get_state() again. + ''' + reply = self.rpc.call("<get_results><active_only>{0}</active_only></get_results>".format(1 if active_only else 0)) + if not reply.tag == 'results': + return [] + + results = [] + for item in list(reply): + results.append(Result.parse(item)) + + return results + + +def read_gui_rpc_password(): + ''' Read password string from GUI_RPC_PASSWD_FILE file, trim the last CR + (if any), and return it + ''' + try: + with open(GUI_RPC_PASSWD_FILE, 'r') as f: + buf = f.read() + if buf.endswith('\n'): return buf[:-1] # trim last CR + else: return buf + except IOError: + # Permission denied or File not found. + pass diff --git a/collectors/python.d.plugin/python_modules/third_party/filelock.py b/collectors/python.d.plugin/python_modules/third_party/filelock.py new file mode 100644 index 00000000..4c981672 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/third_party/filelock.py @@ -0,0 +1,451 @@ +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# For more information, please refer to <http://unlicense.org> + +""" +A platform independent file lock that supports the with-statement. +""" + + +# Modules +# ------------------------------------------------ +import logging +import os +import threading +import time +try: + import warnings +except ImportError: + warnings = None + +try: + import msvcrt +except ImportError: + msvcrt = None + +try: + import fcntl +except ImportError: + fcntl = None + + +# Backward compatibility +# ------------------------------------------------ +try: + TimeoutError +except NameError: + TimeoutError = OSError + + +# Data +# ------------------------------------------------ +__all__ = [ + "Timeout", + "BaseFileLock", + "WindowsFileLock", + "UnixFileLock", + "SoftFileLock", + "FileLock" +] + +__version__ = "3.0.12" + + +_logger = None +def logger(): + """Returns the logger instance used in this module.""" + global _logger + _logger = _logger or logging.getLogger(__name__) + return _logger + + +# Exceptions +# ------------------------------------------------ +class Timeout(TimeoutError): + """ + Raised when the lock could not be acquired in *timeout* + seconds. + """ + + def __init__(self, lock_file): + """ + """ + #: The path of the file lock. + self.lock_file = lock_file + return None + + def __str__(self): + temp = "The file lock '{}' could not be acquired."\ + .format(self.lock_file) + return temp + + +# Classes +# ------------------------------------------------ + +# This is a helper class which is returned by :meth:`BaseFileLock.acquire` +# and wraps the lock to make sure __enter__ is not called twice when entering +# the with statement. +# If we would simply return *self*, the lock would be acquired again +# in the *__enter__* method of the BaseFileLock, but not released again +# automatically. +# +# :seealso: issue #37 (memory leak) +class _Acquire_ReturnProxy(object): + + def __init__(self, lock): + self.lock = lock + return None + + def __enter__(self): + return self.lock + + def __exit__(self, exc_type, exc_value, traceback): + self.lock.release() + return None + + +class BaseFileLock(object): + """ + Implements the base class of a file lock. + """ + + def __init__(self, lock_file, timeout = -1): + """ + """ + # The path to the lock file. + self._lock_file = lock_file + + # The file descriptor for the *_lock_file* as it is returned by the + # os.open() function. + # This file lock is only NOT None, if the object currently holds the + # lock. + self._lock_file_fd = None + + # The default timeout value. + self.timeout = timeout + + # We use this lock primarily for the lock counter. + self._thread_lock = threading.Lock() + + # The lock counter is used for implementing the nested locking + # mechanism. Whenever the lock is acquired, the counter is increased and + # the lock is only released, when this value is 0 again. + self._lock_counter = 0 + return None + + @property + def lock_file(self): + """ + The path to the lock file. + """ + return self._lock_file + + @property + def timeout(self): + """ + You can set a default timeout for the filelock. It will be used as + fallback value in the acquire method, if no timeout value (*None*) is + given. + + If you want to disable the timeout, set it to a negative value. + + A timeout of 0 means, that there is exactly one attempt to acquire the + file lock. + + .. versionadded:: 2.0.0 + """ + return self._timeout + + @timeout.setter + def timeout(self, value): + """ + """ + self._timeout = float(value) + return None + + # Platform dependent locking + # -------------------------------------------- + + def _acquire(self): + """ + Platform dependent. If the file lock could be + acquired, self._lock_file_fd holds the file descriptor + of the lock file. + """ + raise NotImplementedError() + + def _release(self): + """ + Releases the lock and sets self._lock_file_fd to None. + """ + raise NotImplementedError() + + # Platform independent methods + # -------------------------------------------- + + @property + def is_locked(self): + """ + True, if the object holds the file lock. + + .. versionchanged:: 2.0.0 + + This was previously a method and is now a property. + """ + return self._lock_file_fd is not None + + def acquire(self, timeout=None, poll_intervall=0.05): + """ + Acquires the file lock or fails with a :exc:`Timeout` error. + + .. code-block:: python + + # You can use this method in the context manager (recommended) + with lock.acquire(): + pass + + # Or use an equivalent try-finally construct: + lock.acquire() + try: + pass + finally: + lock.release() + + :arg float timeout: + The maximum time waited for the file lock. + If ``timeout < 0``, there is no timeout and this method will + block until the lock could be acquired. + If ``timeout`` is None, the default :attr:`~timeout` is used. + + :arg float poll_intervall: + We check once in *poll_intervall* seconds if we can acquire the + file lock. + + :raises Timeout: + if the lock could not be acquired in *timeout* seconds. + + .. versionchanged:: 2.0.0 + + This method returns now a *proxy* object instead of *self*, + so that it can be used in a with statement without side effects. + """ + # Use the default timeout, if no timeout is provided. + if timeout is None: + timeout = self.timeout + + # Increment the number right at the beginning. + # We can still undo it, if something fails. + with self._thread_lock: + self._lock_counter += 1 + + lock_id = id(self) + lock_filename = self._lock_file + start_time = time.time() + try: + while True: + with self._thread_lock: + if not self.is_locked: + logger().debug('Attempting to acquire lock %s on %s', lock_id, lock_filename) + self._acquire() + + if self.is_locked: + logger().info('Lock %s acquired on %s', lock_id, lock_filename) + break + elif timeout >= 0 and time.time() - start_time > timeout: + logger().debug('Timeout on acquiring lock %s on %s', lock_id, lock_filename) + raise Timeout(self._lock_file) + else: + logger().debug( + 'Lock %s not acquired on %s, waiting %s seconds ...', + lock_id, lock_filename, poll_intervall + ) + time.sleep(poll_intervall) + except: + # Something did go wrong, so decrement the counter. + with self._thread_lock: + self._lock_counter = max(0, self._lock_counter - 1) + + raise + return _Acquire_ReturnProxy(lock = self) + + def release(self, force = False): + """ + Releases the file lock. + + Please note, that the lock is only completly released, if the lock + counter is 0. + + Also note, that the lock file itself is not automatically deleted. + + :arg bool force: + If true, the lock counter is ignored and the lock is released in + every case. + """ + with self._thread_lock: + + if self.is_locked: + self._lock_counter -= 1 + + if self._lock_counter == 0 or force: + lock_id = id(self) + lock_filename = self._lock_file + + logger().debug('Attempting to release lock %s on %s', lock_id, lock_filename) + self._release() + self._lock_counter = 0 + logger().info('Lock %s released on %s', lock_id, lock_filename) + + return None + + def __enter__(self): + self.acquire() + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.release() + return None + + def __del__(self): + self.release(force = True) + return None + + +# Windows locking mechanism +# ~~~~~~~~~~~~~~~~~~~~~~~~~ + +class WindowsFileLock(BaseFileLock): + """ + Uses the :func:`msvcrt.locking` function to hard lock the lock file on + windows systems. + """ + + def _acquire(self): + open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC + + try: + fd = os.open(self._lock_file, open_mode) + except OSError: + pass + else: + try: + msvcrt.locking(fd, msvcrt.LK_NBLCK, 1) + except (IOError, OSError): + os.close(fd) + else: + self._lock_file_fd = fd + return None + + def _release(self): + fd = self._lock_file_fd + self._lock_file_fd = None + msvcrt.locking(fd, msvcrt.LK_UNLCK, 1) + os.close(fd) + + try: + os.remove(self._lock_file) + # Probably another instance of the application + # that acquired the file lock. + except OSError: + pass + return None + +# Unix locking mechanism +# ~~~~~~~~~~~~~~~~~~~~~~ + +class UnixFileLock(BaseFileLock): + """ + Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems. + """ + + def _acquire(self): + open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC + fd = os.open(self._lock_file, open_mode) + + try: + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + except (IOError, OSError): + os.close(fd) + else: + self._lock_file_fd = fd + return None + + def _release(self): + # Do not remove the lockfile: + # + # https://github.com/benediktschmitt/py-filelock/issues/31 + # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition + fd = self._lock_file_fd + self._lock_file_fd = None + fcntl.flock(fd, fcntl.LOCK_UN) + os.close(fd) + return None + +# Soft lock +# ~~~~~~~~~ + +class SoftFileLock(BaseFileLock): + """ + Simply watches the existence of the lock file. + """ + + def _acquire(self): + open_mode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | os.O_TRUNC + try: + fd = os.open(self._lock_file, open_mode) + except (IOError, OSError): + pass + else: + self._lock_file_fd = fd + return None + + def _release(self): + os.close(self._lock_file_fd) + self._lock_file_fd = None + + try: + os.remove(self._lock_file) + # The file is already deleted and that's what we want. + except OSError: + pass + return None + + +# Platform filelock +# ~~~~~~~~~~~~~~~~~ + +#: Alias for the lock, which should be used for the current platform. On +#: Windows, this is an alias for :class:`WindowsFileLock`, on Unix for +#: :class:`UnixFileLock` and otherwise for :class:`SoftFileLock`. +FileLock = None + +if msvcrt: + FileLock = WindowsFileLock +elif fcntl: + FileLock = UnixFileLock +else: + FileLock = SoftFileLock + + if warnings is not None: + warnings.warn("only soft file lock is available") diff --git a/collectors/python.d.plugin/python_modules/third_party/lm_sensors.py b/collectors/python.d.plugin/python_modules/third_party/lm_sensors.py new file mode 100644 index 00000000..f873eac8 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/third_party/lm_sensors.py @@ -0,0 +1,327 @@ +# SPDX-License-Identifier: LGPL-2.1 +""" +@package sensors.py +Python Bindings for libsensors3 + +use the documentation of libsensors for the low level API. +see example.py for high level API usage. + +@author: Pavel Rojtberg (http://www.rojtberg.net) +@see: https://github.com/paroj/sensors.py +@copyright: LGPLv2 (same as libsensors) <http://opensource.org/licenses/LGPL-2.1> +""" + +from ctypes import * +import ctypes.util + +_libc = cdll.LoadLibrary(ctypes.util.find_library("c")) +# see https://github.com/paroj/sensors.py/issues/1 +_libc.free.argtypes = [c_void_p] + +_hdl = cdll.LoadLibrary(ctypes.util.find_library("sensors")) + +version = c_char_p.in_dll(_hdl, "libsensors_version").value.decode("ascii") + + +class SensorsError(Exception): + pass + + +class ErrorWildcards(SensorsError): + pass + + +class ErrorNoEntry(SensorsError): + pass + + +class ErrorAccessRead(SensorsError, OSError): + pass + + +class ErrorKernel(SensorsError, OSError): + pass + + +class ErrorDivZero(SensorsError, ZeroDivisionError): + pass + + +class ErrorChipName(SensorsError): + pass + + +class ErrorBusName(SensorsError): + pass + + +class ErrorParse(SensorsError): + pass + + +class ErrorAccessWrite(SensorsError, OSError): + pass + + +class ErrorIO(SensorsError, IOError): + pass + + +class ErrorRecursion(SensorsError): + pass + + +_ERR_MAP = { + 1: ErrorWildcards, + 2: ErrorNoEntry, + 3: ErrorAccessRead, + 4: ErrorKernel, + 5: ErrorDivZero, + 6: ErrorChipName, + 7: ErrorBusName, + 8: ErrorParse, + 9: ErrorAccessWrite, + 10: ErrorIO, + 11: ErrorRecursion +} + + +def raise_sensor_error(errno, message=''): + raise _ERR_MAP[abs(errno)](message) + + +class bus_id(Structure): + _fields_ = [("type", c_short), + ("nr", c_short)] + + +class chip_name(Structure): + _fields_ = [("prefix", c_char_p), + ("bus", bus_id), + ("addr", c_int), + ("path", c_char_p)] + + +class feature(Structure): + _fields_ = [("name", c_char_p), + ("number", c_int), + ("type", c_int)] + + # sensors_feature_type + IN = 0x00 + FAN = 0x01 + TEMP = 0x02 + POWER = 0x03 + ENERGY = 0x04 + CURR = 0x05 + HUMIDITY = 0x06 + MAX_MAIN = 0x7 + VID = 0x10 + INTRUSION = 0x11 + MAX_OTHER = 0x12 + BEEP_ENABLE = 0x18 + + +class subfeature(Structure): + _fields_ = [("name", c_char_p), + ("number", c_int), + ("type", c_int), + ("mapping", c_int), + ("flags", c_uint)] + + +_hdl.sensors_get_detected_chips.restype = POINTER(chip_name) +_hdl.sensors_get_features.restype = POINTER(feature) +_hdl.sensors_get_all_subfeatures.restype = POINTER(subfeature) +_hdl.sensors_get_label.restype = c_void_p # return pointer instead of str so we can free it +_hdl.sensors_get_adapter_name.restype = c_char_p # docs do not say whether to free this or not +_hdl.sensors_strerror.restype = c_char_p + +### RAW API ### +MODE_R = 1 +MODE_W = 2 +COMPUTE_MAPPING = 4 + + +def init(cfg_file=None): + file = _libc.fopen(cfg_file.encode("utf-8"), "r") if cfg_file is not None else None + + result = _hdl.sensors_init(file) + if result != 0: + raise_sensor_error(result, "sensors_init failed") + + if file is not None: + _libc.fclose(file) + + +def cleanup(): + _hdl.sensors_cleanup() + + +def parse_chip_name(orig_name): + ret = chip_name() + err = _hdl.sensors_parse_chip_name(orig_name.encode("utf-8"), byref(ret)) + + if err < 0: + raise_sensor_error(err, strerror(err)) + + return ret + + +def strerror(errnum): + return _hdl.sensors_strerror(errnum).decode("utf-8") + + +def free_chip_name(chip): + _hdl.sensors_free_chip_name(byref(chip)) + + +def get_detected_chips(match, nr): + """ + @return: (chip, next nr to query) + """ + _nr = c_int(nr) + + if match is not None: + match = byref(match) + + chip = _hdl.sensors_get_detected_chips(match, byref(_nr)) + chip = chip.contents if bool(chip) else None + return chip, _nr.value + + +def chip_snprintf_name(chip, buffer_size=200): + """ + @param buffer_size defaults to the size used in the sensors utility + """ + ret = create_string_buffer(buffer_size) + err = _hdl.sensors_snprintf_chip_name(ret, buffer_size, byref(chip)) + + if err < 0: + raise_sensor_error(err, strerror(err)) + + return ret.value.decode("utf-8") + + +def do_chip_sets(chip): + """ + @attention this function was not tested + """ + err = _hdl.sensors_do_chip_sets(byref(chip)) + if err < 0: + raise_sensor_error(err, strerror(err)) + + +def get_adapter_name(bus): + return _hdl.sensors_get_adapter_name(byref(bus)).decode("utf-8") + + +def get_features(chip, nr): + """ + @return: (feature, next nr to query) + """ + _nr = c_int(nr) + feature = _hdl.sensors_get_features(byref(chip), byref(_nr)) + feature = feature.contents if bool(feature) else None + return feature, _nr.value + + +def get_label(chip, feature): + ptr = _hdl.sensors_get_label(byref(chip), byref(feature)) + val = cast(ptr, c_char_p).value.decode("utf-8") + _libc.free(ptr) + return val + + +def get_all_subfeatures(chip, feature, nr): + """ + @return: (subfeature, next nr to query) + """ + _nr = c_int(nr) + subfeature = _hdl.sensors_get_all_subfeatures(byref(chip), byref(feature), byref(_nr)) + subfeature = subfeature.contents if bool(subfeature) else None + return subfeature, _nr.value + + +def get_value(chip, subfeature_nr): + val = c_double() + err = _hdl.sensors_get_value(byref(chip), subfeature_nr, byref(val)) + if err < 0: + raise_sensor_error(err, strerror(err)) + return val.value + + +def set_value(chip, subfeature_nr, value): + """ + @attention this function was not tested + """ + val = c_double(value) + err = _hdl.sensors_set_value(byref(chip), subfeature_nr, byref(val)) + if err < 0: + raise_sensor_error(err, strerror(err)) + + +### Convenience API ### +class ChipIterator: + def __init__(self, match=None): + self.match = parse_chip_name(match) if match is not None else None + self.nr = 0 + + def __iter__(self): + return self + + def __next__(self): + chip, self.nr = get_detected_chips(self.match, self.nr) + + if chip is None: + raise StopIteration + + return chip + + def __del__(self): + if self.match is not None: + free_chip_name(self.match) + + def next(self): # python2 compability + return self.__next__() + + +class FeatureIterator: + def __init__(self, chip): + self.chip = chip + self.nr = 0 + + def __iter__(self): + return self + + def __next__(self): + feature, self.nr = get_features(self.chip, self.nr) + + if feature is None: + raise StopIteration + + return feature + + def next(self): # python2 compability + return self.__next__() + + +class SubFeatureIterator: + def __init__(self, chip, feature): + self.chip = chip + self.feature = feature + self.nr = 0 + + def __iter__(self): + return self + + def __next__(self): + subfeature, self.nr = get_all_subfeatures(self.chip, self.feature, self.nr) + + if subfeature is None: + raise StopIteration + + return subfeature + + def next(self): # python2 compability + return self.__next__() diff --git a/collectors/python.d.plugin/python_modules/third_party/mcrcon.py b/collectors/python.d.plugin/python_modules/third_party/mcrcon.py new file mode 100644 index 00000000..a65a304b --- /dev/null +++ b/collectors/python.d.plugin/python_modules/third_party/mcrcon.py @@ -0,0 +1,74 @@ +# Minecraft Remote Console module. +# +# Copyright (C) 2015 Barnaby Gale +# +# SPDX-License-Identifier: MIT + +import socket +import select +import struct +import time + + +class MCRconException(Exception): + pass + + +class MCRcon(object): + socket = None + + def connect(self, host, port, password): + if self.socket is not None: + raise MCRconException("Already connected") + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.socket.settimeout(0.9) + self.socket.connect((host, port)) + self.send(3, password) + + def disconnect(self): + if self.socket is None: + raise MCRconException("Already disconnected") + self.socket.close() + self.socket = None + + def read(self, length): + data = b"" + while len(data) < length: + data += self.socket.recv(length - len(data)) + return data + + def send(self, out_type, out_data): + if self.socket is None: + raise MCRconException("Must connect before sending data") + + # Send a request packet + out_payload = struct.pack('<ii', 0, out_type) + out_data.encode('utf8') + b'\x00\x00' + out_length = struct.pack('<i', len(out_payload)) + self.socket.send(out_length + out_payload) + + # Read response packets + in_data = "" + while True: + # Read a packet + in_length, = struct.unpack('<i', self.read(4)) + in_payload = self.read(in_length) + in_id = struct.unpack('<ii', in_payload[:8]) + in_data_partial, in_padding = in_payload[8:-2], in_payload[-2:] + + # Sanity checks + if in_padding != b'\x00\x00': + raise MCRconException("Incorrect padding") + if in_id == -1: + raise MCRconException("Login failed") + + # Record the response + in_data += in_data_partial.decode('utf8') + + # If there's nothing more to receive, return the response + if len(select.select([self.socket], [], [], 0)[0]) == 0: + return in_data + + def command(self, command): + result = self.send(2, command) + time.sleep(0.003) # MC-72390 workaround + return result diff --git a/collectors/python.d.plugin/python_modules/third_party/monotonic.py b/collectors/python.d.plugin/python_modules/third_party/monotonic.py new file mode 100644 index 00000000..4ebd556c --- /dev/null +++ b/collectors/python.d.plugin/python_modules/third_party/monotonic.py @@ -0,0 +1,201 @@ +# -*- coding: utf-8 -*- +# +# SPDX-License-Identifier: Apache-2.0 +""" + monotonic + ~~~~~~~~~ + + This module provides a ``monotonic()`` function which returns the + value (in fractional seconds) of a clock which never goes backwards. + + On Python 3.3 or newer, ``monotonic`` will be an alias of + ``time.monotonic`` from the standard library. On older versions, + it will fall back to an equivalent implementation: + + +-------------+----------------------------------------+ + | Linux, BSD | ``clock_gettime(3)`` | + +-------------+----------------------------------------+ + | Windows | ``GetTickCount`` or ``GetTickCount64`` | + +-------------+----------------------------------------+ + | OS X | ``mach_absolute_time`` | + +-------------+----------------------------------------+ + + If no suitable implementation exists for the current platform, + attempting to import this module (or to import from it) will + cause a ``RuntimeError`` exception to be raised. + + + Copyright 2014, 2015, 2016 Ori Livneh <ori@wikimedia.org> + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +""" +import time + + +__all__ = ('monotonic',) + + +try: + monotonic = time.monotonic +except AttributeError: + import ctypes + import ctypes.util + import os + import sys + import threading + + + def clock_clock_gettime_c_library(): + return ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True).clock_gettime + + + def clock_clock_gettime_rt_library(): + return ctypes.CDLL(ctypes.util.find_library('rt'), use_errno=True).clock_gettime + + + def clock_clock_gettime_c_library_synology6(): + return ctypes.CDLL('/usr/lib/libc.so.6', use_errno=True).clock_gettime + + + def clock_clock_gettime_rt_library_synology6(): + return ctypes.CDLL('/usr/lib/librt.so.1', use_errno=True).clock_gettime + + + def clock_gettime_linux(): + # see https://github.com/netdata/netdata/issues/7976 + order = [ + clock_clock_gettime_c_library, + clock_clock_gettime_rt_library, + clock_clock_gettime_c_library_synology6, + clock_clock_gettime_rt_library_synology6, + ] + + for gettime in order: + try: + return gettime() + except (RuntimeError, AttributeError, OSError): + continue + raise RuntimeError('can not find c and rt libraries') + + + try: + if sys.platform == 'darwin': # OS X, iOS + # See Technical Q&A QA1398 of the Mac Developer Library: + # <https://developer.apple.com/library/mac/qa/qa1398/> + libc = ctypes.CDLL('/usr/lib/libc.dylib', use_errno=True) + + class mach_timebase_info_data_t(ctypes.Structure): + """System timebase info. Defined in <mach/mach_time.h>.""" + _fields_ = (('numer', ctypes.c_uint32), + ('denom', ctypes.c_uint32)) + + mach_absolute_time = libc.mach_absolute_time + mach_absolute_time.restype = ctypes.c_uint64 + + timebase = mach_timebase_info_data_t() + libc.mach_timebase_info(ctypes.byref(timebase)) + ticks_per_second = timebase.numer / timebase.denom * 1.0e9 + + def monotonic(): + """Monotonic clock, cannot go backward.""" + return mach_absolute_time() / ticks_per_second + + elif sys.platform.startswith('win32') or sys.platform.startswith('cygwin'): + if sys.platform.startswith('cygwin'): + # Note: cygwin implements clock_gettime (CLOCK_MONOTONIC = 4) since + # version 1.7.6. Using raw WinAPI for maximum version compatibility. + + # Ugly hack using the wrong calling convention (in 32-bit mode) + # because ctypes has no windll under cygwin (and it also seems that + # the code letting you select stdcall in _ctypes doesn't exist under + # the preprocessor definitions relevant to cygwin). + # This is 'safe' because: + # 1. The ABI of GetTickCount and GetTickCount64 is identical for + # both calling conventions because they both have no parameters. + # 2. libffi masks the problem because after making the call it doesn't + # touch anything through esp and epilogue code restores a correct + # esp from ebp afterwards. + try: + kernel32 = ctypes.cdll.kernel32 + except OSError: # 'No such file or directory' + kernel32 = ctypes.cdll.LoadLibrary('kernel32.dll') + else: + kernel32 = ctypes.windll.kernel32 + + GetTickCount64 = getattr(kernel32, 'GetTickCount64', None) + if GetTickCount64: + # Windows Vista / Windows Server 2008 or newer. + GetTickCount64.restype = ctypes.c_ulonglong + + def monotonic(): + """Monotonic clock, cannot go backward.""" + return GetTickCount64() / 1000.0 + + else: + # Before Windows Vista. + GetTickCount = kernel32.GetTickCount + GetTickCount.restype = ctypes.c_uint32 + + get_tick_count_lock = threading.Lock() + get_tick_count_last_sample = 0 + get_tick_count_wraparounds = 0 + + def monotonic(): + """Monotonic clock, cannot go backward.""" + global get_tick_count_last_sample + global get_tick_count_wraparounds + + with get_tick_count_lock: + current_sample = GetTickCount() + if current_sample < get_tick_count_last_sample: + get_tick_count_wraparounds += 1 + get_tick_count_last_sample = current_sample + + final_milliseconds = get_tick_count_wraparounds << 32 + final_milliseconds += get_tick_count_last_sample + return final_milliseconds / 1000.0 + + else: + clock_gettime = clock_gettime_linux() + + class timespec(ctypes.Structure): + """Time specification, as described in clock_gettime(3).""" + _fields_ = (('tv_sec', ctypes.c_long), + ('tv_nsec', ctypes.c_long)) + + if sys.platform.startswith('linux'): + CLOCK_MONOTONIC = 1 + elif sys.platform.startswith('freebsd'): + CLOCK_MONOTONIC = 4 + elif sys.platform.startswith('sunos5'): + CLOCK_MONOTONIC = 4 + elif 'bsd' in sys.platform: + CLOCK_MONOTONIC = 3 + elif sys.platform.startswith('aix'): + CLOCK_MONOTONIC = ctypes.c_longlong(10) + + def monotonic(): + """Monotonic clock, cannot go backward.""" + ts = timespec() + if clock_gettime(CLOCK_MONOTONIC, ctypes.pointer(ts)): + errno = ctypes.get_errno() + raise OSError(errno, os.strerror(errno)) + return ts.tv_sec + ts.tv_nsec / 1.0e9 + + # Perform a sanity-check. + if monotonic() - monotonic() > 0: + raise ValueError('monotonic() is not monotonic!') + + except Exception as e: + raise RuntimeError('no suitable implementation for this system: ' + repr(e)) diff --git a/collectors/python.d.plugin/python_modules/third_party/ordereddict.py b/collectors/python.d.plugin/python_modules/third_party/ordereddict.py new file mode 100644 index 00000000..589401b8 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/third_party/ordereddict.py @@ -0,0 +1,110 @@ +# Copyright (c) 2009 Raymond Hettinger +# +# SPDX-License-Identifier: MIT + +from UserDict import DictMixin + + +class OrderedDict(dict, DictMixin): + + def __init__(self, *args, **kwds): + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__end + except AttributeError: + self.clear() + self.update(*args, **kwds) + + def clear(self): + self.__end = end = [] + end += [None, end, end] # sentinel node for doubly linked list + self.__map = {} # key --> [key, prev, next] + dict.clear(self) + + def __setitem__(self, key, value): + if key not in self: + end = self.__end + curr = end[1] + curr[2] = end[1] = self.__map[key] = [key, curr, end] + dict.__setitem__(self, key, value) + + def __delitem__(self, key): + dict.__delitem__(self, key) + key, prev, next = self.__map.pop(key) + prev[2] = next + next[1] = prev + + def __iter__(self): + end = self.__end + curr = end[2] + while curr is not end: + yield curr[0] + curr = curr[2] + + def __reversed__(self): + end = self.__end + curr = end[1] + while curr is not end: + yield curr[0] + curr = curr[1] + + def popitem(self, last=True): + if not self: + raise KeyError('dictionary is empty') + if last: + key = reversed(self).next() + else: + key = iter(self).next() + value = self.pop(key) + return key, value + + def __reduce__(self): + items = [[k, self[k]] for k in self] + tmp = self.__map, self.__end + del self.__map, self.__end + inst_dict = vars(self).copy() + self.__map, self.__end = tmp + if inst_dict: + return self.__class__, (items,), inst_dict + return self.__class__, (items,) + + def keys(self): + return list(self) + + setdefault = DictMixin.setdefault + update = DictMixin.update + pop = DictMixin.pop + values = DictMixin.values + items = DictMixin.items + iterkeys = DictMixin.iterkeys + itervalues = DictMixin.itervalues + iteritems = DictMixin.iteritems + + def __repr__(self): + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + + def copy(self): + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + if isinstance(other, OrderedDict): + if len(self) != len(other): + return False + for p, q in zip(self.items(), other.items()): + if p != q: + return False + return True + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other diff --git a/collectors/python.d.plugin/python_modules/urllib3/__init__.py b/collectors/python.d.plugin/python_modules/urllib3/__init__.py new file mode 100644 index 00000000..3add8481 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/__init__.py @@ -0,0 +1,98 @@ +# SPDX-License-Identifier: MIT +""" +urllib3 - Thread-safe connection pooling and re-using. +""" + +from __future__ import absolute_import +import warnings + +from .connectionpool import ( + HTTPConnectionPool, + HTTPSConnectionPool, + connection_from_url +) + +from . import exceptions +from .filepost import encode_multipart_formdata +from .poolmanager import PoolManager, ProxyManager, proxy_from_url +from .response import HTTPResponse +from .util.request import make_headers +from .util.url import get_host +from .util.timeout import Timeout +from .util.retry import Retry + + +# Set default logging handler to avoid "No handler found" warnings. +import logging +try: # Python 2.7+ + from logging import NullHandler +except ImportError: + class NullHandler(logging.Handler): + def emit(self, record): + pass + +__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' +__license__ = 'MIT' +__version__ = '1.21.1' + +__all__ = ( + 'HTTPConnectionPool', + 'HTTPSConnectionPool', + 'PoolManager', + 'ProxyManager', + 'HTTPResponse', + 'Retry', + 'Timeout', + 'add_stderr_logger', + 'connection_from_url', + 'disable_warnings', + 'encode_multipart_formdata', + 'get_host', + 'make_headers', + 'proxy_from_url', +) + +logging.getLogger(__name__).addHandler(NullHandler()) + + +def add_stderr_logger(level=logging.DEBUG): + """ + Helper for quickly adding a StreamHandler to the logger. Useful for + debugging. + + Returns the handler after adding it. + """ + # This method needs to be in this __init__.py to get the __name__ correct + # even if urllib3 is vendored within another package. + logger = logging.getLogger(__name__) + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) + logger.addHandler(handler) + logger.setLevel(level) + logger.debug('Added a stderr logging handler to logger: %s', __name__) + return handler + + +# ... Clean up. +del NullHandler + + +# All warning filters *must* be appended unless you're really certain that they +# shouldn't be: otherwise, it's very hard for users to use most Python +# mechanisms to silence them. +# SecurityWarning's always go off by default. +warnings.simplefilter('always', exceptions.SecurityWarning, append=True) +# SubjectAltNameWarning's should go off once per host +warnings.simplefilter('default', exceptions.SubjectAltNameWarning, append=True) +# InsecurePlatformWarning's don't vary between requests, so we keep it default. +warnings.simplefilter('default', exceptions.InsecurePlatformWarning, + append=True) +# SNIMissingWarnings should go off only once. +warnings.simplefilter('default', exceptions.SNIMissingWarning, append=True) + + +def disable_warnings(category=exceptions.HTTPWarning): + """ + Helper for quickly disabling all urllib3 warnings. + """ + warnings.simplefilter('ignore', category) diff --git a/collectors/python.d.plugin/python_modules/urllib3/_collections.py b/collectors/python.d.plugin/python_modules/urllib3/_collections.py new file mode 100644 index 00000000..2a6b3ec7 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/_collections.py @@ -0,0 +1,320 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import + +try: + from collections import Mapping, MutableMapping +except ImportError: + from collections.abc import Mapping, MutableMapping + +try: + from threading import RLock +except ImportError: # Platform-specific: No threads available + class RLock: + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_value, traceback): + pass + + +try: # Python 2.7+ + from collections import OrderedDict +except ImportError: + from .packages.ordered_dict import OrderedDict +from .packages.six import iterkeys, itervalues, PY3 + + +__all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] + + +_Null = object() + + +class RecentlyUsedContainer(MutableMapping): + """ + Provides a thread-safe dict-like container which maintains up to + ``maxsize`` keys while throwing away the least-recently-used keys beyond + ``maxsize``. + + :param maxsize: + Maximum number of recent elements to retain. + + :param dispose_func: + Every time an item is evicted from the container, + ``dispose_func(value)`` is called. Callback which will get called + """ + + ContainerCls = OrderedDict + + def __init__(self, maxsize=10, dispose_func=None): + self._maxsize = maxsize + self.dispose_func = dispose_func + + self._container = self.ContainerCls() + self.lock = RLock() + + def __getitem__(self, key): + # Re-insert the item, moving it to the end of the eviction line. + with self.lock: + item = self._container.pop(key) + self._container[key] = item + return item + + def __setitem__(self, key, value): + evicted_value = _Null + with self.lock: + # Possibly evict the existing value of 'key' + evicted_value = self._container.get(key, _Null) + self._container[key] = value + + # If we didn't evict an existing value, we might have to evict the + # least recently used item from the beginning of the container. + if len(self._container) > self._maxsize: + _key, evicted_value = self._container.popitem(last=False) + + if self.dispose_func and evicted_value is not _Null: + self.dispose_func(evicted_value) + + def __delitem__(self, key): + with self.lock: + value = self._container.pop(key) + + if self.dispose_func: + self.dispose_func(value) + + def __len__(self): + with self.lock: + return len(self._container) + + def __iter__(self): + raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.') + + def clear(self): + with self.lock: + # Copy pointers to all values, then wipe the mapping + values = list(itervalues(self._container)) + self._container.clear() + + if self.dispose_func: + for value in values: + self.dispose_func(value) + + def keys(self): + with self.lock: + return list(iterkeys(self._container)) + + +class HTTPHeaderDict(MutableMapping): + """ + :param headers: + An iterable of field-value pairs. Must not contain multiple field names + when compared case-insensitively. + + :param kwargs: + Additional field-value pairs to pass in to ``dict.update``. + + A ``dict`` like container for storing HTTP Headers. + + Field names are stored and compared case-insensitively in compliance with + RFC 7230. Iteration provides the first case-sensitive key seen for each + case-insensitive pair. + + Using ``__setitem__`` syntax overwrites fields that compare equal + case-insensitively in order to maintain ``dict``'s api. For fields that + compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add`` + in a loop. + + If multiple fields that are equal case-insensitively are passed to the + constructor or ``.update``, the behavior is undefined and some will be + lost. + + >>> headers = HTTPHeaderDict() + >>> headers.add('Set-Cookie', 'foo=bar') + >>> headers.add('set-cookie', 'baz=quxx') + >>> headers['content-length'] = '7' + >>> headers['SET-cookie'] + 'foo=bar, baz=quxx' + >>> headers['Content-Length'] + '7' + """ + + def __init__(self, headers=None, **kwargs): + super(HTTPHeaderDict, self).__init__() + self._container = OrderedDict() + if headers is not None: + if isinstance(headers, HTTPHeaderDict): + self._copy_from(headers) + else: + self.extend(headers) + if kwargs: + self.extend(kwargs) + + def __setitem__(self, key, val): + self._container[key.lower()] = [key, val] + return self._container[key.lower()] + + def __getitem__(self, key): + val = self._container[key.lower()] + return ', '.join(val[1:]) + + def __delitem__(self, key): + del self._container[key.lower()] + + def __contains__(self, key): + return key.lower() in self._container + + def __eq__(self, other): + if not isinstance(other, Mapping) and not hasattr(other, 'keys'): + return False + if not isinstance(other, type(self)): + other = type(self)(other) + return (dict((k.lower(), v) for k, v in self.itermerged()) == + dict((k.lower(), v) for k, v in other.itermerged())) + + def __ne__(self, other): + return not self.__eq__(other) + + if not PY3: # Python 2 + iterkeys = MutableMapping.iterkeys + itervalues = MutableMapping.itervalues + + __marker = object() + + def __len__(self): + return len(self._container) + + def __iter__(self): + # Only provide the originally cased names + for vals in self._container.values(): + yield vals[0] + + def pop(self, key, default=__marker): + '''D.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + ''' + # Using the MutableMapping function directly fails due to the private marker. + # Using ordinary dict.pop would expose the internal structures. + # So let's reinvent the wheel. + try: + value = self[key] + except KeyError: + if default is self.__marker: + raise + return default + else: + del self[key] + return value + + def discard(self, key): + try: + del self[key] + except KeyError: + pass + + def add(self, key, val): + """Adds a (name, value) pair, doesn't overwrite the value if it already + exists. + + >>> headers = HTTPHeaderDict(foo='bar') + >>> headers.add('Foo', 'baz') + >>> headers['foo'] + 'bar, baz' + """ + key_lower = key.lower() + new_vals = [key, val] + # Keep the common case aka no item present as fast as possible + vals = self._container.setdefault(key_lower, new_vals) + if new_vals is not vals: + vals.append(val) + + def extend(self, *args, **kwargs): + """Generic import function for any type of header-like object. + Adapted version of MutableMapping.update in order to insert items + with self.add instead of self.__setitem__ + """ + if len(args) > 1: + raise TypeError("extend() takes at most 1 positional " + "arguments ({0} given)".format(len(args))) + other = args[0] if len(args) >= 1 else () + + if isinstance(other, HTTPHeaderDict): + for key, val in other.iteritems(): + self.add(key, val) + elif isinstance(other, Mapping): + for key in other: + self.add(key, other[key]) + elif hasattr(other, "keys"): + for key in other.keys(): + self.add(key, other[key]) + else: + for key, value in other: + self.add(key, value) + + for key, value in kwargs.items(): + self.add(key, value) + + def getlist(self, key): + """Returns a list of all the values for the named field. Returns an + empty list if the key doesn't exist.""" + try: + vals = self._container[key.lower()] + except KeyError: + return [] + else: + return vals[1:] + + # Backwards compatibility for httplib + getheaders = getlist + getallmatchingheaders = getlist + iget = getlist + + def __repr__(self): + return "%s(%s)" % (type(self).__name__, dict(self.itermerged())) + + def _copy_from(self, other): + for key in other: + val = other.getlist(key) + if isinstance(val, list): + # Don't need to convert tuples + val = list(val) + self._container[key.lower()] = [key] + val + + def copy(self): + clone = type(self)() + clone._copy_from(self) + return clone + + def iteritems(self): + """Iterate over all header lines, including duplicate ones.""" + for key in self: + vals = self._container[key.lower()] + for val in vals[1:]: + yield vals[0], val + + def itermerged(self): + """Iterate over all headers, merging duplicate ones together.""" + for key in self: + val = self._container[key.lower()] + yield val[0], ', '.join(val[1:]) + + def items(self): + return list(self.iteritems()) + + @classmethod + def from_httplib(cls, message): # Python 2 + """Read headers from a Python 2 httplib message object.""" + # python2.7 does not expose a proper API for exporting multiheaders + # efficiently. This function re-reads raw lines from the message + # object and extracts the multiheaders properly. + headers = [] + + for line in message.headers: + if line.startswith((' ', '\t')): + key, value = headers[-1] + headers[-1] = (key, value + '\r\n' + line.rstrip()) + continue + + key, value = line.split(':', 1) + headers.append((key, value.strip())) + + return cls(headers) diff --git a/collectors/python.d.plugin/python_modules/urllib3/connection.py b/collectors/python.d.plugin/python_modules/urllib3/connection.py new file mode 100644 index 00000000..f757493c --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/connection.py @@ -0,0 +1,374 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +import datetime +import logging +import os +import sys +import socket +from socket import error as SocketError, timeout as SocketTimeout +import warnings +from .packages import six +from .packages.six.moves.http_client import HTTPConnection as _HTTPConnection +from .packages.six.moves.http_client import HTTPException # noqa: F401 + +try: # Compiled with SSL? + import ssl + BaseSSLError = ssl.SSLError +except (ImportError, AttributeError): # Platform-specific: No SSL. + ssl = None + + class BaseSSLError(BaseException): + pass + + +try: # Python 3: + # Not a no-op, we're adding this to the namespace so it can be imported. + ConnectionError = ConnectionError +except NameError: # Python 2: + class ConnectionError(Exception): + pass + + +from .exceptions import ( + NewConnectionError, + ConnectTimeoutError, + SubjectAltNameWarning, + SystemTimeWarning, +) +from .packages.ssl_match_hostname import match_hostname, CertificateError + +from .util.ssl_ import ( + resolve_cert_reqs, + resolve_ssl_version, + assert_fingerprint, + create_urllib3_context, + ssl_wrap_socket +) + + +from .util import connection + +from ._collections import HTTPHeaderDict + +log = logging.getLogger(__name__) + +port_by_scheme = { + 'http': 80, + 'https': 443, +} + +# When updating RECENT_DATE, move it to +# within two years of the current date, and no +# earlier than 6 months ago. +RECENT_DATE = datetime.date(2016, 1, 1) + + +class DummyConnection(object): + """Used to detect a failed ConnectionCls import.""" + pass + + +class HTTPConnection(_HTTPConnection, object): + """ + Based on httplib.HTTPConnection but provides an extra constructor + backwards-compatibility layer between older and newer Pythons. + + Additional keyword parameters are used to configure attributes of the connection. + Accepted parameters include: + + - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` + - ``source_address``: Set the source address for the current connection. + + .. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x + + - ``socket_options``: Set specific options on the underlying socket. If not specified, then + defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling + Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. + + For example, if you wish to enable TCP Keep Alive in addition to the defaults, + you might pass:: + + HTTPConnection.default_socket_options + [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + ] + + Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). + """ + + default_port = port_by_scheme['http'] + + #: Disable Nagle's algorithm by default. + #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` + default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] + + #: Whether this connection verifies the host's certificate. + is_verified = False + + def __init__(self, *args, **kw): + if six.PY3: # Python 3 + kw.pop('strict', None) + + # Pre-set source_address in case we have an older Python like 2.6. + self.source_address = kw.get('source_address') + + if sys.version_info < (2, 7): # Python 2.6 + # _HTTPConnection on Python 2.6 will balk at this keyword arg, but + # not newer versions. We can still use it when creating a + # connection though, so we pop it *after* we have saved it as + # self.source_address. + kw.pop('source_address', None) + + #: The socket options provided by the user. If no options are + #: provided, we use the default options. + self.socket_options = kw.pop('socket_options', self.default_socket_options) + + # Superclass also sets self.source_address in Python 2.7+. + _HTTPConnection.__init__(self, *args, **kw) + + def _new_conn(self): + """ Establish a socket connection and set nodelay settings on it. + + :return: New socket connection. + """ + extra_kw = {} + if self.source_address: + extra_kw['source_address'] = self.source_address + + if self.socket_options: + extra_kw['socket_options'] = self.socket_options + + try: + conn = connection.create_connection( + (self.host, self.port), self.timeout, **extra_kw) + + except SocketTimeout as e: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) + + except SocketError as e: + raise NewConnectionError( + self, "Failed to establish a new connection: %s" % e) + + return conn + + def _prepare_conn(self, conn): + self.sock = conn + # the _tunnel_host attribute was added in python 2.6.3 (via + # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do + # not have them. + if getattr(self, '_tunnel_host', None): + # TODO: Fix tunnel so it doesn't depend on self.sock state. + self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + + def request_chunked(self, method, url, body=None, headers=None): + """ + Alternative to the common request method, which sends the + body with chunked encoding and not as one block + """ + headers = HTTPHeaderDict(headers if headers is not None else {}) + skip_accept_encoding = 'accept-encoding' in headers + skip_host = 'host' in headers + self.putrequest( + method, + url, + skip_accept_encoding=skip_accept_encoding, + skip_host=skip_host + ) + for header, value in headers.items(): + self.putheader(header, value) + if 'transfer-encoding' not in headers: + self.putheader('Transfer-Encoding', 'chunked') + self.endheaders() + + if body is not None: + stringish_types = six.string_types + (six.binary_type,) + if isinstance(body, stringish_types): + body = (body,) + for chunk in body: + if not chunk: + continue + if not isinstance(chunk, six.binary_type): + chunk = chunk.encode('utf8') + len_str = hex(len(chunk))[2:] + self.send(len_str.encode('utf-8')) + self.send(b'\r\n') + self.send(chunk) + self.send(b'\r\n') + + # After the if clause, to always have a closed body + self.send(b'0\r\n\r\n') + + +class HTTPSConnection(HTTPConnection): + default_port = port_by_scheme['https'] + + ssl_version = None + + def __init__(self, host, port=None, key_file=None, cert_file=None, + strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + ssl_context=None, **kw): + + HTTPConnection.__init__(self, host, port, strict=strict, + timeout=timeout, **kw) + + self.key_file = key_file + self.cert_file = cert_file + self.ssl_context = ssl_context + + # Required property for Google AppEngine 1.9.0 which otherwise causes + # HTTPS requests to go out as HTTP. (See Issue #356) + self._protocol = 'https' + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + + if self.ssl_context is None: + self.ssl_context = create_urllib3_context( + ssl_version=resolve_ssl_version(None), + cert_reqs=resolve_cert_reqs(None), + ) + + self.sock = ssl_wrap_socket( + sock=conn, + keyfile=self.key_file, + certfile=self.cert_file, + ssl_context=self.ssl_context, + ) + + +class VerifiedHTTPSConnection(HTTPSConnection): + """ + Based on httplib.HTTPSConnection but wraps the socket with + SSL certification. + """ + cert_reqs = None + ca_certs = None + ca_cert_dir = None + ssl_version = None + assert_fingerprint = None + + def set_cert(self, key_file=None, cert_file=None, + cert_reqs=None, ca_certs=None, + assert_hostname=None, assert_fingerprint=None, + ca_cert_dir=None): + """ + This method should only be called once, before the connection is used. + """ + # If cert_reqs is not provided, we can try to guess. If the user gave + # us a cert database, we assume they want to use it: otherwise, if + # they gave us an SSL Context object we should use whatever is set for + # it. + if cert_reqs is None: + if ca_certs or ca_cert_dir: + cert_reqs = 'CERT_REQUIRED' + elif self.ssl_context is not None: + cert_reqs = self.ssl_context.verify_mode + + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = cert_reqs + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + self.ca_certs = ca_certs and os.path.expanduser(ca_certs) + self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir) + + def connect(self): + # Add certificate verification + conn = self._new_conn() + + hostname = self.host + if getattr(self, '_tunnel_host', None): + # _tunnel_host was added in Python 2.6.3 + # (See: http://hg.python.org/cpython/rev/0f57b30a152f) + + self.sock = conn + # Calls self._set_hostport(), so self.host is + # self._tunnel_host below. + self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 + + # Override the host with the one we're requesting data from. + hostname = self._tunnel_host + + is_time_off = datetime.date.today() < RECENT_DATE + if is_time_off: + warnings.warn(( + 'System time is way off (before {0}). This will probably ' + 'lead to SSL verification errors').format(RECENT_DATE), + SystemTimeWarning + ) + + # Wrap socket using verification with the root certs in + # trusted_root_certs + if self.ssl_context is None: + self.ssl_context = create_urllib3_context( + ssl_version=resolve_ssl_version(self.ssl_version), + cert_reqs=resolve_cert_reqs(self.cert_reqs), + ) + + context = self.ssl_context + context.verify_mode = resolve_cert_reqs(self.cert_reqs) + self.sock = ssl_wrap_socket( + sock=conn, + keyfile=self.key_file, + certfile=self.cert_file, + ca_certs=self.ca_certs, + ca_cert_dir=self.ca_cert_dir, + server_hostname=hostname, + ssl_context=context) + + if self.assert_fingerprint: + assert_fingerprint(self.sock.getpeercert(binary_form=True), + self.assert_fingerprint) + elif context.verify_mode != ssl.CERT_NONE \ + and not getattr(context, 'check_hostname', False) \ + and self.assert_hostname is not False: + # While urllib3 attempts to always turn off hostname matching from + # the TLS library, this cannot always be done. So we check whether + # the TLS Library still thinks it's matching hostnames. + cert = self.sock.getpeercert() + if not cert.get('subjectAltName', ()): + warnings.warn(( + 'Certificate for {0} has no `subjectAltName`, falling back to check for a ' + '`commonName` for now. This feature is being removed by major browsers and ' + 'deprecated by RFC 2818. (See https://github.com/shazow/urllib3/issues/497 ' + 'for details.)'.format(hostname)), + SubjectAltNameWarning + ) + _match_hostname(cert, self.assert_hostname or hostname) + + self.is_verified = ( + context.verify_mode == ssl.CERT_REQUIRED or + self.assert_fingerprint is not None + ) + + +def _match_hostname(cert, asserted_hostname): + try: + match_hostname(cert, asserted_hostname) + except CertificateError as e: + log.error( + 'Certificate did not match expected hostname: %s. ' + 'Certificate: %s', asserted_hostname, cert + ) + # Add cert to exception and reraise so client code can inspect + # the cert when catching the exception, if they want to + e._peer_cert = cert + raise + + +if ssl: + # Make a copy for testing. + UnverifiedHTTPSConnection = HTTPSConnection + HTTPSConnection = VerifiedHTTPSConnection +else: + HTTPSConnection = DummyConnection diff --git a/collectors/python.d.plugin/python_modules/urllib3/connectionpool.py b/collectors/python.d.plugin/python_modules/urllib3/connectionpool.py new file mode 100644 index 00000000..90e4c86a --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/connectionpool.py @@ -0,0 +1,900 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +import errno +import logging +import sys +import warnings + +from socket import error as SocketError, timeout as SocketTimeout +import socket + + +from .exceptions import ( + ClosedPoolError, + ProtocolError, + EmptyPoolError, + HeaderParsingError, + HostChangedError, + LocationValueError, + MaxRetryError, + ProxyError, + ReadTimeoutError, + SSLError, + TimeoutError, + InsecureRequestWarning, + NewConnectionError, +) +from .packages.ssl_match_hostname import CertificateError +from .packages import six +from .packages.six.moves import queue +from .connection import ( + port_by_scheme, + DummyConnection, + HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection, + HTTPException, BaseSSLError, +) +from .request import RequestMethods +from .response import HTTPResponse + +from .util.connection import is_connection_dropped +from .util.request import set_file_position +from .util.response import assert_header_parsing +from .util.retry import Retry +from .util.timeout import Timeout +from .util.url import get_host, Url + + +if six.PY2: + # Queue is imported for side effects on MS Windows + import Queue as _unused_module_Queue # noqa: F401 + +xrange = six.moves.xrange + +log = logging.getLogger(__name__) + +_Default = object() + + +# Pool objects +class ConnectionPool(object): + """ + Base class for all connection pools, such as + :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`. + """ + + scheme = None + QueueCls = queue.LifoQueue + + def __init__(self, host, port=None): + if not host: + raise LocationValueError("No host specified.") + + self.host = _ipv6_host(host).lower() + self.port = port + + def __str__(self): + return '%s(host=%r, port=%r)' % (type(self).__name__, + self.host, self.port) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + # Return False to re-raise any potential exceptions + return False + + def close(self): + """ + Close all pooled connections and disable the pool. + """ + pass + + +# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 +_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) + + +class HTTPConnectionPool(ConnectionPool, RequestMethods): + """ + Thread-safe connection pool for one host. + + :param host: + Host used for this HTTP Connection (e.g. "localhost"), passed into + :class:`httplib.HTTPConnection`. + + :param port: + Port used for this HTTP Connection (None is equivalent to 80), passed + into :class:`httplib.HTTPConnection`. + + :param strict: + Causes BadStatusLine to be raised if the status line can't be parsed + as a valid HTTP/1.0 or 1.1 status line, passed into + :class:`httplib.HTTPConnection`. + + .. note:: + Only works in Python 2. This parameter is ignored in Python 3. + + :param timeout: + Socket timeout in seconds for each individual connection. This can + be a float or integer, which sets the timeout for the HTTP request, + or an instance of :class:`urllib3.util.Timeout` which gives you more + fine-grained control over request timeouts. After the constructor has + been parsed, this is always a `urllib3.util.Timeout` object. + + :param maxsize: + Number of connections to save that can be reused. More than 1 is useful + in multithreaded situations. If ``block`` is set to False, more + connections will be created but they will not be saved once they've + been used. + + :param block: + If set to True, no more than ``maxsize`` connections will be used at + a time. When no free connections are available, the call will block + until a connection has been released. This is a useful side effect for + particular multithreaded situations where one does not want to use more + than maxsize connections per host to prevent flooding. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + + :param retries: + Retry configuration to use by default with requests in this pool. + + :param _proxy: + Parsed proxy URL, should not be used directly, instead, see + :class:`urllib3.connectionpool.ProxyManager`" + + :param _proxy_headers: + A dictionary with proxy headers, should not be used directly, + instead, see :class:`urllib3.connectionpool.ProxyManager`" + + :param \\**conn_kw: + Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`, + :class:`urllib3.connection.HTTPSConnection` instances. + """ + + scheme = 'http' + ConnectionCls = HTTPConnection + ResponseCls = HTTPResponse + + def __init__(self, host, port=None, strict=False, + timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, + headers=None, retries=None, + _proxy=None, _proxy_headers=None, + **conn_kw): + ConnectionPool.__init__(self, host, port) + RequestMethods.__init__(self, headers) + + self.strict = strict + + if not isinstance(timeout, Timeout): + timeout = Timeout.from_float(timeout) + + if retries is None: + retries = Retry.DEFAULT + + self.timeout = timeout + self.retries = retries + + self.pool = self.QueueCls(maxsize) + self.block = block + + self.proxy = _proxy + self.proxy_headers = _proxy_headers or {} + + # Fill the queue up so that doing get() on it will block properly + for _ in xrange(maxsize): + self.pool.put(None) + + # These are mostly for testing and debugging purposes. + self.num_connections = 0 + self.num_requests = 0 + self.conn_kw = conn_kw + + if self.proxy: + # Enable Nagle's algorithm for proxies, to avoid packet fragmentation. + # We cannot know if the user has added default socket options, so we cannot replace the + # list. + self.conn_kw.setdefault('socket_options', []) + + def _new_conn(self): + """ + Return a fresh :class:`HTTPConnection`. + """ + self.num_connections += 1 + log.debug("Starting new HTTP connection (%d): %s", + self.num_connections, self.host) + + conn = self.ConnectionCls(host=self.host, port=self.port, + timeout=self.timeout.connect_timeout, + strict=self.strict, **self.conn_kw) + return conn + + def _get_conn(self, timeout=None): + """ + Get a connection. Will return a pooled connection if one is available. + + If no connections are available and :prop:`.block` is ``False``, then a + fresh connection is returned. + + :param timeout: + Seconds to wait before giving up and raising + :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and + :prop:`.block` is ``True``. + """ + conn = None + try: + conn = self.pool.get(block=self.block, timeout=timeout) + + except AttributeError: # self.pool is None + raise ClosedPoolError(self, "Pool is closed.") + + except queue.Empty: + if self.block: + raise EmptyPoolError(self, + "Pool reached maximum size and no more " + "connections are allowed.") + pass # Oh well, we'll create a new connection then + + # If this is a persistent connection, check if it got disconnected + if conn and is_connection_dropped(conn): + log.debug("Resetting dropped connection: %s", self.host) + conn.close() + if getattr(conn, 'auto_open', 1) == 0: + # This is a proxied connection that has been mutated by + # httplib._tunnel() and cannot be reused (since it would + # attempt to bypass the proxy) + conn = None + + return conn or self._new_conn() + + def _put_conn(self, conn): + """ + Put a connection back into the pool. + + :param conn: + Connection object for the current host and port as returned by + :meth:`._new_conn` or :meth:`._get_conn`. + + If the pool is already full, the connection is closed and discarded + because we exceeded maxsize. If connections are discarded frequently, + then maxsize should be increased. + + If the pool is closed, then the connection will be closed and discarded. + """ + try: + self.pool.put(conn, block=False) + return # Everything is dandy, done. + except AttributeError: + # self.pool is None. + pass + except queue.Full: + # This should never happen if self.block == True + log.warning( + "Connection pool is full, discarding connection: %s", + self.host) + + # Connection never got put back into the pool, close it. + if conn: + conn.close() + + def _validate_conn(self, conn): + """ + Called right before a request is made, after the socket is created. + """ + pass + + def _prepare_proxy(self, conn): + # Nothing to do for HTTP connections. + pass + + def _get_timeout(self, timeout): + """ Helper that always returns a :class:`urllib3.util.Timeout` """ + if timeout is _Default: + return self.timeout.clone() + + if isinstance(timeout, Timeout): + return timeout.clone() + else: + # User passed us an int/float. This is for backwards compatibility, + # can be removed later + return Timeout.from_float(timeout) + + def _raise_timeout(self, err, url, timeout_value): + """Is the error actually a timeout? Will raise a ReadTimeout or pass""" + + if isinstance(err, SocketTimeout): + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + + # See the above comment about EAGAIN in Python 3. In Python 2 we have + # to specifically catch it and throw the timeout error + if hasattr(err, 'errno') and err.errno in _blocking_errnos: + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + + # Catch possible read timeouts thrown as SSL errors. If not the + # case, rethrow the original. We need to do this because of: + # http://bugs.python.org/issue10272 + if 'timed out' in str(err) or 'did not complete (read)' in str(err): # Python 2.6 + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + + def _make_request(self, conn, method, url, timeout=_Default, chunked=False, + **httplib_request_kw): + """ + Perform a request on a given urllib connection object taken from our + pool. + + :param conn: + a connection from one of our connection pools + + :param timeout: + Socket timeout in seconds for the request. This can be a + float or integer, which will set the same timeout value for + the socket connect and the socket read, or an instance of + :class:`urllib3.util.Timeout`, which gives you more fine-grained + control over your timeouts. + """ + self.num_requests += 1 + + timeout_obj = self._get_timeout(timeout) + timeout_obj.start_connect() + conn.timeout = timeout_obj.connect_timeout + + # Trigger any extra validation we need to do. + try: + self._validate_conn(conn) + except (SocketTimeout, BaseSSLError) as e: + # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout. + self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) + raise + + # conn.request() calls httplib.*.request, not the method in + # urllib3.request. It also calls makefile (recv) on the socket. + if chunked: + conn.request_chunked(method, url, **httplib_request_kw) + else: + conn.request(method, url, **httplib_request_kw) + + # Reset the timeout for the recv() on the socket + read_timeout = timeout_obj.read_timeout + + # App Engine doesn't have a sock attr + if getattr(conn, 'sock', None): + # In Python 3 socket.py will catch EAGAIN and return None when you + # try and read into the file pointer created by http.client, which + # instead raises a BadStatusLine exception. Instead of catching + # the exception and assuming all BadStatusLine exceptions are read + # timeouts, check for a zero timeout before making the request. + if read_timeout == 0: + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % read_timeout) + if read_timeout is Timeout.DEFAULT_TIMEOUT: + conn.sock.settimeout(socket.getdefaulttimeout()) + else: # None or a value + conn.sock.settimeout(read_timeout) + + # Receive the response from the server + try: + try: # Python 2.7, use buffering of HTTP responses + httplib_response = conn.getresponse(buffering=True) + except TypeError: # Python 2.6 and older, Python 3 + try: + httplib_response = conn.getresponse() + except Exception as e: + # Remove the TypeError from the exception chain in Python 3; + # otherwise it looks like a programming error was the cause. + six.raise_from(e, None) + except (SocketTimeout, BaseSSLError, SocketError) as e: + self._raise_timeout(err=e, url=url, timeout_value=read_timeout) + raise + + # AppEngine doesn't have a version attr. + http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') + log.debug("%s://%s:%s \"%s %s %s\" %s %s", self.scheme, self.host, self.port, + method, url, http_version, httplib_response.status, + httplib_response.length) + + try: + assert_header_parsing(httplib_response.msg) + except HeaderParsingError as hpe: # Platform-specific: Python 3 + log.warning( + 'Failed to parse headers (url=%s): %s', + self._absolute_url(url), hpe, exc_info=True) + + return httplib_response + + def _absolute_url(self, path): + return Url(scheme=self.scheme, host=self.host, port=self.port, path=path).url + + def close(self): + """ + Close all pooled connections and disable the pool. + """ + # Disable access to the pool + old_pool, self.pool = self.pool, None + + try: + while True: + conn = old_pool.get(block=False) + if conn: + conn.close() + + except queue.Empty: + pass # Done. + + def is_same_host(self, url): + """ + Check if the given ``url`` is a member of the same host as this + connection pool. + """ + if url.startswith('/'): + return True + + # TODO: Add optional support for socket.gethostbyname checking. + scheme, host, port = get_host(url) + + host = _ipv6_host(host).lower() + + # Use explicit default port for comparison when none is given + if self.port and not port: + port = port_by_scheme.get(scheme) + elif not self.port and port == port_by_scheme.get(scheme): + port = None + + return (scheme, host, port) == (self.scheme, self.host, self.port) + + def urlopen(self, method, url, body=None, headers=None, retries=None, + redirect=True, assert_same_host=True, timeout=_Default, + pool_timeout=None, release_conn=None, chunked=False, + body_pos=None, **response_kw): + """ + Get a connection from the pool and perform an HTTP request. This is the + lowest level call for making a request, so you'll need to specify all + the raw details. + + .. note:: + + More commonly, it's appropriate to use a convenience method provided + by :class:`.RequestMethods`, such as :meth:`request`. + + .. note:: + + `release_conn` will only behave as expected if + `preload_content=False` because we want to make + `preload_content=False` the default behaviour someday soon without + breaking backwards compatibility. + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param body: + Data to send in the request body (useful for creating + POST requests, see HTTPConnectionPool.post_url for + more convenience). + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. + + :param retries: + Configure the number of retries to allow before raising a + :class:`~urllib3.exceptions.MaxRetryError` exception. + + Pass ``None`` to retry until you receive a response. Pass a + :class:`~urllib3.util.retry.Retry` object for fine-grained control + over different types of retries. + Pass an integer number to retry connection errors that many times, + but no other types of errors. Pass zero to never retry. + + If ``False``, then retries are disabled and any exception is raised + immediately. Also, instead of raising a MaxRetryError on redirects, + the redirect response will be returned. + + :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int. + + :param redirect: + If True, automatically handle redirects (status codes 301, 302, + 303, 307, 308). Each redirect counts as a retry. Disabling retries + will disable redirect, too. + + :param assert_same_host: + If ``True``, will make sure that the host of the pool requests is + consistent else will raise HostChangedError. When False, you can + use the pool on an HTTP proxy and request foreign hosts. + + :param timeout: + If specified, overrides the default timeout for this one + request. It may be a float (in seconds) or an instance of + :class:`urllib3.util.Timeout`. + + :param pool_timeout: + If set and the pool is set to block=True, then this method will + block for ``pool_timeout`` seconds and raise EmptyPoolError if no + connection is available within the time period. + + :param release_conn: + If False, then the urlopen call will not release the connection + back into the pool once a response is received (but will release if + you read the entire contents of the response such as when + `preload_content=True`). This is useful if you're not preloading + the response's content immediately. You will need to call + ``r.release_conn()`` on the response ``r`` to return the connection + back into the pool. If None, it takes the value of + ``response_kw.get('preload_content', True)``. + + :param chunked: + If True, urllib3 will send the body using chunked transfer + encoding. Otherwise, urllib3 will send the body using the standard + content-length form. Defaults to False. + + :param int body_pos: + Position to seek to in file-like body in the event of a retry or + redirect. Typically this won't need to be set because urllib3 will + auto-populate the value when needed. + + :param \\**response_kw: + Additional parameters are passed to + :meth:`urllib3.response.HTTPResponse.from_httplib` + """ + if headers is None: + headers = self.headers + + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect, default=self.retries) + + if release_conn is None: + release_conn = response_kw.get('preload_content', True) + + # Check host + if assert_same_host and not self.is_same_host(url): + raise HostChangedError(self, url, retries) + + conn = None + + # Track whether `conn` needs to be released before + # returning/raising/recursing. Update this variable if necessary, and + # leave `release_conn` constant throughout the function. That way, if + # the function recurses, the original value of `release_conn` will be + # passed down into the recursive call, and its value will be respected. + # + # See issue #651 [1] for details. + # + # [1] <https://github.com/shazow/urllib3/issues/651> + release_this_conn = release_conn + + # Merge the proxy headers. Only do this in HTTP. We have to copy the + # headers dict so we can safely change it without those changes being + # reflected in anyone else's copy. + if self.scheme == 'http': + headers = headers.copy() + headers.update(self.proxy_headers) + + # Must keep the exception bound to a separate variable or else Python 3 + # complains about UnboundLocalError. + err = None + + # Keep track of whether we cleanly exited the except block. This + # ensures we do proper cleanup in finally. + clean_exit = False + + # Rewind body position, if needed. Record current position + # for future rewinds in the event of a redirect/retry. + body_pos = set_file_position(body, body_pos) + + try: + # Request a connection from the queue. + timeout_obj = self._get_timeout(timeout) + conn = self._get_conn(timeout=pool_timeout) + + conn.timeout = timeout_obj.connect_timeout + + is_new_proxy_conn = self.proxy is not None and not getattr(conn, 'sock', None) + if is_new_proxy_conn: + self._prepare_proxy(conn) + + # Make the request on the httplib connection object. + httplib_response = self._make_request(conn, method, url, + timeout=timeout_obj, + body=body, headers=headers, + chunked=chunked) + + # If we're going to release the connection in ``finally:``, then + # the response doesn't need to know about the connection. Otherwise + # it will also try to release it and we'll have a double-release + # mess. + response_conn = conn if not release_conn else None + + # Pass method to Response for length checking + response_kw['request_method'] = method + + # Import httplib's response into our own wrapper object + response = self.ResponseCls.from_httplib(httplib_response, + pool=self, + connection=response_conn, + retries=retries, + **response_kw) + + # Everything went great! + clean_exit = True + + except queue.Empty: + # Timed out by queue. + raise EmptyPoolError(self, "No pool connections are available.") + + except (BaseSSLError, CertificateError) as e: + # Close the connection. If a connection is reused on which there + # was a Certificate error, the next request will certainly raise + # another Certificate error. + clean_exit = False + raise SSLError(e) + + except SSLError: + # Treat SSLError separately from BaseSSLError to preserve + # traceback. + clean_exit = False + raise + + except (TimeoutError, HTTPException, SocketError, ProtocolError) as e: + # Discard the connection for these exceptions. It will be + # be replaced during the next _get_conn() call. + clean_exit = False + + if isinstance(e, (SocketError, NewConnectionError)) and self.proxy: + e = ProxyError('Cannot connect to proxy.', e) + elif isinstance(e, (SocketError, HTTPException)): + e = ProtocolError('Connection aborted.', e) + + retries = retries.increment(method, url, error=e, _pool=self, + _stacktrace=sys.exc_info()[2]) + retries.sleep() + + # Keep track of the error for the retry warning. + err = e + + finally: + if not clean_exit: + # We hit some kind of exception, handled or otherwise. We need + # to throw the connection away unless explicitly told not to. + # Close the connection, set the variable to None, and make sure + # we put the None back in the pool to avoid leaking it. + conn = conn and conn.close() + release_this_conn = True + + if release_this_conn: + # Put the connection back to be reused. If the connection is + # expired then it will be None, which will get replaced with a + # fresh connection during _get_conn. + self._put_conn(conn) + + if not conn: + # Try again + log.warning("Retrying (%r) after connection " + "broken by '%r': %s", retries, err, url) + return self.urlopen(method, url, body, headers, retries, + redirect, assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, body_pos=body_pos, + **response_kw) + + # Handle redirect? + redirect_location = redirect and response.get_redirect_location() + if redirect_location: + if response.status == 303: + method = 'GET' + + try: + retries = retries.increment(method, url, response=response, _pool=self) + except MaxRetryError: + if retries.raise_on_redirect: + # Release the connection for this response, since we're not + # returning it to be released manually. + response.release_conn() + raise + return response + + retries.sleep_for_retry(response) + log.debug("Redirecting %s -> %s", url, redirect_location) + return self.urlopen( + method, redirect_location, body, headers, + retries=retries, redirect=redirect, + assert_same_host=assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, body_pos=body_pos, + **response_kw) + + # Check if we should retry the HTTP response. + has_retry_after = bool(response.getheader('Retry-After')) + if retries.is_retry(method, response.status, has_retry_after): + try: + retries = retries.increment(method, url, response=response, _pool=self) + except MaxRetryError: + if retries.raise_on_status: + # Release the connection for this response, since we're not + # returning it to be released manually. + response.release_conn() + raise + return response + retries.sleep(response) + log.debug("Retry: %s", url) + return self.urlopen( + method, url, body, headers, + retries=retries, redirect=redirect, + assert_same_host=assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, + body_pos=body_pos, **response_kw) + + return response + + +class HTTPSConnectionPool(HTTPConnectionPool): + """ + Same as :class:`.HTTPConnectionPool`, but HTTPS. + + When Python is compiled with the :mod:`ssl` module, then + :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, + instead of :class:`.HTTPSConnection`. + + :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, + ``assert_hostname`` and ``host`` in this order to verify connections. + If ``assert_hostname`` is False, no verification is done. + + The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs``, + ``ca_cert_dir``, and ``ssl_version`` are only used if :mod:`ssl` is + available and are fed into :meth:`urllib3.util.ssl_wrap_socket` to upgrade + the connection socket into an SSL socket. + """ + + scheme = 'https' + ConnectionCls = HTTPSConnection + + def __init__(self, host, port=None, + strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, + block=False, headers=None, retries=None, + _proxy=None, _proxy_headers=None, + key_file=None, cert_file=None, cert_reqs=None, + ca_certs=None, ssl_version=None, + assert_hostname=None, assert_fingerprint=None, + ca_cert_dir=None, **conn_kw): + + HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, + block, headers, retries, _proxy, _proxy_headers, + **conn_kw) + + if ca_certs and cert_reqs is None: + cert_reqs = 'CERT_REQUIRED' + + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = cert_reqs + self.ca_certs = ca_certs + self.ca_cert_dir = ca_cert_dir + self.ssl_version = ssl_version + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + + def _prepare_conn(self, conn): + """ + Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` + and establish the tunnel if proxy is used. + """ + + if isinstance(conn, VerifiedHTTPSConnection): + conn.set_cert(key_file=self.key_file, + cert_file=self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs, + ca_cert_dir=self.ca_cert_dir, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint) + conn.ssl_version = self.ssl_version + return conn + + def _prepare_proxy(self, conn): + """ + Establish tunnel connection early, because otherwise httplib + would improperly set Host: header to proxy's IP:port. + """ + # Python 2.7+ + try: + set_tunnel = conn.set_tunnel + except AttributeError: # Platform-specific: Python 2.6 + set_tunnel = conn._set_tunnel + + if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older + set_tunnel(self.host, self.port) + else: + set_tunnel(self.host, self.port, self.proxy_headers) + + conn.connect() + + def _new_conn(self): + """ + Return a fresh :class:`httplib.HTTPSConnection`. + """ + self.num_connections += 1 + log.debug("Starting new HTTPS connection (%d): %s", + self.num_connections, self.host) + + if not self.ConnectionCls or self.ConnectionCls is DummyConnection: + raise SSLError("Can't connect to HTTPS URL because the SSL " + "module is not available.") + + actual_host = self.host + actual_port = self.port + if self.proxy is not None: + actual_host = self.proxy.host + actual_port = self.proxy.port + + conn = self.ConnectionCls(host=actual_host, port=actual_port, + timeout=self.timeout.connect_timeout, + strict=self.strict, **self.conn_kw) + + return self._prepare_conn(conn) + + def _validate_conn(self, conn): + """ + Called right before a request is made, after the socket is created. + """ + super(HTTPSConnectionPool, self)._validate_conn(conn) + + # Force connect early to allow us to validate the connection. + if not getattr(conn, 'sock', None): # AppEngine might not have `.sock` + conn.connect() + + if not conn.is_verified: + warnings.warn(( + 'Unverified HTTPS request is being made. ' + 'Adding certificate verification is strongly advised. See: ' + 'https://urllib3.readthedocs.io/en/latest/advanced-usage.html' + '#ssl-warnings'), + InsecureRequestWarning) + + +def connection_from_url(url, **kw): + """ + Given a url, return an :class:`.ConnectionPool` instance of its host. + + This is a shortcut for not having to parse out the scheme, host, and port + of the url before creating an :class:`.ConnectionPool` instance. + + :param url: + Absolute URL string that must include the scheme. Port is optional. + + :param \\**kw: + Passes additional parameters to the constructor of the appropriate + :class:`.ConnectionPool`. Useful for specifying things like + timeout, maxsize, headers, etc. + + Example:: + + >>> conn = connection_from_url('http://google.com/') + >>> r = conn.request('GET', '/') + """ + scheme, host, port = get_host(url) + port = port or port_by_scheme.get(scheme, 80) + if scheme == 'https': + return HTTPSConnectionPool(host, port=port, **kw) + else: + return HTTPConnectionPool(host, port=port, **kw) + + +def _ipv6_host(host): + """ + Process IPv6 address literals + """ + + # httplib doesn't like it when we include brackets in IPv6 addresses + # Specifically, if we include brackets but also pass the port then + # httplib crazily doubles up the square brackets on the Host header. + # Instead, we need to make sure we never pass ``None`` as the port. + # However, for backward compatibility reasons we can't actually + # *assert* that. See http://bugs.python.org/issue28539 + # + # Also if an IPv6 address literal has a zone identifier, the + # percent sign might be URIencoded, convert it back into ASCII + if host.startswith('[') and host.endswith(']'): + host = host.replace('%25', '%').strip('[]') + return host diff --git a/collectors/python.d.plugin/python_modules/urllib3/contrib/__init__.py b/collectors/python.d.plugin/python_modules/urllib3/contrib/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/contrib/__init__.py diff --git a/collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/__init__.py b/collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/__init__.py diff --git a/collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/bindings.py b/collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/bindings.py new file mode 100644 index 00000000..bb826673 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/bindings.py @@ -0,0 +1,591 @@ +# SPDX-License-Identifier: MIT +""" +This module uses ctypes to bind a whole bunch of functions and constants from +SecureTransport. The goal here is to provide the low-level API to +SecureTransport. These are essentially the C-level functions and constants, and +they're pretty gross to work with. + +This code is a bastardised version of the code found in Will Bond's oscrypto +library. An enormous debt is owed to him for blazing this trail for us. For +that reason, this code should be considered to be covered both by urllib3's +license and by oscrypto's: + + Copyright (c) 2015-2016 Will Bond <will@wbond.net> + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. +""" +from __future__ import absolute_import + +import platform +from ctypes.util import find_library +from ctypes import ( + c_void_p, c_int32, c_char_p, c_size_t, c_byte, c_uint32, c_ulong, c_long, + c_bool +) +from ctypes import CDLL, POINTER, CFUNCTYPE + + +security_path = find_library('Security') +if not security_path: + raise ImportError('The library Security could not be found') + + +core_foundation_path = find_library('CoreFoundation') +if not core_foundation_path: + raise ImportError('The library CoreFoundation could not be found') + + +version = platform.mac_ver()[0] +version_info = tuple(map(int, version.split('.'))) +if version_info < (10, 8): + raise OSError( + 'Only OS X 10.8 and newer are supported, not %s.%s' % ( + version_info[0], version_info[1] + ) + ) + +Security = CDLL(security_path, use_errno=True) +CoreFoundation = CDLL(core_foundation_path, use_errno=True) + +Boolean = c_bool +CFIndex = c_long +CFStringEncoding = c_uint32 +CFData = c_void_p +CFString = c_void_p +CFArray = c_void_p +CFMutableArray = c_void_p +CFDictionary = c_void_p +CFError = c_void_p +CFType = c_void_p +CFTypeID = c_ulong + +CFTypeRef = POINTER(CFType) +CFAllocatorRef = c_void_p + +OSStatus = c_int32 + +CFDataRef = POINTER(CFData) +CFStringRef = POINTER(CFString) +CFArrayRef = POINTER(CFArray) +CFMutableArrayRef = POINTER(CFMutableArray) +CFDictionaryRef = POINTER(CFDictionary) +CFArrayCallBacks = c_void_p +CFDictionaryKeyCallBacks = c_void_p +CFDictionaryValueCallBacks = c_void_p + +SecCertificateRef = POINTER(c_void_p) +SecExternalFormat = c_uint32 +SecExternalItemType = c_uint32 +SecIdentityRef = POINTER(c_void_p) +SecItemImportExportFlags = c_uint32 +SecItemImportExportKeyParameters = c_void_p +SecKeychainRef = POINTER(c_void_p) +SSLProtocol = c_uint32 +SSLCipherSuite = c_uint32 +SSLContextRef = POINTER(c_void_p) +SecTrustRef = POINTER(c_void_p) +SSLConnectionRef = c_uint32 +SecTrustResultType = c_uint32 +SecTrustOptionFlags = c_uint32 +SSLProtocolSide = c_uint32 +SSLConnectionType = c_uint32 +SSLSessionOption = c_uint32 + + +try: + Security.SecItemImport.argtypes = [ + CFDataRef, + CFStringRef, + POINTER(SecExternalFormat), + POINTER(SecExternalItemType), + SecItemImportExportFlags, + POINTER(SecItemImportExportKeyParameters), + SecKeychainRef, + POINTER(CFArrayRef), + ] + Security.SecItemImport.restype = OSStatus + + Security.SecCertificateGetTypeID.argtypes = [] + Security.SecCertificateGetTypeID.restype = CFTypeID + + Security.SecIdentityGetTypeID.argtypes = [] + Security.SecIdentityGetTypeID.restype = CFTypeID + + Security.SecKeyGetTypeID.argtypes = [] + Security.SecKeyGetTypeID.restype = CFTypeID + + Security.SecCertificateCreateWithData.argtypes = [ + CFAllocatorRef, + CFDataRef + ] + Security.SecCertificateCreateWithData.restype = SecCertificateRef + + Security.SecCertificateCopyData.argtypes = [ + SecCertificateRef + ] + Security.SecCertificateCopyData.restype = CFDataRef + + Security.SecCopyErrorMessageString.argtypes = [ + OSStatus, + c_void_p + ] + Security.SecCopyErrorMessageString.restype = CFStringRef + + Security.SecIdentityCreateWithCertificate.argtypes = [ + CFTypeRef, + SecCertificateRef, + POINTER(SecIdentityRef) + ] + Security.SecIdentityCreateWithCertificate.restype = OSStatus + + Security.SecKeychainCreate.argtypes = [ + c_char_p, + c_uint32, + c_void_p, + Boolean, + c_void_p, + POINTER(SecKeychainRef) + ] + Security.SecKeychainCreate.restype = OSStatus + + Security.SecKeychainDelete.argtypes = [ + SecKeychainRef + ] + Security.SecKeychainDelete.restype = OSStatus + + Security.SecPKCS12Import.argtypes = [ + CFDataRef, + CFDictionaryRef, + POINTER(CFArrayRef) + ] + Security.SecPKCS12Import.restype = OSStatus + + SSLReadFunc = CFUNCTYPE(OSStatus, SSLConnectionRef, c_void_p, POINTER(c_size_t)) + SSLWriteFunc = CFUNCTYPE(OSStatus, SSLConnectionRef, POINTER(c_byte), POINTER(c_size_t)) + + Security.SSLSetIOFuncs.argtypes = [ + SSLContextRef, + SSLReadFunc, + SSLWriteFunc + ] + Security.SSLSetIOFuncs.restype = OSStatus + + Security.SSLSetPeerID.argtypes = [ + SSLContextRef, + c_char_p, + c_size_t + ] + Security.SSLSetPeerID.restype = OSStatus + + Security.SSLSetCertificate.argtypes = [ + SSLContextRef, + CFArrayRef + ] + Security.SSLSetCertificate.restype = OSStatus + + Security.SSLSetCertificateAuthorities.argtypes = [ + SSLContextRef, + CFTypeRef, + Boolean + ] + Security.SSLSetCertificateAuthorities.restype = OSStatus + + Security.SSLSetConnection.argtypes = [ + SSLContextRef, + SSLConnectionRef + ] + Security.SSLSetConnection.restype = OSStatus + + Security.SSLSetPeerDomainName.argtypes = [ + SSLContextRef, + c_char_p, + c_size_t + ] + Security.SSLSetPeerDomainName.restype = OSStatus + + Security.SSLHandshake.argtypes = [ + SSLContextRef + ] + Security.SSLHandshake.restype = OSStatus + + Security.SSLRead.argtypes = [ + SSLContextRef, + c_char_p, + c_size_t, + POINTER(c_size_t) + ] + Security.SSLRead.restype = OSStatus + + Security.SSLWrite.argtypes = [ + SSLContextRef, + c_char_p, + c_size_t, + POINTER(c_size_t) + ] + Security.SSLWrite.restype = OSStatus + + Security.SSLClose.argtypes = [ + SSLContextRef + ] + Security.SSLClose.restype = OSStatus + + Security.SSLGetNumberSupportedCiphers.argtypes = [ + SSLContextRef, + POINTER(c_size_t) + ] + Security.SSLGetNumberSupportedCiphers.restype = OSStatus + + Security.SSLGetSupportedCiphers.argtypes = [ + SSLContextRef, + POINTER(SSLCipherSuite), + POINTER(c_size_t) + ] + Security.SSLGetSupportedCiphers.restype = OSStatus + + Security.SSLSetEnabledCiphers.argtypes = [ + SSLContextRef, + POINTER(SSLCipherSuite), + c_size_t + ] + Security.SSLSetEnabledCiphers.restype = OSStatus + + Security.SSLGetNumberEnabledCiphers.argtype = [ + SSLContextRef, + POINTER(c_size_t) + ] + Security.SSLGetNumberEnabledCiphers.restype = OSStatus + + Security.SSLGetEnabledCiphers.argtypes = [ + SSLContextRef, + POINTER(SSLCipherSuite), + POINTER(c_size_t) + ] + Security.SSLGetEnabledCiphers.restype = OSStatus + + Security.SSLGetNegotiatedCipher.argtypes = [ + SSLContextRef, + POINTER(SSLCipherSuite) + ] + Security.SSLGetNegotiatedCipher.restype = OSStatus + + Security.SSLGetNegotiatedProtocolVersion.argtypes = [ + SSLContextRef, + POINTER(SSLProtocol) + ] + Security.SSLGetNegotiatedProtocolVersion.restype = OSStatus + + Security.SSLCopyPeerTrust.argtypes = [ + SSLContextRef, + POINTER(SecTrustRef) + ] + Security.SSLCopyPeerTrust.restype = OSStatus + + Security.SecTrustSetAnchorCertificates.argtypes = [ + SecTrustRef, + CFArrayRef + ] + Security.SecTrustSetAnchorCertificates.restype = OSStatus + + Security.SecTrustSetAnchorCertificatesOnly.argstypes = [ + SecTrustRef, + Boolean + ] + Security.SecTrustSetAnchorCertificatesOnly.restype = OSStatus + + Security.SecTrustEvaluate.argtypes = [ + SecTrustRef, + POINTER(SecTrustResultType) + ] + Security.SecTrustEvaluate.restype = OSStatus + + Security.SecTrustGetCertificateCount.argtypes = [ + SecTrustRef + ] + Security.SecTrustGetCertificateCount.restype = CFIndex + + Security.SecTrustGetCertificateAtIndex.argtypes = [ + SecTrustRef, + CFIndex + ] + Security.SecTrustGetCertificateAtIndex.restype = SecCertificateRef + + Security.SSLCreateContext.argtypes = [ + CFAllocatorRef, + SSLProtocolSide, + SSLConnectionType + ] + Security.SSLCreateContext.restype = SSLContextRef + + Security.SSLSetSessionOption.argtypes = [ + SSLContextRef, + SSLSessionOption, + Boolean + ] + Security.SSLSetSessionOption.restype = OSStatus + + Security.SSLSetProtocolVersionMin.argtypes = [ + SSLContextRef, + SSLProtocol + ] + Security.SSLSetProtocolVersionMin.restype = OSStatus + + Security.SSLSetProtocolVersionMax.argtypes = [ + SSLContextRef, + SSLProtocol + ] + Security.SSLSetProtocolVersionMax.restype = OSStatus + + Security.SecCopyErrorMessageString.argtypes = [ + OSStatus, + c_void_p + ] + Security.SecCopyErrorMessageString.restype = CFStringRef + + Security.SSLReadFunc = SSLReadFunc + Security.SSLWriteFunc = SSLWriteFunc + Security.SSLContextRef = SSLContextRef + Security.SSLProtocol = SSLProtocol + Security.SSLCipherSuite = SSLCipherSuite + Security.SecIdentityRef = SecIdentityRef + Security.SecKeychainRef = SecKeychainRef + Security.SecTrustRef = SecTrustRef + Security.SecTrustResultType = SecTrustResultType + Security.SecExternalFormat = SecExternalFormat + Security.OSStatus = OSStatus + + Security.kSecImportExportPassphrase = CFStringRef.in_dll( + Security, 'kSecImportExportPassphrase' + ) + Security.kSecImportItemIdentity = CFStringRef.in_dll( + Security, 'kSecImportItemIdentity' + ) + + # CoreFoundation time! + CoreFoundation.CFRetain.argtypes = [ + CFTypeRef + ] + CoreFoundation.CFRetain.restype = CFTypeRef + + CoreFoundation.CFRelease.argtypes = [ + CFTypeRef + ] + CoreFoundation.CFRelease.restype = None + + CoreFoundation.CFGetTypeID.argtypes = [ + CFTypeRef + ] + CoreFoundation.CFGetTypeID.restype = CFTypeID + + CoreFoundation.CFStringCreateWithCString.argtypes = [ + CFAllocatorRef, + c_char_p, + CFStringEncoding + ] + CoreFoundation.CFStringCreateWithCString.restype = CFStringRef + + CoreFoundation.CFStringGetCStringPtr.argtypes = [ + CFStringRef, + CFStringEncoding + ] + CoreFoundation.CFStringGetCStringPtr.restype = c_char_p + + CoreFoundation.CFStringGetCString.argtypes = [ + CFStringRef, + c_char_p, + CFIndex, + CFStringEncoding + ] + CoreFoundation.CFStringGetCString.restype = c_bool + + CoreFoundation.CFDataCreate.argtypes = [ + CFAllocatorRef, + c_char_p, + CFIndex + ] + CoreFoundation.CFDataCreate.restype = CFDataRef + + CoreFoundation.CFDataGetLength.argtypes = [ + CFDataRef + ] + CoreFoundation.CFDataGetLength.restype = CFIndex + + CoreFoundation.CFDataGetBytePtr.argtypes = [ + CFDataRef + ] + CoreFoundation.CFDataGetBytePtr.restype = c_void_p + + CoreFoundation.CFDictionaryCreate.argtypes = [ + CFAllocatorRef, + POINTER(CFTypeRef), + POINTER(CFTypeRef), + CFIndex, + CFDictionaryKeyCallBacks, + CFDictionaryValueCallBacks + ] + CoreFoundation.CFDictionaryCreate.restype = CFDictionaryRef + + CoreFoundation.CFDictionaryGetValue.argtypes = [ + CFDictionaryRef, + CFTypeRef + ] + CoreFoundation.CFDictionaryGetValue.restype = CFTypeRef + + CoreFoundation.CFArrayCreate.argtypes = [ + CFAllocatorRef, + POINTER(CFTypeRef), + CFIndex, + CFArrayCallBacks, + ] + CoreFoundation.CFArrayCreate.restype = CFArrayRef + + CoreFoundation.CFArrayCreateMutable.argtypes = [ + CFAllocatorRef, + CFIndex, + CFArrayCallBacks + ] + CoreFoundation.CFArrayCreateMutable.restype = CFMutableArrayRef + + CoreFoundation.CFArrayAppendValue.argtypes = [ + CFMutableArrayRef, + c_void_p + ] + CoreFoundation.CFArrayAppendValue.restype = None + + CoreFoundation.CFArrayGetCount.argtypes = [ + CFArrayRef + ] + CoreFoundation.CFArrayGetCount.restype = CFIndex + + CoreFoundation.CFArrayGetValueAtIndex.argtypes = [ + CFArrayRef, + CFIndex + ] + CoreFoundation.CFArrayGetValueAtIndex.restype = c_void_p + + CoreFoundation.kCFAllocatorDefault = CFAllocatorRef.in_dll( + CoreFoundation, 'kCFAllocatorDefault' + ) + CoreFoundation.kCFTypeArrayCallBacks = c_void_p.in_dll(CoreFoundation, 'kCFTypeArrayCallBacks') + CoreFoundation.kCFTypeDictionaryKeyCallBacks = c_void_p.in_dll( + CoreFoundation, 'kCFTypeDictionaryKeyCallBacks' + ) + CoreFoundation.kCFTypeDictionaryValueCallBacks = c_void_p.in_dll( + CoreFoundation, 'kCFTypeDictionaryValueCallBacks' + ) + + CoreFoundation.CFTypeRef = CFTypeRef + CoreFoundation.CFArrayRef = CFArrayRef + CoreFoundation.CFStringRef = CFStringRef + CoreFoundation.CFDictionaryRef = CFDictionaryRef + +except (AttributeError): + raise ImportError('Error initializing ctypes') + + +class CFConst(object): + """ + A class object that acts as essentially a namespace for CoreFoundation + constants. + """ + kCFStringEncodingUTF8 = CFStringEncoding(0x08000100) + + +class SecurityConst(object): + """ + A class object that acts as essentially a namespace for Security constants. + """ + kSSLSessionOptionBreakOnServerAuth = 0 + + kSSLProtocol2 = 1 + kSSLProtocol3 = 2 + kTLSProtocol1 = 4 + kTLSProtocol11 = 7 + kTLSProtocol12 = 8 + + kSSLClientSide = 1 + kSSLStreamType = 0 + + kSecFormatPEMSequence = 10 + + kSecTrustResultInvalid = 0 + kSecTrustResultProceed = 1 + # This gap is present on purpose: this was kSecTrustResultConfirm, which + # is deprecated. + kSecTrustResultDeny = 3 + kSecTrustResultUnspecified = 4 + kSecTrustResultRecoverableTrustFailure = 5 + kSecTrustResultFatalTrustFailure = 6 + kSecTrustResultOtherError = 7 + + errSSLProtocol = -9800 + errSSLWouldBlock = -9803 + errSSLClosedGraceful = -9805 + errSSLClosedNoNotify = -9816 + errSSLClosedAbort = -9806 + + errSSLXCertChainInvalid = -9807 + errSSLCrypto = -9809 + errSSLInternal = -9810 + errSSLCertExpired = -9814 + errSSLCertNotYetValid = -9815 + errSSLUnknownRootCert = -9812 + errSSLNoRootCert = -9813 + errSSLHostNameMismatch = -9843 + errSSLPeerHandshakeFail = -9824 + errSSLPeerUserCancelled = -9839 + errSSLWeakPeerEphemeralDHKey = -9850 + errSSLServerAuthCompleted = -9841 + errSSLRecordOverflow = -9847 + + errSecVerifyFailed = -67808 + errSecNoTrustSettings = -25263 + errSecItemNotFound = -25300 + errSecInvalidTrustSettings = -25262 + + # Cipher suites. We only pick the ones our default cipher string allows. + TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 = 0xC02C + TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 = 0xC030 + TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 = 0xC02B + TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 = 0xC02F + TLS_DHE_DSS_WITH_AES_256_GCM_SHA384 = 0x00A3 + TLS_DHE_RSA_WITH_AES_256_GCM_SHA384 = 0x009F + TLS_DHE_DSS_WITH_AES_128_GCM_SHA256 = 0x00A2 + TLS_DHE_RSA_WITH_AES_128_GCM_SHA256 = 0x009E + TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 = 0xC024 + TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384 = 0xC028 + TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA = 0xC00A + TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA = 0xC014 + TLS_DHE_RSA_WITH_AES_256_CBC_SHA256 = 0x006B + TLS_DHE_DSS_WITH_AES_256_CBC_SHA256 = 0x006A + TLS_DHE_RSA_WITH_AES_256_CBC_SHA = 0x0039 + TLS_DHE_DSS_WITH_AES_256_CBC_SHA = 0x0038 + TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 = 0xC023 + TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 = 0xC027 + TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA = 0xC009 + TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA = 0xC013 + TLS_DHE_RSA_WITH_AES_128_CBC_SHA256 = 0x0067 + TLS_DHE_DSS_WITH_AES_128_CBC_SHA256 = 0x0040 + TLS_DHE_RSA_WITH_AES_128_CBC_SHA = 0x0033 + TLS_DHE_DSS_WITH_AES_128_CBC_SHA = 0x0032 + TLS_RSA_WITH_AES_256_GCM_SHA384 = 0x009D + TLS_RSA_WITH_AES_128_GCM_SHA256 = 0x009C + TLS_RSA_WITH_AES_256_CBC_SHA256 = 0x003D + TLS_RSA_WITH_AES_128_CBC_SHA256 = 0x003C + TLS_RSA_WITH_AES_256_CBC_SHA = 0x0035 + TLS_RSA_WITH_AES_128_CBC_SHA = 0x002F diff --git a/collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/low_level.py b/collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/low_level.py new file mode 100644 index 00000000..0f79a137 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/contrib/_securetransport/low_level.py @@ -0,0 +1,344 @@ +# SPDX-License-Identifier: MIT +""" +Low-level helpers for the SecureTransport bindings. + +These are Python functions that are not directly related to the high-level APIs +but are necessary to get them to work. They include a whole bunch of low-level +CoreFoundation messing about and memory management. The concerns in this module +are almost entirely about trying to avoid memory leaks and providing +appropriate and useful assistance to the higher-level code. +""" +import base64 +import ctypes +import itertools +import re +import os +import ssl +import tempfile + +from .bindings import Security, CoreFoundation, CFConst + + +# This regular expression is used to grab PEM data out of a PEM bundle. +_PEM_CERTS_RE = re.compile( + b"-----BEGIN CERTIFICATE-----\n(.*?)\n-----END CERTIFICATE-----", re.DOTALL +) + + +def _cf_data_from_bytes(bytestring): + """ + Given a bytestring, create a CFData object from it. This CFData object must + be CFReleased by the caller. + """ + return CoreFoundation.CFDataCreate( + CoreFoundation.kCFAllocatorDefault, bytestring, len(bytestring) + ) + + +def _cf_dictionary_from_tuples(tuples): + """ + Given a list of Python tuples, create an associated CFDictionary. + """ + dictionary_size = len(tuples) + + # We need to get the dictionary keys and values out in the same order. + keys = (t[0] for t in tuples) + values = (t[1] for t in tuples) + cf_keys = (CoreFoundation.CFTypeRef * dictionary_size)(*keys) + cf_values = (CoreFoundation.CFTypeRef * dictionary_size)(*values) + + return CoreFoundation.CFDictionaryCreate( + CoreFoundation.kCFAllocatorDefault, + cf_keys, + cf_values, + dictionary_size, + CoreFoundation.kCFTypeDictionaryKeyCallBacks, + CoreFoundation.kCFTypeDictionaryValueCallBacks, + ) + + +def _cf_string_to_unicode(value): + """ + Creates a Unicode string from a CFString object. Used entirely for error + reporting. + + Yes, it annoys me quite a lot that this function is this complex. + """ + value_as_void_p = ctypes.cast(value, ctypes.POINTER(ctypes.c_void_p)) + + string = CoreFoundation.CFStringGetCStringPtr( + value_as_void_p, + CFConst.kCFStringEncodingUTF8 + ) + if string is None: + buffer = ctypes.create_string_buffer(1024) + result = CoreFoundation.CFStringGetCString( + value_as_void_p, + buffer, + 1024, + CFConst.kCFStringEncodingUTF8 + ) + if not result: + raise OSError('Error copying C string from CFStringRef') + string = buffer.value + if string is not None: + string = string.decode('utf-8') + return string + + +def _assert_no_error(error, exception_class=None): + """ + Checks the return code and throws an exception if there is an error to + report + """ + if error == 0: + return + + cf_error_string = Security.SecCopyErrorMessageString(error, None) + output = _cf_string_to_unicode(cf_error_string) + CoreFoundation.CFRelease(cf_error_string) + + if output is None or output == u'': + output = u'OSStatus %s' % error + + if exception_class is None: + exception_class = ssl.SSLError + + raise exception_class(output) + + +def _cert_array_from_pem(pem_bundle): + """ + Given a bundle of certs in PEM format, turns them into a CFArray of certs + that can be used to validate a cert chain. + """ + der_certs = [ + base64.b64decode(match.group(1)) + for match in _PEM_CERTS_RE.finditer(pem_bundle) + ] + if not der_certs: + raise ssl.SSLError("No root certificates specified") + + cert_array = CoreFoundation.CFArrayCreateMutable( + CoreFoundation.kCFAllocatorDefault, + 0, + ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks) + ) + if not cert_array: + raise ssl.SSLError("Unable to allocate memory!") + + try: + for der_bytes in der_certs: + certdata = _cf_data_from_bytes(der_bytes) + if not certdata: + raise ssl.SSLError("Unable to allocate memory!") + cert = Security.SecCertificateCreateWithData( + CoreFoundation.kCFAllocatorDefault, certdata + ) + CoreFoundation.CFRelease(certdata) + if not cert: + raise ssl.SSLError("Unable to build cert object!") + + CoreFoundation.CFArrayAppendValue(cert_array, cert) + CoreFoundation.CFRelease(cert) + except Exception: + # We need to free the array before the exception bubbles further. + # We only want to do that if an error occurs: otherwise, the caller + # should free. + CoreFoundation.CFRelease(cert_array) + + return cert_array + + +def _is_cert(item): + """ + Returns True if a given CFTypeRef is a certificate. + """ + expected = Security.SecCertificateGetTypeID() + return CoreFoundation.CFGetTypeID(item) == expected + + +def _is_identity(item): + """ + Returns True if a given CFTypeRef is an identity. + """ + expected = Security.SecIdentityGetTypeID() + return CoreFoundation.CFGetTypeID(item) == expected + + +def _temporary_keychain(): + """ + This function creates a temporary Mac keychain that we can use to work with + credentials. This keychain uses a one-time password and a temporary file to + store the data. We expect to have one keychain per socket. The returned + SecKeychainRef must be freed by the caller, including calling + SecKeychainDelete. + + Returns a tuple of the SecKeychainRef and the path to the temporary + directory that contains it. + """ + # Unfortunately, SecKeychainCreate requires a path to a keychain. This + # means we cannot use mkstemp to use a generic temporary file. Instead, + # we're going to create a temporary directory and a filename to use there. + # This filename will be 8 random bytes expanded into base64. We also need + # some random bytes to password-protect the keychain we're creating, so we + # ask for 40 random bytes. + random_bytes = os.urandom(40) + filename = base64.b64encode(random_bytes[:8]).decode('utf-8') + password = base64.b64encode(random_bytes[8:]) # Must be valid UTF-8 + tempdirectory = tempfile.mkdtemp() + + keychain_path = os.path.join(tempdirectory, filename).encode('utf-8') + + # We now want to create the keychain itself. + keychain = Security.SecKeychainRef() + status = Security.SecKeychainCreate( + keychain_path, + len(password), + password, + False, + None, + ctypes.byref(keychain) + ) + _assert_no_error(status) + + # Having created the keychain, we want to pass it off to the caller. + return keychain, tempdirectory + + +def _load_items_from_file(keychain, path): + """ + Given a single file, loads all the trust objects from it into arrays and + the keychain. + Returns a tuple of lists: the first list is a list of identities, the + second a list of certs. + """ + certificates = [] + identities = [] + result_array = None + + with open(path, 'rb') as f: + raw_filedata = f.read() + + try: + filedata = CoreFoundation.CFDataCreate( + CoreFoundation.kCFAllocatorDefault, + raw_filedata, + len(raw_filedata) + ) + result_array = CoreFoundation.CFArrayRef() + result = Security.SecItemImport( + filedata, # cert data + None, # Filename, leaving it out for now + None, # What the type of the file is, we don't care + None, # what's in the file, we don't care + 0, # import flags + None, # key params, can include passphrase in the future + keychain, # The keychain to insert into + ctypes.byref(result_array) # Results + ) + _assert_no_error(result) + + # A CFArray is not very useful to us as an intermediary + # representation, so we are going to extract the objects we want + # and then free the array. We don't need to keep hold of keys: the + # keychain already has them! + result_count = CoreFoundation.CFArrayGetCount(result_array) + for index in range(result_count): + item = CoreFoundation.CFArrayGetValueAtIndex( + result_array, index + ) + item = ctypes.cast(item, CoreFoundation.CFTypeRef) + + if _is_cert(item): + CoreFoundation.CFRetain(item) + certificates.append(item) + elif _is_identity(item): + CoreFoundation.CFRetain(item) + identities.append(item) + finally: + if result_array: + CoreFoundation.CFRelease(result_array) + + CoreFoundation.CFRelease(filedata) + + return (identities, certificates) + + +def _load_client_cert_chain(keychain, *paths): + """ + Load certificates and maybe keys from a number of files. Has the end goal + of returning a CFArray containing one SecIdentityRef, and then zero or more + SecCertificateRef objects, suitable for use as a client certificate trust + chain. + """ + # Ok, the strategy. + # + # This relies on knowing that macOS will not give you a SecIdentityRef + # unless you have imported a key into a keychain. This is a somewhat + # artificial limitation of macOS (for example, it doesn't necessarily + # affect iOS), but there is nothing inside Security.framework that lets you + # get a SecIdentityRef without having a key in a keychain. + # + # So the policy here is we take all the files and iterate them in order. + # Each one will use SecItemImport to have one or more objects loaded from + # it. We will also point at a keychain that macOS can use to work with the + # private key. + # + # Once we have all the objects, we'll check what we actually have. If we + # already have a SecIdentityRef in hand, fab: we'll use that. Otherwise, + # we'll take the first certificate (which we assume to be our leaf) and + # ask the keychain to give us a SecIdentityRef with that cert's associated + # key. + # + # We'll then return a CFArray containing the trust chain: one + # SecIdentityRef and then zero-or-more SecCertificateRef objects. The + # responsibility for freeing this CFArray will be with the caller. This + # CFArray must remain alive for the entire connection, so in practice it + # will be stored with a single SSLSocket, along with the reference to the + # keychain. + certificates = [] + identities = [] + + # Filter out bad paths. + paths = (path for path in paths if path) + + try: + for file_path in paths: + new_identities, new_certs = _load_items_from_file( + keychain, file_path + ) + identities.extend(new_identities) + certificates.extend(new_certs) + + # Ok, we have everything. The question is: do we have an identity? If + # not, we want to grab one from the first cert we have. + if not identities: + new_identity = Security.SecIdentityRef() + status = Security.SecIdentityCreateWithCertificate( + keychain, + certificates[0], + ctypes.byref(new_identity) + ) + _assert_no_error(status) + identities.append(new_identity) + + # We now want to release the original certificate, as we no longer + # need it. + CoreFoundation.CFRelease(certificates.pop(0)) + + # We now need to build a new CFArray that holds the trust chain. + trust_chain = CoreFoundation.CFArrayCreateMutable( + CoreFoundation.kCFAllocatorDefault, + 0, + ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks), + ) + for item in itertools.chain(identities, certificates): + # ArrayAppendValue does a CFRetain on the item. That's fine, + # because the finally block will release our other refs to them. + CoreFoundation.CFArrayAppendValue(trust_chain, item) + + return trust_chain + finally: + for obj in itertools.chain(identities, certificates): + CoreFoundation.CFRelease(obj) diff --git a/collectors/python.d.plugin/python_modules/urllib3/contrib/appengine.py b/collectors/python.d.plugin/python_modules/urllib3/contrib/appengine.py new file mode 100644 index 00000000..e74589fa --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/contrib/appengine.py @@ -0,0 +1,297 @@ +# SPDX-License-Identifier: MIT +""" +This module provides a pool manager that uses Google App Engine's +`URLFetch Service <https://cloud.google.com/appengine/docs/python/urlfetch>`_. + +Example usage:: + + from urllib3 import PoolManager + from urllib3.contrib.appengine import AppEngineManager, is_appengine_sandbox + + if is_appengine_sandbox(): + # AppEngineManager uses AppEngine's URLFetch API behind the scenes + http = AppEngineManager() + else: + # PoolManager uses a socket-level API behind the scenes + http = PoolManager() + + r = http.request('GET', 'https://google.com/') + +There are `limitations <https://cloud.google.com/appengine/docs/python/\ +urlfetch/#Python_Quotas_and_limits>`_ to the URLFetch service and it may not be +the best choice for your application. There are three options for using +urllib3 on Google App Engine: + +1. You can use :class:`AppEngineManager` with URLFetch. URLFetch is + cost-effective in many circumstances as long as your usage is within the + limitations. +2. You can use a normal :class:`~urllib3.PoolManager` by enabling sockets. + Sockets also have `limitations and restrictions + <https://cloud.google.com/appengine/docs/python/sockets/\ + #limitations-and-restrictions>`_ and have a lower free quota than URLFetch. + To use sockets, be sure to specify the following in your ``app.yaml``:: + + env_variables: + GAE_USE_SOCKETS_HTTPLIB : 'true' + +3. If you are using `App Engine Flexible +<https://cloud.google.com/appengine/docs/flexible/>`_, you can use the standard +:class:`PoolManager` without any configuration or special environment variables. +""" + +from __future__ import absolute_import +import logging +import os +import warnings +from ..packages.six.moves.urllib.parse import urljoin + +from ..exceptions import ( + HTTPError, + HTTPWarning, + MaxRetryError, + ProtocolError, + TimeoutError, + SSLError +) + +from ..packages.six import BytesIO +from ..request import RequestMethods +from ..response import HTTPResponse +from ..util.timeout import Timeout +from ..util.retry import Retry + +try: + from google.appengine.api import urlfetch +except ImportError: + urlfetch = None + + +log = logging.getLogger(__name__) + + +class AppEnginePlatformWarning(HTTPWarning): + pass + + +class AppEnginePlatformError(HTTPError): + pass + + +class AppEngineManager(RequestMethods): + """ + Connection manager for Google App Engine sandbox applications. + + This manager uses the URLFetch service directly instead of using the + emulated httplib, and is subject to URLFetch limitations as described in + the App Engine documentation `here + <https://cloud.google.com/appengine/docs/python/urlfetch>`_. + + Notably it will raise an :class:`AppEnginePlatformError` if: + * URLFetch is not available. + * If you attempt to use this on App Engine Flexible, as full socket + support is available. + * If a request size is more than 10 megabytes. + * If a response size is more than 32 megabtyes. + * If you use an unsupported request method such as OPTIONS. + + Beyond those cases, it will raise normal urllib3 errors. + """ + + def __init__(self, headers=None, retries=None, validate_certificate=True, + urlfetch_retries=True): + if not urlfetch: + raise AppEnginePlatformError( + "URLFetch is not available in this environment.") + + if is_prod_appengine_mvms(): + raise AppEnginePlatformError( + "Use normal urllib3.PoolManager instead of AppEngineManager" + "on Managed VMs, as using URLFetch is not necessary in " + "this environment.") + + warnings.warn( + "urllib3 is using URLFetch on Google App Engine sandbox instead " + "of sockets. To use sockets directly instead of URLFetch see " + "https://urllib3.readthedocs.io/en/latest/reference/urllib3.contrib.html.", + AppEnginePlatformWarning) + + RequestMethods.__init__(self, headers) + self.validate_certificate = validate_certificate + self.urlfetch_retries = urlfetch_retries + + self.retries = retries or Retry.DEFAULT + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + # Return False to re-raise any potential exceptions + return False + + def urlopen(self, method, url, body=None, headers=None, + retries=None, redirect=True, timeout=Timeout.DEFAULT_TIMEOUT, + **response_kw): + + retries = self._get_retries(retries, redirect) + + try: + follow_redirects = ( + redirect and + retries.redirect != 0 and + retries.total) + response = urlfetch.fetch( + url, + payload=body, + method=method, + headers=headers or {}, + allow_truncated=False, + follow_redirects=self.urlfetch_retries and follow_redirects, + deadline=self._get_absolute_timeout(timeout), + validate_certificate=self.validate_certificate, + ) + except urlfetch.DeadlineExceededError as e: + raise TimeoutError(self, e) + + except urlfetch.InvalidURLError as e: + if 'too large' in str(e): + raise AppEnginePlatformError( + "URLFetch request too large, URLFetch only " + "supports requests up to 10mb in size.", e) + raise ProtocolError(e) + + except urlfetch.DownloadError as e: + if 'Too many redirects' in str(e): + raise MaxRetryError(self, url, reason=e) + raise ProtocolError(e) + + except urlfetch.ResponseTooLargeError as e: + raise AppEnginePlatformError( + "URLFetch response too large, URLFetch only supports" + "responses up to 32mb in size.", e) + + except urlfetch.SSLCertificateError as e: + raise SSLError(e) + + except urlfetch.InvalidMethodError as e: + raise AppEnginePlatformError( + "URLFetch does not support method: %s" % method, e) + + http_response = self._urlfetch_response_to_http_response( + response, retries=retries, **response_kw) + + # Handle redirect? + redirect_location = redirect and http_response.get_redirect_location() + if redirect_location: + # Check for redirect response + if (self.urlfetch_retries and retries.raise_on_redirect): + raise MaxRetryError(self, url, "too many redirects") + else: + if http_response.status == 303: + method = 'GET' + + try: + retries = retries.increment(method, url, response=http_response, _pool=self) + except MaxRetryError: + if retries.raise_on_redirect: + raise MaxRetryError(self, url, "too many redirects") + return http_response + + retries.sleep_for_retry(http_response) + log.debug("Redirecting %s -> %s", url, redirect_location) + redirect_url = urljoin(url, redirect_location) + return self.urlopen( + method, redirect_url, body, headers, + retries=retries, redirect=redirect, + timeout=timeout, **response_kw) + + # Check if we should retry the HTTP response. + has_retry_after = bool(http_response.getheader('Retry-After')) + if retries.is_retry(method, http_response.status, has_retry_after): + retries = retries.increment( + method, url, response=http_response, _pool=self) + log.debug("Retry: %s", url) + retries.sleep(http_response) + return self.urlopen( + method, url, + body=body, headers=headers, + retries=retries, redirect=redirect, + timeout=timeout, **response_kw) + + return http_response + + def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw): + + if is_prod_appengine(): + # Production GAE handles deflate encoding automatically, but does + # not remove the encoding header. + content_encoding = urlfetch_resp.headers.get('content-encoding') + + if content_encoding == 'deflate': + del urlfetch_resp.headers['content-encoding'] + + transfer_encoding = urlfetch_resp.headers.get('transfer-encoding') + # We have a full response's content, + # so let's make sure we don't report ourselves as chunked data. + if transfer_encoding == 'chunked': + encodings = transfer_encoding.split(",") + encodings.remove('chunked') + urlfetch_resp.headers['transfer-encoding'] = ','.join(encodings) + + return HTTPResponse( + # In order for decoding to work, we must present the content as + # a file-like object. + body=BytesIO(urlfetch_resp.content), + headers=urlfetch_resp.headers, + status=urlfetch_resp.status_code, + **response_kw + ) + + def _get_absolute_timeout(self, timeout): + if timeout is Timeout.DEFAULT_TIMEOUT: + return None # Defer to URLFetch's default. + if isinstance(timeout, Timeout): + if timeout._read is not None or timeout._connect is not None: + warnings.warn( + "URLFetch does not support granular timeout settings, " + "reverting to total or default URLFetch timeout.", + AppEnginePlatformWarning) + return timeout.total + return timeout + + def _get_retries(self, retries, redirect): + if not isinstance(retries, Retry): + retries = Retry.from_int( + retries, redirect=redirect, default=self.retries) + + if retries.connect or retries.read or retries.redirect: + warnings.warn( + "URLFetch only supports total retries and does not " + "recognize connect, read, or redirect retry parameters.", + AppEnginePlatformWarning) + + return retries + + +def is_appengine(): + return (is_local_appengine() or + is_prod_appengine() or + is_prod_appengine_mvms()) + + +def is_appengine_sandbox(): + return is_appengine() and not is_prod_appengine_mvms() + + +def is_local_appengine(): + return ('APPENGINE_RUNTIME' in os.environ and + 'Development/' in os.environ['SERVER_SOFTWARE']) + + +def is_prod_appengine(): + return ('APPENGINE_RUNTIME' in os.environ and + 'Google App Engine/' in os.environ['SERVER_SOFTWARE'] and + not is_prod_appengine_mvms()) + + +def is_prod_appengine_mvms(): + return os.environ.get('GAE_VM', False) == 'true' diff --git a/collectors/python.d.plugin/python_modules/urllib3/contrib/ntlmpool.py b/collectors/python.d.plugin/python_modules/urllib3/contrib/ntlmpool.py new file mode 100644 index 00000000..3f8c9ebf --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/contrib/ntlmpool.py @@ -0,0 +1,113 @@ +# SPDX-License-Identifier: MIT +""" +NTLM authenticating pool, contributed by erikcederstran + +Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 +""" +from __future__ import absolute_import + +from logging import getLogger +from ntlm import ntlm + +from .. import HTTPSConnectionPool +from ..packages.six.moves.http_client import HTTPSConnection + + +log = getLogger(__name__) + + +class NTLMConnectionPool(HTTPSConnectionPool): + """ + Implements an NTLM authentication version of an urllib3 connection pool + """ + + scheme = 'https' + + def __init__(self, user, pw, authurl, *args, **kwargs): + """ + authurl is a random URL on the server that is protected by NTLM. + user is the Windows user, probably in the DOMAIN\\username format. + pw is the password for the user. + """ + super(NTLMConnectionPool, self).__init__(*args, **kwargs) + self.authurl = authurl + self.rawuser = user + user_parts = user.split('\\', 1) + self.domain = user_parts[0].upper() + self.user = user_parts[1] + self.pw = pw + + def _new_conn(self): + # Performs the NTLM handshake that secures the connection. The socket + # must be kept open while requests are performed. + self.num_connections += 1 + log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s', + self.num_connections, self.host, self.authurl) + + headers = {} + headers['Connection'] = 'Keep-Alive' + req_header = 'Authorization' + resp_header = 'www-authenticate' + + conn = HTTPSConnection(host=self.host, port=self.port) + + # Send negotiation message + headers[req_header] = ( + 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) + log.debug('Request headers: %s', headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + reshdr = dict(res.getheaders()) + log.debug('Response status: %s %s', res.status, res.reason) + log.debug('Response headers: %s', reshdr) + log.debug('Response data: %s [...]', res.read(100)) + + # Remove the reference to the socket, so that it can not be closed by + # the response object (we want to keep the socket open) + res.fp = None + + # Server should respond with a challenge message + auth_header_values = reshdr[resp_header].split(', ') + auth_header_value = None + for s in auth_header_values: + if s[:5] == 'NTLM ': + auth_header_value = s[5:] + if auth_header_value is None: + raise Exception('Unexpected %s response header: %s' % + (resp_header, reshdr[resp_header])) + + # Send authentication message + ServerChallenge, NegotiateFlags = \ + ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value) + auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, + self.user, + self.domain, + self.pw, + NegotiateFlags) + headers[req_header] = 'NTLM %s' % auth_msg + log.debug('Request headers: %s', headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + log.debug('Response status: %s %s', res.status, res.reason) + log.debug('Response headers: %s', dict(res.getheaders())) + log.debug('Response data: %s [...]', res.read()[:100]) + if res.status != 200: + if res.status == 401: + raise Exception('Server rejected request: wrong ' + 'username or password') + raise Exception('Wrong server response: %s %s' % + (res.status, res.reason)) + + res.fp = None + log.debug('Connection established') + return conn + + def urlopen(self, method, url, body=None, headers=None, retries=3, + redirect=True, assert_same_host=True): + if headers is None: + headers = {} + headers['Connection'] = 'Keep-Alive' + return super(NTLMConnectionPool, self).urlopen(method, url, body, + headers, retries, + redirect, + assert_same_host) diff --git a/collectors/python.d.plugin/python_modules/urllib3/contrib/pyopenssl.py b/collectors/python.d.plugin/python_modules/urllib3/contrib/pyopenssl.py new file mode 100644 index 00000000..8d373507 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/contrib/pyopenssl.py @@ -0,0 +1,458 @@ +# SPDX-License-Identifier: MIT +""" +SSL with SNI_-support for Python 2. Follow these instructions if you would +like to verify SSL certificates in Python 2. Note, the default libraries do +*not* do certificate checking; you need to do additional work to validate +certificates yourself. + +This needs the following packages installed: + +* pyOpenSSL (tested with 16.0.0) +* cryptography (minimum 1.3.4, from pyopenssl) +* idna (minimum 2.0, from cryptography) + +However, pyopenssl depends on cryptography, which depends on idna, so while we +use all three directly here we end up having relatively few packages required. + +You can install them with the following command: + + pip install pyopenssl cryptography idna + +To activate certificate checking, call +:func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code +before you begin making HTTP requests. This can be done in a ``sitecustomize`` +module, or at any other time before your application begins using ``urllib3``, +like this:: + + try: + import urllib3.contrib.pyopenssl + urllib3.contrib.pyopenssl.inject_into_urllib3() + except ImportError: + pass + +Now you can use :mod:`urllib3` as you normally would, and it will support SNI +when the required modules are installed. + +Activating this module also has the positive side effect of disabling SSL/TLS +compression in Python 2 (see `CRIME attack`_). + +If you want to configure the default list of supported cipher suites, you can +set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable. + +.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication +.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) +""" +from __future__ import absolute_import + +import OpenSSL.SSL +from cryptography import x509 +from cryptography.hazmat.backends.openssl import backend as openssl_backend +from cryptography.hazmat.backends.openssl.x509 import _Certificate + +from socket import timeout, error as SocketError +from io import BytesIO + +try: # Platform-specific: Python 2 + from socket import _fileobject +except ImportError: # Platform-specific: Python 3 + _fileobject = None + from ..packages.backports.makefile import backport_makefile + +import logging +import ssl + +try: + import six +except ImportError: + from ..packages import six + +import sys + +from .. import util + +__all__ = ['inject_into_urllib3', 'extract_from_urllib3'] + +# SNI always works. +HAS_SNI = True + +# Map from urllib3 to PyOpenSSL compatible parameter-values. +_openssl_versions = { + ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, + ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, +} + +if hasattr(ssl, 'PROTOCOL_TLSv1_1') and hasattr(OpenSSL.SSL, 'TLSv1_1_METHOD'): + _openssl_versions[ssl.PROTOCOL_TLSv1_1] = OpenSSL.SSL.TLSv1_1_METHOD + +if hasattr(ssl, 'PROTOCOL_TLSv1_2') and hasattr(OpenSSL.SSL, 'TLSv1_2_METHOD'): + _openssl_versions[ssl.PROTOCOL_TLSv1_2] = OpenSSL.SSL.TLSv1_2_METHOD + +try: + _openssl_versions.update({ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD}) +except AttributeError: + pass + +_stdlib_to_openssl_verify = { + ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE, + ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER, + ssl.CERT_REQUIRED: + OpenSSL.SSL.VERIFY_PEER + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, +} +_openssl_to_stdlib_verify = dict( + (v, k) for k, v in _stdlib_to_openssl_verify.items() +) + +# OpenSSL will only write 16K at a time +SSL_WRITE_BLOCKSIZE = 16384 + +orig_util_HAS_SNI = util.HAS_SNI +orig_util_SSLContext = util.ssl_.SSLContext + + +log = logging.getLogger(__name__) + + +def inject_into_urllib3(): + 'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.' + + _validate_dependencies_met() + + util.ssl_.SSLContext = PyOpenSSLContext + util.HAS_SNI = HAS_SNI + util.ssl_.HAS_SNI = HAS_SNI + util.IS_PYOPENSSL = True + util.ssl_.IS_PYOPENSSL = True + + +def extract_from_urllib3(): + 'Undo monkey-patching by :func:`inject_into_urllib3`.' + + util.ssl_.SSLContext = orig_util_SSLContext + util.HAS_SNI = orig_util_HAS_SNI + util.ssl_.HAS_SNI = orig_util_HAS_SNI + util.IS_PYOPENSSL = False + util.ssl_.IS_PYOPENSSL = False + + +def _validate_dependencies_met(): + """ + Verifies that PyOpenSSL's package-level dependencies have been met. + Throws `ImportError` if they are not met. + """ + # Method added in `cryptography==1.1`; not available in older versions + from cryptography.x509.extensions import Extensions + if getattr(Extensions, "get_extension_for_class", None) is None: + raise ImportError("'cryptography' module missing required functionality. " + "Try upgrading to v1.3.4 or newer.") + + # pyOpenSSL 0.14 and above use cryptography for OpenSSL bindings. The _x509 + # attribute is only present on those versions. + from OpenSSL.crypto import X509 + x509 = X509() + if getattr(x509, "_x509", None) is None: + raise ImportError("'pyOpenSSL' module missing required functionality. " + "Try upgrading to v0.14 or newer.") + + +def _dnsname_to_stdlib(name): + """ + Converts a dNSName SubjectAlternativeName field to the form used by the + standard library on the given Python version. + + Cryptography produces a dNSName as a unicode string that was idna-decoded + from ASCII bytes. We need to idna-encode that string to get it back, and + then on Python 3 we also need to convert to unicode via UTF-8 (the stdlib + uses PyUnicode_FromStringAndSize on it, which decodes via UTF-8). + """ + def idna_encode(name): + """ + Borrowed wholesale from the Python Cryptography Project. It turns out + that we can't just safely call `idna.encode`: it can explode for + wildcard names. This avoids that problem. + """ + import idna + + for prefix in [u'*.', u'.']: + if name.startswith(prefix): + name = name[len(prefix):] + return prefix.encode('ascii') + idna.encode(name) + return idna.encode(name) + + name = idna_encode(name) + if sys.version_info >= (3, 0): + name = name.decode('utf-8') + return name + + +def get_subj_alt_name(peer_cert): + """ + Given an PyOpenSSL certificate, provides all the subject alternative names. + """ + # Pass the cert to cryptography, which has much better APIs for this. + # This is technically using private APIs, but should work across all + # relevant versions until PyOpenSSL gets something proper for this. + cert = _Certificate(openssl_backend, peer_cert._x509) + + # We want to find the SAN extension. Ask Cryptography to locate it (it's + # faster than looping in Python) + try: + ext = cert.extensions.get_extension_for_class( + x509.SubjectAlternativeName + ).value + except x509.ExtensionNotFound: + # No such extension, return the empty list. + return [] + except (x509.DuplicateExtension, x509.UnsupportedExtension, + x509.UnsupportedGeneralNameType, UnicodeError) as e: + # A problem has been found with the quality of the certificate. Assume + # no SAN field is present. + log.warning( + "A problem was encountered with the certificate that prevented " + "urllib3 from finding the SubjectAlternativeName field. This can " + "affect certificate validation. The error was %s", + e, + ) + return [] + + # We want to return dNSName and iPAddress fields. We need to cast the IPs + # back to strings because the match_hostname function wants them as + # strings. + # Sadly the DNS names need to be idna encoded and then, on Python 3, UTF-8 + # decoded. This is pretty frustrating, but that's what the standard library + # does with certificates, and so we need to attempt to do the same. + names = [ + ('DNS', _dnsname_to_stdlib(name)) + for name in ext.get_values_for_type(x509.DNSName) + ] + names.extend( + ('IP Address', str(name)) + for name in ext.get_values_for_type(x509.IPAddress) + ) + + return names + + +class WrappedSocket(object): + '''API-compatibility wrapper for Python OpenSSL's Connection-class. + + Note: _makefile_refs, _drop() and _reuse() are needed for the garbage + collector of pypy. + ''' + + def __init__(self, connection, socket, suppress_ragged_eofs=True): + self.connection = connection + self.socket = socket + self.suppress_ragged_eofs = suppress_ragged_eofs + self._makefile_refs = 0 + self._closed = False + + def fileno(self): + return self.socket.fileno() + + # Copy-pasted from Python 3.5 source code + def _decref_socketios(self): + if self._makefile_refs > 0: + self._makefile_refs -= 1 + if self._closed: + self.close() + + def recv(self, *args, **kwargs): + try: + data = self.connection.recv(*args, **kwargs) + except OpenSSL.SSL.SysCallError as e: + if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'): + return b'' + else: + raise SocketError(str(e)) + except OpenSSL.SSL.ZeroReturnError as e: + if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN: + return b'' + else: + raise + except OpenSSL.SSL.WantReadError: + rd = util.wait_for_read(self.socket, self.socket.gettimeout()) + if not rd: + raise timeout('The read operation timed out') + else: + return self.recv(*args, **kwargs) + else: + return data + + def recv_into(self, *args, **kwargs): + try: + return self.connection.recv_into(*args, **kwargs) + except OpenSSL.SSL.SysCallError as e: + if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'): + return 0 + else: + raise SocketError(str(e)) + except OpenSSL.SSL.ZeroReturnError as e: + if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN: + return 0 + else: + raise + except OpenSSL.SSL.WantReadError: + rd = util.wait_for_read(self.socket, self.socket.gettimeout()) + if not rd: + raise timeout('The read operation timed out') + else: + return self.recv_into(*args, **kwargs) + + def settimeout(self, timeout): + return self.socket.settimeout(timeout) + + def _send_until_done(self, data): + while True: + try: + return self.connection.send(data) + except OpenSSL.SSL.WantWriteError: + wr = util.wait_for_write(self.socket, self.socket.gettimeout()) + if not wr: + raise timeout() + continue + except OpenSSL.SSL.SysCallError as e: + raise SocketError(str(e)) + + def sendall(self, data): + total_sent = 0 + while total_sent < len(data): + sent = self._send_until_done(data[total_sent:total_sent + SSL_WRITE_BLOCKSIZE]) + total_sent += sent + + def shutdown(self): + # FIXME rethrow compatible exceptions should we ever use this + self.connection.shutdown() + + def close(self): + if self._makefile_refs < 1: + try: + self._closed = True + return self.connection.close() + except OpenSSL.SSL.Error: + return + else: + self._makefile_refs -= 1 + + def getpeercert(self, binary_form=False): + x509 = self.connection.get_peer_certificate() + + if not x509: + return x509 + + if binary_form: + return OpenSSL.crypto.dump_certificate( + OpenSSL.crypto.FILETYPE_ASN1, + x509) + + return { + 'subject': ( + (('commonName', x509.get_subject().CN),), + ), + 'subjectAltName': get_subj_alt_name(x509) + } + + def _reuse(self): + self._makefile_refs += 1 + + def _drop(self): + if self._makefile_refs < 1: + self.close() + else: + self._makefile_refs -= 1 + + +if _fileobject: # Platform-specific: Python 2 + def makefile(self, mode, bufsize=-1): + self._makefile_refs += 1 + return _fileobject(self, mode, bufsize, close=True) +else: # Platform-specific: Python 3 + makefile = backport_makefile + +WrappedSocket.makefile = makefile + + +class PyOpenSSLContext(object): + """ + I am a wrapper class for the PyOpenSSL ``Context`` object. I am responsible + for translating the interface of the standard library ``SSLContext`` object + to calls into PyOpenSSL. + """ + def __init__(self, protocol): + self.protocol = _openssl_versions[protocol] + self._ctx = OpenSSL.SSL.Context(self.protocol) + self._options = 0 + self.check_hostname = False + + @property + def options(self): + return self._options + + @options.setter + def options(self, value): + self._options = value + self._ctx.set_options(value) + + @property + def verify_mode(self): + return _openssl_to_stdlib_verify[self._ctx.get_verify_mode()] + + @verify_mode.setter + def verify_mode(self, value): + self._ctx.set_verify( + _stdlib_to_openssl_verify[value], + _verify_callback + ) + + def set_default_verify_paths(self): + self._ctx.set_default_verify_paths() + + def set_ciphers(self, ciphers): + if isinstance(ciphers, six.text_type): + ciphers = ciphers.encode('utf-8') + self._ctx.set_cipher_list(ciphers) + + def load_verify_locations(self, cafile=None, capath=None, cadata=None): + if cafile is not None: + cafile = cafile.encode('utf-8') + if capath is not None: + capath = capath.encode('utf-8') + self._ctx.load_verify_locations(cafile, capath) + if cadata is not None: + self._ctx.load_verify_locations(BytesIO(cadata)) + + def load_cert_chain(self, certfile, keyfile=None, password=None): + self._ctx.use_certificate_file(certfile) + if password is not None: + self._ctx.set_passwd_cb(lambda max_length, prompt_twice, userdata: password) + self._ctx.use_privatekey_file(keyfile or certfile) + + def wrap_socket(self, sock, server_side=False, + do_handshake_on_connect=True, suppress_ragged_eofs=True, + server_hostname=None): + cnx = OpenSSL.SSL.Connection(self._ctx, sock) + + if isinstance(server_hostname, six.text_type): # Platform-specific: Python 3 + server_hostname = server_hostname.encode('utf-8') + + if server_hostname is not None: + cnx.set_tlsext_host_name(server_hostname) + + cnx.set_connect_state() + + while True: + try: + cnx.do_handshake() + except OpenSSL.SSL.WantReadError: + rd = util.wait_for_read(sock, sock.gettimeout()) + if not rd: + raise timeout('select timed out') + continue + except OpenSSL.SSL.Error as e: + raise ssl.SSLError('bad handshake: %r' % e) + break + + return WrappedSocket(cnx, sock) + + +def _verify_callback(cnx, x509, err_no, err_depth, return_code): + return err_no == 0 diff --git a/collectors/python.d.plugin/python_modules/urllib3/contrib/securetransport.py b/collectors/python.d.plugin/python_modules/urllib3/contrib/securetransport.py new file mode 100644 index 00000000..fcc30118 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/contrib/securetransport.py @@ -0,0 +1,808 @@ +# SPDX-License-Identifier: MIT +""" +SecureTranport support for urllib3 via ctypes. + +This makes platform-native TLS available to urllib3 users on macOS without the +use of a compiler. This is an important feature because the Python Package +Index is moving to become a TLSv1.2-or-higher server, and the default OpenSSL +that ships with macOS is not capable of doing TLSv1.2. The only way to resolve +this is to give macOS users an alternative solution to the problem, and that +solution is to use SecureTransport. + +We use ctypes here because this solution must not require a compiler. That's +because pip is not allowed to require a compiler either. + +This is not intended to be a seriously long-term solution to this problem. +The hope is that PEP 543 will eventually solve this issue for us, at which +point we can retire this contrib module. But in the short term, we need to +solve the impending tire fire that is Python on Mac without this kind of +contrib module. So...here we are. + +To use this module, simply import and inject it:: + + import urllib3.contrib.securetransport + urllib3.contrib.securetransport.inject_into_urllib3() + +Happy TLSing! +""" +from __future__ import absolute_import + +import contextlib +import ctypes +import errno +import os.path +import shutil +import socket +import ssl +import threading +import weakref + +from .. import util +from ._securetransport.bindings import ( + Security, SecurityConst, CoreFoundation +) +from ._securetransport.low_level import ( + _assert_no_error, _cert_array_from_pem, _temporary_keychain, + _load_client_cert_chain +) + +try: # Platform-specific: Python 2 + from socket import _fileobject +except ImportError: # Platform-specific: Python 3 + _fileobject = None + from ..packages.backports.makefile import backport_makefile + +try: + memoryview(b'') +except NameError: + raise ImportError("SecureTransport only works on Pythons with memoryview") + +__all__ = ['inject_into_urllib3', 'extract_from_urllib3'] + +# SNI always works +HAS_SNI = True + +orig_util_HAS_SNI = util.HAS_SNI +orig_util_SSLContext = util.ssl_.SSLContext + +# This dictionary is used by the read callback to obtain a handle to the +# calling wrapped socket. This is a pretty silly approach, but for now it'll +# do. I feel like I should be able to smuggle a handle to the wrapped socket +# directly in the SSLConnectionRef, but for now this approach will work I +# guess. +# +# We need to lock around this structure for inserts, but we don't do it for +# reads/writes in the callbacks. The reasoning here goes as follows: +# +# 1. It is not possible to call into the callbacks before the dictionary is +# populated, so once in the callback the id must be in the dictionary. +# 2. The callbacks don't mutate the dictionary, they only read from it, and +# so cannot conflict with any of the insertions. +# +# This is good: if we had to lock in the callbacks we'd drastically slow down +# the performance of this code. +_connection_refs = weakref.WeakValueDictionary() +_connection_ref_lock = threading.Lock() + +# Limit writes to 16kB. This is OpenSSL's limit, but we'll cargo-cult it over +# for no better reason than we need *a* limit, and this one is right there. +SSL_WRITE_BLOCKSIZE = 16384 + +# This is our equivalent of util.ssl_.DEFAULT_CIPHERS, but expanded out to +# individual cipher suites. We need to do this becuase this is how +# SecureTransport wants them. +CIPHER_SUITES = [ + SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, + SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, + SecurityConst.TLS_DHE_DSS_WITH_AES_256_GCM_SHA384, + SecurityConst.TLS_DHE_RSA_WITH_AES_256_GCM_SHA384, + SecurityConst.TLS_DHE_DSS_WITH_AES_128_GCM_SHA256, + SecurityConst.TLS_DHE_RSA_WITH_AES_128_GCM_SHA256, + SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384, + SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, + SecurityConst.TLS_DHE_RSA_WITH_AES_256_CBC_SHA256, + SecurityConst.TLS_DHE_DSS_WITH_AES_256_CBC_SHA256, + SecurityConst.TLS_DHE_RSA_WITH_AES_256_CBC_SHA, + SecurityConst.TLS_DHE_DSS_WITH_AES_256_CBC_SHA, + SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, + SecurityConst.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, + SecurityConst.TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, + SecurityConst.TLS_DHE_RSA_WITH_AES_128_CBC_SHA256, + SecurityConst.TLS_DHE_DSS_WITH_AES_128_CBC_SHA256, + SecurityConst.TLS_DHE_RSA_WITH_AES_128_CBC_SHA, + SecurityConst.TLS_DHE_DSS_WITH_AES_128_CBC_SHA, + SecurityConst.TLS_RSA_WITH_AES_256_GCM_SHA384, + SecurityConst.TLS_RSA_WITH_AES_128_GCM_SHA256, + SecurityConst.TLS_RSA_WITH_AES_256_CBC_SHA256, + SecurityConst.TLS_RSA_WITH_AES_128_CBC_SHA256, + SecurityConst.TLS_RSA_WITH_AES_256_CBC_SHA, + SecurityConst.TLS_RSA_WITH_AES_128_CBC_SHA, +] + +# Basically this is simple: for PROTOCOL_SSLv23 we turn it into a low of +# TLSv1 and a high of TLSv1.2. For everything else, we pin to that version. +_protocol_to_min_max = { + ssl.PROTOCOL_SSLv23: (SecurityConst.kTLSProtocol1, SecurityConst.kTLSProtocol12), +} + +if hasattr(ssl, "PROTOCOL_SSLv2"): + _protocol_to_min_max[ssl.PROTOCOL_SSLv2] = ( + SecurityConst.kSSLProtocol2, SecurityConst.kSSLProtocol2 + ) +if hasattr(ssl, "PROTOCOL_SSLv3"): + _protocol_to_min_max[ssl.PROTOCOL_SSLv3] = ( + SecurityConst.kSSLProtocol3, SecurityConst.kSSLProtocol3 + ) +if hasattr(ssl, "PROTOCOL_TLSv1"): + _protocol_to_min_max[ssl.PROTOCOL_TLSv1] = ( + SecurityConst.kTLSProtocol1, SecurityConst.kTLSProtocol1 + ) +if hasattr(ssl, "PROTOCOL_TLSv1_1"): + _protocol_to_min_max[ssl.PROTOCOL_TLSv1_1] = ( + SecurityConst.kTLSProtocol11, SecurityConst.kTLSProtocol11 + ) +if hasattr(ssl, "PROTOCOL_TLSv1_2"): + _protocol_to_min_max[ssl.PROTOCOL_TLSv1_2] = ( + SecurityConst.kTLSProtocol12, SecurityConst.kTLSProtocol12 + ) +if hasattr(ssl, "PROTOCOL_TLS"): + _protocol_to_min_max[ssl.PROTOCOL_TLS] = _protocol_to_min_max[ssl.PROTOCOL_SSLv23] + + +def inject_into_urllib3(): + """ + Monkey-patch urllib3 with SecureTransport-backed SSL-support. + """ + util.ssl_.SSLContext = SecureTransportContext + util.HAS_SNI = HAS_SNI + util.ssl_.HAS_SNI = HAS_SNI + util.IS_SECURETRANSPORT = True + util.ssl_.IS_SECURETRANSPORT = True + + +def extract_from_urllib3(): + """ + Undo monkey-patching by :func:`inject_into_urllib3`. + """ + util.ssl_.SSLContext = orig_util_SSLContext + util.HAS_SNI = orig_util_HAS_SNI + util.ssl_.HAS_SNI = orig_util_HAS_SNI + util.IS_SECURETRANSPORT = False + util.ssl_.IS_SECURETRANSPORT = False + + +def _read_callback(connection_id, data_buffer, data_length_pointer): + """ + SecureTransport read callback. This is called by ST to request that data + be returned from the socket. + """ + wrapped_socket = None + try: + wrapped_socket = _connection_refs.get(connection_id) + if wrapped_socket is None: + return SecurityConst.errSSLInternal + base_socket = wrapped_socket.socket + + requested_length = data_length_pointer[0] + + timeout = wrapped_socket.gettimeout() + error = None + read_count = 0 + buffer = (ctypes.c_char * requested_length).from_address(data_buffer) + buffer_view = memoryview(buffer) + + try: + while read_count < requested_length: + if timeout is None or timeout >= 0: + readables = util.wait_for_read([base_socket], timeout) + if not readables: + raise socket.error(errno.EAGAIN, 'timed out') + + # We need to tell ctypes that we have a buffer that can be + # written to. Upsettingly, we do that like this: + chunk_size = base_socket.recv_into( + buffer_view[read_count:requested_length] + ) + read_count += chunk_size + if not chunk_size: + if not read_count: + return SecurityConst.errSSLClosedGraceful + break + except (socket.error) as e: + error = e.errno + + if error is not None and error != errno.EAGAIN: + if error == errno.ECONNRESET: + return SecurityConst.errSSLClosedAbort + raise + + data_length_pointer[0] = read_count + + if read_count != requested_length: + return SecurityConst.errSSLWouldBlock + + return 0 + except Exception as e: + if wrapped_socket is not None: + wrapped_socket._exception = e + return SecurityConst.errSSLInternal + + +def _write_callback(connection_id, data_buffer, data_length_pointer): + """ + SecureTransport write callback. This is called by ST to request that data + actually be sent on the network. + """ + wrapped_socket = None + try: + wrapped_socket = _connection_refs.get(connection_id) + if wrapped_socket is None: + return SecurityConst.errSSLInternal + base_socket = wrapped_socket.socket + + bytes_to_write = data_length_pointer[0] + data = ctypes.string_at(data_buffer, bytes_to_write) + + timeout = wrapped_socket.gettimeout() + error = None + sent = 0 + + try: + while sent < bytes_to_write: + if timeout is None or timeout >= 0: + writables = util.wait_for_write([base_socket], timeout) + if not writables: + raise socket.error(errno.EAGAIN, 'timed out') + chunk_sent = base_socket.send(data) + sent += chunk_sent + + # This has some needless copying here, but I'm not sure there's + # much value in optimising this data path. + data = data[chunk_sent:] + except (socket.error) as e: + error = e.errno + + if error is not None and error != errno.EAGAIN: + if error == errno.ECONNRESET: + return SecurityConst.errSSLClosedAbort + raise + + data_length_pointer[0] = sent + if sent != bytes_to_write: + return SecurityConst.errSSLWouldBlock + + return 0 + except Exception as e: + if wrapped_socket is not None: + wrapped_socket._exception = e + return SecurityConst.errSSLInternal + + +# We need to keep these two objects references alive: if they get GC'd while +# in use then SecureTransport could attempt to call a function that is in freed +# memory. That would be...uh...bad. Yeah, that's the word. Bad. +_read_callback_pointer = Security.SSLReadFunc(_read_callback) +_write_callback_pointer = Security.SSLWriteFunc(_write_callback) + + +class WrappedSocket(object): + """ + API-compatibility wrapper for Python's OpenSSL wrapped socket object. + + Note: _makefile_refs, _drop(), and _reuse() are needed for the garbage + collector of PyPy. + """ + def __init__(self, socket): + self.socket = socket + self.context = None + self._makefile_refs = 0 + self._closed = False + self._exception = None + self._keychain = None + self._keychain_dir = None + self._client_cert_chain = None + + # We save off the previously-configured timeout and then set it to + # zero. This is done because we use select and friends to handle the + # timeouts, but if we leave the timeout set on the lower socket then + # Python will "kindly" call select on that socket again for us. Avoid + # that by forcing the timeout to zero. + self._timeout = self.socket.gettimeout() + self.socket.settimeout(0) + + @contextlib.contextmanager + def _raise_on_error(self): + """ + A context manager that can be used to wrap calls that do I/O from + SecureTransport. If any of the I/O callbacks hit an exception, this + context manager will correctly propagate the exception after the fact. + This avoids silently swallowing those exceptions. + + It also correctly forces the socket closed. + """ + self._exception = None + + # We explicitly don't catch around this yield because in the unlikely + # event that an exception was hit in the block we don't want to swallow + # it. + yield + if self._exception is not None: + exception, self._exception = self._exception, None + self.close() + raise exception + + def _set_ciphers(self): + """ + Sets up the allowed ciphers. By default this matches the set in + util.ssl_.DEFAULT_CIPHERS, at least as supported by macOS. This is done + custom and doesn't allow changing at this time, mostly because parsing + OpenSSL cipher strings is going to be a freaking nightmare. + """ + ciphers = (Security.SSLCipherSuite * len(CIPHER_SUITES))(*CIPHER_SUITES) + result = Security.SSLSetEnabledCiphers( + self.context, ciphers, len(CIPHER_SUITES) + ) + _assert_no_error(result) + + def _custom_validate(self, verify, trust_bundle): + """ + Called when we have set custom validation. We do this in two cases: + first, when cert validation is entirely disabled; and second, when + using a custom trust DB. + """ + # If we disabled cert validation, just say: cool. + if not verify: + return + + # We want data in memory, so load it up. + if os.path.isfile(trust_bundle): + with open(trust_bundle, 'rb') as f: + trust_bundle = f.read() + + cert_array = None + trust = Security.SecTrustRef() + + try: + # Get a CFArray that contains the certs we want. + cert_array = _cert_array_from_pem(trust_bundle) + + # Ok, now the hard part. We want to get the SecTrustRef that ST has + # created for this connection, shove our CAs into it, tell ST to + # ignore everything else it knows, and then ask if it can build a + # chain. This is a buuuunch of code. + result = Security.SSLCopyPeerTrust( + self.context, ctypes.byref(trust) + ) + _assert_no_error(result) + if not trust: + raise ssl.SSLError("Failed to copy trust reference") + + result = Security.SecTrustSetAnchorCertificates(trust, cert_array) + _assert_no_error(result) + + result = Security.SecTrustSetAnchorCertificatesOnly(trust, True) + _assert_no_error(result) + + trust_result = Security.SecTrustResultType() + result = Security.SecTrustEvaluate( + trust, ctypes.byref(trust_result) + ) + _assert_no_error(result) + finally: + if trust: + CoreFoundation.CFRelease(trust) + + if cert_array is None: + CoreFoundation.CFRelease(cert_array) + + # Ok, now we can look at what the result was. + successes = ( + SecurityConst.kSecTrustResultUnspecified, + SecurityConst.kSecTrustResultProceed + ) + if trust_result.value not in successes: + raise ssl.SSLError( + "certificate verify failed, error code: %d" % + trust_result.value + ) + + def handshake(self, + server_hostname, + verify, + trust_bundle, + min_version, + max_version, + client_cert, + client_key, + client_key_passphrase): + """ + Actually performs the TLS handshake. This is run automatically by + wrapped socket, and shouldn't be needed in user code. + """ + # First, we do the initial bits of connection setup. We need to create + # a context, set its I/O funcs, and set the connection reference. + self.context = Security.SSLCreateContext( + None, SecurityConst.kSSLClientSide, SecurityConst.kSSLStreamType + ) + result = Security.SSLSetIOFuncs( + self.context, _read_callback_pointer, _write_callback_pointer + ) + _assert_no_error(result) + + # Here we need to compute the handle to use. We do this by taking the + # id of self modulo 2**31 - 1. If this is already in the dictionary, we + # just keep incrementing by one until we find a free space. + with _connection_ref_lock: + handle = id(self) % 2147483647 + while handle in _connection_refs: + handle = (handle + 1) % 2147483647 + _connection_refs[handle] = self + + result = Security.SSLSetConnection(self.context, handle) + _assert_no_error(result) + + # If we have a server hostname, we should set that too. + if server_hostname: + if not isinstance(server_hostname, bytes): + server_hostname = server_hostname.encode('utf-8') + + result = Security.SSLSetPeerDomainName( + self.context, server_hostname, len(server_hostname) + ) + _assert_no_error(result) + + # Setup the ciphers. + self._set_ciphers() + + # Set the minimum and maximum TLS versions. + result = Security.SSLSetProtocolVersionMin(self.context, min_version) + _assert_no_error(result) + result = Security.SSLSetProtocolVersionMax(self.context, max_version) + _assert_no_error(result) + + # If there's a trust DB, we need to use it. We do that by telling + # SecureTransport to break on server auth. We also do that if we don't + # want to validate the certs at all: we just won't actually do any + # authing in that case. + if not verify or trust_bundle is not None: + result = Security.SSLSetSessionOption( + self.context, + SecurityConst.kSSLSessionOptionBreakOnServerAuth, + True + ) + _assert_no_error(result) + + # If there's a client cert, we need to use it. + if client_cert: + self._keychain, self._keychain_dir = _temporary_keychain() + self._client_cert_chain = _load_client_cert_chain( + self._keychain, client_cert, client_key + ) + result = Security.SSLSetCertificate( + self.context, self._client_cert_chain + ) + _assert_no_error(result) + + while True: + with self._raise_on_error(): + result = Security.SSLHandshake(self.context) + + if result == SecurityConst.errSSLWouldBlock: + raise socket.timeout("handshake timed out") + elif result == SecurityConst.errSSLServerAuthCompleted: + self._custom_validate(verify, trust_bundle) + continue + else: + _assert_no_error(result) + break + + def fileno(self): + return self.socket.fileno() + + # Copy-pasted from Python 3.5 source code + def _decref_socketios(self): + if self._makefile_refs > 0: + self._makefile_refs -= 1 + if self._closed: + self.close() + + def recv(self, bufsiz): + buffer = ctypes.create_string_buffer(bufsiz) + bytes_read = self.recv_into(buffer, bufsiz) + data = buffer[:bytes_read] + return data + + def recv_into(self, buffer, nbytes=None): + # Read short on EOF. + if self._closed: + return 0 + + if nbytes is None: + nbytes = len(buffer) + + buffer = (ctypes.c_char * nbytes).from_buffer(buffer) + processed_bytes = ctypes.c_size_t(0) + + with self._raise_on_error(): + result = Security.SSLRead( + self.context, buffer, nbytes, ctypes.byref(processed_bytes) + ) + + # There are some result codes that we want to treat as "not always + # errors". Specifically, those are errSSLWouldBlock, + # errSSLClosedGraceful, and errSSLClosedNoNotify. + if (result == SecurityConst.errSSLWouldBlock): + # If we didn't process any bytes, then this was just a time out. + # However, we can get errSSLWouldBlock in situations when we *did* + # read some data, and in those cases we should just read "short" + # and return. + if processed_bytes.value == 0: + # Timed out, no data read. + raise socket.timeout("recv timed out") + elif result in (SecurityConst.errSSLClosedGraceful, SecurityConst.errSSLClosedNoNotify): + # The remote peer has closed this connection. We should do so as + # well. Note that we don't actually return here because in + # principle this could actually be fired along with return data. + # It's unlikely though. + self.close() + else: + _assert_no_error(result) + + # Ok, we read and probably succeeded. We should return whatever data + # was actually read. + return processed_bytes.value + + def settimeout(self, timeout): + self._timeout = timeout + + def gettimeout(self): + return self._timeout + + def send(self, data): + processed_bytes = ctypes.c_size_t(0) + + with self._raise_on_error(): + result = Security.SSLWrite( + self.context, data, len(data), ctypes.byref(processed_bytes) + ) + + if result == SecurityConst.errSSLWouldBlock and processed_bytes.value == 0: + # Timed out + raise socket.timeout("send timed out") + else: + _assert_no_error(result) + + # We sent, and probably succeeded. Tell them how much we sent. + return processed_bytes.value + + def sendall(self, data): + total_sent = 0 + while total_sent < len(data): + sent = self.send(data[total_sent:total_sent + SSL_WRITE_BLOCKSIZE]) + total_sent += sent + + def shutdown(self): + with self._raise_on_error(): + Security.SSLClose(self.context) + + def close(self): + # TODO: should I do clean shutdown here? Do I have to? + if self._makefile_refs < 1: + self._closed = True + if self.context: + CoreFoundation.CFRelease(self.context) + self.context = None + if self._client_cert_chain: + CoreFoundation.CFRelease(self._client_cert_chain) + self._client_cert_chain = None + if self._keychain: + Security.SecKeychainDelete(self._keychain) + CoreFoundation.CFRelease(self._keychain) + shutil.rmtree(self._keychain_dir) + self._keychain = self._keychain_dir = None + return self.socket.close() + else: + self._makefile_refs -= 1 + + def getpeercert(self, binary_form=False): + # Urgh, annoying. + # + # Here's how we do this: + # + # 1. Call SSLCopyPeerTrust to get hold of the trust object for this + # connection. + # 2. Call SecTrustGetCertificateAtIndex for index 0 to get the leaf. + # 3. To get the CN, call SecCertificateCopyCommonName and process that + # string so that it's of the appropriate type. + # 4. To get the SAN, we need to do something a bit more complex: + # a. Call SecCertificateCopyValues to get the data, requesting + # kSecOIDSubjectAltName. + # b. Mess about with this dictionary to try to get the SANs out. + # + # This is gross. Really gross. It's going to be a few hundred LoC extra + # just to repeat something that SecureTransport can *already do*. So my + # operating assumption at this time is that what we want to do is + # instead to just flag to urllib3 that it shouldn't do its own hostname + # validation when using SecureTransport. + if not binary_form: + raise ValueError( + "SecureTransport only supports dumping binary certs" + ) + trust = Security.SecTrustRef() + certdata = None + der_bytes = None + + try: + # Grab the trust store. + result = Security.SSLCopyPeerTrust( + self.context, ctypes.byref(trust) + ) + _assert_no_error(result) + if not trust: + # Probably we haven't done the handshake yet. No biggie. + return None + + cert_count = Security.SecTrustGetCertificateCount(trust) + if not cert_count: + # Also a case that might happen if we haven't handshaked. + # Handshook? Handshaken? + return None + + leaf = Security.SecTrustGetCertificateAtIndex(trust, 0) + assert leaf + + # Ok, now we want the DER bytes. + certdata = Security.SecCertificateCopyData(leaf) + assert certdata + + data_length = CoreFoundation.CFDataGetLength(certdata) + data_buffer = CoreFoundation.CFDataGetBytePtr(certdata) + der_bytes = ctypes.string_at(data_buffer, data_length) + finally: + if certdata: + CoreFoundation.CFRelease(certdata) + if trust: + CoreFoundation.CFRelease(trust) + + return der_bytes + + def _reuse(self): + self._makefile_refs += 1 + + def _drop(self): + if self._makefile_refs < 1: + self.close() + else: + self._makefile_refs -= 1 + + +if _fileobject: # Platform-specific: Python 2 + def makefile(self, mode, bufsize=-1): + self._makefile_refs += 1 + return _fileobject(self, mode, bufsize, close=True) +else: # Platform-specific: Python 3 + def makefile(self, mode="r", buffering=None, *args, **kwargs): + # We disable buffering with SecureTransport because it conflicts with + # the buffering that ST does internally (see issue #1153 for more). + buffering = 0 + return backport_makefile(self, mode, buffering, *args, **kwargs) + +WrappedSocket.makefile = makefile + + +class SecureTransportContext(object): + """ + I am a wrapper class for the SecureTransport library, to translate the + interface of the standard library ``SSLContext`` object to calls into + SecureTransport. + """ + def __init__(self, protocol): + self._min_version, self._max_version = _protocol_to_min_max[protocol] + self._options = 0 + self._verify = False + self._trust_bundle = None + self._client_cert = None + self._client_key = None + self._client_key_passphrase = None + + @property + def check_hostname(self): + """ + SecureTransport cannot have its hostname checking disabled. For more, + see the comment on getpeercert() in this file. + """ + return True + + @check_hostname.setter + def check_hostname(self, value): + """ + SecureTransport cannot have its hostname checking disabled. For more, + see the comment on getpeercert() in this file. + """ + pass + + @property + def options(self): + # TODO: Well, crap. + # + # So this is the bit of the code that is the most likely to cause us + # trouble. Essentially we need to enumerate all of the SSL options that + # users might want to use and try to see if we can sensibly translate + # them, or whether we should just ignore them. + return self._options + + @options.setter + def options(self, value): + # TODO: Update in line with above. + self._options = value + + @property + def verify_mode(self): + return ssl.CERT_REQUIRED if self._verify else ssl.CERT_NONE + + @verify_mode.setter + def verify_mode(self, value): + self._verify = True if value == ssl.CERT_REQUIRED else False + + def set_default_verify_paths(self): + # So, this has to do something a bit weird. Specifically, what it does + # is nothing. + # + # This means that, if we had previously had load_verify_locations + # called, this does not undo that. We need to do that because it turns + # out that the rest of the urllib3 code will attempt to load the + # default verify paths if it hasn't been told about any paths, even if + # the context itself was sometime earlier. We resolve that by just + # ignoring it. + pass + + def load_default_certs(self): + return self.set_default_verify_paths() + + def set_ciphers(self, ciphers): + # For now, we just require the default cipher string. + if ciphers != util.ssl_.DEFAULT_CIPHERS: + raise ValueError( + "SecureTransport doesn't support custom cipher strings" + ) + + def load_verify_locations(self, cafile=None, capath=None, cadata=None): + # OK, we only really support cadata and cafile. + if capath is not None: + raise ValueError( + "SecureTransport does not support cert directories" + ) + + self._trust_bundle = cafile or cadata + + def load_cert_chain(self, certfile, keyfile=None, password=None): + self._client_cert = certfile + self._client_key = keyfile + self._client_cert_passphrase = password + + def wrap_socket(self, sock, server_side=False, + do_handshake_on_connect=True, suppress_ragged_eofs=True, + server_hostname=None): + # So, what do we do here? Firstly, we assert some properties. This is a + # stripped down shim, so there is some functionality we don't support. + # See PEP 543 for the real deal. + assert not server_side + assert do_handshake_on_connect + assert suppress_ragged_eofs + + # Ok, we're good to go. Now we want to create the wrapped socket object + # and store it in the appropriate place. + wrapped_socket = WrappedSocket(sock) + + # Now we can handshake + wrapped_socket.handshake( + server_hostname, self._verify, self._trust_bundle, + self._min_version, self._max_version, self._client_cert, + self._client_key, self._client_key_passphrase + ) + return wrapped_socket diff --git a/collectors/python.d.plugin/python_modules/urllib3/contrib/socks.py b/collectors/python.d.plugin/python_modules/urllib3/contrib/socks.py new file mode 100644 index 00000000..1cb79285 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/contrib/socks.py @@ -0,0 +1,189 @@ +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: MIT +""" +This module contains provisional support for SOCKS proxies from within +urllib3. This module supports SOCKS4 (specifically the SOCKS4A variant) and +SOCKS5. To enable its functionality, either install PySocks or install this +module with the ``socks`` extra. + +The SOCKS implementation supports the full range of urllib3 features. It also +supports the following SOCKS features: + +- SOCKS4 +- SOCKS4a +- SOCKS5 +- Usernames and passwords for the SOCKS proxy + +Known Limitations: + +- Currently PySocks does not support contacting remote websites via literal + IPv6 addresses. Any such connection attempt will fail. You must use a domain + name. +- Currently PySocks does not support IPv6 connections to the SOCKS proxy. Any + such connection attempt will fail. +""" +from __future__ import absolute_import + +try: + import socks +except ImportError: + import warnings + from ..exceptions import DependencyWarning + + warnings.warn(( + 'SOCKS support in urllib3 requires the installation of optional ' + 'dependencies: specifically, PySocks. For more information, see ' + 'https://urllib3.readthedocs.io/en/latest/contrib.html#socks-proxies' + ), + DependencyWarning + ) + raise + +from socket import error as SocketError, timeout as SocketTimeout + +from ..connection import ( + HTTPConnection, HTTPSConnection +) +from ..connectionpool import ( + HTTPConnectionPool, HTTPSConnectionPool +) +from ..exceptions import ConnectTimeoutError, NewConnectionError +from ..poolmanager import PoolManager +from ..util.url import parse_url + +try: + import ssl +except ImportError: + ssl = None + + +class SOCKSConnection(HTTPConnection): + """ + A plain-text HTTP connection that connects via a SOCKS proxy. + """ + def __init__(self, *args, **kwargs): + self._socks_options = kwargs.pop('_socks_options') + super(SOCKSConnection, self).__init__(*args, **kwargs) + + def _new_conn(self): + """ + Establish a new connection via the SOCKS proxy. + """ + extra_kw = {} + if self.source_address: + extra_kw['source_address'] = self.source_address + + if self.socket_options: + extra_kw['socket_options'] = self.socket_options + + try: + conn = socks.create_connection( + (self.host, self.port), + proxy_type=self._socks_options['socks_version'], + proxy_addr=self._socks_options['proxy_host'], + proxy_port=self._socks_options['proxy_port'], + proxy_username=self._socks_options['username'], + proxy_password=self._socks_options['password'], + proxy_rdns=self._socks_options['rdns'], + timeout=self.timeout, + **extra_kw + ) + + except SocketTimeout as e: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) + + except socks.ProxyError as e: + # This is fragile as hell, but it seems to be the only way to raise + # useful errors here. + if e.socket_err: + error = e.socket_err + if isinstance(error, SocketTimeout): + raise ConnectTimeoutError( + self, + "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout) + ) + else: + raise NewConnectionError( + self, + "Failed to establish a new connection: %s" % error + ) + else: + raise NewConnectionError( + self, + "Failed to establish a new connection: %s" % e + ) + + except SocketError as e: # Defensive: PySocks should catch all these. + raise NewConnectionError( + self, "Failed to establish a new connection: %s" % e) + + return conn + + +# We don't need to duplicate the Verified/Unverified distinction from +# urllib3/connection.py here because the HTTPSConnection will already have been +# correctly set to either the Verified or Unverified form by that module. This +# means the SOCKSHTTPSConnection will automatically be the correct type. +class SOCKSHTTPSConnection(SOCKSConnection, HTTPSConnection): + pass + + +class SOCKSHTTPConnectionPool(HTTPConnectionPool): + ConnectionCls = SOCKSConnection + + +class SOCKSHTTPSConnectionPool(HTTPSConnectionPool): + ConnectionCls = SOCKSHTTPSConnection + + +class SOCKSProxyManager(PoolManager): + """ + A version of the urllib3 ProxyManager that routes connections via the + defined SOCKS proxy. + """ + pool_classes_by_scheme = { + 'http': SOCKSHTTPConnectionPool, + 'https': SOCKSHTTPSConnectionPool, + } + + def __init__(self, proxy_url, username=None, password=None, + num_pools=10, headers=None, **connection_pool_kw): + parsed = parse_url(proxy_url) + + if parsed.scheme == 'socks5': + socks_version = socks.PROXY_TYPE_SOCKS5 + rdns = False + elif parsed.scheme == 'socks5h': + socks_version = socks.PROXY_TYPE_SOCKS5 + rdns = True + elif parsed.scheme == 'socks4': + socks_version = socks.PROXY_TYPE_SOCKS4 + rdns = False + elif parsed.scheme == 'socks4a': + socks_version = socks.PROXY_TYPE_SOCKS4 + rdns = True + else: + raise ValueError( + "Unable to determine SOCKS version from %s" % proxy_url + ) + + self.proxy_url = proxy_url + + socks_options = { + 'socks_version': socks_version, + 'proxy_host': parsed.host, + 'proxy_port': parsed.port, + 'username': username, + 'password': password, + 'rdns': rdns + } + connection_pool_kw['_socks_options'] = socks_options + + super(SOCKSProxyManager, self).__init__( + num_pools, headers, **connection_pool_kw + ) + + self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme diff --git a/collectors/python.d.plugin/python_modules/urllib3/exceptions.py b/collectors/python.d.plugin/python_modules/urllib3/exceptions.py new file mode 100644 index 00000000..a71cabe0 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/exceptions.py @@ -0,0 +1,247 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +from .packages.six.moves.http_client import ( + IncompleteRead as httplib_IncompleteRead +) +# Base Exceptions + + +class HTTPError(Exception): + "Base exception used by this module." + pass + + +class HTTPWarning(Warning): + "Base warning used by this module." + pass + + +class PoolError(HTTPError): + "Base exception for errors caused within a pool." + def __init__(self, pool, message): + self.pool = pool + HTTPError.__init__(self, "%s: %s" % (pool, message)) + + def __reduce__(self): + # For pickling purposes. + return self.__class__, (None, None) + + +class RequestError(PoolError): + "Base exception for PoolErrors that have associated URLs." + def __init__(self, pool, url, message): + self.url = url + PoolError.__init__(self, pool, message) + + def __reduce__(self): + # For pickling purposes. + return self.__class__, (None, self.url, None) + + +class SSLError(HTTPError): + "Raised when SSL certificate fails in an HTTPS connection." + pass + + +class ProxyError(HTTPError): + "Raised when the connection to a proxy fails." + pass + + +class DecodeError(HTTPError): + "Raised when automatic decoding based on Content-Type fails." + pass + + +class ProtocolError(HTTPError): + "Raised when something unexpected happens mid-request/response." + pass + + +#: Renamed to ProtocolError but aliased for backwards compatibility. +ConnectionError = ProtocolError + + +# Leaf Exceptions + +class MaxRetryError(RequestError): + """Raised when the maximum number of retries is exceeded. + + :param pool: The connection pool + :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool` + :param string url: The requested Url + :param exceptions.Exception reason: The underlying error + + """ + + def __init__(self, pool, url, reason=None): + self.reason = reason + + message = "Max retries exceeded with url: %s (Caused by %r)" % ( + url, reason) + + RequestError.__init__(self, pool, url, message) + + +class HostChangedError(RequestError): + "Raised when an existing pool gets a request for a foreign host." + + def __init__(self, pool, url, retries=3): + message = "Tried to open a foreign host with url: %s" % url + RequestError.__init__(self, pool, url, message) + self.retries = retries + + +class TimeoutStateError(HTTPError): + """ Raised when passing an invalid state to a timeout """ + pass + + +class TimeoutError(HTTPError): + """ Raised when a socket timeout error occurs. + + Catching this error will catch both :exc:`ReadTimeoutErrors + <ReadTimeoutError>` and :exc:`ConnectTimeoutErrors <ConnectTimeoutError>`. + """ + pass + + +class ReadTimeoutError(TimeoutError, RequestError): + "Raised when a socket timeout occurs while receiving data from a server" + pass + + +# This timeout error does not have a URL attached and needs to inherit from the +# base HTTPError +class ConnectTimeoutError(TimeoutError): + "Raised when a socket timeout occurs while connecting to a server" + pass + + +class NewConnectionError(ConnectTimeoutError, PoolError): + "Raised when we fail to establish a new connection. Usually ECONNREFUSED." + pass + + +class EmptyPoolError(PoolError): + "Raised when a pool runs out of connections and no more are allowed." + pass + + +class ClosedPoolError(PoolError): + "Raised when a request enters a pool after the pool has been closed." + pass + + +class LocationValueError(ValueError, HTTPError): + "Raised when there is something wrong with a given URL input." + pass + + +class LocationParseError(LocationValueError): + "Raised when get_host or similar fails to parse the URL input." + + def __init__(self, location): + message = "Failed to parse: %s" % location + HTTPError.__init__(self, message) + + self.location = location + + +class ResponseError(HTTPError): + "Used as a container for an error reason supplied in a MaxRetryError." + GENERIC_ERROR = 'too many error responses' + SPECIFIC_ERROR = 'too many {status_code} error responses' + + +class SecurityWarning(HTTPWarning): + "Warned when perfoming security reducing actions" + pass + + +class SubjectAltNameWarning(SecurityWarning): + "Warned when connecting to a host with a certificate missing a SAN." + pass + + +class InsecureRequestWarning(SecurityWarning): + "Warned when making an unverified HTTPS request." + pass + + +class SystemTimeWarning(SecurityWarning): + "Warned when system time is suspected to be wrong" + pass + + +class InsecurePlatformWarning(SecurityWarning): + "Warned when certain SSL configuration is not available on a platform." + pass + + +class SNIMissingWarning(HTTPWarning): + "Warned when making a HTTPS request without SNI available." + pass + + +class DependencyWarning(HTTPWarning): + """ + Warned when an attempt is made to import a module with missing optional + dependencies. + """ + pass + + +class ResponseNotChunked(ProtocolError, ValueError): + "Response needs to be chunked in order to read it as chunks." + pass + + +class BodyNotHttplibCompatible(HTTPError): + """ + Body should be httplib.HTTPResponse like (have an fp attribute which + returns raw chunks) for read_chunked(). + """ + pass + + +class IncompleteRead(HTTPError, httplib_IncompleteRead): + """ + Response length doesn't match expected Content-Length + + Subclass of http_client.IncompleteRead to allow int value + for `partial` to avoid creating large objects on streamed + reads. + """ + def __init__(self, partial, expected): + super(IncompleteRead, self).__init__(partial, expected) + + def __repr__(self): + return ('IncompleteRead(%i bytes read, ' + '%i more expected)' % (self.partial, self.expected)) + + +class InvalidHeader(HTTPError): + "The header provided was somehow invalid." + pass + + +class ProxySchemeUnknown(AssertionError, ValueError): + "ProxyManager does not support the supplied scheme" + # TODO(t-8ch): Stop inheriting from AssertionError in v2.0. + + def __init__(self, scheme): + message = "Not supported proxy scheme %s" % scheme + super(ProxySchemeUnknown, self).__init__(message) + + +class HeaderParsingError(HTTPError): + "Raised by assert_header_parsing, but we convert it to a log.warning statement." + def __init__(self, defects, unparsed_data): + message = '%s, unparsed data: %r' % (defects or 'Unknown', unparsed_data) + super(HeaderParsingError, self).__init__(message) + + +class UnrewindableBodyError(HTTPError): + "urllib3 encountered an error when trying to rewind a body" + pass diff --git a/collectors/python.d.plugin/python_modules/urllib3/fields.py b/collectors/python.d.plugin/python_modules/urllib3/fields.py new file mode 100644 index 00000000..de7577b7 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/fields.py @@ -0,0 +1,179 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +import email.utils +import mimetypes + +from .packages import six + + +def guess_content_type(filename, default='application/octet-stream'): + """ + Guess the "Content-Type" of a file. + + :param filename: + The filename to guess the "Content-Type" of using :mod:`mimetypes`. + :param default: + If no "Content-Type" can be guessed, default to `default`. + """ + if filename: + return mimetypes.guess_type(filename)[0] or default + return default + + +def format_header_param(name, value): + """ + Helper function to format and quote a single header parameter. + + Particularly useful for header parameters which might contain + non-ASCII values, like file names. This follows RFC 2231, as + suggested by RFC 2388 Section 4.4. + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as a unicode string. + """ + if not any(ch in value for ch in '"\\\r\n'): + result = '%s="%s"' % (name, value) + try: + result.encode('ascii') + except (UnicodeEncodeError, UnicodeDecodeError): + pass + else: + return result + if not six.PY3 and isinstance(value, six.text_type): # Python 2: + value = value.encode('utf-8') + value = email.utils.encode_rfc2231(value, 'utf-8') + value = '%s*=%s' % (name, value) + return value + + +class RequestField(object): + """ + A data container for request body parameters. + + :param name: + The name of this request field. + :param data: + The data/value body. + :param filename: + An optional filename of the request field. + :param headers: + An optional dict-like object of headers to initially use for the field. + """ + def __init__(self, name, data, filename=None, headers=None): + self._name = name + self._filename = filename + self.data = data + self.headers = {} + if headers: + self.headers = dict(headers) + + @classmethod + def from_tuples(cls, fieldname, value): + """ + A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. + + Supports constructing :class:`~urllib3.fields.RequestField` from + parameter of key/value strings AND key/filetuple. A filetuple is a + (filename, data, MIME type) tuple where the MIME type is optional. + For example:: + + 'foo': 'bar', + 'fakefile': ('foofile.txt', 'contents of foofile'), + 'realfile': ('barfile.txt', open('realfile').read()), + 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'), + 'nonamefile': 'contents of nonamefile field', + + Field names and filenames must be unicode. + """ + if isinstance(value, tuple): + if len(value) == 3: + filename, data, content_type = value + else: + filename, data = value + content_type = guess_content_type(filename) + else: + filename = None + content_type = None + data = value + + request_param = cls(fieldname, data, filename=filename) + request_param.make_multipart(content_type=content_type) + + return request_param + + def _render_part(self, name, value): + """ + Overridable helper function to format a single header parameter. + + :param name: + The name of the parameter, a string expected to be ASCII only. + :param value: + The value of the parameter, provided as a unicode string. + """ + return format_header_param(name, value) + + def _render_parts(self, header_parts): + """ + Helper function to format and quote a single header. + + Useful for single headers that are composed of multiple items. E.g., + 'Content-Disposition' fields. + + :param header_parts: + A sequence of (k, v) typles or a :class:`dict` of (k, v) to format + as `k1="v1"; k2="v2"; ...`. + """ + parts = [] + iterable = header_parts + if isinstance(header_parts, dict): + iterable = header_parts.items() + + for name, value in iterable: + if value is not None: + parts.append(self._render_part(name, value)) + + return '; '.join(parts) + + def render_headers(self): + """ + Renders the headers for this request field. + """ + lines = [] + + sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location'] + for sort_key in sort_keys: + if self.headers.get(sort_key, False): + lines.append('%s: %s' % (sort_key, self.headers[sort_key])) + + for header_name, header_value in self.headers.items(): + if header_name not in sort_keys: + if header_value: + lines.append('%s: %s' % (header_name, header_value)) + + lines.append('\r\n') + return '\r\n'.join(lines) + + def make_multipart(self, content_disposition=None, content_type=None, + content_location=None): + """ + Makes this request field into a multipart request field. + + This method overrides "Content-Disposition", "Content-Type" and + "Content-Location" headers to the request parameter. + + :param content_type: + The 'Content-Type' of the request body. + :param content_location: + The 'Content-Location' of the request body. + + """ + self.headers['Content-Disposition'] = content_disposition or 'form-data' + self.headers['Content-Disposition'] += '; '.join([ + '', self._render_parts( + (('name', self._name), ('filename', self._filename)) + ) + ]) + self.headers['Content-Type'] = content_type + self.headers['Content-Location'] = content_location diff --git a/collectors/python.d.plugin/python_modules/urllib3/filepost.py b/collectors/python.d.plugin/python_modules/urllib3/filepost.py new file mode 100644 index 00000000..3febc9cf --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/filepost.py @@ -0,0 +1,95 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +import codecs + +from uuid import uuid4 +from io import BytesIO + +from .packages import six +from .packages.six import b +from .fields import RequestField + +writer = codecs.lookup('utf-8')[3] + + +def choose_boundary(): + """ + Our embarrassingly-simple replacement for mimetools.choose_boundary. + """ + return uuid4().hex + + +def iter_field_objects(fields): + """ + Iterate over fields. + + Supports list of (k, v) tuples and dicts, and lists of + :class:`~urllib3.fields.RequestField`. + + """ + if isinstance(fields, dict): + i = six.iteritems(fields) + else: + i = iter(fields) + + for field in i: + if isinstance(field, RequestField): + yield field + else: + yield RequestField.from_tuples(*field) + + +def iter_fields(fields): + """ + .. deprecated:: 1.6 + + Iterate over fields. + + The addition of :class:`~urllib3.fields.RequestField` makes this function + obsolete. Instead, use :func:`iter_field_objects`, which returns + :class:`~urllib3.fields.RequestField` objects. + + Supports list of (k, v) tuples and dicts. + """ + if isinstance(fields, dict): + return ((k, v) for k, v in six.iteritems(fields)) + + return ((k, v) for k, v in fields) + + +def encode_multipart_formdata(fields, boundary=None): + """ + Encode a dictionary of ``fields`` using the multipart/form-data MIME format. + + :param fields: + Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`). + + :param boundary: + If not specified, then a random boundary will be generated using + :func:`mimetools.choose_boundary`. + """ + body = BytesIO() + if boundary is None: + boundary = choose_boundary() + + for field in iter_field_objects(fields): + body.write(b('--%s\r\n' % (boundary))) + + writer(body).write(field.render_headers()) + data = field.data + + if isinstance(data, int): + data = str(data) # Backwards compatibility + + if isinstance(data, six.text_type): + writer(body).write(data) + else: + body.write(data) + + body.write(b'\r\n') + + body.write(b('--%s--\r\n' % (boundary))) + + content_type = str('multipart/form-data; boundary=%s' % boundary) + + return body.getvalue(), content_type diff --git a/collectors/python.d.plugin/python_modules/urllib3/packages/__init__.py b/collectors/python.d.plugin/python_modules/urllib3/packages/__init__.py new file mode 100644 index 00000000..170e974c --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/packages/__init__.py @@ -0,0 +1,5 @@ +from __future__ import absolute_import + +from . import ssl_match_hostname + +__all__ = ('ssl_match_hostname', ) diff --git a/collectors/python.d.plugin/python_modules/urllib3/packages/backports/__init__.py b/collectors/python.d.plugin/python_modules/urllib3/packages/backports/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/packages/backports/__init__.py diff --git a/collectors/python.d.plugin/python_modules/urllib3/packages/backports/makefile.py b/collectors/python.d.plugin/python_modules/urllib3/packages/backports/makefile.py new file mode 100644 index 00000000..8ab122f8 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/packages/backports/makefile.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: MIT +""" +backports.makefile +~~~~~~~~~~~~~~~~~~ + +Backports the Python 3 ``socket.makefile`` method for use with anything that +wants to create a "fake" socket object. +""" +import io + +from socket import SocketIO + + +def backport_makefile(self, mode="r", buffering=None, encoding=None, + errors=None, newline=None): + """ + Backport of ``socket.makefile`` from Python 3.5. + """ + if not set(mode) <= set(["r", "w", "b"]): + raise ValueError( + "invalid mode %r (only r, w, b allowed)" % (mode,) + ) + writing = "w" in mode + reading = "r" in mode or not writing + assert reading or writing + binary = "b" in mode + rawmode = "" + if reading: + rawmode += "r" + if writing: + rawmode += "w" + raw = SocketIO(self, rawmode) + self._makefile_refs += 1 + if buffering is None: + buffering = -1 + if buffering < 0: + buffering = io.DEFAULT_BUFFER_SIZE + if buffering == 0: + if not binary: + raise ValueError("unbuffered streams must be binary") + return raw + if reading and writing: + buffer = io.BufferedRWPair(raw, raw, buffering) + elif reading: + buffer = io.BufferedReader(raw, buffering) + else: + assert writing + buffer = io.BufferedWriter(raw, buffering) + if binary: + return buffer + text = io.TextIOWrapper(buffer, encoding, errors, newline) + text.mode = mode + return text diff --git a/collectors/python.d.plugin/python_modules/urllib3/packages/ordered_dict.py b/collectors/python.d.plugin/python_modules/urllib3/packages/ordered_dict.py new file mode 100644 index 00000000..9f7c0e6b --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/packages/ordered_dict.py @@ -0,0 +1,260 @@ +# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. +# Passes Python2.7's test suite and incorporates all the latest updates. +# Copyright 2009 Raymond Hettinger, released under the MIT License. +# http://code.activestate.com/recipes/576693/ +# SPDX-License-Identifier: MIT +try: + from thread import get_ident as _get_ident +except ImportError: + from dummy_thread import get_ident as _get_ident + +try: + from _abcoll import KeysView, ValuesView, ItemsView +except ImportError: + pass + + +class OrderedDict(dict): + 'Dictionary that remembers insertion order' + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as for regular dictionaries. + + # The internal self.__map dictionary maps keys to links in a doubly linked list. + # The circular doubly linked list starts and ends with a sentinel element. + # The sentinel element never gets deleted (this simplifies the algorithm). + # Each link is stored as a list of length three: [PREV, NEXT, KEY]. + + def __init__(self, *args, **kwds): + '''Initialize an ordered dictionary. Signature is the same as for + regular dictionaries, but keyword arguments are not recommended + because their insertion order is arbitrary. + + ''' + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__root + except AttributeError: + self.__root = root = [] # sentinel node + root[:] = [root, root, None] + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, dict_setitem=dict.__setitem__): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link which goes at the end of the linked + # list, and the inherited dictionary is updated with the new key/value pair. + if key not in self: + root = self.__root + last = root[0] + last[1] = root[0] = self.__map[key] = [last, root, key] + dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which is + # then removed by updating the links in the predecessor and successor nodes. + dict_delitem(self, key) + link_prev, link_next, key = self.__map.pop(key) + link_prev[1] = link_next + link_next[0] = link_prev + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + root = self.__root + curr = root[1] + while curr is not root: + yield curr[2] + curr = curr[1] + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + root = self.__root + curr = root[0] + while curr is not root: + yield curr[2] + curr = curr[0] + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + try: + for node in self.__map.itervalues(): + del node[:] + root = self.__root + root[:] = [root, root, None] + self.__map.clear() + except AttributeError: + pass + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. + + ''' + if not self: + raise KeyError('dictionary is empty') + root = self.__root + if last: + link = root[0] + link_prev = link[0] + link_prev[1] = root + root[0] = link_prev + else: + link = root[1] + link_next = link[1] + root[1] = link_next + link_next[0] = root + key = link[2] + del self.__map[key] + value = dict.pop(self, key) + return key, value + + # -- the following methods do not depend on the internal structure -- + + def keys(self): + 'od.keys() -> list of keys in od' + return list(self) + + def values(self): + 'od.values() -> list of values in od' + return [self[key] for key in self] + + def items(self): + 'od.items() -> list of (key, value) pairs in od' + return [(key, self[key]) for key in self] + + def iterkeys(self): + 'od.iterkeys() -> an iterator over the keys in od' + return iter(self) + + def itervalues(self): + 'od.itervalues -> an iterator over the values in od' + for k in self: + yield self[k] + + def iteritems(self): + 'od.iteritems -> an iterator over the (key, value) items in od' + for k in self: + yield (k, self[k]) + + def update(*args, **kwds): + '''od.update(E, **F) -> None. Update od from dict/iterable E and F. + + If E is a dict instance, does: for k in E: od[k] = E[k] + If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] + Or if E is an iterable of items, does: for k, v in E: od[k] = v + In either case, this is followed by: for k, v in F.items(): od[k] = v + + ''' + if len(args) > 2: + raise TypeError('update() takes at most 2 positional ' + 'arguments (%d given)' % (len(args),)) + elif not args: + raise TypeError('update() takes at least 1 argument (0 given)') + self = args[0] + # Make progressively weaker assumptions about "other" + other = () + if len(args) == 2: + other = args[1] + if isinstance(other, dict): + for key in other: + self[key] = other[key] + elif hasattr(other, 'keys'): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + + __update = update # let subclasses override update without breaking __init__ + + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + + ''' + if key in self: + result = self[key] + del self[key] + return result + if default is self.__marker: + raise KeyError(key) + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + self[key] = default + return default + + def __repr__(self, _repr_running={}): + 'od.__repr__() <==> repr(od)' + call_key = id(self), _get_ident() + if call_key in _repr_running: + return '...' + _repr_running[call_key] = 1 + try: + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + finally: + del _repr_running[call_key] + + def __reduce__(self): + 'Return state information for pickling' + items = [[k, self[k]] for k in self] + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S + and values equal to v (which defaults to None). + + ''' + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive + while comparison to a regular mapping is order-insensitive. + + ''' + if isinstance(other, OrderedDict): + return len(self)==len(other) and self.items() == other.items() + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other + + # -- the following methods are only used in Python 2.7 -- + + def viewkeys(self): + "od.viewkeys() -> a set-like object providing a view on od's keys" + return KeysView(self) + + def viewvalues(self): + "od.viewvalues() -> an object providing a view on od's values" + return ValuesView(self) + + def viewitems(self): + "od.viewitems() -> a set-like object providing a view on od's items" + return ItemsView(self) diff --git a/collectors/python.d.plugin/python_modules/urllib3/packages/six.py b/collectors/python.d.plugin/python_modules/urllib3/packages/six.py new file mode 100644 index 00000000..31df5012 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/packages/six.py @@ -0,0 +1,852 @@ +"""Utilities for writing code that runs on Python 2 and 3""" + +# Copyright (c) 2010-2015 Benjamin Peterson +# +# SPDX-License-Identifier: MIT + +from __future__ import absolute_import + +import functools +import itertools +import operator +import sys +import types + +__author__ = "Benjamin Peterson <benjamin@python.org>" +__version__ = "1.10.0" + + +# Useful for very coarse version differentiation. +PY2 = sys.version_info[0] == 2 +PY3 = sys.version_info[0] == 3 +PY34 = sys.version_info[0:2] >= (3, 4) + +if PY3: + string_types = str, + integer_types = int, + class_types = type, + text_type = str + binary_type = bytes + + MAXSIZE = sys.maxsize +else: + string_types = basestring, + integer_types = (int, long) + class_types = (type, types.ClassType) + text_type = unicode + binary_type = str + + if sys.platform.startswith("java"): + # Jython always uses 32 bits. + MAXSIZE = int((1 << 31) - 1) + else: + # It's possible to have sizeof(long) != sizeof(Py_ssize_t). + class X(object): + + def __len__(self): + return 1 << 31 + try: + len(X()) + except OverflowError: + # 32-bit + MAXSIZE = int((1 << 31) - 1) + else: + # 64-bit + MAXSIZE = int((1 << 63) - 1) + del X + + +def _add_doc(func, doc): + """Add documentation to a function.""" + func.__doc__ = doc + + +def _import_module(name): + """Import module, returning the module after the last dot.""" + __import__(name) + return sys.modules[name] + + +class _LazyDescr(object): + + def __init__(self, name): + self.name = name + + def __get__(self, obj, tp): + result = self._resolve() + setattr(obj, self.name, result) # Invokes __set__. + try: + # This is a bit ugly, but it avoids running this again by + # removing this descriptor. + delattr(obj.__class__, self.name) + except AttributeError: + pass + return result + + +class MovedModule(_LazyDescr): + + def __init__(self, name, old, new=None): + super(MovedModule, self).__init__(name) + if PY3: + if new is None: + new = name + self.mod = new + else: + self.mod = old + + def _resolve(self): + return _import_module(self.mod) + + def __getattr__(self, attr): + _module = self._resolve() + value = getattr(_module, attr) + setattr(self, attr, value) + return value + + +class _LazyModule(types.ModuleType): + + def __init__(self, name): + super(_LazyModule, self).__init__(name) + self.__doc__ = self.__class__.__doc__ + + def __dir__(self): + attrs = ["__doc__", "__name__"] + attrs += [attr.name for attr in self._moved_attributes] + return attrs + + # Subclasses should override this + _moved_attributes = [] + + +class MovedAttribute(_LazyDescr): + + def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): + super(MovedAttribute, self).__init__(name) + if PY3: + if new_mod is None: + new_mod = name + self.mod = new_mod + if new_attr is None: + if old_attr is None: + new_attr = name + else: + new_attr = old_attr + self.attr = new_attr + else: + self.mod = old_mod + if old_attr is None: + old_attr = name + self.attr = old_attr + + def _resolve(self): + module = _import_module(self.mod) + return getattr(module, self.attr) + + +class _SixMetaPathImporter(object): + + """ + A meta path importer to import six.moves and its submodules. + + This class implements a PEP302 finder and loader. It should be compatible + with Python 2.5 and all existing versions of Python3 + """ + + def __init__(self, six_module_name): + self.name = six_module_name + self.known_modules = {} + + def _add_module(self, mod, *fullnames): + for fullname in fullnames: + self.known_modules[self.name + "." + fullname] = mod + + def _get_module(self, fullname): + return self.known_modules[self.name + "." + fullname] + + def find_module(self, fullname, path=None): + if fullname in self.known_modules: + return self + return None + + def __get_module(self, fullname): + try: + return self.known_modules[fullname] + except KeyError: + raise ImportError("This loader does not know module " + fullname) + + def load_module(self, fullname): + try: + # in case of a reload + return sys.modules[fullname] + except KeyError: + pass + mod = self.__get_module(fullname) + if isinstance(mod, MovedModule): + mod = mod._resolve() + else: + mod.__loader__ = self + sys.modules[fullname] = mod + return mod + + def is_package(self, fullname): + """ + Return true, if the named module is a package. + + We need this method to get correct spec objects with + Python 3.4 (see PEP451) + """ + return hasattr(self.__get_module(fullname), "__path__") + + def get_code(self, fullname): + """Return None + + Required, if is_package is implemented""" + self.__get_module(fullname) # eventually raises ImportError + return None + get_source = get_code # same as get_code + +_importer = _SixMetaPathImporter(__name__) + + +class _MovedItems(_LazyModule): + + """Lazy loading of moved objects""" + __path__ = [] # mark as package + + +_moved_attributes = [ + MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), + MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), + MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"), + MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"), + MovedAttribute("intern", "__builtin__", "sys"), + MovedAttribute("map", "itertools", "builtins", "imap", "map"), + MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"), + MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"), + MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute("reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload"), + MovedAttribute("reduce", "__builtin__", "functools"), + MovedAttribute("shlex_quote", "pipes", "shlex", "quote"), + MovedAttribute("StringIO", "StringIO", "io"), + MovedAttribute("UserDict", "UserDict", "collections"), + MovedAttribute("UserList", "UserList", "collections"), + MovedAttribute("UserString", "UserString", "collections"), + MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"), + MovedAttribute("zip", "itertools", "builtins", "izip", "zip"), + MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"), + MovedModule("builtins", "__builtin__"), + MovedModule("configparser", "ConfigParser"), + MovedModule("copyreg", "copy_reg"), + MovedModule("dbm_gnu", "gdbm", "dbm.gnu"), + MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread"), + MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), + MovedModule("http_cookies", "Cookie", "http.cookies"), + MovedModule("html_entities", "htmlentitydefs", "html.entities"), + MovedModule("html_parser", "HTMLParser", "html.parser"), + MovedModule("http_client", "httplib", "http.client"), + MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"), + MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"), + MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"), + MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"), + MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), + MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), + MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), + MovedModule("cPickle", "cPickle", "pickle"), + MovedModule("queue", "Queue"), + MovedModule("reprlib", "repr"), + MovedModule("socketserver", "SocketServer"), + MovedModule("_thread", "thread", "_thread"), + MovedModule("tkinter", "Tkinter"), + MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"), + MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"), + MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"), + MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"), + MovedModule("tkinter_tix", "Tix", "tkinter.tix"), + MovedModule("tkinter_ttk", "ttk", "tkinter.ttk"), + MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"), + MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"), + MovedModule("tkinter_colorchooser", "tkColorChooser", + "tkinter.colorchooser"), + MovedModule("tkinter_commondialog", "tkCommonDialog", + "tkinter.commondialog"), + MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"), + MovedModule("tkinter_font", "tkFont", "tkinter.font"), + MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"), + MovedModule("tkinter_tksimpledialog", "tkSimpleDialog", + "tkinter.simpledialog"), + MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"), + MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"), + MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"), + MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"), + MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"), + MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"), +] +# Add windows specific modules. +if sys.platform == "win32": + _moved_attributes += [ + MovedModule("winreg", "_winreg"), + ] + +for attr in _moved_attributes: + setattr(_MovedItems, attr.name, attr) + if isinstance(attr, MovedModule): + _importer._add_module(attr, "moves." + attr.name) +del attr + +_MovedItems._moved_attributes = _moved_attributes + +moves = _MovedItems(__name__ + ".moves") +_importer._add_module(moves, "moves") + + +class Module_six_moves_urllib_parse(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_parse""" + + +_urllib_parse_moved_attributes = [ + MovedAttribute("ParseResult", "urlparse", "urllib.parse"), + MovedAttribute("SplitResult", "urlparse", "urllib.parse"), + MovedAttribute("parse_qs", "urlparse", "urllib.parse"), + MovedAttribute("parse_qsl", "urlparse", "urllib.parse"), + MovedAttribute("urldefrag", "urlparse", "urllib.parse"), + MovedAttribute("urljoin", "urlparse", "urllib.parse"), + MovedAttribute("urlparse", "urlparse", "urllib.parse"), + MovedAttribute("urlsplit", "urlparse", "urllib.parse"), + MovedAttribute("urlunparse", "urlparse", "urllib.parse"), + MovedAttribute("urlunsplit", "urlparse", "urllib.parse"), + MovedAttribute("quote", "urllib", "urllib.parse"), + MovedAttribute("quote_plus", "urllib", "urllib.parse"), + MovedAttribute("unquote", "urllib", "urllib.parse"), + MovedAttribute("unquote_plus", "urllib", "urllib.parse"), + MovedAttribute("urlencode", "urllib", "urllib.parse"), + MovedAttribute("splitquery", "urllib", "urllib.parse"), + MovedAttribute("splittag", "urllib", "urllib.parse"), + MovedAttribute("splituser", "urllib", "urllib.parse"), + MovedAttribute("uses_fragment", "urlparse", "urllib.parse"), + MovedAttribute("uses_netloc", "urlparse", "urllib.parse"), + MovedAttribute("uses_params", "urlparse", "urllib.parse"), + MovedAttribute("uses_query", "urlparse", "urllib.parse"), + MovedAttribute("uses_relative", "urlparse", "urllib.parse"), +] +for attr in _urllib_parse_moved_attributes: + setattr(Module_six_moves_urllib_parse, attr.name, attr) +del attr + +Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes + +_importer._add_module(Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"), + "moves.urllib_parse", "moves.urllib.parse") + + +class Module_six_moves_urllib_error(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_error""" + + +_urllib_error_moved_attributes = [ + MovedAttribute("URLError", "urllib2", "urllib.error"), + MovedAttribute("HTTPError", "urllib2", "urllib.error"), + MovedAttribute("ContentTooShortError", "urllib", "urllib.error"), +] +for attr in _urllib_error_moved_attributes: + setattr(Module_six_moves_urllib_error, attr.name, attr) +del attr + +Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes + +_importer._add_module(Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"), + "moves.urllib_error", "moves.urllib.error") + + +class Module_six_moves_urllib_request(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_request""" + + +_urllib_request_moved_attributes = [ + MovedAttribute("urlopen", "urllib2", "urllib.request"), + MovedAttribute("install_opener", "urllib2", "urllib.request"), + MovedAttribute("build_opener", "urllib2", "urllib.request"), + MovedAttribute("pathname2url", "urllib", "urllib.request"), + MovedAttribute("url2pathname", "urllib", "urllib.request"), + MovedAttribute("getproxies", "urllib", "urllib.request"), + MovedAttribute("Request", "urllib2", "urllib.request"), + MovedAttribute("OpenerDirector", "urllib2", "urllib.request"), + MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"), + MovedAttribute("ProxyHandler", "urllib2", "urllib.request"), + MovedAttribute("BaseHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"), + MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"), + MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"), + MovedAttribute("FileHandler", "urllib2", "urllib.request"), + MovedAttribute("FTPHandler", "urllib2", "urllib.request"), + MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"), + MovedAttribute("UnknownHandler", "urllib2", "urllib.request"), + MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"), + MovedAttribute("urlretrieve", "urllib", "urllib.request"), + MovedAttribute("urlcleanup", "urllib", "urllib.request"), + MovedAttribute("URLopener", "urllib", "urllib.request"), + MovedAttribute("FancyURLopener", "urllib", "urllib.request"), + MovedAttribute("proxy_bypass", "urllib", "urllib.request"), +] +for attr in _urllib_request_moved_attributes: + setattr(Module_six_moves_urllib_request, attr.name, attr) +del attr + +Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes + +_importer._add_module(Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"), + "moves.urllib_request", "moves.urllib.request") + + +class Module_six_moves_urllib_response(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_response""" + + +_urllib_response_moved_attributes = [ + MovedAttribute("addbase", "urllib", "urllib.response"), + MovedAttribute("addclosehook", "urllib", "urllib.response"), + MovedAttribute("addinfo", "urllib", "urllib.response"), + MovedAttribute("addinfourl", "urllib", "urllib.response"), +] +for attr in _urllib_response_moved_attributes: + setattr(Module_six_moves_urllib_response, attr.name, attr) +del attr + +Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes + +_importer._add_module(Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"), + "moves.urllib_response", "moves.urllib.response") + + +class Module_six_moves_urllib_robotparser(_LazyModule): + + """Lazy loading of moved objects in six.moves.urllib_robotparser""" + + +_urllib_robotparser_moved_attributes = [ + MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"), +] +for attr in _urllib_robotparser_moved_attributes: + setattr(Module_six_moves_urllib_robotparser, attr.name, attr) +del attr + +Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes + +_importer._add_module(Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser"), + "moves.urllib_robotparser", "moves.urllib.robotparser") + + +class Module_six_moves_urllib(types.ModuleType): + + """Create a six.moves.urllib namespace that resembles the Python 3 namespace""" + __path__ = [] # mark as package + parse = _importer._get_module("moves.urllib_parse") + error = _importer._get_module("moves.urllib_error") + request = _importer._get_module("moves.urllib_request") + response = _importer._get_module("moves.urllib_response") + robotparser = _importer._get_module("moves.urllib_robotparser") + + def __dir__(self): + return ['parse', 'error', 'request', 'response', 'robotparser'] + +_importer._add_module(Module_six_moves_urllib(__name__ + ".moves.urllib"), + "moves.urllib") + + +def add_move(move): + """Add an item to six.moves.""" + setattr(_MovedItems, move.name, move) + + +def remove_move(name): + """Remove item from six.moves.""" + try: + delattr(_MovedItems, name) + except AttributeError: + try: + del moves.__dict__[name] + except KeyError: + raise AttributeError("no such move, %r" % (name,)) + + +if PY3: + _meth_func = "__func__" + _meth_self = "__self__" + + _func_closure = "__closure__" + _func_code = "__code__" + _func_defaults = "__defaults__" + _func_globals = "__globals__" +else: + _meth_func = "im_func" + _meth_self = "im_self" + + _func_closure = "func_closure" + _func_code = "func_code" + _func_defaults = "func_defaults" + _func_globals = "func_globals" + + +try: + advance_iterator = next +except NameError: + def advance_iterator(it): + return it.next() +next = advance_iterator + + +try: + callable = callable +except NameError: + def callable(obj): + return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) + + +if PY3: + def get_unbound_function(unbound): + return unbound + + create_bound_method = types.MethodType + + def create_unbound_method(func, cls): + return func + + Iterator = object +else: + def get_unbound_function(unbound): + return unbound.im_func + + def create_bound_method(func, obj): + return types.MethodType(func, obj, obj.__class__) + + def create_unbound_method(func, cls): + return types.MethodType(func, None, cls) + + class Iterator(object): + + def next(self): + return type(self).__next__(self) + + callable = callable +_add_doc(get_unbound_function, + """Get the function out of a possibly unbound function""") + + +get_method_function = operator.attrgetter(_meth_func) +get_method_self = operator.attrgetter(_meth_self) +get_function_closure = operator.attrgetter(_func_closure) +get_function_code = operator.attrgetter(_func_code) +get_function_defaults = operator.attrgetter(_func_defaults) +get_function_globals = operator.attrgetter(_func_globals) + + +if PY3: + def iterkeys(d, **kw): + return iter(d.keys(**kw)) + + def itervalues(d, **kw): + return iter(d.values(**kw)) + + def iteritems(d, **kw): + return iter(d.items(**kw)) + + def iterlists(d, **kw): + return iter(d.lists(**kw)) + + viewkeys = operator.methodcaller("keys") + + viewvalues = operator.methodcaller("values") + + viewitems = operator.methodcaller("items") +else: + def iterkeys(d, **kw): + return d.iterkeys(**kw) + + def itervalues(d, **kw): + return d.itervalues(**kw) + + def iteritems(d, **kw): + return d.iteritems(**kw) + + def iterlists(d, **kw): + return d.iterlists(**kw) + + viewkeys = operator.methodcaller("viewkeys") + + viewvalues = operator.methodcaller("viewvalues") + + viewitems = operator.methodcaller("viewitems") + +_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.") +_add_doc(itervalues, "Return an iterator over the values of a dictionary.") +_add_doc(iteritems, + "Return an iterator over the (key, value) pairs of a dictionary.") +_add_doc(iterlists, + "Return an iterator over the (key, [values]) pairs of a dictionary.") + + +if PY3: + def b(s): + return s.encode("latin-1") + + def u(s): + return s + unichr = chr + import struct + int2byte = struct.Struct(">B").pack + del struct + byte2int = operator.itemgetter(0) + indexbytes = operator.getitem + iterbytes = iter + import io + StringIO = io.StringIO + BytesIO = io.BytesIO + _assertCountEqual = "assertCountEqual" + if sys.version_info[1] <= 1: + _assertRaisesRegex = "assertRaisesRegexp" + _assertRegex = "assertRegexpMatches" + else: + _assertRaisesRegex = "assertRaisesRegex" + _assertRegex = "assertRegex" +else: + def b(s): + return s + # Workaround for standalone backslash + + def u(s): + return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape") + unichr = unichr + int2byte = chr + + def byte2int(bs): + return ord(bs[0]) + + def indexbytes(buf, i): + return ord(buf[i]) + iterbytes = functools.partial(itertools.imap, ord) + import StringIO + StringIO = BytesIO = StringIO.StringIO + _assertCountEqual = "assertItemsEqual" + _assertRaisesRegex = "assertRaisesRegexp" + _assertRegex = "assertRegexpMatches" +_add_doc(b, """Byte literal""") +_add_doc(u, """Text literal""") + + +def assertCountEqual(self, *args, **kwargs): + return getattr(self, _assertCountEqual)(*args, **kwargs) + + +def assertRaisesRegex(self, *args, **kwargs): + return getattr(self, _assertRaisesRegex)(*args, **kwargs) + + +def assertRegex(self, *args, **kwargs): + return getattr(self, _assertRegex)(*args, **kwargs) + + +if PY3: + exec_ = getattr(moves.builtins, "exec") + + def reraise(tp, value, tb=None): + if value is None: + value = tp() + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + +else: + def exec_(_code_, _globs_=None, _locs_=None): + """Execute code in a namespace.""" + if _globs_ is None: + frame = sys._getframe(1) + _globs_ = frame.f_globals + if _locs_ is None: + _locs_ = frame.f_locals + del frame + elif _locs_ is None: + _locs_ = _globs_ + exec("""exec _code_ in _globs_, _locs_""") + + exec_("""def reraise(tp, value, tb=None): + raise tp, value, tb +""") + + +if sys.version_info[:2] == (3, 2): + exec_("""def raise_from(value, from_value): + if from_value is None: + raise value + raise value from from_value +""") +elif sys.version_info[:2] > (3, 2): + exec_("""def raise_from(value, from_value): + raise value from from_value +""") +else: + def raise_from(value, from_value): + raise value + + +print_ = getattr(moves.builtins, "print", None) +if print_ is None: + def print_(*args, **kwargs): + """The new-style print function for Python 2.4 and 2.5.""" + fp = kwargs.pop("file", sys.stdout) + if fp is None: + return + + def write(data): + if not isinstance(data, basestring): + data = str(data) + # If the file has an encoding, encode unicode with it. + if (isinstance(fp, file) and + isinstance(data, unicode) and + fp.encoding is not None): + errors = getattr(fp, "errors", None) + if errors is None: + errors = "strict" + data = data.encode(fp.encoding, errors) + fp.write(data) + want_unicode = False + sep = kwargs.pop("sep", None) + if sep is not None: + if isinstance(sep, unicode): + want_unicode = True + elif not isinstance(sep, str): + raise TypeError("sep must be None or a string") + end = kwargs.pop("end", None) + if end is not None: + if isinstance(end, unicode): + want_unicode = True + elif not isinstance(end, str): + raise TypeError("end must be None or a string") + if kwargs: + raise TypeError("invalid keyword arguments to print()") + if not want_unicode: + for arg in args: + if isinstance(arg, unicode): + want_unicode = True + break + if want_unicode: + newline = unicode("\n") + space = unicode(" ") + else: + newline = "\n" + space = " " + if sep is None: + sep = space + if end is None: + end = newline + for i, arg in enumerate(args): + if i: + write(sep) + write(arg) + write(end) +if sys.version_info[:2] < (3, 3): + _print = print_ + + def print_(*args, **kwargs): + fp = kwargs.get("file", sys.stdout) + flush = kwargs.pop("flush", False) + _print(*args, **kwargs) + if flush and fp is not None: + fp.flush() + +_add_doc(reraise, """Reraise an exception.""") + +if sys.version_info[0:2] < (3, 4): + def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS, + updated=functools.WRAPPER_UPDATES): + def wrapper(f): + f = functools.wraps(wrapped, assigned, updated)(f) + f.__wrapped__ = wrapped + return f + return wrapper +else: + wraps = functools.wraps + + +def with_metaclass(meta, *bases): + """Create a base class with a metaclass.""" + # This requires a bit of explanation: the basic idea is to make a dummy + # metaclass for one level of class instantiation that replaces itself with + # the actual metaclass. + class metaclass(meta): + + def __new__(cls, name, this_bases, d): + return meta(name, bases, d) + return type.__new__(metaclass, 'temporary_class', (), {}) + + +def add_metaclass(metaclass): + """Class decorator for creating a class with a metaclass.""" + def wrapper(cls): + orig_vars = cls.__dict__.copy() + slots = orig_vars.get('__slots__') + if slots is not None: + if isinstance(slots, str): + slots = [slots] + for slots_var in slots: + orig_vars.pop(slots_var) + orig_vars.pop('__dict__', None) + orig_vars.pop('__weakref__', None) + return metaclass(cls.__name__, cls.__bases__, orig_vars) + return wrapper + + +def python_2_unicode_compatible(klass): + """ + A decorator that defines __unicode__ and __str__ methods under Python 2. + Under Python 3 it does nothing. + + To support Python 2 and 3 with a single code base, define a __str__ method + returning text and apply this decorator to the class. + """ + if PY2: + if '__str__' not in klass.__dict__: + raise ValueError("@python_2_unicode_compatible cannot be applied " + "to %s because it doesn't define __str__()." % + klass.__name__) + klass.__unicode__ = klass.__str__ + klass.__str__ = lambda self: self.__unicode__().encode('utf-8') + return klass + + +# Complete the moves implementation. +# This code is at the end of this module to speed up module loading. +# Turn this module into a package. +__path__ = [] # required for PEP 302 and PEP 451 +__package__ = __name__ # see PEP 366 @ReservedAssignment +if globals().get("__spec__") is not None: + __spec__.submodule_search_locations = [] # PEP 451 @UndefinedVariable +# Remove other six meta path importers, since they cause problems. This can +# happen if six is removed from sys.modules and then reloaded. (Setuptools does +# this for some reason.) +if sys.meta_path: + for i, importer in enumerate(sys.meta_path): + # Here's some real nastiness: Another "instance" of the six module might + # be floating around. Therefore, we can't use isinstance() to check for + # the six meta path importer, since the other six instance will have + # inserted an importer with different class. + if (type(importer).__name__ == "_SixMetaPathImporter" and + importer.name == __name__): + del sys.meta_path[i] + break + del i, importer +# Finally, add the importer to the meta path import hook. +sys.meta_path.append(_importer) diff --git a/collectors/python.d.plugin/python_modules/urllib3/packages/ssl_match_hostname/__init__.py b/collectors/python.d.plugin/python_modules/urllib3/packages/ssl_match_hostname/__init__.py new file mode 100644 index 00000000..2aeeeff9 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/packages/ssl_match_hostname/__init__.py @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: MIT +import sys + +try: + # Our match_hostname function is the same as 3.5's, so we only want to + # import the match_hostname function if it's at least that good. + if sys.version_info < (3, 5): + raise ImportError("Fallback to vendored code") + + from ssl import CertificateError, match_hostname +except ImportError: + try: + # Backport of the function from a pypi module + from backports.ssl_match_hostname import CertificateError, match_hostname + except ImportError: + # Our vendored copy + from ._implementation import CertificateError, match_hostname + +# Not needed, but documenting what we provide. +__all__ = ('CertificateError', 'match_hostname') diff --git a/collectors/python.d.plugin/python_modules/urllib3/packages/ssl_match_hostname/_implementation.py b/collectors/python.d.plugin/python_modules/urllib3/packages/ssl_match_hostname/_implementation.py new file mode 100644 index 00000000..647e081d --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/packages/ssl_match_hostname/_implementation.py @@ -0,0 +1,156 @@ +"""The match_hostname() function from Python 3.3.3, essential when using SSL.""" + +# SPDX-License-Identifier: Python-2.0 + +import re +import sys + +# ipaddress has been backported to 2.6+ in pypi. If it is installed on the +# system, use it to handle IPAddress ServerAltnames (this was added in +# python-3.5) otherwise only do DNS matching. This allows +# backports.ssl_match_hostname to continue to be used all the way back to +# python-2.4. +try: + import ipaddress +except ImportError: + ipaddress = None + +__version__ = '3.5.0.1' + + +class CertificateError(ValueError): + pass + + +def _dnsname_match(dn, hostname, max_wildcards=1): + """Matching according to RFC 6125, section 6.4.3 + + http://tools.ietf.org/html/rfc6125#section-6.4.3 + """ + pats = [] + if not dn: + return False + + # Ported from python3-syntax: + # leftmost, *remainder = dn.split(r'.') + parts = dn.split(r'.') + leftmost = parts[0] + remainder = parts[1:] + + wildcards = leftmost.count('*') + if wildcards > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survey of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise CertificateError( + "too many wildcards in certificate DNS name: " + repr(dn)) + + # speed up common case w/o wildcards + if not wildcards: + return dn.lower() == hostname.lower() + + # RFC 6125, section 6.4.3, subitem 1. + # The client SHOULD NOT attempt to match a presented identifier in which + # the wildcard character comprises a label other than the left-most label. + if leftmost == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + elif leftmost.startswith('xn--') or hostname.startswith('xn--'): + # RFC 6125, section 6.4.3, subitem 3. + # The client SHOULD NOT attempt to match a presented identifier + # where the wildcard character is embedded within an A-label or + # U-label of an internationalized domain name. + pats.append(re.escape(leftmost)) + else: + # Otherwise, '*' matches any dotless string, e.g. www* + pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + + # add the remaining fragments, ignore any wildcards + for frag in remainder: + pats.append(re.escape(frag)) + + pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + return pat.match(hostname) + + +def _to_unicode(obj): + if isinstance(obj, str) and sys.version_info < (3,): + obj = unicode(obj, encoding='ascii', errors='strict') + return obj + +def _ipaddress_match(ipname, host_ip): + """Exact matching of IP addresses. + + RFC 6125 explicitly doesn't define an algorithm for this + (section 1.7.2 - "Out of Scope"). + """ + # OpenSSL may add a trailing newline to a subjectAltName's IP address + # Divergence from upstream: ipaddress can't handle byte str + ip = ipaddress.ip_address(_to_unicode(ipname).rstrip()) + return ip == host_ip + + +def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError("empty or no certificate, match_hostname needs a " + "SSL socket or SSL context with either " + "CERT_OPTIONAL or CERT_REQUIRED") + try: + # Divergence from upstream: ipaddress can't handle byte str + host_ip = ipaddress.ip_address(_to_unicode(hostname)) + except ValueError: + # Not an IP address (common case) + host_ip = None + except UnicodeError: + # Divergence from upstream: Have to deal with ipaddress not taking + # byte strings. addresses should be all ascii, so we consider it not + # an ipaddress in this case + host_ip = None + except AttributeError: + # Divergence from upstream: Make ipaddress library optional + if ipaddress is None: + host_ip = None + else: + raise + dnsnames = [] + san = cert.get('subjectAltName', ()) + for key, value in san: + if key == 'DNS': + if host_ip is None and _dnsname_match(value, hostname): + return + dnsnames.append(value) + elif key == 'IP Address': + if host_ip is not None and _ipaddress_match(value, host_ip): + return + dnsnames.append(value) + if not dnsnames: + # The subject is only checked when there is no dNSName entry + # in subjectAltName + for sub in cert.get('subject', ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == 'commonName': + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError("hostname %r " + "doesn't match either of %s" + % (hostname, ', '.join(map(repr, dnsnames)))) + elif len(dnsnames) == 1: + raise CertificateError("hostname %r " + "doesn't match %r" + % (hostname, dnsnames[0])) + else: + raise CertificateError("no appropriate commonName or " + "subjectAltName fields were found") diff --git a/collectors/python.d.plugin/python_modules/urllib3/poolmanager.py b/collectors/python.d.plugin/python_modules/urllib3/poolmanager.py new file mode 100644 index 00000000..adea9bc0 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/poolmanager.py @@ -0,0 +1,441 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +import collections +import functools +import logging + +from ._collections import RecentlyUsedContainer +from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool +from .connectionpool import port_by_scheme +from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown +from .packages.six.moves.urllib.parse import urljoin +from .request import RequestMethods +from .util.url import parse_url +from .util.retry import Retry + + +__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] + + +log = logging.getLogger(__name__) + +SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs', + 'ssl_version', 'ca_cert_dir', 'ssl_context') + +# All known keyword arguments that could be provided to the pool manager, its +# pools, or the underlying connections. This is used to construct a pool key. +_key_fields = ( + 'key_scheme', # str + 'key_host', # str + 'key_port', # int + 'key_timeout', # int or float or Timeout + 'key_retries', # int or Retry + 'key_strict', # bool + 'key_block', # bool + 'key_source_address', # str + 'key_key_file', # str + 'key_cert_file', # str + 'key_cert_reqs', # str + 'key_ca_certs', # str + 'key_ssl_version', # str + 'key_ca_cert_dir', # str + 'key_ssl_context', # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext + 'key_maxsize', # int + 'key_headers', # dict + 'key__proxy', # parsed proxy url + 'key__proxy_headers', # dict + 'key_socket_options', # list of (level (int), optname (int), value (int or str)) tuples + 'key__socks_options', # dict + 'key_assert_hostname', # bool or string + 'key_assert_fingerprint', # str +) + +#: The namedtuple class used to construct keys for the connection pool. +#: All custom key schemes should include the fields in this key at a minimum. +PoolKey = collections.namedtuple('PoolKey', _key_fields) + + +def _default_key_normalizer(key_class, request_context): + """ + Create a pool key out of a request context dictionary. + + According to RFC 3986, both the scheme and host are case-insensitive. + Therefore, this function normalizes both before constructing the pool + key for an HTTPS request. If you wish to change this behaviour, provide + alternate callables to ``key_fn_by_scheme``. + + :param key_class: + The class to use when constructing the key. This should be a namedtuple + with the ``scheme`` and ``host`` keys at a minimum. + :type key_class: namedtuple + :param request_context: + A dictionary-like object that contain the context for a request. + :type request_context: dict + + :return: A namedtuple that can be used as a connection pool key. + :rtype: PoolKey + """ + # Since we mutate the dictionary, make a copy first + context = request_context.copy() + context['scheme'] = context['scheme'].lower() + context['host'] = context['host'].lower() + + # These are both dictionaries and need to be transformed into frozensets + for key in ('headers', '_proxy_headers', '_socks_options'): + if key in context and context[key] is not None: + context[key] = frozenset(context[key].items()) + + # The socket_options key may be a list and needs to be transformed into a + # tuple. + socket_opts = context.get('socket_options') + if socket_opts is not None: + context['socket_options'] = tuple(socket_opts) + + # Map the kwargs to the names in the namedtuple - this is necessary since + # namedtuples can't have fields starting with '_'. + for key in list(context.keys()): + context['key_' + key] = context.pop(key) + + # Default to ``None`` for keys missing from the context + for field in key_class._fields: + if field not in context: + context[field] = None + + return key_class(**context) + + +#: A dictionary that maps a scheme to a callable that creates a pool key. +#: This can be used to alter the way pool keys are constructed, if desired. +#: Each PoolManager makes a copy of this dictionary so they can be configured +#: globally here, or individually on the instance. +key_fn_by_scheme = { + 'http': functools.partial(_default_key_normalizer, PoolKey), + 'https': functools.partial(_default_key_normalizer, PoolKey), +} + +pool_classes_by_scheme = { + 'http': HTTPConnectionPool, + 'https': HTTPSConnectionPool, +} + + +class PoolManager(RequestMethods): + """ + Allows for arbitrary requests while transparently keeping track of + necessary connection pools for you. + + :param num_pools: + Number of connection pools to cache before discarding the least + recently used pool. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + + :param \\**connection_pool_kw: + Additional parameters are used to create fresh + :class:`urllib3.connectionpool.ConnectionPool` instances. + + Example:: + + >>> manager = PoolManager(num_pools=2) + >>> r = manager.request('GET', 'http://google.com/') + >>> r = manager.request('GET', 'http://google.com/mail') + >>> r = manager.request('GET', 'http://yahoo.com/') + >>> len(manager.pools) + 2 + + """ + + proxy = None + + def __init__(self, num_pools=10, headers=None, **connection_pool_kw): + RequestMethods.__init__(self, headers) + self.connection_pool_kw = connection_pool_kw + self.pools = RecentlyUsedContainer(num_pools, + dispose_func=lambda p: p.close()) + + # Locally set the pool classes and keys so other PoolManagers can + # override them. + self.pool_classes_by_scheme = pool_classes_by_scheme + self.key_fn_by_scheme = key_fn_by_scheme.copy() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.clear() + # Return False to re-raise any potential exceptions + return False + + def _new_pool(self, scheme, host, port, request_context=None): + """ + Create a new :class:`ConnectionPool` based on host, port, scheme, and + any additional pool keyword arguments. + + If ``request_context`` is provided, it is provided as keyword arguments + to the pool class used. This method is used to actually create the + connection pools handed out by :meth:`connection_from_url` and + companion methods. It is intended to be overridden for customization. + """ + pool_cls = self.pool_classes_by_scheme[scheme] + if request_context is None: + request_context = self.connection_pool_kw.copy() + + # Although the context has everything necessary to create the pool, + # this function has historically only used the scheme, host, and port + # in the positional args. When an API change is acceptable these can + # be removed. + for key in ('scheme', 'host', 'port'): + request_context.pop(key, None) + + if scheme == 'http': + for kw in SSL_KEYWORDS: + request_context.pop(kw, None) + + return pool_cls(host, port, **request_context) + + def clear(self): + """ + Empty our store of pools and direct them all to close. + + This will not affect in-flight connections, but they will not be + re-used after completion. + """ + self.pools.clear() + + def connection_from_host(self, host, port=None, scheme='http', pool_kwargs=None): + """ + Get a :class:`ConnectionPool` based on the host, port, and scheme. + + If ``port`` isn't given, it will be derived from the ``scheme`` using + ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is + provided, it is merged with the instance's ``connection_pool_kw`` + variable and used to create the new connection pool, if one is + needed. + """ + + if not host: + raise LocationValueError("No host specified.") + + request_context = self._merge_pool_kwargs(pool_kwargs) + request_context['scheme'] = scheme or 'http' + if not port: + port = port_by_scheme.get(request_context['scheme'].lower(), 80) + request_context['port'] = port + request_context['host'] = host + + return self.connection_from_context(request_context) + + def connection_from_context(self, request_context): + """ + Get a :class:`ConnectionPool` based on the request context. + + ``request_context`` must at least contain the ``scheme`` key and its + value must be a key in ``key_fn_by_scheme`` instance variable. + """ + scheme = request_context['scheme'].lower() + pool_key_constructor = self.key_fn_by_scheme[scheme] + pool_key = pool_key_constructor(request_context) + + return self.connection_from_pool_key(pool_key, request_context=request_context) + + def connection_from_pool_key(self, pool_key, request_context=None): + """ + Get a :class:`ConnectionPool` based on the provided pool key. + + ``pool_key`` should be a namedtuple that only contains immutable + objects. At a minimum it must have the ``scheme``, ``host``, and + ``port`` fields. + """ + with self.pools.lock: + # If the scheme, host, or port doesn't match existing open + # connections, open a new ConnectionPool. + pool = self.pools.get(pool_key) + if pool: + return pool + + # Make a fresh ConnectionPool of the desired type + scheme = request_context['scheme'] + host = request_context['host'] + port = request_context['port'] + pool = self._new_pool(scheme, host, port, request_context=request_context) + self.pools[pool_key] = pool + + return pool + + def connection_from_url(self, url, pool_kwargs=None): + """ + Similar to :func:`urllib3.connectionpool.connection_from_url`. + + If ``pool_kwargs`` is not provided and a new pool needs to be + constructed, ``self.connection_pool_kw`` is used to initialize + the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs`` + is provided, it is used instead. Note that if a new pool does not + need to be created for the request, the provided ``pool_kwargs`` are + not used. + """ + u = parse_url(url) + return self.connection_from_host(u.host, port=u.port, scheme=u.scheme, + pool_kwargs=pool_kwargs) + + def _merge_pool_kwargs(self, override): + """ + Merge a dictionary of override values for self.connection_pool_kw. + + This does not modify self.connection_pool_kw and returns a new dict. + Any keys in the override dictionary with a value of ``None`` are + removed from the merged dictionary. + """ + base_pool_kwargs = self.connection_pool_kw.copy() + if override: + for key, value in override.items(): + if value is None: + try: + del base_pool_kwargs[key] + except KeyError: + pass + else: + base_pool_kwargs[key] = value + return base_pool_kwargs + + def urlopen(self, method, url, redirect=True, **kw): + """ + Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` + with custom cross-host redirect logic and only sends the request-uri + portion of the ``url``. + + The given ``url`` parameter must be absolute, such that an appropriate + :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. + """ + u = parse_url(url) + conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) + + kw['assert_same_host'] = False + kw['redirect'] = False + if 'headers' not in kw: + kw['headers'] = self.headers + + if self.proxy is not None and u.scheme == "http": + response = conn.urlopen(method, url, **kw) + else: + response = conn.urlopen(method, u.request_uri, **kw) + + redirect_location = redirect and response.get_redirect_location() + if not redirect_location: + return response + + # Support relative URLs for redirecting. + redirect_location = urljoin(url, redirect_location) + + # RFC 7231, Section 6.4.4 + if response.status == 303: + method = 'GET' + + retries = kw.get('retries') + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect) + + try: + retries = retries.increment(method, url, response=response, _pool=conn) + except MaxRetryError: + if retries.raise_on_redirect: + raise + return response + + kw['retries'] = retries + kw['redirect'] = redirect + + log.info("Redirecting %s -> %s", url, redirect_location) + return self.urlopen(method, redirect_location, **kw) + + +class ProxyManager(PoolManager): + """ + Behaves just like :class:`PoolManager`, but sends all requests through + the defined proxy, using the CONNECT method for HTTPS URLs. + + :param proxy_url: + The URL of the proxy to be used. + + :param proxy_headers: + A dictionary contaning headers that will be sent to the proxy. In case + of HTTP they are being sent with each request, while in the + HTTPS/CONNECT case they are sent only once. Could be used for proxy + authentication. + + Example: + >>> proxy = urllib3.ProxyManager('http://localhost:3128/') + >>> r1 = proxy.request('GET', 'http://google.com/') + >>> r2 = proxy.request('GET', 'http://httpbin.org/') + >>> len(proxy.pools) + 1 + >>> r3 = proxy.request('GET', 'https://httpbin.org/') + >>> r4 = proxy.request('GET', 'https://twitter.com/') + >>> len(proxy.pools) + 3 + + """ + + def __init__(self, proxy_url, num_pools=10, headers=None, + proxy_headers=None, **connection_pool_kw): + + if isinstance(proxy_url, HTTPConnectionPool): + proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host, + proxy_url.port) + proxy = parse_url(proxy_url) + if not proxy.port: + port = port_by_scheme.get(proxy.scheme, 80) + proxy = proxy._replace(port=port) + + if proxy.scheme not in ("http", "https"): + raise ProxySchemeUnknown(proxy.scheme) + + self.proxy = proxy + self.proxy_headers = proxy_headers or {} + + connection_pool_kw['_proxy'] = self.proxy + connection_pool_kw['_proxy_headers'] = self.proxy_headers + + super(ProxyManager, self).__init__( + num_pools, headers, **connection_pool_kw) + + def connection_from_host(self, host, port=None, scheme='http', pool_kwargs=None): + if scheme == "https": + return super(ProxyManager, self).connection_from_host( + host, port, scheme, pool_kwargs=pool_kwargs) + + return super(ProxyManager, self).connection_from_host( + self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs) + + def _set_proxy_headers(self, url, headers=None): + """ + Sets headers needed by proxies: specifically, the Accept and Host + headers. Only sets headers not provided by the user. + """ + headers_ = {'Accept': '*/*'} + + netloc = parse_url(url).netloc + if netloc: + headers_['Host'] = netloc + + if headers: + headers_.update(headers) + return headers_ + + def urlopen(self, method, url, redirect=True, **kw): + "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." + u = parse_url(url) + + if u.scheme == "http": + # For proxied HTTPS requests, httplib sets the necessary headers + # on the CONNECT to the proxy. For HTTP, we'll definitely + # need to set 'Host' at the very least. + headers = kw.get('headers', self.headers) + kw['headers'] = self._set_proxy_headers(url, headers) + + return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) + + +def proxy_from_url(url, **kw): + return ProxyManager(proxy_url=url, **kw) diff --git a/collectors/python.d.plugin/python_modules/urllib3/request.py b/collectors/python.d.plugin/python_modules/urllib3/request.py new file mode 100644 index 00000000..f7833197 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/request.py @@ -0,0 +1,149 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import + +from .filepost import encode_multipart_formdata +from .packages.six.moves.urllib.parse import urlencode + + +__all__ = ['RequestMethods'] + + +class RequestMethods(object): + """ + Convenience mixin for classes who implement a :meth:`urlopen` method, such + as :class:`~urllib3.connectionpool.HTTPConnectionPool` and + :class:`~urllib3.poolmanager.PoolManager`. + + Provides behavior for making common types of HTTP request methods and + decides which type of request field encoding to use. + + Specifically, + + :meth:`.request_encode_url` is for sending requests whose fields are + encoded in the URL (such as GET, HEAD, DELETE). + + :meth:`.request_encode_body` is for sending requests whose fields are + encoded in the *body* of the request using multipart or www-form-urlencoded + (such as for POST, PUT, PATCH). + + :meth:`.request` is for making any kind of request, it will look up the + appropriate encoding format and use one of the above two methods to make + the request. + + Initializer parameters: + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + """ + + _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) + + def __init__(self, headers=None): + self.headers = headers or {} + + def urlopen(self, method, url, body=None, headers=None, + encode_multipart=True, multipart_boundary=None, + **kw): # Abstract + raise NotImplemented("Classes extending RequestMethods must implement " + "their own ``urlopen`` method.") + + def request(self, method, url, fields=None, headers=None, **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the appropriate encoding of + ``fields`` based on the ``method`` used. + + This is a convenience method that requires the least amount of manual + effort. It can be used in most situations, while still having the + option to drop down to more specific methods when necessary, such as + :meth:`request_encode_url`, :meth:`request_encode_body`, + or even the lowest level :meth:`urlopen`. + """ + method = method.upper() + + if method in self._encode_url_methods: + return self.request_encode_url(method, url, fields=fields, + headers=headers, + **urlopen_kw) + else: + return self.request_encode_body(method, url, fields=fields, + headers=headers, + **urlopen_kw) + + def request_encode_url(self, method, url, fields=None, headers=None, + **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the ``fields`` encoded in + the url. This is useful for request methods like GET, HEAD, DELETE, etc. + """ + if headers is None: + headers = self.headers + + extra_kw = {'headers': headers} + extra_kw.update(urlopen_kw) + + if fields: + url += '?' + urlencode(fields) + + return self.urlopen(method, url, **extra_kw) + + def request_encode_body(self, method, url, fields=None, headers=None, + encode_multipart=True, multipart_boundary=None, + **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the ``fields`` encoded in + the body. This is useful for request methods like POST, PUT, PATCH, etc. + + When ``encode_multipart=True`` (default), then + :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode + the payload with the appropriate content type. Otherwise + :meth:`urllib.urlencode` is used with the + 'application/x-www-form-urlencoded' content type. + + Multipart encoding must be used when posting files, and it's reasonably + safe to use it in other times too. However, it may break request + signing, such as with OAuth. + + Supports an optional ``fields`` parameter of key/value strings AND + key/filetuple. A filetuple is a (filename, data, MIME type) tuple where + the MIME type is optional. For example:: + + fields = { + 'foo': 'bar', + 'fakefile': ('foofile.txt', 'contents of foofile'), + 'realfile': ('barfile.txt', open('realfile').read()), + 'typedfile': ('bazfile.bin', open('bazfile').read(), + 'image/jpeg'), + 'nonamefile': 'contents of nonamefile field', + } + + When uploading a file, providing a filename (the first parameter of the + tuple) is optional but recommended to best mimick behavior of browsers. + + Note that if ``headers`` are supplied, the 'Content-Type' header will + be overwritten because it depends on the dynamic random boundary string + which is used to compose the body of the request. The random boundary + string can be explicitly set with the ``multipart_boundary`` parameter. + """ + if headers is None: + headers = self.headers + + extra_kw = {'headers': {}} + + if fields: + if 'body' in urlopen_kw: + raise TypeError( + "request got values for both 'fields' and 'body', can only specify one.") + + if encode_multipart: + body, content_type = encode_multipart_formdata(fields, boundary=multipart_boundary) + else: + body, content_type = urlencode(fields), 'application/x-www-form-urlencoded' + + extra_kw['body'] = body + extra_kw['headers'] = {'Content-Type': content_type} + + extra_kw['headers'].update(headers) + extra_kw.update(urlopen_kw) + + return self.urlopen(method, url, **extra_kw) diff --git a/collectors/python.d.plugin/python_modules/urllib3/response.py b/collectors/python.d.plugin/python_modules/urllib3/response.py new file mode 100644 index 00000000..cf14a307 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/response.py @@ -0,0 +1,623 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +from contextlib import contextmanager +import zlib +import io +import logging +from socket import timeout as SocketTimeout +from socket import error as SocketError + +from ._collections import HTTPHeaderDict +from .exceptions import ( + BodyNotHttplibCompatible, ProtocolError, DecodeError, ReadTimeoutError, + ResponseNotChunked, IncompleteRead, InvalidHeader +) +from .packages.six import string_types as basestring, binary_type, PY3 +from .packages.six.moves import http_client as httplib +from .connection import HTTPException, BaseSSLError +from .util.response import is_fp_closed, is_response_to_head + +log = logging.getLogger(__name__) + + +class DeflateDecoder(object): + + def __init__(self): + self._first_try = True + self._data = binary_type() + self._obj = zlib.decompressobj() + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + if not data: + return data + + if not self._first_try: + return self._obj.decompress(data) + + self._data += data + try: + decompressed = self._obj.decompress(data) + if decompressed: + self._first_try = False + self._data = None + return decompressed + except zlib.error: + self._first_try = False + self._obj = zlib.decompressobj(-zlib.MAX_WBITS) + try: + return self.decompress(self._data) + finally: + self._data = None + + +class GzipDecoder(object): + + def __init__(self): + self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + if not data: + return data + return self._obj.decompress(data) + + +def _get_decoder(mode): + if mode == 'gzip': + return GzipDecoder() + + return DeflateDecoder() + + +class HTTPResponse(io.IOBase): + """ + HTTP Response container. + + Backwards-compatible to httplib's HTTPResponse but the response ``body`` is + loaded and decoded on-demand when the ``data`` property is accessed. This + class is also compatible with the Python standard library's :mod:`io` + module, and can hence be treated as a readable object in the context of that + framework. + + Extra parameters for behaviour not present in httplib.HTTPResponse: + + :param preload_content: + If True, the response's body will be preloaded during construction. + + :param decode_content: + If True, attempts to decode specific content-encoding's based on headers + (like 'gzip' and 'deflate') will be skipped and raw data will be used + instead. + + :param original_response: + When this HTTPResponse wrapper is generated from an httplib.HTTPResponse + object, it's convenient to include the original for debug purposes. It's + otherwise unused. + + :param retries: + The retries contains the last :class:`~urllib3.util.retry.Retry` that + was used during the request. + + :param enforce_content_length: + Enforce content length checking. Body returned by server must match + value of Content-Length header, if present. Otherwise, raise error. + """ + + CONTENT_DECODERS = ['gzip', 'deflate'] + REDIRECT_STATUSES = [301, 302, 303, 307, 308] + + def __init__(self, body='', headers=None, status=0, version=0, reason=None, + strict=0, preload_content=True, decode_content=True, + original_response=None, pool=None, connection=None, + retries=None, enforce_content_length=False, request_method=None): + + if isinstance(headers, HTTPHeaderDict): + self.headers = headers + else: + self.headers = HTTPHeaderDict(headers) + self.status = status + self.version = version + self.reason = reason + self.strict = strict + self.decode_content = decode_content + self.retries = retries + self.enforce_content_length = enforce_content_length + + self._decoder = None + self._body = None + self._fp = None + self._original_response = original_response + self._fp_bytes_read = 0 + + if body and isinstance(body, (basestring, binary_type)): + self._body = body + + self._pool = pool + self._connection = connection + + if hasattr(body, 'read'): + self._fp = body + + # Are we using the chunked-style of transfer encoding? + self.chunked = False + self.chunk_left = None + tr_enc = self.headers.get('transfer-encoding', '').lower() + # Don't incur the penalty of creating a list and then discarding it + encodings = (enc.strip() for enc in tr_enc.split(",")) + if "chunked" in encodings: + self.chunked = True + + # Determine length of response + self.length_remaining = self._init_length(request_method) + + # If requested, preload the body. + if preload_content and not self._body: + self._body = self.read(decode_content=decode_content) + + def get_redirect_location(self): + """ + Should we redirect and where to? + + :returns: Truthy redirect location string if we got a redirect status + code and valid location. ``None`` if redirect status and no + location. ``False`` if not a redirect status code. + """ + if self.status in self.REDIRECT_STATUSES: + return self.headers.get('location') + + return False + + def release_conn(self): + if not self._pool or not self._connection: + return + + self._pool._put_conn(self._connection) + self._connection = None + + @property + def data(self): + # For backwords-compat with earlier urllib3 0.4 and earlier. + if self._body: + return self._body + + if self._fp: + return self.read(cache_content=True) + + @property + def connection(self): + return self._connection + + def tell(self): + """ + Obtain the number of bytes pulled over the wire so far. May differ from + the amount of content returned by :meth:``HTTPResponse.read`` if bytes + are encoded on the wire (e.g, compressed). + """ + return self._fp_bytes_read + + def _init_length(self, request_method): + """ + Set initial length value for Response content if available. + """ + length = self.headers.get('content-length') + + if length is not None and self.chunked: + # This Response will fail with an IncompleteRead if it can't be + # received as chunked. This method falls back to attempt reading + # the response before raising an exception. + log.warning("Received response with both Content-Length and " + "Transfer-Encoding set. This is expressly forbidden " + "by RFC 7230 sec 3.3.2. Ignoring Content-Length and " + "attempting to process response as Transfer-Encoding: " + "chunked.") + return None + + elif length is not None: + try: + # RFC 7230 section 3.3.2 specifies multiple content lengths can + # be sent in a single Content-Length header + # (e.g. Content-Length: 42, 42). This line ensures the values + # are all valid ints and that as long as the `set` length is 1, + # all values are the same. Otherwise, the header is invalid. + lengths = set([int(val) for val in length.split(',')]) + if len(lengths) > 1: + raise InvalidHeader("Content-Length contained multiple " + "unmatching values (%s)" % length) + length = lengths.pop() + except ValueError: + length = None + else: + if length < 0: + length = None + + # Convert status to int for comparison + # In some cases, httplib returns a status of "_UNKNOWN" + try: + status = int(self.status) + except ValueError: + status = 0 + + # Check for responses that shouldn't include a body + if status in (204, 304) or 100 <= status < 200 or request_method == 'HEAD': + length = 0 + + return length + + def _init_decoder(self): + """ + Set-up the _decoder attribute if necessary. + """ + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 + content_encoding = self.headers.get('content-encoding', '').lower() + if self._decoder is None and content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) + + def _decode(self, data, decode_content, flush_decoder): + """ + Decode the data passed in and potentially flush the decoder. + """ + try: + if decode_content and self._decoder: + data = self._decoder.decompress(data) + except (IOError, zlib.error) as e: + content_encoding = self.headers.get('content-encoding', '').lower() + raise DecodeError( + "Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding, e) + + if flush_decoder and decode_content: + data += self._flush_decoder() + + return data + + def _flush_decoder(self): + """ + Flushes the decoder. Should only be called if the decoder is actually + being used. + """ + if self._decoder: + buf = self._decoder.decompress(b'') + return buf + self._decoder.flush() + + return b'' + + @contextmanager + def _error_catcher(self): + """ + Catch low-level python exceptions, instead re-raising urllib3 + variants, so that low-level exceptions are not leaked in the + high-level api. + + On exit, release the connection back to the pool. + """ + clean_exit = False + + try: + try: + yield + + except SocketTimeout: + # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but + # there is yet no clean way to get at it from this context. + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except BaseSSLError as e: + # FIXME: Is there a better way to differentiate between SSLErrors? + if 'read operation timed out' not in str(e): # Defensive: + # This shouldn't happen but just in case we're missing an edge + # case, let's avoid swallowing SSL errors. + raise + + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except (HTTPException, SocketError) as e: + # This includes IncompleteRead. + raise ProtocolError('Connection broken: %r' % e, e) + + # If no exception is thrown, we should avoid cleaning up + # unnecessarily. + clean_exit = True + finally: + # If we didn't terminate cleanly, we need to throw away our + # connection. + if not clean_exit: + # The response may not be closed but we're not going to use it + # anymore so close it now to ensure that the connection is + # released back to the pool. + if self._original_response: + self._original_response.close() + + # Closing the response may not actually be sufficient to close + # everything, so if we have a hold of the connection close that + # too. + if self._connection: + self._connection.close() + + # If we hold the original response but it's closed now, we should + # return the connection back to the pool. + if self._original_response and self._original_response.isclosed(): + self.release_conn() + + def read(self, amt=None, decode_content=None, cache_content=False): + """ + Similar to :meth:`httplib.HTTPResponse.read`, but with two additional + parameters: ``decode_content`` and ``cache_content``. + + :param amt: + How much of the content to read. If specified, caching is skipped + because it doesn't make sense to cache partial content as the full + response. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + + :param cache_content: + If True, will save the returned data such that the same result is + returned despite of the state of the underlying file object. This + is useful if you want the ``.data`` property to continue working + after having ``.read()`` the file object. (Overridden if ``amt`` is + set.) + """ + self._init_decoder() + if decode_content is None: + decode_content = self.decode_content + + if self._fp is None: + return + + flush_decoder = False + data = None + + with self._error_catcher(): + if amt is None: + # cStringIO doesn't like amt=None + data = self._fp.read() + flush_decoder = True + else: + cache_content = False + data = self._fp.read(amt) + if amt != 0 and not data: # Platform-specific: Buggy versions of Python. + # Close the connection when no data is returned + # + # This is redundant to what httplib/http.client _should_ + # already do. However, versions of python released before + # December 15, 2012 (http://bugs.python.org/issue16298) do + # not properly close the connection in all cases. There is + # no harm in redundantly calling close. + self._fp.close() + flush_decoder = True + if self.enforce_content_length and self.length_remaining not in (0, None): + # This is an edge case that httplib failed to cover due + # to concerns of backward compatibility. We're + # addressing it here to make sure IncompleteRead is + # raised during streaming, so all calls with incorrect + # Content-Length are caught. + raise IncompleteRead(self._fp_bytes_read, self.length_remaining) + + if data: + self._fp_bytes_read += len(data) + if self.length_remaining is not None: + self.length_remaining -= len(data) + + data = self._decode(data, decode_content, flush_decoder) + + if cache_content: + self._body = data + + return data + + def stream(self, amt=2**16, decode_content=None): + """ + A generator wrapper for the read() method. A call will block until + ``amt`` bytes have been read from the connection or until the + connection is closed. + + :param amt: + How much of the content to read. The generator will return up to + much data per iteration, but may return less. This is particularly + likely when using compressed data. However, the empty string will + never be returned. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + if self.chunked and self.supports_chunked_reads(): + for line in self.read_chunked(amt, decode_content=decode_content): + yield line + else: + while not is_fp_closed(self._fp): + data = self.read(amt=amt, decode_content=decode_content) + + if data: + yield data + + @classmethod + def from_httplib(ResponseCls, r, **response_kw): + """ + Given an :class:`httplib.HTTPResponse` instance ``r``, return a + corresponding :class:`urllib3.response.HTTPResponse` object. + + Remaining parameters are passed to the HTTPResponse constructor, along + with ``original_response=r``. + """ + headers = r.msg + + if not isinstance(headers, HTTPHeaderDict): + if PY3: # Python 3 + headers = HTTPHeaderDict(headers.items()) + else: # Python 2 + headers = HTTPHeaderDict.from_httplib(headers) + + # HTTPResponse objects in Python 3 don't have a .strict attribute + strict = getattr(r, 'strict', 0) + resp = ResponseCls(body=r, + headers=headers, + status=r.status, + version=r.version, + reason=r.reason, + strict=strict, + original_response=r, + **response_kw) + return resp + + # Backwards-compatibility methods for httplib.HTTPResponse + def getheaders(self): + return self.headers + + def getheader(self, name, default=None): + return self.headers.get(name, default) + + # Overrides from io.IOBase + def close(self): + if not self.closed: + self._fp.close() + + if self._connection: + self._connection.close() + + @property + def closed(self): + if self._fp is None: + return True + elif hasattr(self._fp, 'isclosed'): + return self._fp.isclosed() + elif hasattr(self._fp, 'closed'): + return self._fp.closed + else: + return True + + def fileno(self): + if self._fp is None: + raise IOError("HTTPResponse has no file to get a fileno from") + elif hasattr(self._fp, "fileno"): + return self._fp.fileno() + else: + raise IOError("The file-like object this HTTPResponse is wrapped " + "around has no file descriptor") + + def flush(self): + if self._fp is not None and hasattr(self._fp, 'flush'): + return self._fp.flush() + + def readable(self): + # This method is required for `io` module compatibility. + return True + + def readinto(self, b): + # This method is required for `io` module compatibility. + temp = self.read(len(b)) + if len(temp) == 0: + return 0 + else: + b[:len(temp)] = temp + return len(temp) + + def supports_chunked_reads(self): + """ + Checks if the underlying file-like object looks like a + httplib.HTTPResponse object. We do this by testing for the fp + attribute. If it is present we assume it returns raw chunks as + processed by read_chunked(). + """ + return hasattr(self._fp, 'fp') + + def _update_chunk_length(self): + # First, we'll figure out length of a chunk and then + # we'll try to read it from socket. + if self.chunk_left is not None: + return + line = self._fp.fp.readline() + line = line.split(b';', 1)[0] + try: + self.chunk_left = int(line, 16) + except ValueError: + # Invalid chunked protocol response, abort. + self.close() + raise httplib.IncompleteRead(line) + + def _handle_chunk(self, amt): + returned_chunk = None + if amt is None: + chunk = self._fp._safe_read(self.chunk_left) + returned_chunk = chunk + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + elif amt < self.chunk_left: + value = self._fp._safe_read(amt) + self.chunk_left = self.chunk_left - amt + returned_chunk = value + elif amt == self.chunk_left: + value = self._fp._safe_read(amt) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + returned_chunk = value + else: # amt > self.chunk_left + returned_chunk = self._fp._safe_read(self.chunk_left) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + return returned_chunk + + def read_chunked(self, amt=None, decode_content=None): + """ + Similar to :meth:`HTTPResponse.read`, but with an additional + parameter: ``decode_content``. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + self._init_decoder() + # FIXME: Rewrite this method and make it a class with a better structured logic. + if not self.chunked: + raise ResponseNotChunked( + "Response is not chunked. " + "Header 'transfer-encoding: chunked' is missing.") + if not self.supports_chunked_reads(): + raise BodyNotHttplibCompatible( + "Body should be httplib.HTTPResponse like. " + "It should have have an fp attribute which returns raw chunks.") + + # Don't bother reading the body of a HEAD request. + if self._original_response and is_response_to_head(self._original_response): + self._original_response.close() + return + + with self._error_catcher(): + while True: + self._update_chunk_length() + if self.chunk_left == 0: + break + chunk = self._handle_chunk(amt) + decoded = self._decode(chunk, decode_content=decode_content, + flush_decoder=False) + if decoded: + yield decoded + + if decode_content: + # On CPython and PyPy, we should never need to flush the + # decoder. However, on Jython we *might* need to, so + # lets defensively do it anyway. + decoded = self._flush_decoder() + if decoded: # Platform-specific: Jython. + yield decoded + + # Chunk content ends with \r\n: discard it. + while True: + line = self._fp.fp.readline() + if not line: + # Some sites may not end with '\r\n'. + break + if line == b'\r\n': + break + + # We read everything; close the "file". + if self._original_response: + self._original_response.close() diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/__init__.py b/collectors/python.d.plugin/python_modules/urllib3/util/__init__.py new file mode 100644 index 00000000..bba628d9 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/__init__.py @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +# For backwards compatibility, provide imports that used to be here. +from .connection import is_connection_dropped +from .request import make_headers +from .response import is_fp_closed +from .ssl_ import ( + SSLContext, + HAS_SNI, + IS_PYOPENSSL, + IS_SECURETRANSPORT, + assert_fingerprint, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, +) +from .timeout import ( + current_time, + Timeout, +) + +from .retry import Retry +from .url import ( + get_host, + parse_url, + split_first, + Url, +) +from .wait import ( + wait_for_read, + wait_for_write +) + +__all__ = ( + 'HAS_SNI', + 'IS_PYOPENSSL', + 'IS_SECURETRANSPORT', + 'SSLContext', + 'Retry', + 'Timeout', + 'Url', + 'assert_fingerprint', + 'current_time', + 'is_connection_dropped', + 'is_fp_closed', + 'get_host', + 'parse_url', + 'make_headers', + 'resolve_cert_reqs', + 'resolve_ssl_version', + 'split_first', + 'ssl_wrap_socket', + 'wait_for_read', + 'wait_for_write' +) diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/connection.py b/collectors/python.d.plugin/python_modules/urllib3/util/connection.py new file mode 100644 index 00000000..3bd69e8f --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/connection.py @@ -0,0 +1,131 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +import socket +from .wait import wait_for_read +from .selectors import HAS_SELECT, SelectorError + + +def is_connection_dropped(conn): # Platform-specific + """ + Returns True if the connection is dropped and should be closed. + + :param conn: + :class:`httplib.HTTPConnection` object. + + Note: For platforms like AppEngine, this will always return ``False`` to + let the platform handle connection recycling transparently for us. + """ + sock = getattr(conn, 'sock', False) + if sock is False: # Platform-specific: AppEngine + return False + if sock is None: # Connection already closed (such as by httplib). + return True + + if not HAS_SELECT: + return False + + try: + return bool(wait_for_read(sock, timeout=0.0)) + except SelectorError: + return True + + +# This function is copied from socket.py in the Python 2.7 standard +# library test suite. Added to its signature is only `socket_options`. +# One additional modification is that we avoid binding to IPv6 servers +# discovered in DNS if the system doesn't have IPv6 functionality. +def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None, socket_options=None): + """Connect to *address* and return the socket object. + + Convenience function. Connect to *address* (a 2-tuple ``(host, + port)``) and return the socket object. Passing the optional + *timeout* parameter will set the timeout on the socket instance + before attempting to connect. If no *timeout* is supplied, the + global default timeout setting returned by :func:`getdefaulttimeout` + is used. If *source_address* is set it must be a tuple of (host, port) + for the socket to bind as a source address before making the connection. + An host of '' or port 0 tells the OS to use the default. + """ + + host, port = address + if host.startswith('['): + host = host.strip('[]') + err = None + + # Using the value from allowed_gai_family() in the context of getaddrinfo lets + # us select whether to work with IPv4 DNS records, IPv6 records, or both. + # The original create_connection function always returns all records. + family = allowed_gai_family() + + for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket.socket(af, socktype, proto) + + # If provided, set socket level options before connecting. + _set_socket_options(sock, socket_options) + + if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + if source_address: + sock.bind(source_address) + sock.connect(sa) + return sock + + except socket.error as e: + err = e + if sock is not None: + sock.close() + sock = None + + if err is not None: + raise err + + raise socket.error("getaddrinfo returns an empty list") + + +def _set_socket_options(sock, options): + if options is None: + return + + for opt in options: + sock.setsockopt(*opt) + + +def allowed_gai_family(): + """This function is designed to work in the context of + getaddrinfo, where family=socket.AF_UNSPEC is the default and + will perform a DNS search for both IPv6 and IPv4 records.""" + + family = socket.AF_INET + if HAS_IPV6: + family = socket.AF_UNSPEC + return family + + +def _has_ipv6(host): + """ Returns True if the system can bind an IPv6 address. """ + sock = None + has_ipv6 = False + + if socket.has_ipv6: + # has_ipv6 returns true if cPython was compiled with IPv6 support. + # It does not tell us if the system has IPv6 support enabled. To + # determine that we must bind to an IPv6 address. + # https://github.com/shazow/urllib3/pull/611 + # https://bugs.python.org/issue658327 + try: + sock = socket.socket(socket.AF_INET6) + sock.bind((host, 0)) + has_ipv6 = True + except Exception: + pass + + if sock: + sock.close() + return has_ipv6 + + +HAS_IPV6 = _has_ipv6('::1') diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/request.py b/collectors/python.d.plugin/python_modules/urllib3/util/request.py new file mode 100644 index 00000000..18f27b03 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/request.py @@ -0,0 +1,119 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +from base64 import b64encode + +from ..packages.six import b, integer_types +from ..exceptions import UnrewindableBodyError + +ACCEPT_ENCODING = 'gzip,deflate' +_FAILEDTELL = object() + + +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, + basic_auth=None, proxy_basic_auth=None, disable_cache=None): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + :param proxy_basic_auth: + Colon-separated username:password string for 'proxy-authorization: basic ...' + auth header. + + :param disable_cache: + If ``True``, adds 'cache-control: no-cache' header. + + Example:: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = ACCEPT_ENCODING + headers['accept-encoding'] = accept_encoding + + if user_agent: + headers['user-agent'] = user_agent + + if keep_alive: + headers['connection'] = 'keep-alive' + + if basic_auth: + headers['authorization'] = 'Basic ' + \ + b64encode(b(basic_auth)).decode('utf-8') + + if proxy_basic_auth: + headers['proxy-authorization'] = 'Basic ' + \ + b64encode(b(proxy_basic_auth)).decode('utf-8') + + if disable_cache: + headers['cache-control'] = 'no-cache' + + return headers + + +def set_file_position(body, pos): + """ + If a position is provided, move file to that point. + Otherwise, we'll attempt to record a position for future use. + """ + if pos is not None: + rewind_body(body, pos) + elif getattr(body, 'tell', None) is not None: + try: + pos = body.tell() + except (IOError, OSError): + # This differentiates from None, allowing us to catch + # a failed `tell()` later when trying to rewind the body. + pos = _FAILEDTELL + + return pos + + +def rewind_body(body, body_pos): + """ + Attempt to rewind body to a certain position. + Primarily used for request redirects and retries. + + :param body: + File-like object that supports seek. + + :param int pos: + Position to seek to in file. + """ + body_seek = getattr(body, 'seek', None) + if body_seek is not None and isinstance(body_pos, integer_types): + try: + body_seek(body_pos) + except (IOError, OSError): + raise UnrewindableBodyError("An error occurred when rewinding request " + "body for redirect/retry.") + elif body_pos is _FAILEDTELL: + raise UnrewindableBodyError("Unable to record file position for rewinding " + "request body during a redirect/retry.") + else: + raise ValueError("body_pos must be of type integer, " + "instead it was %s." % type(body_pos)) diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/response.py b/collectors/python.d.plugin/python_modules/urllib3/util/response.py new file mode 100644 index 00000000..e4cda93d --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/response.py @@ -0,0 +1,82 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +from ..packages.six.moves import http_client as httplib + +from ..exceptions import HeaderParsingError + + +def is_fp_closed(obj): + """ + Checks whether a given file-like object is closed. + + :param obj: + The file-like object to check. + """ + + try: + # Check `isclosed()` first, in case Python3 doesn't set `closed`. + # GH Issue #928 + return obj.isclosed() + except AttributeError: + pass + + try: + # Check via the official file-like-object way. + return obj.closed + except AttributeError: + pass + + try: + # Check if the object is a container for another file-like object that + # gets released on exhaustion (e.g. HTTPResponse). + return obj.fp is None + except AttributeError: + pass + + raise ValueError("Unable to determine whether fp is closed.") + + +def assert_header_parsing(headers): + """ + Asserts whether all headers have been successfully parsed. + Extracts encountered errors from the result of parsing headers. + + Only works on Python 3. + + :param headers: Headers to verify. + :type headers: `httplib.HTTPMessage`. + + :raises urllib3.exceptions.HeaderParsingError: + If parsing errors are found. + """ + + # This will fail silently if we pass in the wrong kind of parameter. + # To make debugging easier add an explicit check. + if not isinstance(headers, httplib.HTTPMessage): + raise TypeError('expected httplib.Message, got {0}.'.format( + type(headers))) + + defects = getattr(headers, 'defects', None) + get_payload = getattr(headers, 'get_payload', None) + + unparsed_data = None + if get_payload: # Platform-specific: Python 3. + unparsed_data = get_payload() + + if defects or unparsed_data: + raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data) + + +def is_response_to_head(response): + """ + Checks whether the request of a response has been a HEAD-request. + Handles the quirks of AppEngine. + + :param conn: + :type conn: :class:`httplib.HTTPResponse` + """ + # FIXME: Can we do this somehow without accessing private httplib _method? + method = response._method + if isinstance(method, int): # Platform-specific: Appengine + return method == 3 + return method.upper() == 'HEAD' diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/retry.py b/collectors/python.d.plugin/python_modules/urllib3/util/retry.py new file mode 100644 index 00000000..61e63afe --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/retry.py @@ -0,0 +1,402 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +import time +import logging +from collections import namedtuple +from itertools import takewhile +import email +import re + +from ..exceptions import ( + ConnectTimeoutError, + MaxRetryError, + ProtocolError, + ReadTimeoutError, + ResponseError, + InvalidHeader, +) +from ..packages import six + + +log = logging.getLogger(__name__) + +# Data structure for representing the metadata of requests that result in a retry. +RequestHistory = namedtuple('RequestHistory', ["method", "url", "error", + "status", "redirect_location"]) + + +class Retry(object): + """ Retry configuration. + + Each retry attempt will create a new Retry object with updated values, so + they can be safely reused. + + Retries can be defined as a default for a pool:: + + retries = Retry(connect=5, read=2, redirect=5) + http = PoolManager(retries=retries) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', retries=Retry(10)) + + Retries can be disabled by passing ``False``:: + + response = http.request('GET', 'http://example.com/', retries=False) + + Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless + retries are disabled, in which case the causing exception will be raised. + + :param int total: + Total number of retries to allow. Takes precedence over other counts. + + Set to ``None`` to remove this constraint and fall back on other + counts. It's a good idea to set this to some sensibly-high value to + account for unexpected edge cases and avoid infinite retry loops. + + Set to ``0`` to fail on the first retry. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param int connect: + How many connection-related errors to retry on. + + These are errors raised before the request is sent to the remote server, + which we assume has not triggered the server to process the request. + + Set to ``0`` to fail on the first retry of this type. + + :param int read: + How many times to retry on read errors. + + These errors are raised after the request was sent to the server, so the + request may have side-effects. + + Set to ``0`` to fail on the first retry of this type. + + :param int redirect: + How many redirects to perform. Limit this to avoid infinite redirect + loops. + + A redirect is a HTTP response with a status code 301, 302, 303, 307 or + 308. + + Set to ``0`` to fail on the first retry of this type. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param int status: + How many times to retry on bad status codes. + + These are retries made on responses, where status code matches + ``status_forcelist``. + + Set to ``0`` to fail on the first retry of this type. + + :param iterable method_whitelist: + Set of uppercased HTTP method verbs that we should retry on. + + By default, we only retry on methods which are considered to be + idempotent (multiple requests with the same parameters end with the + same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`. + + Set to a ``False`` value to retry on any verb. + + :param iterable status_forcelist: + A set of integer HTTP status codes that we should force a retry on. + A retry is initiated if the request method is in ``method_whitelist`` + and the response status code is in ``status_forcelist``. + + By default, this is disabled with ``None``. + + :param float backoff_factor: + A backoff factor to apply between attempts after the second try + (most errors are resolved immediately by a second try without a + delay). urllib3 will sleep for:: + + {backoff factor} * (2 ^ ({number of total retries} - 1)) + + seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep + for [0.0s, 0.2s, 0.4s, ...] between retries. It will never be longer + than :attr:`Retry.BACKOFF_MAX`. + + By default, backoff is disabled (set to 0). + + :param bool raise_on_redirect: Whether, if the number of redirects is + exhausted, to raise a MaxRetryError, or to return a response with a + response code in the 3xx range. + + :param bool raise_on_status: Similar meaning to ``raise_on_redirect``: + whether we should raise an exception, or return a response, + if status falls in ``status_forcelist`` range and retries have + been exhausted. + + :param tuple history: The history of the request encountered during + each call to :meth:`~Retry.increment`. The list is in the order + the requests occurred. Each list item is of class :class:`RequestHistory`. + + :param bool respect_retry_after_header: + Whether to respect Retry-After header on status codes defined as + :attr:`Retry.RETRY_AFTER_STATUS_CODES` or not. + + """ + + DEFAULT_METHOD_WHITELIST = frozenset([ + 'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE']) + + RETRY_AFTER_STATUS_CODES = frozenset([413, 429, 503]) + + #: Maximum backoff time. + BACKOFF_MAX = 120 + + def __init__(self, total=10, connect=None, read=None, redirect=None, status=None, + method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None, + backoff_factor=0, raise_on_redirect=True, raise_on_status=True, + history=None, respect_retry_after_header=True): + + self.total = total + self.connect = connect + self.read = read + self.status = status + + if redirect is False or total is False: + redirect = 0 + raise_on_redirect = False + + self.redirect = redirect + self.status_forcelist = status_forcelist or set() + self.method_whitelist = method_whitelist + self.backoff_factor = backoff_factor + self.raise_on_redirect = raise_on_redirect + self.raise_on_status = raise_on_status + self.history = history or tuple() + self.respect_retry_after_header = respect_retry_after_header + + def new(self, **kw): + params = dict( + total=self.total, + connect=self.connect, read=self.read, redirect=self.redirect, status=self.status, + method_whitelist=self.method_whitelist, + status_forcelist=self.status_forcelist, + backoff_factor=self.backoff_factor, + raise_on_redirect=self.raise_on_redirect, + raise_on_status=self.raise_on_status, + history=self.history, + ) + params.update(kw) + return type(self)(**params) + + @classmethod + def from_int(cls, retries, redirect=True, default=None): + """ Backwards-compatibility for the old retries format.""" + if retries is None: + retries = default if default is not None else cls.DEFAULT + + if isinstance(retries, Retry): + return retries + + redirect = bool(redirect) and None + new_retries = cls(retries, redirect=redirect) + log.debug("Converted retries value: %r -> %r", retries, new_retries) + return new_retries + + def get_backoff_time(self): + """ Formula for computing the current backoff + + :rtype: float + """ + # We want to consider only the last consecutive errors sequence (Ignore redirects). + consecutive_errors_len = len(list(takewhile(lambda x: x.redirect_location is None, + reversed(self.history)))) + if consecutive_errors_len <= 1: + return 0 + + backoff_value = self.backoff_factor * (2 ** (consecutive_errors_len - 1)) + return min(self.BACKOFF_MAX, backoff_value) + + def parse_retry_after(self, retry_after): + # Whitespace: https://tools.ietf.org/html/rfc7230#section-3.2.4 + if re.match(r"^\s*[0-9]+\s*$", retry_after): + seconds = int(retry_after) + else: + retry_date_tuple = email.utils.parsedate(retry_after) + if retry_date_tuple is None: + raise InvalidHeader("Invalid Retry-After header: %s" % retry_after) + retry_date = time.mktime(retry_date_tuple) + seconds = retry_date - time.time() + + if seconds < 0: + seconds = 0 + + return seconds + + def get_retry_after(self, response): + """ Get the value of Retry-After in seconds. """ + + retry_after = response.getheader("Retry-After") + + if retry_after is None: + return None + + return self.parse_retry_after(retry_after) + + def sleep_for_retry(self, response=None): + retry_after = self.get_retry_after(response) + if retry_after: + time.sleep(retry_after) + return True + + return False + + def _sleep_backoff(self): + backoff = self.get_backoff_time() + if backoff <= 0: + return + time.sleep(backoff) + + def sleep(self, response=None): + """ Sleep between retry attempts. + + This method will respect a server's ``Retry-After`` response header + and sleep the duration of the time requested. If that is not present, it + will use an exponential backoff. By default, the backoff factor is 0 and + this method will return immediately. + """ + + if response: + slept = self.sleep_for_retry(response) + if slept: + return + + self._sleep_backoff() + + def _is_connection_error(self, err): + """ Errors when we're fairly sure that the server did not receive the + request, so it should be safe to retry. + """ + return isinstance(err, ConnectTimeoutError) + + def _is_read_error(self, err): + """ Errors that occur after the request has been started, so we should + assume that the server began processing it. + """ + return isinstance(err, (ReadTimeoutError, ProtocolError)) + + def _is_method_retryable(self, method): + """ Checks if a given HTTP method should be retried upon, depending if + it is included on the method whitelist. + """ + if self.method_whitelist and method.upper() not in self.method_whitelist: + return False + + return True + + def is_retry(self, method, status_code, has_retry_after=False): + """ Is this method/status code retryable? (Based on whitelists and control + variables such as the number of total retries to allow, whether to + respect the Retry-After header, whether this header is present, and + whether the returned status code is on the list of status codes to + be retried upon on the presence of the aforementioned header) + """ + if not self._is_method_retryable(method): + return False + + if self.status_forcelist and status_code in self.status_forcelist: + return True + + return (self.total and self.respect_retry_after_header and + has_retry_after and (status_code in self.RETRY_AFTER_STATUS_CODES)) + + def is_exhausted(self): + """ Are we out of retries? """ + retry_counts = (self.total, self.connect, self.read, self.redirect, self.status) + retry_counts = list(filter(None, retry_counts)) + if not retry_counts: + return False + + return min(retry_counts) < 0 + + def increment(self, method=None, url=None, response=None, error=None, + _pool=None, _stacktrace=None): + """ Return a new Retry object with incremented retry counters. + + :param response: A response object, or None, if the server did not + return a response. + :type response: :class:`~urllib3.response.HTTPResponse` + :param Exception error: An error encountered during the request, or + None if the response was received successfully. + + :return: A new ``Retry`` object. + """ + if self.total is False and error: + # Disabled, indicate to re-raise the error. + raise six.reraise(type(error), error, _stacktrace) + + total = self.total + if total is not None: + total -= 1 + + connect = self.connect + read = self.read + redirect = self.redirect + status_count = self.status + cause = 'unknown' + status = None + redirect_location = None + + if error and self._is_connection_error(error): + # Connect retry? + if connect is False: + raise six.reraise(type(error), error, _stacktrace) + elif connect is not None: + connect -= 1 + + elif error and self._is_read_error(error): + # Read retry? + if read is False or not self._is_method_retryable(method): + raise six.reraise(type(error), error, _stacktrace) + elif read is not None: + read -= 1 + + elif response and response.get_redirect_location(): + # Redirect retry? + if redirect is not None: + redirect -= 1 + cause = 'too many redirects' + redirect_location = response.get_redirect_location() + status = response.status + + else: + # Incrementing because of a server error like a 500 in + # status_forcelist and a the given method is in the whitelist + cause = ResponseError.GENERIC_ERROR + if response and response.status: + if status_count is not None: + status_count -= 1 + cause = ResponseError.SPECIFIC_ERROR.format( + status_code=response.status) + status = response.status + + history = self.history + (RequestHistory(method, url, error, status, redirect_location),) + + new_retry = self.new( + total=total, + connect=connect, read=read, redirect=redirect, status=status_count, + history=history) + + if new_retry.is_exhausted(): + raise MaxRetryError(_pool, url, error or ResponseError(cause)) + + log.debug("Incremented Retry for (url='%s'): %r", url, new_retry) + + return new_retry + + def __repr__(self): + return ('{cls.__name__}(total={self.total}, connect={self.connect}, ' + 'read={self.read}, redirect={self.redirect}, status={self.status})').format( + cls=type(self), self=self) + + +# For backwards compatibility (equivalent to pre-v1.9): +Retry.DEFAULT = Retry(3) diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/selectors.py b/collectors/python.d.plugin/python_modules/urllib3/util/selectors.py new file mode 100644 index 00000000..de5e4983 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/selectors.py @@ -0,0 +1,588 @@ +# SPDX-License-Identifier: MIT +# Backport of selectors.py from Python 3.5+ to support Python < 3.4 +# Also has the behavior specified in PEP 475 which is to retry syscalls +# in the case of an EINTR error. This module is required because selectors34 +# does not follow this behavior and instead returns that no dile descriptor +# events have occurred rather than retry the syscall. The decision to drop +# support for select.devpoll is made to maintain 100% test coverage. + +import errno +import math +import select +import socket +import sys +import time + +from collections import namedtuple + +try: + from collections import Mapping +except ImportError: + from collections.abc import Mapping + +try: + monotonic = time.monotonic +except (AttributeError, ImportError): # Python 3.3< + monotonic = time.time + +EVENT_READ = (1 << 0) +EVENT_WRITE = (1 << 1) + +HAS_SELECT = True # Variable that shows whether the platform has a selector. +_SYSCALL_SENTINEL = object() # Sentinel in case a system call returns None. +_DEFAULT_SELECTOR = None + + +class SelectorError(Exception): + def __init__(self, errcode): + super(SelectorError, self).__init__() + self.errno = errcode + + def __repr__(self): + return "<SelectorError errno={0}>".format(self.errno) + + def __str__(self): + return self.__repr__() + + +def _fileobj_to_fd(fileobj): + """ Return a file descriptor from a file object. If + given an integer will simply return that integer back. """ + if isinstance(fileobj, int): + fd = fileobj + else: + try: + fd = int(fileobj.fileno()) + except (AttributeError, TypeError, ValueError): + raise ValueError("Invalid file object: {0!r}".format(fileobj)) + if fd < 0: + raise ValueError("Invalid file descriptor: {0}".format(fd)) + return fd + + +# Determine which function to use to wrap system calls because Python 3.5+ +# already handles the case when system calls are interrupted. +if sys.version_info >= (3, 5): + def _syscall_wrapper(func, _, *args, **kwargs): + """ This is the short-circuit version of the below logic + because in Python 3.5+ all system calls automatically restart + and recalculate their timeouts. """ + try: + return func(*args, **kwargs) + except (OSError, IOError, select.error) as e: + errcode = None + if hasattr(e, "errno"): + errcode = e.errno + raise SelectorError(errcode) +else: + def _syscall_wrapper(func, recalc_timeout, *args, **kwargs): + """ Wrapper function for syscalls that could fail due to EINTR. + All functions should be retried if there is time left in the timeout + in accordance with PEP 475. """ + timeout = kwargs.get("timeout", None) + if timeout is None: + expires = None + recalc_timeout = False + else: + timeout = float(timeout) + if timeout < 0.0: # Timeout less than 0 treated as no timeout. + expires = None + else: + expires = monotonic() + timeout + + args = list(args) + if recalc_timeout and "timeout" not in kwargs: + raise ValueError( + "Timeout must be in args or kwargs to be recalculated") + + result = _SYSCALL_SENTINEL + while result is _SYSCALL_SENTINEL: + try: + result = func(*args, **kwargs) + # OSError is thrown by select.select + # IOError is thrown by select.epoll.poll + # select.error is thrown by select.poll.poll + # Aren't we thankful for Python 3.x rework for exceptions? + except (OSError, IOError, select.error) as e: + # select.error wasn't a subclass of OSError in the past. + errcode = None + if hasattr(e, "errno"): + errcode = e.errno + elif hasattr(e, "args"): + errcode = e.args[0] + + # Also test for the Windows equivalent of EINTR. + is_interrupt = (errcode == errno.EINTR or (hasattr(errno, "WSAEINTR") and + errcode == errno.WSAEINTR)) + + if is_interrupt: + if expires is not None: + current_time = monotonic() + if current_time > expires: + raise OSError(errno=errno.ETIMEDOUT) + if recalc_timeout: + if "timeout" in kwargs: + kwargs["timeout"] = expires - current_time + continue + if errcode: + raise SelectorError(errcode) + else: + raise + return result + + +SelectorKey = namedtuple('SelectorKey', ['fileobj', 'fd', 'events', 'data']) + + +class _SelectorMapping(Mapping): + """ Mapping of file objects to selector keys """ + + def __init__(self, selector): + self._selector = selector + + def __len__(self): + return len(self._selector._fd_to_key) + + def __getitem__(self, fileobj): + try: + fd = self._selector._fileobj_lookup(fileobj) + return self._selector._fd_to_key[fd] + except KeyError: + raise KeyError("{0!r} is not registered.".format(fileobj)) + + def __iter__(self): + return iter(self._selector._fd_to_key) + + +class BaseSelector(object): + """ Abstract Selector class + + A selector supports registering file objects to be monitored + for specific I/O events. + + A file object is a file descriptor or any object with a + `fileno()` method. An arbitrary object can be attached to the + file object which can be used for example to store context info, + a callback, etc. + + A selector can use various implementations (select(), poll(), epoll(), + and kqueue()) depending on the platform. The 'DefaultSelector' class uses + the most efficient implementation for the current platform. + """ + def __init__(self): + # Maps file descriptors to keys. + self._fd_to_key = {} + + # Read-only mapping returned by get_map() + self._map = _SelectorMapping(self) + + def _fileobj_lookup(self, fileobj): + """ Return a file descriptor from a file object. + This wraps _fileobj_to_fd() to do an exhaustive + search in case the object is invalid but we still + have it in our map. Used by unregister() so we can + unregister an object that was previously registered + even if it is closed. It is also used by _SelectorMapping + """ + try: + return _fileobj_to_fd(fileobj) + except ValueError: + + # Search through all our mapped keys. + for key in self._fd_to_key.values(): + if key.fileobj is fileobj: + return key.fd + + # Raise ValueError after all. + raise + + def register(self, fileobj, events, data=None): + """ Register a file object for a set of events to monitor. """ + if (not events) or (events & ~(EVENT_READ | EVENT_WRITE)): + raise ValueError("Invalid events: {0!r}".format(events)) + + key = SelectorKey(fileobj, self._fileobj_lookup(fileobj), events, data) + + if key.fd in self._fd_to_key: + raise KeyError("{0!r} (FD {1}) is already registered" + .format(fileobj, key.fd)) + + self._fd_to_key[key.fd] = key + return key + + def unregister(self, fileobj): + """ Unregister a file object from being monitored. """ + try: + key = self._fd_to_key.pop(self._fileobj_lookup(fileobj)) + except KeyError: + raise KeyError("{0!r} is not registered".format(fileobj)) + + # Getting the fileno of a closed socket on Windows errors with EBADF. + except socket.error as e: # Platform-specific: Windows. + if e.errno != errno.EBADF: + raise + else: + for key in self._fd_to_key.values(): + if key.fileobj is fileobj: + self._fd_to_key.pop(key.fd) + break + else: + raise KeyError("{0!r} is not registered".format(fileobj)) + return key + + def modify(self, fileobj, events, data=None): + """ Change a registered file object monitored events and data. """ + # NOTE: Some subclasses optimize this operation even further. + try: + key = self._fd_to_key[self._fileobj_lookup(fileobj)] + except KeyError: + raise KeyError("{0!r} is not registered".format(fileobj)) + + if events != key.events: + self.unregister(fileobj) + key = self.register(fileobj, events, data) + + elif data != key.data: + # Use a shortcut to update the data. + key = key._replace(data=data) + self._fd_to_key[key.fd] = key + + return key + + def select(self, timeout=None): + """ Perform the actual selection until some monitored file objects + are ready or the timeout expires. """ + raise NotImplementedError() + + def close(self): + """ Close the selector. This must be called to ensure that all + underlying resources are freed. """ + self._fd_to_key.clear() + self._map = None + + def get_key(self, fileobj): + """ Return the key associated with a registered file object. """ + mapping = self.get_map() + if mapping is None: + raise RuntimeError("Selector is closed") + try: + return mapping[fileobj] + except KeyError: + raise KeyError("{0!r} is not registered".format(fileobj)) + + def get_map(self): + """ Return a mapping of file objects to selector keys """ + return self._map + + def _key_from_fd(self, fd): + """ Return the key associated to a given file descriptor + Return None if it is not found. """ + try: + return self._fd_to_key[fd] + except KeyError: + return None + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + +# Almost all platforms have select.select() +if hasattr(select, "select"): + class SelectSelector(BaseSelector): + """ Select-based selector. """ + def __init__(self): + super(SelectSelector, self).__init__() + self._readers = set() + self._writers = set() + + def register(self, fileobj, events, data=None): + key = super(SelectSelector, self).register(fileobj, events, data) + if events & EVENT_READ: + self._readers.add(key.fd) + if events & EVENT_WRITE: + self._writers.add(key.fd) + return key + + def unregister(self, fileobj): + key = super(SelectSelector, self).unregister(fileobj) + self._readers.discard(key.fd) + self._writers.discard(key.fd) + return key + + def _select(self, r, w, timeout=None): + """ Wrapper for select.select because timeout is a positional arg """ + return select.select(r, w, [], timeout) + + def select(self, timeout=None): + # Selecting on empty lists on Windows errors out. + if not len(self._readers) and not len(self._writers): + return [] + + timeout = None if timeout is None else max(timeout, 0.0) + ready = [] + r, w, _ = _syscall_wrapper(self._select, True, self._readers, + self._writers, timeout) + r = set(r) + w = set(w) + for fd in r | w: + events = 0 + if fd in r: + events |= EVENT_READ + if fd in w: + events |= EVENT_WRITE + + key = self._key_from_fd(fd) + if key: + ready.append((key, events & key.events)) + return ready + + +if hasattr(select, "poll"): + class PollSelector(BaseSelector): + """ Poll-based selector """ + def __init__(self): + super(PollSelector, self).__init__() + self._poll = select.poll() + + def register(self, fileobj, events, data=None): + key = super(PollSelector, self).register(fileobj, events, data) + event_mask = 0 + if events & EVENT_READ: + event_mask |= select.POLLIN + if events & EVENT_WRITE: + event_mask |= select.POLLOUT + self._poll.register(key.fd, event_mask) + return key + + def unregister(self, fileobj): + key = super(PollSelector, self).unregister(fileobj) + self._poll.unregister(key.fd) + return key + + def _wrap_poll(self, timeout=None): + """ Wrapper function for select.poll.poll() so that + _syscall_wrapper can work with only seconds. """ + if timeout is not None: + if timeout <= 0: + timeout = 0 + else: + # select.poll.poll() has a resolution of 1 millisecond, + # round away from zero to wait *at least* timeout seconds. + timeout = math.ceil(timeout * 1e3) + + result = self._poll.poll(timeout) + return result + + def select(self, timeout=None): + ready = [] + fd_events = _syscall_wrapper(self._wrap_poll, True, timeout=timeout) + for fd, event_mask in fd_events: + events = 0 + if event_mask & ~select.POLLIN: + events |= EVENT_WRITE + if event_mask & ~select.POLLOUT: + events |= EVENT_READ + + key = self._key_from_fd(fd) + if key: + ready.append((key, events & key.events)) + + return ready + + +if hasattr(select, "epoll"): + class EpollSelector(BaseSelector): + """ Epoll-based selector """ + def __init__(self): + super(EpollSelector, self).__init__() + self._epoll = select.epoll() + + def fileno(self): + return self._epoll.fileno() + + def register(self, fileobj, events, data=None): + key = super(EpollSelector, self).register(fileobj, events, data) + events_mask = 0 + if events & EVENT_READ: + events_mask |= select.EPOLLIN + if events & EVENT_WRITE: + events_mask |= select.EPOLLOUT + _syscall_wrapper(self._epoll.register, False, key.fd, events_mask) + return key + + def unregister(self, fileobj): + key = super(EpollSelector, self).unregister(fileobj) + try: + _syscall_wrapper(self._epoll.unregister, False, key.fd) + except SelectorError: + # This can occur when the fd was closed since registry. + pass + return key + + def select(self, timeout=None): + if timeout is not None: + if timeout <= 0: + timeout = 0.0 + else: + # select.epoll.poll() has a resolution of 1 millisecond + # but luckily takes seconds so we don't need a wrapper + # like PollSelector. Just for better rounding. + timeout = math.ceil(timeout * 1e3) * 1e-3 + timeout = float(timeout) + else: + timeout = -1.0 # epoll.poll() must have a float. + + # We always want at least 1 to ensure that select can be called + # with no file descriptors registered. Otherwise will fail. + max_events = max(len(self._fd_to_key), 1) + + ready = [] + fd_events = _syscall_wrapper(self._epoll.poll, True, + timeout=timeout, + maxevents=max_events) + for fd, event_mask in fd_events: + events = 0 + if event_mask & ~select.EPOLLIN: + events |= EVENT_WRITE + if event_mask & ~select.EPOLLOUT: + events |= EVENT_READ + + key = self._key_from_fd(fd) + if key: + ready.append((key, events & key.events)) + return ready + + def close(self): + self._epoll.close() + super(EpollSelector, self).close() + + +if hasattr(select, "kqueue"): + class KqueueSelector(BaseSelector): + """ Kqueue / Kevent-based selector """ + def __init__(self): + super(KqueueSelector, self).__init__() + self._kqueue = select.kqueue() + + def fileno(self): + return self._kqueue.fileno() + + def register(self, fileobj, events, data=None): + key = super(KqueueSelector, self).register(fileobj, events, data) + if events & EVENT_READ: + kevent = select.kevent(key.fd, + select.KQ_FILTER_READ, + select.KQ_EV_ADD) + + _syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0) + + if events & EVENT_WRITE: + kevent = select.kevent(key.fd, + select.KQ_FILTER_WRITE, + select.KQ_EV_ADD) + + _syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0) + + return key + + def unregister(self, fileobj): + key = super(KqueueSelector, self).unregister(fileobj) + if key.events & EVENT_READ: + kevent = select.kevent(key.fd, + select.KQ_FILTER_READ, + select.KQ_EV_DELETE) + try: + _syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0) + except SelectorError: + pass + if key.events & EVENT_WRITE: + kevent = select.kevent(key.fd, + select.KQ_FILTER_WRITE, + select.KQ_EV_DELETE) + try: + _syscall_wrapper(self._kqueue.control, False, [kevent], 0, 0) + except SelectorError: + pass + + return key + + def select(self, timeout=None): + if timeout is not None: + timeout = max(timeout, 0) + + max_events = len(self._fd_to_key) * 2 + ready_fds = {} + + kevent_list = _syscall_wrapper(self._kqueue.control, True, + None, max_events, timeout) + + for kevent in kevent_list: + fd = kevent.ident + event_mask = kevent.filter + events = 0 + if event_mask == select.KQ_FILTER_READ: + events |= EVENT_READ + if event_mask == select.KQ_FILTER_WRITE: + events |= EVENT_WRITE + + key = self._key_from_fd(fd) + if key: + if key.fd not in ready_fds: + ready_fds[key.fd] = (key, events & key.events) + else: + old_events = ready_fds[key.fd][1] + ready_fds[key.fd] = (key, (events | old_events) & key.events) + + return list(ready_fds.values()) + + def close(self): + self._kqueue.close() + super(KqueueSelector, self).close() + + +if not hasattr(select, 'select'): # Platform-specific: AppEngine + HAS_SELECT = False + + +def _can_allocate(struct): + """ Checks that select structs can be allocated by the underlying + operating system, not just advertised by the select module. We don't + check select() because we'll be hopeful that most platforms that + don't have it available will not advertise it. (ie: GAE) """ + try: + # select.poll() objects won't fail until used. + if struct == 'poll': + p = select.poll() + p.poll(0) + + # All others will fail on allocation. + else: + getattr(select, struct)().close() + return True + except (OSError, AttributeError) as e: + return False + + +# Choose the best implementation, roughly: +# kqueue == epoll > poll > select. Devpoll not supported. (See above) +# select() also can't accept a FD > FD_SETSIZE (usually around 1024) +def DefaultSelector(): + """ This function serves as a first call for DefaultSelector to + detect if the select module is being monkey-patched incorrectly + by eventlet, greenlet, and preserve proper behavior. """ + global _DEFAULT_SELECTOR + if _DEFAULT_SELECTOR is None: + if _can_allocate('kqueue'): + _DEFAULT_SELECTOR = KqueueSelector + elif _can_allocate('epoll'): + _DEFAULT_SELECTOR = EpollSelector + elif _can_allocate('poll'): + _DEFAULT_SELECTOR = PollSelector + elif hasattr(select, 'select'): + _DEFAULT_SELECTOR = SelectSelector + else: # Platform-specific: AppEngine + raise ValueError('Platform does not have a selector') + return _DEFAULT_SELECTOR() diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/ssl_.py b/collectors/python.d.plugin/python_modules/urllib3/util/ssl_.py new file mode 100644 index 00000000..ece3ec39 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/ssl_.py @@ -0,0 +1,338 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +import errno +import warnings +import hmac + +from binascii import hexlify, unhexlify +from hashlib import md5, sha1, sha256 + +from ..exceptions import SSLError, InsecurePlatformWarning, SNIMissingWarning + + +SSLContext = None +HAS_SNI = False +IS_PYOPENSSL = False +IS_SECURETRANSPORT = False + +# Maps the length of a digest to a possible hash function producing this digest +HASHFUNC_MAP = { + 32: md5, + 40: sha1, + 64: sha256, +} + + +def _const_compare_digest_backport(a, b): + """ + Compare two digests of equal length in constant time. + + The digests must be of type str/bytes. + Returns True if the digests match, and False otherwise. + """ + result = abs(len(a) - len(b)) + for l, r in zip(bytearray(a), bytearray(b)): + result |= l ^ r + return result == 0 + + +_const_compare_digest = getattr(hmac, 'compare_digest', + _const_compare_digest_backport) + + +try: # Test for SSL features + import ssl + from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 + from ssl import HAS_SNI # Has SNI? +except ImportError: + pass + + +try: + from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION +except ImportError: + OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000 + OP_NO_COMPRESSION = 0x20000 + +# A secure default. +# Sources for more information on TLS ciphers: +# +# - https://wiki.mozilla.org/Security/Server_Side_TLS +# - https://www.ssllabs.com/projects/best-practices/index.html +# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ +# +# The general intent is: +# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), +# - prefer ECDHE over DHE for better performance, +# - prefer any AES-GCM and ChaCha20 over any AES-CBC for better performance and +# security, +# - prefer AES-GCM over ChaCha20 because hardware-accelerated AES is common, +# - disable NULL authentication, MD5 MACs and DSS for security reasons. +DEFAULT_CIPHERS = ':'.join([ + 'ECDH+AESGCM', + 'ECDH+CHACHA20', + 'DH+AESGCM', + 'DH+CHACHA20', + 'ECDH+AES256', + 'DH+AES256', + 'ECDH+AES128', + 'DH+AES', + 'RSA+AESGCM', + 'RSA+AES', + '!aNULL', + '!eNULL', + '!MD5', +]) + +try: + from ssl import SSLContext # Modern SSL? +except ImportError: + import sys + + class SSLContext(object): # Platform-specific: Python 2 & 3.1 + supports_set_ciphers = ((2, 7) <= sys.version_info < (3,) or + (3, 2) <= sys.version_info) + + def __init__(self, protocol_version): + self.protocol = protocol_version + # Use default values from a real SSLContext + self.check_hostname = False + self.verify_mode = ssl.CERT_NONE + self.ca_certs = None + self.options = 0 + self.certfile = None + self.keyfile = None + self.ciphers = None + + def load_cert_chain(self, certfile, keyfile): + self.certfile = certfile + self.keyfile = keyfile + + def load_verify_locations(self, cafile=None, capath=None): + self.ca_certs = cafile + + if capath is not None: + raise SSLError("CA directories not supported in older Pythons") + + def set_ciphers(self, cipher_suite): + if not self.supports_set_ciphers: + raise TypeError( + 'Your version of Python does not support setting ' + 'a custom cipher suite. Please upgrade to Python ' + '2.7, 3.2, or later if you need this functionality.' + ) + self.ciphers = cipher_suite + + def wrap_socket(self, socket, server_hostname=None, server_side=False): + warnings.warn( + 'A true SSLContext object is not available. This prevents ' + 'urllib3 from configuring SSL appropriately and may cause ' + 'certain SSL connections to fail. You can upgrade to a newer ' + 'version of Python to solve this. For more information, see ' + 'https://urllib3.readthedocs.io/en/latest/advanced-usage.html' + '#ssl-warnings', + InsecurePlatformWarning + ) + kwargs = { + 'keyfile': self.keyfile, + 'certfile': self.certfile, + 'ca_certs': self.ca_certs, + 'cert_reqs': self.verify_mode, + 'ssl_version': self.protocol, + 'server_side': server_side, + } + if self.supports_set_ciphers: # Platform-specific: Python 2.7+ + return wrap_socket(socket, ciphers=self.ciphers, **kwargs) + else: # Platform-specific: Python 2.6 + return wrap_socket(socket, **kwargs) + + +def assert_fingerprint(cert, fingerprint): + """ + Checks if given fingerprint matches the supplied certificate. + + :param cert: + Certificate as bytes object. + :param fingerprint: + Fingerprint as string of hexdigits, can be interspersed by colons. + """ + + fingerprint = fingerprint.replace(':', '').lower() + digest_length = len(fingerprint) + hashfunc = HASHFUNC_MAP.get(digest_length) + if not hashfunc: + raise SSLError( + 'Fingerprint of invalid length: {0}'.format(fingerprint)) + + # We need encode() here for py32; works on py2 and p33. + fingerprint_bytes = unhexlify(fingerprint.encode()) + + cert_digest = hashfunc(cert).digest() + + if not _const_compare_digest(cert_digest, fingerprint_bytes): + raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' + .format(fingerprint, hexlify(cert_digest))) + + +def resolve_cert_reqs(candidate): + """ + Resolves the argument to a numeric constant, which can be passed to + the wrap_socket function/method from the ssl module. + Defaults to :data:`ssl.CERT_NONE`. + If given a string it is assumed to be the name of the constant in the + :mod:`ssl` module or its abbrevation. + (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. + If it's neither `None` nor a string we assume it is already the numeric + constant which can directly be passed to wrap_socket. + """ + if candidate is None: + return CERT_NONE + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'CERT_' + candidate) + return res + + return candidate + + +def resolve_ssl_version(candidate): + """ + like resolve_cert_reqs + """ + if candidate is None: + return PROTOCOL_SSLv23 + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'PROTOCOL_' + candidate) + return res + + return candidate + + +def create_urllib3_context(ssl_version=None, cert_reqs=None, + options=None, ciphers=None): + """All arguments have the same meaning as ``ssl_wrap_socket``. + + By default, this function does a lot of the same work that + ``ssl.create_default_context`` does on Python 3.4+. It: + + - Disables SSLv2, SSLv3, and compression + - Sets a restricted set of server ciphers + + If you wish to enable SSLv3, you can do:: + + from urllib3.util import ssl_ + context = ssl_.create_urllib3_context() + context.options &= ~ssl_.OP_NO_SSLv3 + + You can do the same to enable compression (substituting ``COMPRESSION`` + for ``SSLv3`` in the last line above). + + :param ssl_version: + The desired protocol version to use. This will default to + PROTOCOL_SSLv23 which will negotiate the highest protocol that both + the server and your installation of OpenSSL support. + :param cert_reqs: + Whether to require the certificate verification. This defaults to + ``ssl.CERT_REQUIRED``. + :param options: + Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``, + ``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``. + :param ciphers: + Which cipher suites to allow the server to select. + :returns: + Constructed SSLContext object with specified options + :rtype: SSLContext + """ + context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23) + + # Setting the default here, as we may have no ssl module on import + cert_reqs = ssl.CERT_REQUIRED if cert_reqs is None else cert_reqs + + if options is None: + options = 0 + # SSLv2 is easily broken and is considered harmful and dangerous + options |= OP_NO_SSLv2 + # SSLv3 has several problems and is now dangerous + options |= OP_NO_SSLv3 + # Disable compression to prevent CRIME attacks for OpenSSL 1.0+ + # (issue #309) + options |= OP_NO_COMPRESSION + + context.options |= options + + if getattr(context, 'supports_set_ciphers', True): # Platform-specific: Python 2.6 + context.set_ciphers(ciphers or DEFAULT_CIPHERS) + + context.verify_mode = cert_reqs + if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2 + # We do our own verification, including fingerprints and alternative + # hostnames. So disable it here + context.check_hostname = False + return context + + +def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None, ciphers=None, ssl_context=None, + ca_cert_dir=None): + """ + All arguments except for server_hostname, ssl_context, and ca_cert_dir have + the same meaning as they do when using :func:`ssl.wrap_socket`. + + :param server_hostname: + When SNI is supported, the expected hostname of the certificate + :param ssl_context: + A pre-made :class:`SSLContext` object. If none is provided, one will + be created using :func:`create_urllib3_context`. + :param ciphers: + A string of ciphers we wish the client to support. This is not + supported on Python 2.6 as the ssl module does not support it. + :param ca_cert_dir: + A directory containing CA certificates in multiple separate files, as + supported by OpenSSL's -CApath flag or the capath argument to + SSLContext.load_verify_locations(). + """ + context = ssl_context + if context is None: + # Note: This branch of code and all the variables in it are no longer + # used by urllib3 itself. We should consider deprecating and removing + # this code. + context = create_urllib3_context(ssl_version, cert_reqs, + ciphers=ciphers) + + if ca_certs or ca_cert_dir: + try: + context.load_verify_locations(ca_certs, ca_cert_dir) + except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2 + raise SSLError(e) + # Py33 raises FileNotFoundError which subclasses OSError + # These are not equivalent unless we check the errno attribute + except OSError as e: # Platform-specific: Python 3.3 and beyond + if e.errno == errno.ENOENT: + raise SSLError(e) + raise + elif getattr(context, 'load_default_certs', None) is not None: + # try to load OS default certs; works well on Windows (require Python3.4+) + context.load_default_certs() + + if certfile: + context.load_cert_chain(certfile, keyfile) + if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI + return context.wrap_socket(sock, server_hostname=server_hostname) + + warnings.warn( + 'An HTTPS request has been made, but the SNI (Subject Name ' + 'Indication) extension to TLS is not available on this platform. ' + 'This may cause the server to present an incorrect TLS ' + 'certificate, which can cause validation failures. You can upgrade to ' + 'a newer version of Python to solve this. For more information, see ' + 'https://urllib3.readthedocs.io/en/latest/advanced-usage.html' + '#ssl-warnings', + SNIMissingWarning + ) + return context.wrap_socket(sock) diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/timeout.py b/collectors/python.d.plugin/python_modules/urllib3/util/timeout.py new file mode 100644 index 00000000..4041cf9b --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/timeout.py @@ -0,0 +1,243 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +# The default socket timeout, used by httplib to indicate that no timeout was +# specified by the user +from socket import _GLOBAL_DEFAULT_TIMEOUT +import time + +from ..exceptions import TimeoutStateError + +# A sentinel value to indicate that no timeout was specified by the user in +# urllib3 +_Default = object() + + +# Use time.monotonic if available. +current_time = getattr(time, "monotonic", time.time) + + +class Timeout(object): + """ Timeout configuration. + + Timeouts can be defined as a default for a pool:: + + timeout = Timeout(connect=2.0, read=7.0) + http = PoolManager(timeout=timeout) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', timeout=Timeout(10)) + + Timeouts can be disabled by setting all the parameters to ``None``:: + + no_timeout = Timeout(connect=None, read=None) + response = http.request('GET', 'http://example.com/, timeout=no_timeout) + + + :param total: + This combines the connect and read timeouts into one; the read timeout + will be set to the time leftover from the connect attempt. In the + event that both a connect timeout and a total are specified, or a read + timeout and a total are specified, the shorter timeout will be applied. + + Defaults to None. + + :type total: integer, float, or None + + :param connect: + The maximum amount of time to wait for a connection attempt to a server + to succeed. Omitting the parameter will default the connect timeout to + the system default, probably `the global default timeout in socket.py + <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_. + None will set an infinite timeout for connection attempts. + + :type connect: integer, float, or None + + :param read: + The maximum amount of time to wait between consecutive + read operations for a response from the server. Omitting + the parameter will default the read timeout to the system + default, probably `the global default timeout in socket.py + <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_. + None will set an infinite timeout. + + :type read: integer, float, or None + + .. note:: + + Many factors can affect the total amount of time for urllib3 to return + an HTTP response. + + For example, Python's DNS resolver does not obey the timeout specified + on the socket. Other factors that can affect total request time include + high CPU load, high swap, the program running at a low priority level, + or other behaviors. + + In addition, the read and total timeouts only measure the time between + read operations on the socket connecting the client and the server, + not the total amount of time for the request to return a complete + response. For most requests, the timeout is raised because the server + has not sent the first byte in the specified time. This is not always + the case; if a server streams one byte every fifteen seconds, a timeout + of 20 seconds will not trigger, even though the request will take + several minutes to complete. + + If your goal is to cut off any request after a set amount of wall clock + time, consider having a second "watcher" thread to cut off a slow + request. + """ + + #: A sentinel object representing the default timeout value + DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT + + def __init__(self, total=None, connect=_Default, read=_Default): + self._connect = self._validate_timeout(connect, 'connect') + self._read = self._validate_timeout(read, 'read') + self.total = self._validate_timeout(total, 'total') + self._start_connect = None + + def __str__(self): + return '%s(connect=%r, read=%r, total=%r)' % ( + type(self).__name__, self._connect, self._read, self.total) + + @classmethod + def _validate_timeout(cls, value, name): + """ Check that a timeout attribute is valid. + + :param value: The timeout value to validate + :param name: The name of the timeout attribute to validate. This is + used to specify in error messages. + :return: The validated and casted version of the given value. + :raises ValueError: If it is a numeric value less than or equal to + zero, or the type is not an integer, float, or None. + """ + if value is _Default: + return cls.DEFAULT_TIMEOUT + + if value is None or value is cls.DEFAULT_TIMEOUT: + return value + + if isinstance(value, bool): + raise ValueError("Timeout cannot be a boolean value. It must " + "be an int, float or None.") + try: + float(value) + except (TypeError, ValueError): + raise ValueError("Timeout value %s was %s, but it must be an " + "int, float or None." % (name, value)) + + try: + if value <= 0: + raise ValueError("Attempted to set %s timeout to %s, but the " + "timeout cannot be set to a value less " + "than or equal to 0." % (name, value)) + except TypeError: # Python 3 + raise ValueError("Timeout value %s was %s, but it must be an " + "int, float or None." % (name, value)) + + return value + + @classmethod + def from_float(cls, timeout): + """ Create a new Timeout from a legacy timeout value. + + The timeout value used by httplib.py sets the same timeout on the + connect(), and recv() socket requests. This creates a :class:`Timeout` + object that sets the individual timeouts to the ``timeout`` value + passed to this function. + + :param timeout: The legacy timeout value. + :type timeout: integer, float, sentinel default object, or None + :return: Timeout object + :rtype: :class:`Timeout` + """ + return Timeout(read=timeout, connect=timeout) + + def clone(self): + """ Create a copy of the timeout object + + Timeout properties are stored per-pool but each request needs a fresh + Timeout object to ensure each one has its own start/stop configured. + + :return: a copy of the timeout object + :rtype: :class:`Timeout` + """ + # We can't use copy.deepcopy because that will also create a new object + # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to + # detect the user default. + return Timeout(connect=self._connect, read=self._read, + total=self.total) + + def start_connect(self): + """ Start the timeout clock, used during a connect() attempt + + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to start a timer that has been started already. + """ + if self._start_connect is not None: + raise TimeoutStateError("Timeout timer has already been started.") + self._start_connect = current_time() + return self._start_connect + + def get_connect_duration(self): + """ Gets the time elapsed since the call to :meth:`start_connect`. + + :return: Elapsed time. + :rtype: float + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to get duration for a timer that hasn't been started. + """ + if self._start_connect is None: + raise TimeoutStateError("Can't get connect duration for timer " + "that has not started.") + return current_time() - self._start_connect + + @property + def connect_timeout(self): + """ Get the value to use when setting a connection timeout. + + This will be a positive float or integer, the value None + (never timeout), or the default system timeout. + + :return: Connect timeout. + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + """ + if self.total is None: + return self._connect + + if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: + return self.total + + return min(self._connect, self.total) + + @property + def read_timeout(self): + """ Get the value for the read timeout. + + This assumes some time has elapsed in the connection timeout and + computes the read timeout appropriately. + + If self.total is set, the read timeout is dependent on the amount of + time taken by the connect timeout. If the connection time has not been + established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be + raised. + + :return: Value to use for the read timeout. + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` + has not yet been called on this object. + """ + if (self.total is not None and + self.total is not self.DEFAULT_TIMEOUT and + self._read is not None and + self._read is not self.DEFAULT_TIMEOUT): + # In case the connect timeout has not yet been established. + if self._start_connect is None: + return self._read + return max(0, min(self.total - self.get_connect_duration(), + self._read)) + elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: + return max(0, self.total - self.get_connect_duration()) + else: + return self._read diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/url.py b/collectors/python.d.plugin/python_modules/urllib3/util/url.py new file mode 100644 index 00000000..99fd6534 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/url.py @@ -0,0 +1,231 @@ +# SPDX-License-Identifier: MIT +from __future__ import absolute_import +from collections import namedtuple + +from ..exceptions import LocationParseError + + +url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] + +# We only want to normalize urls with an HTTP(S) scheme. +# urllib3 infers URLs without a scheme (None) to be http. +NORMALIZABLE_SCHEMES = ('http', 'https', None) + + +class Url(namedtuple('Url', url_attrs)): + """ + Datastructure for representing an HTTP URL. Used as a return value for + :func:`parse_url`. Both the scheme and host are normalized as they are + both case-insensitive according to RFC 3986. + """ + __slots__ = () + + def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, + query=None, fragment=None): + if path and not path.startswith('/'): + path = '/' + path + if scheme: + scheme = scheme.lower() + if host and scheme in NORMALIZABLE_SCHEMES: + host = host.lower() + return super(Url, cls).__new__(cls, scheme, auth, host, port, path, + query, fragment) + + @property + def hostname(self): + """For backwards-compatibility with urlparse. We're nice like that.""" + return self.host + + @property + def request_uri(self): + """Absolute path including the query string.""" + uri = self.path or '/' + + if self.query is not None: + uri += '?' + self.query + + return uri + + @property + def netloc(self): + """Network location including host and port""" + if self.port: + return '%s:%d' % (self.host, self.port) + return self.host + + @property + def url(self): + """ + Convert self into a url + + This function should more or less round-trip with :func:`.parse_url`. The + returned url may not be exactly the same as the url inputted to + :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls + with a blank port will have : removed). + + Example: :: + + >>> U = parse_url('http://google.com/mail/') + >>> U.url + 'http://google.com/mail/' + >>> Url('http', 'username:password', 'host.com', 80, + ... '/path', 'query', 'fragment').url + 'http://username:password@host.com:80/path?query#fragment' + """ + scheme, auth, host, port, path, query, fragment = self + url = '' + + # We use "is not None" we want things to happen with empty strings (or 0 port) + if scheme is not None: + url += scheme + '://' + if auth is not None: + url += auth + '@' + if host is not None: + url += host + if port is not None: + url += ':' + str(port) + if path is not None: + url += path + if query is not None: + url += '?' + query + if fragment is not None: + url += '#' + fragment + + return url + + def __str__(self): + return self.url + + +def split_first(s, delims): + """ + Given a string and an iterable of delimiters, split on the first found + delimiter. Return two split parts and the matched delimiter. + + If not found, then the first part is the full input string. + + Example:: + + >>> split_first('foo/bar?baz', '?/=') + ('foo', 'bar?baz', '/') + >>> split_first('foo/bar?baz', '123') + ('foo/bar?baz', '', None) + + Scales linearly with number of delims. Not ideal for large number of delims. + """ + min_idx = None + min_delim = None + for d in delims: + idx = s.find(d) + if idx < 0: + continue + + if min_idx is None or idx < min_idx: + min_idx = idx + min_delim = d + + if min_idx is None or min_idx < 0: + return s, '', None + + return s[:min_idx], s[min_idx + 1:], min_delim + + +def parse_url(url): + """ + Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is + performed to parse incomplete urls. Fields not provided will be None. + + Partly backwards-compatible with :mod:`urlparse`. + + Example:: + + >>> parse_url('http://google.com/mail/') + Url(scheme='http', host='google.com', port=None, path='/mail/', ...) + >>> parse_url('google.com:80') + Url(scheme=None, host='google.com', port=80, path=None, ...) + >>> parse_url('/foo?bar') + Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) + """ + + # While this code has overlap with stdlib's urlparse, it is much + # simplified for our needs and less annoying. + # Additionally, this implementations does silly things to be optimal + # on CPython. + + if not url: + # Empty + return Url() + + scheme = None + auth = None + host = None + port = None + path = None + fragment = None + query = None + + # Scheme + if '://' in url: + scheme, url = url.split('://', 1) + + # Find the earliest Authority Terminator + # (http://tools.ietf.org/html/rfc3986#section-3.2) + url, path_, delim = split_first(url, ['/', '?', '#']) + + if delim: + # Reassemble the path + path = delim + path_ + + # Auth + if '@' in url: + # Last '@' denotes end of auth part + auth, url = url.rsplit('@', 1) + + # IPv6 + if url and url[0] == '[': + host, url = url.split(']', 1) + host += ']' + + # Port + if ':' in url: + _host, port = url.split(':', 1) + + if not host: + host = _host + + if port: + # If given, ports must be integers. No whitespace, no plus or + # minus prefixes, no non-integer digits such as ^2 (superscript). + if not port.isdigit(): + raise LocationParseError(url) + try: + port = int(port) + except ValueError: + raise LocationParseError(url) + else: + # Blank ports are cool, too. (rfc3986#section-3.2.3) + port = None + + elif not host and url: + host = url + + if not path: + return Url(scheme, auth, host, port, path, query, fragment) + + # Fragment + if '#' in path: + path, fragment = path.split('#', 1) + + # Query + if '?' in path: + path, query = path.split('?', 1) + + return Url(scheme, auth, host, port, path, query, fragment) + + +def get_host(url): + """ + Deprecated. Use :func:`parse_url` instead. + """ + p = parse_url(url) + return p.scheme or 'http', p.hostname, p.port diff --git a/collectors/python.d.plugin/python_modules/urllib3/util/wait.py b/collectors/python.d.plugin/python_modules/urllib3/util/wait.py new file mode 100644 index 00000000..21e72979 --- /dev/null +++ b/collectors/python.d.plugin/python_modules/urllib3/util/wait.py @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: MIT +from .selectors import ( + HAS_SELECT, + DefaultSelector, + EVENT_READ, + EVENT_WRITE +) + + +def _wait_for_io_events(socks, events, timeout=None): + """ Waits for IO events to be available from a list of sockets + or optionally a single socket if passed in. Returns a list of + sockets that can be interacted with immediately. """ + if not HAS_SELECT: + raise ValueError('Platform does not have a selector') + if not isinstance(socks, list): + # Probably just a single socket. + if hasattr(socks, "fileno"): + socks = [socks] + # Otherwise it might be a non-list iterable. + else: + socks = list(socks) + with DefaultSelector() as selector: + for sock in socks: + selector.register(sock, events) + return [key[0].fileobj for key in + selector.select(timeout) if key[1] & events] + + +def wait_for_read(socks, timeout=None): + """ Waits for reading to be available from a list of sockets + or optionally a single socket if passed in. Returns a list of + sockets that can be read from immediately. """ + return _wait_for_io_events(socks, EVENT_READ, timeout) + + +def wait_for_write(socks, timeout=None): + """ Waits for writing to be available from a list of sockets + or optionally a single socket if passed in. Returns a list of + sockets that can be written to immediately. """ + return _wait_for_io_events(socks, EVENT_WRITE, timeout) |