diff options
Diffstat (limited to 'src/jaegertracing/thrift/test/crossrunner')
-rw-r--r-- | src/jaegertracing/thrift/test/crossrunner/__init__.py | 23 | ||||
-rw-r--r-- | src/jaegertracing/thrift/test/crossrunner/collect.py | 164 | ||||
-rw-r--r-- | src/jaegertracing/thrift/test/crossrunner/compat.py | 24 | ||||
-rw-r--r-- | src/jaegertracing/thrift/test/crossrunner/report.py | 441 | ||||
-rw-r--r-- | src/jaegertracing/thrift/test/crossrunner/run.py | 425 | ||||
-rw-r--r-- | src/jaegertracing/thrift/test/crossrunner/setup.cfg | 2 | ||||
-rw-r--r-- | src/jaegertracing/thrift/test/crossrunner/test.py | 149 | ||||
-rw-r--r-- | src/jaegertracing/thrift/test/crossrunner/util.py | 35 |
8 files changed, 1263 insertions, 0 deletions
diff --git a/src/jaegertracing/thrift/test/crossrunner/__init__.py b/src/jaegertracing/thrift/test/crossrunner/__init__.py new file mode 100644 index 000000000..9d0b83acb --- /dev/null +++ b/src/jaegertracing/thrift/test/crossrunner/__init__.py @@ -0,0 +1,23 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +from .test import test_name # noqa +from .collect import collect_cross_tests, collect_feature_tests # noqa +from .run import TestDispatcher # noqa +from .report import generate_known_failures, load_known_failures # noqa diff --git a/src/jaegertracing/thrift/test/crossrunner/collect.py b/src/jaegertracing/thrift/test/crossrunner/collect.py new file mode 100644 index 000000000..e2d897828 --- /dev/null +++ b/src/jaegertracing/thrift/test/crossrunner/collect.py @@ -0,0 +1,164 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +import platform +import re +from itertools import product + +from .util import merge_dict +from .test import TestEntry + +# Those keys are passed to execution as is. +# Note that there are keys other than these, namely: +# delay: After server is started, client start is delayed for the value +# (seconds). +# timeout: Test timeout after client is started (seconds). +# platforms: Supported platforms. Should match platform.system() value. +# protocols: list of supported protocols +# transports: list of supported transports +# sockets: list of supported sockets +# +# protocols and transports entries can be colon separated "spec:impl" pair +# (e.g. binary:accel) where test is run for any matching "spec" while actual +# argument passed to test executable is "impl". +# Otherwise "spec" is equivalent to "spec:spec" pair. +# (e.g. "binary" is equivalent to "binary:binary" in tests.json) +# +VALID_JSON_KEYS = [ + 'name', # name of the library, typically a language name + 'workdir', # work directory where command is executed + 'command', # test command + 'extra_args', # args appended to command after other args are appended + 'remote_args', # args added to the other side of the program + 'join_args', # whether args should be passed as single concatenated string + 'env', # additional environmental variable +] + +DEFAULT_MAX_DELAY = 5 +DEFAULT_SIGNAL = 1 +DEFAULT_TIMEOUT = 5 + + +def _collect_testlibs(config, server_match, client_match=[None]): + """Collects server/client configurations from library configurations""" + def expand_libs(config): + for lib in config: + sv = lib.pop('server', None) + cl = lib.pop('client', None) + yield lib, sv, cl + + def yield_testlibs(base_configs, configs, match): + for base, conf in zip(base_configs, configs): + if conf: + if not match or base['name'] in match: + platforms = conf.get('platforms') or base.get('platforms') + if not platforms or platform.system() in platforms: + yield merge_dict(base, conf) + + libs, svs, cls = zip(*expand_libs(config)) + servers = list(yield_testlibs(libs, svs, server_match)) + clients = list(yield_testlibs(libs, cls, client_match)) + return servers, clients + + +def collect_features(config, match): + res = list(map(re.compile, match)) + return list(filter(lambda c: any(map(lambda r: r.search(c['name']), res)), config)) + + +def _do_collect_tests(servers, clients): + def intersection(key, o1, o2): + """intersection of two collections. + collections are replaced with sets the first time""" + def cached_set(o, key): + v = o[key] + if not isinstance(v, set): + v = set(v) + o[key] = v + return v + return cached_set(o1, key) & cached_set(o2, key) + + def intersect_with_spec(key, o1, o2): + # store as set of (spec, impl) tuple + def cached_set(o): + def to_spec_impl_tuples(values): + for v in values: + spec, _, impl = v.partition(':') + yield spec, impl or spec + v = o[key] + if not isinstance(v, set): + v = set(to_spec_impl_tuples(set(v))) + o[key] = v + return v + for spec1, impl1 in cached_set(o1): + for spec2, impl2 in cached_set(o2): + if spec1 == spec2: + name = impl1 if impl1 == impl2 else '%s-%s' % (impl1, impl2) + yield name, impl1, impl2 + + def maybe_max(key, o1, o2, default): + """maximum of two if present, otherwise default value""" + v1 = o1.get(key) + v2 = o2.get(key) + return max(v1, v2) if v1 and v2 else v1 or v2 or default + + def filter_with_validkeys(o): + ret = {} + for key in VALID_JSON_KEYS: + if key in o: + ret[key] = o[key] + return ret + + def merge_metadata(o, **ret): + for key in VALID_JSON_KEYS: + if key in o: + ret[key] = o[key] + return ret + + for sv, cl in product(servers, clients): + for proto, proto1, proto2 in intersect_with_spec('protocols', sv, cl): + for trans, trans1, trans2 in intersect_with_spec('transports', sv, cl): + for sock in intersection('sockets', sv, cl): + yield { + 'server': merge_metadata(sv, **{'protocol': proto1, 'transport': trans1}), + 'client': merge_metadata(cl, **{'protocol': proto2, 'transport': trans2}), + 'delay': maybe_max('delay', sv, cl, DEFAULT_MAX_DELAY), + 'stop_signal': maybe_max('stop_signal', sv, cl, DEFAULT_SIGNAL), + 'timeout': maybe_max('timeout', sv, cl, DEFAULT_TIMEOUT), + 'protocol': proto, + 'transport': trans, + 'socket': sock + } + + +def _filter_entries(tests, regex): + if regex: + return filter(lambda t: re.search(regex, TestEntry.get_name(**t)), tests) + return tests + + +def collect_cross_tests(tests_dict, server_match, client_match, regex): + sv, cl = _collect_testlibs(tests_dict, server_match, client_match) + return list(_filter_entries(_do_collect_tests(sv, cl), regex)) + + +def collect_feature_tests(tests_dict, features_dict, server_match, feature_match, regex): + sv, _ = _collect_testlibs(tests_dict, server_match) + ft = collect_features(features_dict, feature_match) + return list(_filter_entries(_do_collect_tests(sv, ft), regex)) diff --git a/src/jaegertracing/thrift/test/crossrunner/compat.py b/src/jaegertracing/thrift/test/crossrunner/compat.py new file mode 100644 index 000000000..f1ca91bb3 --- /dev/null +++ b/src/jaegertracing/thrift/test/crossrunner/compat.py @@ -0,0 +1,24 @@ +import os +import sys + +if sys.version_info[0] == 2: + _ENCODE = sys.getfilesystemencoding() + + def path_join(*args): + bin_args = map(lambda a: a.decode(_ENCODE), args) + return os.path.join(*bin_args).encode(_ENCODE) + + def str_join(s, l): + bin_args = map(lambda a: a.decode(_ENCODE), l) + b = s.decode(_ENCODE) + return b.join(bin_args).encode(_ENCODE) + + logfile_open = open + +else: + + path_join = os.path.join + str_join = str.join + + def logfile_open(*args): + return open(*args, errors='replace') diff --git a/src/jaegertracing/thrift/test/crossrunner/report.py b/src/jaegertracing/thrift/test/crossrunner/report.py new file mode 100644 index 000000000..5baf16195 --- /dev/null +++ b/src/jaegertracing/thrift/test/crossrunner/report.py @@ -0,0 +1,441 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +from __future__ import print_function +import datetime +import json +import multiprocessing +import os +import platform +import re +import subprocess +import sys +import time +import traceback + +from .compat import logfile_open, path_join, str_join +from .test import TestEntry + +LOG_DIR = 'log' +RESULT_HTML = 'index.html' +RESULT_JSON = 'results.json' +FAIL_JSON = 'known_failures_%s.json' + + +def generate_known_failures(testdir, overwrite, save, out): + def collect_failures(results): + success_index = 5 + for r in results: + if not r[success_index]: + yield TestEntry.get_name(*r) + try: + with logfile_open(path_join(testdir, RESULT_JSON), 'r') as fp: + results = json.load(fp) + except IOError: + sys.stderr.write('Unable to load last result. Did you run tests ?\n') + return False + fails = collect_failures(results['results']) + if not overwrite: + known = load_known_failures(testdir) + known.extend(fails) + fails = known + fails_json = json.dumps(sorted(set(fails)), indent=2, separators=(',', ': ')) + if save: + with logfile_open(os.path.join(testdir, FAIL_JSON % platform.system()), 'w+') as fp: + fp.write(fails_json) + sys.stdout.write('Successfully updated known failures.\n') + if out: + sys.stdout.write(fails_json) + sys.stdout.write('\n') + return True + + +def load_known_failures(testdir): + try: + with logfile_open(path_join(testdir, FAIL_JSON % platform.system()), 'r') as fp: + return json.load(fp) + except IOError: + return [] + + +class TestReporter(object): + # Unfortunately, standard library doesn't handle timezone well + # DATETIME_FORMAT = '%a %b %d %H:%M:%S %Z %Y' + DATETIME_FORMAT = '%a %b %d %H:%M:%S %Y' + + def __init__(self): + self._log = multiprocessing.get_logger() + self._lock = multiprocessing.Lock() + + @classmethod + def test_logfile(cls, test_name, prog_kind, dir=None): + relpath = path_join('log', '%s_%s.log' % (test_name, prog_kind)) + return relpath if not dir else os.path.realpath(path_join(dir, relpath)) + + def _start(self): + self._start_time = time.time() + + @property + def _elapsed(self): + return time.time() - self._start_time + + @classmethod + def _format_date(cls): + return '%s' % datetime.datetime.now().strftime(cls.DATETIME_FORMAT) + + def _print_date(self): + print(self._format_date(), file=self.out) + + def _print_bar(self, out=None): + print( + '===============================================================================', + file=(out or self.out)) + + def _print_exec_time(self): + print('Test execution took {:.1f} seconds.'.format(self._elapsed), file=self.out) + + +class ExecReporter(TestReporter): + def __init__(self, testdir, test, prog): + super(ExecReporter, self).__init__() + self._test = test + self._prog = prog + self.logpath = self.test_logfile(test.name, prog.kind, testdir) + self.out = None + + def begin(self): + self._start() + self._open() + if self.out and not self.out.closed: + self._print_header() + else: + self._log.debug('Output stream is not available.') + + def end(self, returncode): + self._lock.acquire() + try: + if self.out and not self.out.closed: + self._print_footer(returncode) + self._close() + self.out = None + else: + self._log.debug('Output stream is not available.') + finally: + self._lock.release() + + def killed(self): + print(file=self.out) + print('Server process is successfully killed.', file=self.out) + self.end(None) + + def died(self): + print(file=self.out) + print('*** Server process has died unexpectedly ***', file=self.out) + self.end(None) + + _init_failure_exprs = { + 'server': list(map(re.compile, [ + '[Aa]ddress already in use', + 'Could not bind', + 'EADDRINUSE', + ])), + 'client': list(map(re.compile, [ + '[Cc]onnection refused', + 'Could not connect to', + 'Could not open UNIX ', # domain socket (rb) + 'ECONNREFUSED', + 'econnrefused', # erl + 'CONNECTION-REFUSED-ERROR', # cl + 'connect ENOENT', # nodejs domain socket + 'No such file or directory', # domain socket + 'Sockets.TcpClient.Connect', # csharp + ])), + } + + def maybe_false_positive(self): + """Searches through log file for socket bind error. + Returns True if suspicious expression is found, otherwise False""" + try: + if self.out and not self.out.closed: + self.out.flush() + exprs = self._init_failure_exprs[self._prog.kind] + + def match(line): + for expr in exprs: + if expr.search(line): + self._log.info("maybe false positive: %s" % line) + return True + + with logfile_open(self.logpath, 'r') as fp: + if any(map(match, fp)): + return True + except (KeyboardInterrupt, SystemExit): + raise + except Exception as ex: + self._log.warn('[%s]: Error while detecting false positive: %s' % (self._test.name, str(ex))) + self._log.info(traceback.print_exc()) + return False + + def _open(self): + self.out = logfile_open(self.logpath, 'w+') + + def _close(self): + self.out.close() + + def _print_header(self): + self._print_date() + print('Executing: %s' % str_join(' ', self._prog.command), file=self.out) + print('Directory: %s' % self._prog.workdir, file=self.out) + print('config:delay: %s' % self._test.delay, file=self.out) + print('config:timeout: %s' % self._test.timeout, file=self.out) + self._print_bar() + self.out.flush() + + def _print_footer(self, returncode=None): + self._print_bar() + if returncode is not None: + print('Return code: %d (negative values indicate kill by signal)' % returncode, file=self.out) + else: + print('Process is killed.', file=self.out) + self._print_exec_time() + self._print_date() + + +class SummaryReporter(TestReporter): + def __init__(self, basedir, testdir_relative, concurrent=True): + super(SummaryReporter, self).__init__() + self._basedir = basedir + self._testdir_rel = testdir_relative + self.logdir = path_join(self.testdir, LOG_DIR) + self.out_path = path_join(self.testdir, RESULT_JSON) + self.concurrent = concurrent + self.out = sys.stdout + self._platform = platform.system() + self._revision = self._get_revision() + self._tests = [] + if not os.path.exists(self.logdir): + os.mkdir(self.logdir) + self._known_failures = load_known_failures(self.testdir) + self._unexpected_success = [] + self._flaky_success = [] + self._unexpected_failure = [] + self._expected_failure = [] + self._print_header() + + @property + def testdir(self): + return path_join(self._basedir, self._testdir_rel) + + def _result_string(self, test): + if test.success: + if test.retry_count == 0: + return 'success' + elif test.retry_count == 1: + return 'flaky(1 retry)' + else: + return 'flaky(%d retries)' % test.retry_count + elif test.expired: + return 'failure(timeout)' + else: + return 'failure(%d)' % test.returncode + + def _get_revision(self): + p = subprocess.Popen(['git', 'rev-parse', '--short', 'HEAD'], + cwd=self.testdir, stdout=subprocess.PIPE) + out, _ = p.communicate() + return out.strip() + + def _format_test(self, test, with_result=True): + name = '%s-%s' % (test.server.name, test.client.name) + trans = '%s-%s' % (test.transport, test.socket) + if not with_result: + return '{:24s}{:18s}{:25s}'.format(name[:23], test.protocol[:17], trans[:24]) + else: + return '{:24s}{:18s}{:25s}{:s}\n'.format(name[:23], test.protocol[:17], + trans[:24], self._result_string(test)) + + def _print_test_header(self): + self._print_bar() + print( + '{:24s}{:18s}{:25s}{:s}'.format('server-client:', 'protocol:', 'transport:', 'result:'), + file=self.out) + + def _print_header(self): + self._start() + print('Apache Thrift - Integration Test Suite', file=self.out) + self._print_date() + self._print_test_header() + + def _print_unexpected_failure(self): + if len(self._unexpected_failure) > 0: + self.out.writelines([ + '*** Following %d failures were unexpected ***:\n' % len(self._unexpected_failure), + 'If it is introduced by you, please fix it before submitting the code.\n', + # 'If not, please report at https://issues.apache.org/jira/browse/THRIFT\n', + ]) + self._print_test_header() + for i in self._unexpected_failure: + self.out.write(self._format_test(self._tests[i])) + self._print_bar() + else: + print('No unexpected failures.', file=self.out) + + def _print_flaky_success(self): + if len(self._flaky_success) > 0: + print( + 'Following %d tests were expected to cleanly succeed but needed retry:' % len(self._flaky_success), + file=self.out) + self._print_test_header() + for i in self._flaky_success: + self.out.write(self._format_test(self._tests[i])) + self._print_bar() + + def _print_unexpected_success(self): + if len(self._unexpected_success) > 0: + print( + 'Following %d tests were known to fail but succeeded (maybe flaky):' % len(self._unexpected_success), + file=self.out) + self._print_test_header() + for i in self._unexpected_success: + self.out.write(self._format_test(self._tests[i])) + self._print_bar() + + def _http_server_command(self, port): + if sys.version_info[0] < 3: + return 'python -m SimpleHTTPServer %d' % port + else: + return 'python -m http.server %d' % port + + def _print_footer(self): + fail_count = len(self._expected_failure) + len(self._unexpected_failure) + self._print_bar() + self._print_unexpected_success() + self._print_flaky_success() + self._print_unexpected_failure() + self._write_html_data() + self._assemble_log('unexpected failures', self._unexpected_failure) + self._assemble_log('known failures', self._expected_failure) + self.out.writelines([ + 'You can browse results at:\n', + '\tfile://%s/%s\n' % (self.testdir, RESULT_HTML), + '# If you use Chrome, run:\n', + '# \tcd %s\n#\t%s\n' % (self._basedir, self._http_server_command(8001)), + '# then browse:\n', + '# \thttp://localhost:%d/%s/\n' % (8001, self._testdir_rel), + 'Full log for each test is here:\n', + '\ttest/log/server_client_protocol_transport_client.log\n', + '\ttest/log/server_client_protocol_transport_server.log\n', + '%d failed of %d tests in total.\n' % (fail_count, len(self._tests)), + ]) + self._print_exec_time() + self._print_date() + + def _render_result(self, test): + return [ + test.server.name, + test.client.name, + test.protocol, + test.transport, + test.socket, + test.success, + test.as_expected, + test.returncode, + { + 'server': self.test_logfile(test.name, test.server.kind), + 'client': self.test_logfile(test.name, test.client.kind), + }, + ] + + def _write_html_data(self): + """Writes JSON data to be read by result html""" + results = [self._render_result(r) for r in self._tests] + with logfile_open(self.out_path, 'w+') as fp: + fp.write(json.dumps({ + 'date': self._format_date(), + 'revision': str(self._revision), + 'platform': self._platform, + 'duration': '{:.1f}'.format(self._elapsed), + 'results': results, + }, indent=2)) + + def _assemble_log(self, title, indexes): + if len(indexes) > 0: + def add_prog_log(fp, test, prog_kind): + print('*************************** %s message ***************************' % prog_kind, + file=fp) + path = self.test_logfile(test.name, prog_kind, self.testdir) + if os.path.exists(path): + with logfile_open(path, 'r') as prog_fp: + print(prog_fp.read(), file=fp) + filename = title.replace(' ', '_') + '.log' + with logfile_open(os.path.join(self.logdir, filename), 'w+') as fp: + for test in map(self._tests.__getitem__, indexes): + fp.write('TEST: [%s]\n' % test.name) + add_prog_log(fp, test, test.server.kind) + add_prog_log(fp, test, test.client.kind) + fp.write('**********************************************************************\n\n') + print('%s are logged to %s/%s/%s' % (title.capitalize(), self._testdir_rel, LOG_DIR, filename)) + + def end(self): + self._print_footer() + return len(self._unexpected_failure) == 0 + + def add_test(self, test_dict): + test = TestEntry(self.testdir, **test_dict) + self._lock.acquire() + try: + if not self.concurrent: + self.out.write(self._format_test(test, False)) + self.out.flush() + self._tests.append(test) + return len(self._tests) - 1 + finally: + self._lock.release() + + def add_result(self, index, returncode, expired, retry_count): + self._lock.acquire() + try: + failed = returncode is None or returncode != 0 + flaky = not failed and retry_count != 0 + test = self._tests[index] + known = test.name in self._known_failures + if failed: + if known: + self._log.debug('%s failed as expected' % test.name) + self._expected_failure.append(index) + else: + self._log.info('unexpected failure: %s' % test.name) + self._unexpected_failure.append(index) + elif flaky and not known: + self._log.info('unexpected flaky success: %s' % test.name) + self._flaky_success.append(index) + elif not flaky and known: + self._log.info('unexpected success: %s' % test.name) + self._unexpected_success.append(index) + test.success = not failed + test.returncode = returncode + test.retry_count = retry_count + test.expired = expired + test.as_expected = known == failed + if not self.concurrent: + self.out.write(self._result_string(test) + '\n') + else: + self.out.write(self._format_test(test)) + finally: + self._lock.release() diff --git a/src/jaegertracing/thrift/test/crossrunner/run.py b/src/jaegertracing/thrift/test/crossrunner/run.py new file mode 100644 index 000000000..bb06d25ef --- /dev/null +++ b/src/jaegertracing/thrift/test/crossrunner/run.py @@ -0,0 +1,425 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +import contextlib +import multiprocessing +import multiprocessing.managers +import os +import platform +import random +import socket +import subprocess +import sys +import time + +from .compat import str_join +from .report import ExecReporter, SummaryReporter +from .test import TestEntry +from .util import domain_socket_path + +RESULT_ERROR = 64 +RESULT_TIMEOUT = 128 +SIGNONE = 0 +SIGKILL = 15 + +# globals +ports = None +stop = None + + +class ExecutionContext(object): + def __init__(self, cmd, cwd, env, stop_signal, is_server, report): + self._log = multiprocessing.get_logger() + self.cmd = cmd + self.cwd = cwd + self.env = env + self.stop_signal = stop_signal + self.is_server = is_server + self.report = report + self.expired = False + self.killed = False + self.proc = None + + def _popen_args(self): + args = { + 'cwd': self.cwd, + 'env': self.env, + 'stdout': self.report.out, + 'stderr': subprocess.STDOUT, + } + # make sure child processes doesn't remain after killing + if platform.system() == 'Windows': + DETACHED_PROCESS = 0x00000008 + args.update(creationflags=DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP) + else: + args.update(preexec_fn=os.setsid) + return args + + def start(self): + joined = str_join(' ', self.cmd) + self._log.debug('COMMAND: %s', joined) + self._log.debug('WORKDIR: %s', self.cwd) + self._log.debug('LOGFILE: %s', self.report.logpath) + self.report.begin() + self.proc = subprocess.Popen(self.cmd, **self._popen_args()) + self._log.debug(' PID: %d', self.proc.pid) + self._log.debug(' PGID: %d', os.getpgid(self.proc.pid)) + return self._scoped() + + @contextlib.contextmanager + def _scoped(self): + yield self + if self.is_server: + # the server is supposed to run until we stop it + if self.returncode is not None: + self.report.died() + else: + if self.stop_signal != SIGNONE: + if self.sigwait(self.stop_signal): + self.report.end(self.returncode) + else: + self.report.killed() + else: + self.sigwait(SIGKILL) + else: + # the client is supposed to exit normally + if self.returncode is not None: + self.report.end(self.returncode) + else: + self.sigwait(SIGKILL) + self.report.killed() + self._log.debug('[{0}] exited with return code {1}'.format(self.proc.pid, self.returncode)) + + # Send a signal to the process and then wait for it to end + # If the signal requested is SIGNONE, no signal is sent, and + # instead we just wait for the process to end; further if it + # does not end normally with SIGNONE, we mark it as expired. + # If the process fails to end and the signal is not SIGKILL, + # it re-runs with SIGKILL so that a real process kill occurs + # returns True if the process ended, False if it may not have + def sigwait(self, sig=SIGKILL, timeout=2): + try: + if sig != SIGNONE: + self._log.debug('[{0}] send signal {1}'.format(self.proc.pid, sig)) + if sig == SIGKILL: + self.killed = True + try: + if platform.system() != 'Windows': + os.killpg(os.getpgid(self.proc.pid), sig) + else: + self.proc.send_signal(sig) + except Exception: + self._log.info('[{0}] Failed to kill process'.format(self.proc.pid), exc_info=sys.exc_info()) + self._log.debug('[{0}] wait begin, timeout {1} sec(s)'.format(self.proc.pid, timeout)) + self.proc.communicate(timeout=timeout) + self._log.debug('[{0}] process ended with return code {1}'.format(self.proc.pid, self.returncode)) + self.report.end(self.returncode) + return True + except subprocess.TimeoutExpired: + self._log.info('[{0}] timeout waiting for process to end'.format(self.proc.pid)) + if sig == SIGNONE: + self.expired = True + return False if sig == SIGKILL else self.sigwait(SIGKILL, 1) + + # called on the client process to wait for it to end naturally + def wait(self, timeout): + self.sigwait(SIGNONE, timeout) + + @property + def returncode(self): + return self.proc.returncode if self.proc else None + + +def exec_context(port, logdir, test, prog, is_server): + report = ExecReporter(logdir, test, prog) + prog.build_command(port) + return ExecutionContext(prog.command, prog.workdir, prog.env, prog.stop_signal, is_server, report) + + +def run_test(testdir, logdir, test_dict, max_retry, async_mode=True): + logger = multiprocessing.get_logger() + + def ensure_socket_open(sv, port, test): + slept = 0.1 + time.sleep(slept) + sleep_step = 0.1 + while True: + if slept > test.delay: + logger.warn('[{0}] slept for {1} seconds but server is not open'.format(sv.proc.pid, slept)) + return False + if test.socket == 'domain': + if not os.path.exists(domain_socket_path(port)): + logger.debug('[{0}] domain(unix) socket not available yet. slept for {1} seconds so far'.format(sv.proc.pid, slept)) + time.sleep(sleep_step) + slept += sleep_step + elif test.socket == 'abstract': + return True + else: + # Create sockets every iteration because refused sockets cannot be + # reused on some systems. + sock4 = socket.socket() + sock6 = socket.socket(family=socket.AF_INET6) + try: + if sock4.connect_ex(('127.0.0.1', port)) == 0 \ + or sock6.connect_ex(('::1', port)) == 0: + return True + if sv.proc.poll() is not None: + logger.warn('[{0}] server process is exited'.format(sv.proc.pid)) + return False + logger.debug('[{0}] socket not available yet. slept for {1} seconds so far'.format(sv.proc.pid, slept)) + time.sleep(sleep_step) + slept += sleep_step + finally: + sock4.close() + sock6.close() + logger.debug('[{0}] server ready - waited for {1} seconds'.format(sv.proc.pid, slept)) + return True + + try: + max_bind_retry = 3 + retry_count = 0 + bind_retry_count = 0 + test = TestEntry(testdir, **test_dict) + while True: + if stop.is_set(): + logger.debug('Skipping because shutting down') + return (retry_count, None) + logger.debug('Start') + with PortAllocator.alloc_port_scoped(ports, test.socket) as port: + logger.debug('Start with port %d' % port) + sv = exec_context(port, logdir, test, test.server, True) + cl = exec_context(port, logdir, test, test.client, False) + + logger.debug('Starting server') + with sv.start(): + port_ok = ensure_socket_open(sv, port, test) + if port_ok: + connect_retry_count = 0 + max_connect_retry = 12 + connect_retry_wait = 0.25 + while True: + if sv.proc.poll() is not None: + logger.info('not starting client because server process is absent') + break + logger.debug('Starting client') + cl.start() + logger.debug('Waiting client (up to %d secs)' % test.timeout) + cl.wait(test.timeout) + if not cl.report.maybe_false_positive() or connect_retry_count >= max_connect_retry: + if connect_retry_count > 0 and connect_retry_count < max_connect_retry: + logger.info('[%s]: Connected after %d retry (%.2f sec each)' % (test.server.name, connect_retry_count, connect_retry_wait)) + # Wait for 50ms to see if server does not die at the end. + time.sleep(0.05) + break + logger.debug('Server may not be ready, waiting %.2f second...' % connect_retry_wait) + time.sleep(connect_retry_wait) + connect_retry_count += 1 + + if sv.report.maybe_false_positive() and bind_retry_count < max_bind_retry: + logger.warn('[%s]: Detected socket bind failure, retrying...', test.server.name) + bind_retry_count += 1 + else: + result = RESULT_TIMEOUT if cl.expired else cl.returncode if (cl.proc and cl.proc.poll()) is not None else RESULT_ERROR + + # For servers that handle a controlled shutdown by signal + # if they are killed, or return an error code, that is a + # problem. For servers that are not signal-aware, we simply + # kill them off; if we didn't kill them off, something else + # happened (crashed?) + if test.server.stop_signal != 0: + if sv.killed or sv.returncode > 0: + result |= RESULT_ERROR + else: + if not sv.killed: + result |= RESULT_ERROR + + if result == 0 or retry_count >= max_retry: + return (retry_count, result) + else: + logger.info('[%s-%s]: test failed, retrying...', test.server.name, test.client.name) + retry_count += 1 + except Exception: + if not async_mode: + raise + logger.warn('Error executing [%s]', test.name, exc_info=True) + return (retry_count, RESULT_ERROR) + except Exception: + logger.info('Interrupted execution', exc_info=True) + if not async_mode: + raise + stop.set() + return (retry_count, RESULT_ERROR) + + +class PortAllocator(object): + def __init__(self): + self._log = multiprocessing.get_logger() + self._lock = multiprocessing.Lock() + self._ports = set() + self._dom_ports = set() + self._last_alloc = 0 + + def _get_tcp_port(self): + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + sock.bind(('', 0)) + port = sock.getsockname()[1] + self._lock.acquire() + try: + ok = port not in self._ports + if ok: + self._ports.add(port) + self._last_alloc = time.time() + finally: + self._lock.release() + sock.close() + return port if ok else self._get_tcp_port() + + def _get_domain_port(self): + port = random.randint(1024, 65536) + self._lock.acquire() + try: + ok = port not in self._dom_ports + if ok: + self._dom_ports.add(port) + finally: + self._lock.release() + return port if ok else self._get_domain_port() + + def alloc_port(self, socket_type): + if socket_type in ('domain', 'abstract'): + return self._get_domain_port() + else: + return self._get_tcp_port() + + # static method for inter-process invokation + @staticmethod + @contextlib.contextmanager + def alloc_port_scoped(allocator, socket_type): + port = allocator.alloc_port(socket_type) + yield port + allocator.free_port(socket_type, port) + + def free_port(self, socket_type, port): + self._log.debug('free_port') + self._lock.acquire() + try: + if socket_type == 'domain': + self._dom_ports.remove(port) + path = domain_socket_path(port) + if os.path.exists(path): + os.remove(path) + elif socket_type == 'abstract': + self._dom_ports.remove(port) + else: + self._ports.remove(port) + except IOError: + self._log.info('Error while freeing port', exc_info=sys.exc_info()) + finally: + self._lock.release() + + +class NonAsyncResult(object): + def __init__(self, value): + self._value = value + + def get(self, timeout=None): + return self._value + + def wait(self, timeout=None): + pass + + def ready(self): + return True + + def successful(self): + return self._value == 0 + + +class TestDispatcher(object): + def __init__(self, testdir, basedir, logdir_rel, concurrency): + self._log = multiprocessing.get_logger() + self.testdir = testdir + self._report = SummaryReporter(basedir, logdir_rel, concurrency > 1) + self.logdir = self._report.testdir + # seems needed for python 2.x to handle keyboard interrupt + self._stop = multiprocessing.Event() + self._async = concurrency > 1 + if not self._async: + self._pool = None + global stop + global ports + stop = self._stop + ports = PortAllocator() + else: + self._m = multiprocessing.managers.BaseManager() + self._m.register('ports', PortAllocator) + self._m.start() + self._pool = multiprocessing.Pool(concurrency, self._pool_init, (self._m.address,)) + self._log.debug( + 'TestDispatcher started with %d concurrent jobs' % concurrency) + + def _pool_init(self, address): + global stop + global m + global ports + stop = self._stop + m = multiprocessing.managers.BaseManager(address) + m.connect() + ports = m.ports() + + def _dispatch_sync(self, test, cont, max_retry): + r = run_test(self.testdir, self.logdir, test, max_retry, async_mode=False) + cont(r) + return NonAsyncResult(r) + + def _dispatch_async(self, test, cont, max_retry): + self._log.debug('_dispatch_async') + return self._pool.apply_async(func=run_test, args=(self.testdir, self.logdir, test, max_retry), callback=cont) + + def dispatch(self, test, max_retry): + index = self._report.add_test(test) + + def cont(result): + if not self._stop.is_set(): + if result and len(result) == 2: + retry_count, returncode = result + else: + retry_count = 0 + returncode = RESULT_ERROR + self._log.debug('freeing port') + self._log.debug('adding result') + self._report.add_result(index, returncode, returncode == RESULT_TIMEOUT, retry_count) + self._log.debug('finish continuation') + fn = self._dispatch_async if self._async else self._dispatch_sync + return fn(test, cont, max_retry) + + def wait(self): + if self._async: + self._pool.close() + self._pool.join() + self._m.shutdown() + return self._report.end() + + def terminate(self): + self._stop.set() + if self._async: + self._pool.terminate() + self._pool.join() + self._m.shutdown() diff --git a/src/jaegertracing/thrift/test/crossrunner/setup.cfg b/src/jaegertracing/thrift/test/crossrunner/setup.cfg new file mode 100644 index 000000000..7da1f9608 --- /dev/null +++ b/src/jaegertracing/thrift/test/crossrunner/setup.cfg @@ -0,0 +1,2 @@ +[flake8] +max-line-length = 100 diff --git a/src/jaegertracing/thrift/test/crossrunner/test.py b/src/jaegertracing/thrift/test/crossrunner/test.py new file mode 100644 index 000000000..0e912843a --- /dev/null +++ b/src/jaegertracing/thrift/test/crossrunner/test.py @@ -0,0 +1,149 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +import copy +import multiprocessing +import os +import sys +from .compat import path_join +from .util import merge_dict, domain_socket_path + + +class TestProgram(object): + def __init__(self, kind, name, protocol, transport, socket, workdir, stop_signal, command, env=None, + extra_args=[], extra_args2=[], join_args=False, **kwargs): + + self.kind = kind + self.name = name + self.protocol = protocol + self.transport = transport + self.socket = socket + self.workdir = workdir + self.stop_signal = stop_signal + self.command = None + self._base_command = self._fix_cmd_path(command) + if env: + self.env = copy.copy(os.environ) + self.env.update(env) + else: + self.env = os.environ + self._extra_args = extra_args + self._extra_args2 = extra_args2 + self._join_args = join_args + + def _fix_cmd_path(self, cmd): + # if the arg is a file in the current directory, make it path + def abs_if_exists(arg): + p = path_join(self.workdir, arg) + return p if os.path.exists(p) else arg + + if cmd[0] == 'python': + cmd[0] = sys.executable + else: + cmd[0] = abs_if_exists(cmd[0]) + return cmd + + def _socket_args(self, socket, port): + return { + 'ip-ssl': ['--ssl'], + 'domain': ['--domain-socket=%s' % domain_socket_path(port)], + 'abstract': ['--abstract-namespace', '--domain-socket=%s' % domain_socket_path(port)], + }.get(socket, None) + + def _transport_args(self, transport): + return { + 'zlib': ['--zlib'], + }.get(transport, None) + + def build_command(self, port): + cmd = copy.copy(self._base_command) + args = copy.copy(self._extra_args2) + args.append('--protocol=' + self.protocol) + args.append('--transport=' + self.transport) + transport_args = self._transport_args(self.transport) + if transport_args: + args += transport_args + socket_args = self._socket_args(self.socket, port) + if socket_args: + args += socket_args + args.append('--port=%d' % port) + if self._join_args: + cmd.append('%s' % " ".join(args)) + else: + cmd.extend(args) + if self._extra_args: + cmd.extend(self._extra_args) + self.command = cmd + return self.command + + +class TestEntry(object): + def __init__(self, testdir, server, client, delay, timeout, **kwargs): + self.testdir = testdir + self._log = multiprocessing.get_logger() + self._config = kwargs + self.protocol = kwargs['protocol'] + self.transport = kwargs['transport'] + self.socket = kwargs['socket'] + srv_dict = self._fix_workdir(merge_dict(self._config, server)) + cli_dict = self._fix_workdir(merge_dict(self._config, client)) + cli_dict['extra_args2'] = srv_dict.pop('remote_args', []) + srv_dict['extra_args2'] = cli_dict.pop('remote_args', []) + self.server = TestProgram('server', **srv_dict) + self.client = TestProgram('client', **cli_dict) + self.delay = delay + self.timeout = timeout + self._name = None + # results + self.success = None + self.as_expected = None + self.returncode = None + self.expired = False + self.retry_count = 0 + + def _fix_workdir(self, config): + key = 'workdir' + path = config.get(key, None) + if not path: + path = self.testdir + if os.path.isabs(path): + path = os.path.realpath(path) + else: + path = os.path.realpath(path_join(self.testdir, path)) + config.update({key: path}) + return config + + @classmethod + def get_name(cls, server, client, protocol, transport, socket, *args, **kwargs): + return '%s-%s_%s_%s-%s' % (server, client, protocol, transport, socket) + + @property + def name(self): + if not self._name: + self._name = self.get_name( + self.server.name, self.client.name, self.protocol, self.transport, self.socket) + return self._name + + @property + def transport_name(self): + return '%s-%s' % (self.transport, self.socket) + + +def test_name(server, client, protocol, transport, socket, **kwargs): + return TestEntry.get_name(server['name'], client['name'], protocol, transport, socket) diff --git a/src/jaegertracing/thrift/test/crossrunner/util.py b/src/jaegertracing/thrift/test/crossrunner/util.py new file mode 100644 index 000000000..c214df85a --- /dev/null +++ b/src/jaegertracing/thrift/test/crossrunner/util.py @@ -0,0 +1,35 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +import copy + + +def domain_socket_path(port): + return '/tmp/ThriftTest.thrift.%d' % port + + +def merge_dict(base, update): + """Update dict concatenating list values""" + res = copy.deepcopy(base) + for k, v in list(update.items()): + if k in list(res.keys()) and isinstance(v, list): + res[k].extend(v) + else: + res[k] = v + return res |