diff options
Diffstat (limited to '')
226 files changed, 71052 insertions, 0 deletions
diff --git a/qa/tasks/__init__.py b/qa/tasks/__init__.py new file mode 100644 index 000000000..9a7949a00 --- /dev/null +++ b/qa/tasks/__init__.py @@ -0,0 +1,6 @@ +import logging + +# Inherit teuthology's log level +teuthology_log = logging.getLogger('teuthology') +log = logging.getLogger(__name__) +log.setLevel(teuthology_log.level) diff --git a/qa/tasks/admin_socket.py b/qa/tasks/admin_socket.py new file mode 100644 index 000000000..c454d3d0c --- /dev/null +++ b/qa/tasks/admin_socket.py @@ -0,0 +1,194 @@ +""" +Admin Socket task -- used in rados, powercycle, and smoke testing +""" + +import json +import logging +import os +import time + +from teuthology.exceptions import CommandFailedError +from teuthology.orchestra import run +from teuthology import misc as teuthology +from teuthology.parallel import parallel +from teuthology.config import config as teuth_config + +log = logging.getLogger(__name__) + + +def task(ctx, config): + """ + Run an admin socket command, make sure the output is json, and run + a test program on it. The test program should read json from + stdin. This task succeeds if the test program exits with status 0. + + To run the same test on all clients:: + + tasks: + - ceph: + - rados: + - admin_socket: + all: + dump_requests: + test: http://example.com/script + + To restrict it to certain clients:: + + tasks: + - ceph: + - rados: [client.1] + - admin_socket: + client.1: + dump_requests: + test: http://example.com/script + + If an admin socket command has arguments, they can be specified as + a list:: + + tasks: + - ceph: + - rados: [client.0] + - admin_socket: + client.0: + dump_requests: + test: http://example.com/script + help: + test: http://example.com/test_help_version + args: [version] + + Note that there must be a ceph client with an admin socket running + before this task is run. The tests are parallelized at the client + level. Tests for a single client are run serially. + + :param ctx: Context + :param config: Configuration + """ + assert isinstance(config, dict), \ + 'admin_socket task requires a dict for configuration' + teuthology.replace_all_with_clients(ctx.cluster, config) + + with parallel() as ptask: + for client, tests in config.items(): + ptask.spawn(_run_tests, ctx, client, tests) + + +def _socket_command(ctx, remote, socket_path, command, args): + """ + Run an admin socket command and return the result as a string. + + :param ctx: Context + :param remote: Remote site + :param socket_path: path to socket + :param command: command to be run remotely + :param args: command arguments + + :returns: output of command in json format + """ + testdir = teuthology.get_testdir(ctx) + max_tries = 120 + while True: + try: + out = remote.sh([ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'ceph', + '--admin-daemon', socket_path, + ] + command.split(' ') + args) + except CommandFailedError: + assert max_tries > 0 + max_tries -= 1 + log.info('ceph cli returned an error, command not registered yet?') + log.info('sleeping and retrying ...') + time.sleep(1) + continue + break + log.debug('admin socket command %s returned %s', command, out) + return json.loads(out) + +def _run_tests(ctx, client, tests): + """ + Create a temp directory and wait for a client socket to be created. + For each test, copy the executable locally and run the test. + Remove temp directory when finished. + + :param ctx: Context + :param client: client machine to run the test + :param tests: list of tests to run + """ + testdir = teuthology.get_testdir(ctx) + log.debug('Running admin socket tests on %s', client) + (remote,) = ctx.cluster.only(client).remotes.keys() + socket_path = '/var/run/ceph/ceph-{name}.asok'.format(name=client) + overrides = ctx.config.get('overrides', {}).get('admin_socket', {}) + + try: + tmp_dir = os.path.join( + testdir, + 'admin_socket_{client}'.format(client=client), + ) + remote.run( + args=[ + 'mkdir', + '--', + tmp_dir, + run.Raw('&&'), + # wait for client process to create the socket + 'while', 'test', '!', '-e', socket_path, run.Raw(';'), + 'do', 'sleep', '1', run.Raw(';'), 'done', + ], + ) + + for command, config in tests.items(): + if config is None: + config = {} + teuthology.deep_merge(config, overrides) + log.debug('Testing %s with config %s', command, str(config)) + + test_path = None + if 'test' in config: + # hack: the git_url is always ceph-ci or ceph + git_url = teuth_config.get_ceph_git_url() + repo_name = 'ceph.git' + if git_url.count('ceph-ci'): + repo_name = 'ceph-ci.git' + url = config['test'].format( + branch=config.get('branch', 'master'), + repo=repo_name, + ) + test_path = os.path.join(tmp_dir, command) + remote.run( + args=[ + 'wget', + '-q', + '-O', + test_path, + '--', + url, + run.Raw('&&'), + 'chmod', + 'u=rx', + '--', + test_path, + ], + ) + + args = config.get('args', []) + assert isinstance(args, list), \ + 'admin socket command args must be a list' + sock_out = _socket_command(ctx, remote, socket_path, command, args) + if test_path is not None: + remote.run( + args=[ + test_path, + ], + stdin=json.dumps(sock_out), + ) + + finally: + remote.run( + args=[ + 'rm', '-rf', '--', tmp_dir, + ], + ) diff --git a/qa/tasks/autotest.py b/qa/tasks/autotest.py new file mode 100644 index 000000000..80c3fc9d2 --- /dev/null +++ b/qa/tasks/autotest.py @@ -0,0 +1,165 @@ +""" +Run an autotest test on the ceph cluster. +""" +import json +import logging +import os + +from teuthology import misc as teuthology +from teuthology.parallel import parallel +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Run an autotest test on the ceph cluster. + + Only autotest client tests are supported. + + The config is a mapping from role name to list of tests to run on + that client. + + For example:: + + tasks: + - ceph: + - ceph-fuse: [client.0, client.1] + - autotest: + client.0: [dbench] + client.1: [bonnie] + + You can also specify a list of tests to run on all clients:: + + tasks: + - ceph: + - ceph-fuse: + - autotest: + all: [dbench] + """ + assert isinstance(config, dict) + config = teuthology.replace_all_with_clients(ctx.cluster, config) + log.info('Setting up autotest...') + testdir = teuthology.get_testdir(ctx) + with parallel() as p: + for role in config.keys(): + (remote,) = ctx.cluster.only(role).remotes.keys() + p.spawn(_download, testdir, remote) + + log.info('Making a separate scratch dir for every client...') + for role in config.keys(): + assert isinstance(role, str) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.keys() + mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_)) + scratch = os.path.join(mnt, 'client.{id}'.format(id=id_)) + remote.run( + args=[ + 'sudo', + 'install', + '-d', + '-m', '0755', + '--owner={user}'.format(user='ubuntu'), #TODO + '--', + scratch, + ], + ) + + with parallel() as p: + for role, tests in config.items(): + (remote,) = ctx.cluster.only(role).remotes.keys() + p.spawn(_run_tests, testdir, remote, role, tests) + +def _download(testdir, remote): + """ + Download. Does not explicitly support muliple tasks in a single run. + """ + remote.run( + args=[ + # explicitly does not support multiple autotest tasks + # in a single run; the result archival would conflict + 'mkdir', '{tdir}/archive/autotest'.format(tdir=testdir), + run.Raw('&&'), + 'mkdir', '{tdir}/autotest'.format(tdir=testdir), + run.Raw('&&'), + 'wget', + '-nv', + '--no-check-certificate', + 'https://github.com/ceph/autotest/tarball/ceph', + '-O-', + run.Raw('|'), + 'tar', + '-C', '{tdir}/autotest'.format(tdir=testdir), + '-x', + '-z', + '-f-', + '--strip-components=1', + ], + ) + +def _run_tests(testdir, remote, role, tests): + """ + Spawned to run test on remote site + """ + assert isinstance(role, str) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_)) + scratch = os.path.join(mnt, 'client.{id}'.format(id=id_)) + + assert isinstance(tests, list) + for idx, testname in enumerate(tests): + log.info('Running autotest client test #%d: %s...', idx, testname) + + tag = 'client.{id}.num{idx}.{testname}'.format( + idx=idx, + testname=testname, + id=id_, + ) + control = '{tdir}/control.{tag}'.format(tdir=testdir, tag=tag) + remote.write_file( + path=control, + data='import json; data=json.loads({data!r}); job.run_test(**data)'.format( + data=json.dumps(dict( + url=testname, + dir=scratch, + # TODO perhaps tag + # results will be in {testdir}/autotest/client/results/dbench + # or {testdir}/autotest/client/results/dbench.{tag} + )), + ), + ) + remote.run( + args=[ + '{tdir}/autotest/client/bin/autotest'.format(tdir=testdir), + '--verbose', + '--harness=simple', + '--tag={tag}'.format(tag=tag), + control, + run.Raw('3>&1'), + ], + ) + + remote.run( + args=[ + 'rm', '-rf', '--', control, + ], + ) + + remote.run( + args=[ + 'mv', + '--', + '{tdir}/autotest/client/results/{tag}'.format(tdir=testdir, tag=tag), + '{tdir}/archive/autotest/{tag}'.format(tdir=testdir, tag=tag), + ], + ) + + remote.run( + args=[ + 'rm', '-rf', '--', '{tdir}/autotest'.format(tdir=testdir), + ], + ) diff --git a/qa/tasks/aver.py b/qa/tasks/aver.py new file mode 100644 index 000000000..79ee18c5c --- /dev/null +++ b/qa/tasks/aver.py @@ -0,0 +1,67 @@ +""" +Aver wrapper task +""" +import contextlib +import logging +from subprocess import check_call, Popen, PIPE + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Execute an aver assertion + + Parameters: + + input: file containing data referred to by the assertions. File name is + relative to the job's archive path + validations: list of validations in the Aver language + + Example: + - aver: + input: bench_output.csv + validations: + - expect performance(alg='ceph') > performance(alg='raw') + - for size > 3 expect avg_throughput > 2000 + """ + log.info('Beginning aver...') + assert isinstance(config, dict), 'expecting dictionary for configuration' + + if 'input' not in config: + raise Exception("Expecting 'input' option") + if len(config.get('validations', [])) < 1: + raise Exception("Expecting at least one entry in 'validations'") + + url = ('https://github.com/ivotron/aver/releases/download/' + 'v0.3.0/aver-linux-amd64.tar.bz2') + + aver_path = ctx.archive + '/aver' + + # download binary + check_call(['wget', '-O', aver_path + '.tbz', url]) + check_call(['tar', 'xfj', aver_path + '.tbz', '-C', ctx.archive]) + + # print version + process = Popen([aver_path, '-v'], stdout=PIPE) + log.info(process.communicate()[0]) + + # validate + for validation in config['validations']: + cmd = (aver_path + ' -s -i ' + (ctx.archive + '/' + config['input']) + + ' "' + validation + '"') + log.info("executing: " + cmd) + process = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) + (stdout, stderr) = process.communicate() + if stderr: + log.info('aver stderr: ' + stderr) + log.info('aver result: ' + stdout) + if stdout.strip(' \t\n\r') != 'true': + raise Exception('Failed validation: ' + validation) + + try: + yield + finally: + log.info('Removing aver binary...') + check_call(['rm', aver_path, aver_path + '.tbz']) diff --git a/qa/tasks/backfill_toofull.py b/qa/tasks/backfill_toofull.py new file mode 100644 index 000000000..f4ff90a46 --- /dev/null +++ b/qa/tasks/backfill_toofull.py @@ -0,0 +1,193 @@ +""" +Backfill_toofull +""" +import logging +import time +from tasks import ceph_manager +from tasks.util.rados import rados +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def wait_for_pg_state(manager, pgid, state, to_osd): + log.debug("waiting for pg %s state is %s" % (pgid, state)) + for i in range(300): + time.sleep(5) + manager.flush_pg_stats([0, 1, 2, 3]) + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.info('pg=%s' % pg); + assert pg + status = pg['state'].split('+') + if 'active' not in status: + log.debug('not active') + continue + if state not in status: + log.debug('not %s' % state) + continue + assert to_osd in pg['up'] + return + assert False, '%s not in %s' % (pgid, state) + + +def task(ctx, config): + """ + Test backfill reservation calculates "toofull" condition correctly. + + A pretty rigid cluster is brought up and tested by this task + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'backfill_toofull task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + profile = config.get('erasure_code_profile', { + 'k': '2', + 'm': '1', + 'crush-failure-domain': 'osd' + }) + profile_name = profile.get('name', 'backfill_toofull') + manager.create_erasure_code_profile(profile_name, profile) + pool = manager.create_pool_with_unique_name( + pg_num=1, + erasure_code_profile_name=profile_name, + min_size=2) + manager.raw_cluster_cmd('osd', 'pool', 'set', pool, + 'pg_autoscale_mode', 'off') + + manager.flush_pg_stats([0, 1, 2, 3]) + manager.wait_for_clean() + + pool_id = manager.get_pool_num(pool) + pgid = '%d.0' % pool_id + pgs = manager.get_pg_stats() + acting = next((pg['acting'] for pg in pgs if pg['pgid'] == pgid), None) + log.debug("acting=%s" % acting) + assert acting + primary = acting[0] + target = acting[1] + + log.debug("write some data") + rados(ctx, mon, ['-p', pool, 'bench', '120', 'write', '--no-cleanup']) + df = manager.get_osd_df(target) + log.debug("target osd df: %s" % df) + + total_kb = df['kb'] + used_kb = df['kb_used'] + + log.debug("pause recovery") + manager.raw_cluster_cmd('osd', 'set', 'noout') + manager.raw_cluster_cmd('osd', 'set', 'nobackfill') + manager.raw_cluster_cmd('osd', 'set', 'norecover') + + log.debug("stop tartget osd %s" % target) + manager.kill_osd(target) + manager.wait_till_active() + + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.debug('pg=%s' % pg) + assert pg + + log.debug("re-write data") + rados(ctx, mon, ['-p', pool, 'cleanup']) + time.sleep(10) + rados(ctx, mon, ['-p', pool, 'bench', '60', 'write', '--no-cleanup']) + + df = manager.get_osd_df(primary) + log.debug("primary osd df: %s" % df) + + primary_used_kb = df['kb_used'] + + log.info("test backfill reservation rejected with toofull") + + # We set backfillfull ratio less than new data size and expect the pg + # entering backfill_toofull state. + # + # We also need to update nearfull ratio to prevent "full ratio(s) out of order". + + backfillfull = 0.9 * primary_used_kb / total_kb + nearfull = backfillfull * 0.9 + + log.debug("update nearfull ratio to %s and backfillfull ratio to %s" % + (nearfull, backfillfull)) + manager.raw_cluster_cmd('osd', 'set-nearfull-ratio', + '{:.3f}'.format(nearfull + 0.001)) + manager.raw_cluster_cmd('osd', 'set-backfillfull-ratio', + '{:.3f}'.format(backfillfull + 0.001)) + + log.debug("start tartget osd %s" % target) + + manager.revive_osd(target) + manager.wait_for_active() + manager.wait_till_osd_is_up(target) + + wait_for_pg_state(manager, pgid, 'backfill_toofull', target) + + log.info("test pg not enter backfill_toofull after restarting backfill") + + # We want to set backfillfull ratio to be big enough for the target to + # successfully backfill new data but smaller than the sum of old and new + # data, so if the osd backfill reservation incorrectly calculates "toofull" + # the test will detect this (fail). + # + # Note, we need to operate with "uncompressed" bytes because currently + # osd backfill reservation does not take compression into account. + # + # We also need to update nearfull ratio to prevent "full ratio(s) out of order". + + pdf = manager.get_pool_df(pool) + log.debug("pool %s df: %s" % (pool, pdf)) + assert pdf + compress_ratio = 1.0 * pdf['compress_under_bytes'] / pdf['compress_bytes_used'] \ + if pdf['compress_bytes_used'] > 0 else 1.0 + log.debug("compress_ratio: %s" % compress_ratio) + + backfillfull = (used_kb + primary_used_kb) * compress_ratio / total_kb + assert backfillfull < 0.9 + nearfull_min = max(used_kb, primary_used_kb) * compress_ratio / total_kb + assert nearfull_min < backfillfull + delta = backfillfull - nearfull_min + nearfull = nearfull_min + delta * 0.1 + backfillfull = nearfull_min + delta * 0.2 + + log.debug("update nearfull ratio to %s and backfillfull ratio to %s" % + (nearfull, backfillfull)) + manager.raw_cluster_cmd('osd', 'set-nearfull-ratio', + '{:.3f}'.format(nearfull + 0.001)) + manager.raw_cluster_cmd('osd', 'set-backfillfull-ratio', + '{:.3f}'.format(backfillfull + 0.001)) + + wait_for_pg_state(manager, pgid, 'backfilling', target) + + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.debug('pg=%s' % pg) + assert pg + + log.debug("interrupt %s backfill" % target) + manager.mark_down_osd(target) + # after marking the target osd down it will automatically be + # up soon again + + log.debug("resume recovery") + manager.raw_cluster_cmd('osd', 'unset', 'noout') + manager.raw_cluster_cmd('osd', 'unset', 'nobackfill') + manager.raw_cluster_cmd('osd', 'unset', 'norecover') + + # wait for everything to peer, backfill and recover + manager.wait_for_clean() + + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.info('pg=%s' % pg) + assert pg + assert 'clean' in pg['state'].split('+') diff --git a/qa/tasks/barbican.py b/qa/tasks/barbican.py new file mode 100644 index 000000000..d43568c61 --- /dev/null +++ b/qa/tasks/barbican.py @@ -0,0 +1,524 @@ +""" +Deploy and configure Barbican for Teuthology +""" +import argparse +import contextlib +import logging +import http +import json +import time +import math + +from urllib.parse import urlparse + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run +from teuthology.exceptions import ConfigError + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def download(ctx, config): + """ + Download the Barbican from github. + Remove downloaded file upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, dict) + log.info('Downloading barbican...') + testdir = teuthology.get_testdir(ctx) + for (client, cconf) in config.items(): + branch = cconf.get('force-branch', 'master') + log.info("Using branch '%s' for barbican", branch) + + sha1 = cconf.get('sha1') + log.info('sha1=%s', sha1) + + ctx.cluster.only(client).run( + args=[ + 'bash', '-l' + ], + ) + ctx.cluster.only(client).run( + args=[ + 'git', 'clone', + '-b', branch, + 'https://github.com/openstack/barbican.git', + '{tdir}/barbican'.format(tdir=testdir), + ], + ) + if sha1 is not None: + ctx.cluster.only(client).run( + args=[ + 'cd', '{tdir}/barbican'.format(tdir=testdir), + run.Raw('&&'), + 'git', 'reset', '--hard', sha1, + ], + ) + try: + yield + finally: + log.info('Removing barbican...') + testdir = teuthology.get_testdir(ctx) + for client in config: + ctx.cluster.only(client).run( + args=[ + 'rm', + '-rf', + '{tdir}/barbican'.format(tdir=testdir), + ], + ) + +def get_barbican_dir(ctx): + return '{tdir}/barbican'.format(tdir=teuthology.get_testdir(ctx)) + +def run_in_barbican_dir(ctx, client, args): + ctx.cluster.only(client).run( + args=['cd', get_barbican_dir(ctx), run.Raw('&&'), ] + args, + ) + +def run_in_barbican_venv(ctx, client, args): + run_in_barbican_dir(ctx, client, + ['.', + '.barbicanenv/bin/activate', + run.Raw('&&') + ] + args) + +@contextlib.contextmanager +def setup_venv(ctx, config): + """ + Setup the virtualenv for Barbican using pip. + """ + assert isinstance(config, dict) + log.info('Setting up virtualenv for barbican...') + for (client, _) in config.items(): + run_in_barbican_dir(ctx, client, + ['python3', '-m', 'venv', '.barbicanenv']) + run_in_barbican_venv(ctx, client, + ['pip', 'install', '--upgrade', 'pip']) + run_in_barbican_venv(ctx, client, + ['pip', 'install', 'pytz', + '-e', get_barbican_dir(ctx)]) + yield + +def assign_ports(ctx, config, initial_port): + """ + Assign port numbers starting from @initial_port + """ + port = initial_port + role_endpoints = {} + for remote, roles_for_host in ctx.cluster.remotes.items(): + for role in roles_for_host: + if role in config: + role_endpoints[role] = (remote.name.split('@')[1], port) + port += 1 + + return role_endpoints + +def set_authtoken_params(ctx, cclient, cconfig): + section_config_list = cconfig['keystone_authtoken'].items() + for config in section_config_list: + (name, val) = config + run_in_barbican_dir(ctx, cclient, + ['sed', '-i', + '/[[]filter:authtoken]/{p;s##'+'{} = {}'.format(name, val)+'#;}', + 'etc/barbican/barbican-api-paste.ini']) + + keystone_role = cconfig.get('use-keystone-role', None) + public_host, public_port = ctx.keystone.public_endpoints[keystone_role] + url = 'http://{host}:{port}/v3'.format(host=public_host, + port=public_port) + run_in_barbican_dir(ctx, cclient, + ['sed', '-i', + '/[[]filter:authtoken]/{p;s##'+'auth_uri = {}'.format(url)+'#;}', + 'etc/barbican/barbican-api-paste.ini']) + admin_host, admin_port = ctx.keystone.admin_endpoints[keystone_role] + admin_url = 'http://{host}:{port}/v3'.format(host=admin_host, + port=admin_port) + run_in_barbican_dir(ctx, cclient, + ['sed', '-i', + '/[[]filter:authtoken]/{p;s##'+'auth_url = {}'.format(admin_url)+'#;}', + 'etc/barbican/barbican-api-paste.ini']) + +def fix_barbican_api_paste(ctx, cclient): + run_in_barbican_dir(ctx, cclient, + ['sed', '-i', '-n', + '/\\[pipeline:barbican_api]/ {p;n; /^pipeline =/ '+ + '{ s/.*/pipeline = unauthenticated-context apiapp/;p;d } } ; p', + './etc/barbican/barbican-api-paste.ini']) + +def fix_barbican_api(ctx, cclient): + run_in_barbican_dir(ctx, cclient, + ['sed', '-i', + '/prop_dir =/ s#etc/barbican#{}/etc/barbican#'.format(get_barbican_dir(ctx)), + 'bin/barbican-api']) + +def copy_policy_json(ctx, cclient): + run_in_barbican_dir(ctx, cclient, + ['cp', + get_barbican_dir(ctx)+'/etc/barbican/policy.json', + get_barbican_dir(ctx)]) + +def create_barbican_conf(ctx, cclient): + barbican_host, barbican_port = ctx.barbican.endpoints[cclient] + barbican_url = 'http://{host}:{port}'.format(host=barbican_host, + port=barbican_port) + log.info("barbican url=%s", barbican_url) + + run_in_barbican_dir(ctx, cclient, + ['bash', '-c', + 'echo -n -e "[DEFAULT]\nhost_href=' + barbican_url + '\n" ' + \ + '>barbican.conf']) + +@contextlib.contextmanager +def configure_barbican(ctx, config): + """ + Configure barbican paste-api and barbican-api. + """ + assert isinstance(config, dict) + (cclient, cconfig) = next(iter(config.items())) + + keystone_role = cconfig.get('use-keystone-role', None) + if keystone_role is None: + raise ConfigError('use-keystone-role not defined in barbican task') + + set_authtoken_params(ctx, cclient, cconfig) + fix_barbican_api(ctx, cclient) + fix_barbican_api_paste(ctx, cclient) + copy_policy_json(ctx, cclient) + create_barbican_conf(ctx, cclient) + try: + yield + finally: + pass + +@contextlib.contextmanager +def run_barbican(ctx, config): + assert isinstance(config, dict) + log.info('Running barbican...') + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + cluster_name, _, client_id = teuthology.split_role(client) + + # start the public endpoint + client_public_with_id = 'barbican.public' + '.' + client_id + + run_cmd = ['cd', get_barbican_dir(ctx), run.Raw('&&'), + '.', '.barbicanenv/bin/activate', run.Raw('&&'), + 'HOME={}'.format(get_barbican_dir(ctx)), run.Raw('&&'), + 'bin/barbican-api', + run.Raw('& { read; kill %1; }')] + #run.Raw('1>/dev/null') + + run_cmd = 'cd ' + get_barbican_dir(ctx) + ' && ' + \ + '. .barbicanenv/bin/activate && ' + \ + 'HOME={}'.format(get_barbican_dir(ctx)) + ' && ' + \ + 'exec bin/barbican-api & { read; kill %1; }' + + ctx.daemons.add_daemon( + remote, 'barbican', client_public_with_id, + cluster=cluster_name, + args=['bash', '-c', run_cmd], + logger=log.getChild(client), + stdin=run.PIPE, + cwd=get_barbican_dir(ctx), + wait=False, + check_status=False, + ) + + # sleep driven synchronization + run_in_barbican_venv(ctx, client, ['sleep', '15']) + try: + yield + finally: + log.info('Stopping Barbican instance') + ctx.daemons.get_daemon('barbican', client_public_with_id, + cluster_name).stop() + + +@contextlib.contextmanager +def create_secrets(ctx, config): + """ + Create a main and an alternate s3 user. + """ + assert isinstance(config, dict) + (cclient, cconfig) = next(iter(config.items())) + + rgw_user = cconfig['rgw_user'] + + keystone_role = cconfig.get('use-keystone-role', None) + keystone_host, keystone_port = ctx.keystone.public_endpoints[keystone_role] + barbican_host, barbican_port = ctx.barbican.endpoints[cclient] + barbican_url = 'http://{host}:{port}'.format(host=barbican_host, + port=barbican_port) + log.info("barbican_url=%s", barbican_url) + #fetching user_id of user that gets secrets for radosgw + token_req = http.client.HTTPConnection(keystone_host, keystone_port, timeout=30) + token_req.request( + 'POST', + '/v3/auth/tokens', + headers={'Content-Type':'application/json'}, + body=json.dumps({ + "auth": { + "identity": { + "methods": ["password"], + "password": { + "user": { + "domain": {"id": "default"}, + "name": rgw_user["username"], + "password": rgw_user["password"] + } + } + }, + "scope": { + "project": { + "domain": {"id": "default"}, + "name": rgw_user["tenantName"] + } + } + } + })) + rgw_access_user_resp = token_req.getresponse() + if not (rgw_access_user_resp.status >= 200 and + rgw_access_user_resp.status < 300): + raise Exception("Cannot authenticate user "+rgw_user["username"]+" for secret creation") + # baru_resp = json.loads(baru_req.data) + rgw_access_user_data = json.loads(rgw_access_user_resp.read().decode()) + rgw_user_id = rgw_access_user_data['token']['user']['id'] + if 'secrets' in cconfig: + for secret in cconfig['secrets']: + if 'name' not in secret: + raise ConfigError('barbican.secrets must have "name" field') + if 'base64' not in secret: + raise ConfigError('barbican.secrets must have "base64" field') + if 'tenantName' not in secret: + raise ConfigError('barbican.secrets must have "tenantName" field') + if 'username' not in secret: + raise ConfigError('barbican.secrets must have "username" field') + if 'password' not in secret: + raise ConfigError('barbican.secrets must have "password" field') + + token_req = http.client.HTTPConnection(keystone_host, keystone_port, timeout=30) + token_req.request( + 'POST', + '/v3/auth/tokens', + headers={'Content-Type':'application/json'}, + body=json.dumps({ + "auth": { + "identity": { + "methods": ["password"], + "password": { + "user": { + "domain": {"id": "default"}, + "name": secret["username"], + "password": secret["password"] + } + } + }, + "scope": { + "project": { + "domain": {"id": "default"}, + "name": secret["tenantName"] + } + } + } + })) + token_resp = token_req.getresponse() + if not (token_resp.status >= 200 and + token_resp.status < 300): + raise Exception("Cannot authenticate user "+secret["username"]+" for secret creation") + + expire = time.time() + 5400 # now + 90m + (expire_fract,dummy) = math.modf(expire) + expire_format = "%%FT%%T.%06d" % (round(expire_fract*1000000)) + expiration = time.strftime(expire_format, time.gmtime(expire)) + token_id = token_resp.getheader('x-subject-token') + + key1_json = json.dumps( + { + "name": secret['name'], + "expiration": expiration, + "algorithm": "aes", + "bit_length": 256, + "mode": "cbc", + "payload": secret['base64'], + "payload_content_type": "application/octet-stream", + "payload_content_encoding": "base64" + }) + + sec_req = http.client.HTTPConnection(barbican_host, barbican_port, timeout=30) + try: + sec_req.request( + 'POST', + '/v1/secrets', + headers={'Content-Type': 'application/json', + 'Accept': '*/*', + 'X-Auth-Token': token_id}, + body=key1_json + ) + except: + log.info("catched exception!") + run_in_barbican_venv(ctx, cclient, ['sleep', '900']) + + barbican_sec_resp = sec_req.getresponse() + if not (barbican_sec_resp.status >= 200 and + barbican_sec_resp.status < 300): + raise Exception("Cannot create secret") + barbican_data = json.loads(barbican_sec_resp.read().decode()) + if 'secret_ref' not in barbican_data: + raise ValueError("Malformed secret creation response") + secret_ref = barbican_data["secret_ref"] + log.info("secret_ref=%s", secret_ref) + secret_url_parsed = urlparse(secret_ref) + acl_json = json.dumps( + { + "read": { + "users": [rgw_user_id], + "project-access": True + } + }) + acl_req = http.client.HTTPConnection(secret_url_parsed.netloc, timeout=30) + acl_req.request( + 'PUT', + secret_url_parsed.path+'/acl', + headers={'Content-Type': 'application/json', + 'Accept': '*/*', + 'X-Auth-Token': token_id}, + body=acl_json + ) + barbican_acl_resp = acl_req.getresponse() + if not (barbican_acl_resp.status >= 200 and + barbican_acl_resp.status < 300): + raise Exception("Cannot set ACL for secret") + + key = {'id': secret_ref.split('secrets/')[1], 'payload': secret['base64']} + ctx.barbican.keys[secret['name']] = key + + run_in_barbican_venv(ctx, cclient, ['sleep', '3']) + try: + yield + finally: + pass + + +@contextlib.contextmanager +def task(ctx, config): + """ + Deploy and configure Keystone + + Example of configuration: + + tasks: + - local_cluster: + cluster_path: /home/adam/ceph-1/build + - local_rgw: + - tox: [ client.0 ] + - keystone: + client.0: + sha1: 17.0.0.0rc2 + force-branch: master + projects: + - name: rgwcrypt + description: Encryption Tenant + - name: barbican + description: Barbican + - name: s3 + description: S3 project + users: + - name: rgwcrypt-user + password: rgwcrypt-pass + project: rgwcrypt + - name: barbican-user + password: barbican-pass + project: barbican + - name: s3-user + password: s3-pass + project: s3 + roles: [ name: Member, name: creator ] + role-mappings: + - name: Member + user: rgwcrypt-user + project: rgwcrypt + - name: admin + user: barbican-user + project: barbican + - name: creator + user: s3-user + project: s3 + services: + - name: keystone + type: identity + description: Keystone Identity Service + - barbican: + client.0: + force-branch: master + use-keystone-role: client.0 + keystone_authtoken: + auth_plugin: password + username: barbican-user + password: barbican-pass + user_domain_name: Default + rgw_user: + tenantName: rgwcrypt + username: rgwcrypt-user + password: rgwcrypt-pass + secrets: + - name: my-key-1 + base64: a2V5MS5GcWVxKzhzTGNLaGtzQkg5NGVpb1FKcFpGb2c= + tenantName: s3 + username: s3-user + password: s3-pass + - name: my-key-2 + base64: a2V5Mi5yNUNNMGFzMVdIUVZxcCt5NGVmVGlQQ1k4YWg= + tenantName: s3 + username: s3-user + password: s3-pass + - s3tests: + client.0: + force-branch: master + kms_key: my-key-1 + - rgw: + client.0: + use-keystone-role: client.0 + use-barbican-role: client.0 + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task keystone only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + + overrides = ctx.config.get('overrides', {}) + # merge each client section, not the top level. + for client in config.keys(): + if not config[client]: + config[client] = {} + teuthology.deep_merge(config[client], overrides.get('barbican', {})) + + log.debug('Barbican config is %s', config) + + if not hasattr(ctx, 'keystone'): + raise ConfigError('barbican must run after the keystone task') + + + ctx.barbican = argparse.Namespace() + ctx.barbican.endpoints = assign_ports(ctx, config, 9311) + ctx.barbican.keys = {} + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: setup_venv(ctx=ctx, config=config), + lambda: configure_barbican(ctx=ctx, config=config), + lambda: run_barbican(ctx=ctx, config=config), + lambda: create_secrets(ctx=ctx, config=config), + ): + yield diff --git a/qa/tasks/blktrace.py b/qa/tasks/blktrace.py new file mode 100644 index 000000000..10b1da0c0 --- /dev/null +++ b/qa/tasks/blktrace.py @@ -0,0 +1,96 @@ +""" +Run blktrace program through teuthology +""" +import contextlib +import logging + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run + +log = logging.getLogger(__name__) +blktrace = '/usr/sbin/blktrace' +daemon_signal = 'term' + +@contextlib.contextmanager +def setup(ctx, config): + """ + Setup all the remotes + """ + osds = ctx.cluster.only(teuthology.is_type('osd', config['cluster'])) + log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=teuthology.get_testdir(ctx)) + + for remote, roles_for_host in osds.remotes.items(): + log.info('Creating %s on %s' % (log_dir, remote.name)) + remote.run( + args=['mkdir', '-p', '-m0755', '--', log_dir], + wait=False, + ) + yield + +@contextlib.contextmanager +def execute(ctx, config): + """ + Run the blktrace program on remote machines. + """ + procs = [] + testdir = teuthology.get_testdir(ctx) + log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=testdir) + + osds = ctx.cluster.only(teuthology.is_type('osd')) + for remote, roles_for_host in osds.remotes.items(): + roles_to_devs = ctx.disk_config.remote_to_roles_to_dev[remote] + for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', + config['cluster']): + if roles_to_devs.get(role): + dev = roles_to_devs[role] + log.info("running blktrace on %s: %s" % (remote.name, dev)) + + proc = remote.run( + args=[ + 'cd', + log_dir, + run.Raw(';'), + 'daemon-helper', + daemon_signal, + 'sudo', + blktrace, + '-o', + dev.rsplit("/", 1)[1], + '-d', + dev, + ], + wait=False, + stdin=run.PIPE, + ) + procs.append(proc) + try: + yield + finally: + osds = ctx.cluster.only(teuthology.is_type('osd')) + log.info('stopping blktrace processs') + for proc in procs: + proc.stdin.close() + +@contextlib.contextmanager +def task(ctx, config): + """ + Usage: + blktrace: + + or: + blktrace: + cluster: backup + + Runs blktrace on all osds in the specified cluster (the 'ceph' cluster by + default). + """ + if config is None: + config = {} + config['cluster'] = config.get('cluster', 'ceph') + + with contextutil.nested( + lambda: setup(ctx=ctx, config=config), + lambda: execute(ctx=ctx, config=config), + ): + yield diff --git a/qa/tasks/boto.cfg.template b/qa/tasks/boto.cfg.template new file mode 100644 index 000000000..cdfe8873b --- /dev/null +++ b/qa/tasks/boto.cfg.template @@ -0,0 +1,2 @@ +[Boto] +http_socket_timeout = {idle_timeout} diff --git a/qa/tasks/cbt.py b/qa/tasks/cbt.py new file mode 100644 index 000000000..56c57138b --- /dev/null +++ b/qa/tasks/cbt.py @@ -0,0 +1,293 @@ +import logging +import os +import yaml + +from teuthology import misc +from teuthology.orchestra import run +from teuthology.task import Task + +log = logging.getLogger(__name__) + + +class CBT(Task): + """ + Passes through a CBT configuration yaml fragment. + """ + def __init__(self, ctx, config): + super(CBT, self).__init__(ctx, config) + self.log = log + + def hosts_of_type(self, type_): + return [r.name for r in self.ctx.cluster.only(misc.is_type(type_)).remotes.keys()] + + def generate_cbt_config(self): + mon_hosts = self.hosts_of_type('mon') + osd_hosts = self.hosts_of_type('osd') + client_hosts = self.hosts_of_type('client') + rgw_client = {} + rgw_client[client_hosts[0]] = None + rgw_hosts = self.config.get('cluster', {}).get('rgws', rgw_client) + cluster_config = dict( + user=self.config.get('cluster', {}).get('user', 'ubuntu'), + head=mon_hosts[0], + osds=osd_hosts, + mons=mon_hosts, + clients=client_hosts, + rgws=rgw_hosts, + osds_per_node=self.config.get('cluster', {}).get('osds_per_node', 1), + rebuild_every_test=False, + use_existing=True, + is_teuthology=self.config.get('cluster', {}).get('is_teuthology', True), + iterations=self.config.get('cluster', {}).get('iterations', 1), + tmp_dir='/tmp/cbt', + pool_profiles=self.config.get('cluster', {}).get('pool_profiles'), + ) + + benchmark_config = self.config.get('benchmarks') + benchmark_type = next(iter(benchmark_config.keys())) + if benchmark_type in ['librbdfio', 'fio']: + testdir = misc.get_testdir(self.ctx) + benchmark_config[benchmark_type]['cmd_path'] = os.path.join(testdir, 'fio/fio') + if benchmark_type == 'cosbench': + # create cosbench_dir and cosbench_xml_dir + testdir = misc.get_testdir(self.ctx) + benchmark_config['cosbench']['cosbench_dir'] = os.path.join(testdir, 'cos') + benchmark_config['cosbench']['cosbench_xml_dir'] = os.path.join(testdir, 'xml') + self.ctx.cluster.run(args=['mkdir', '-p', '-m0755', '--', benchmark_config['cosbench']['cosbench_xml_dir']]) + benchmark_config['cosbench']['controller'] = osd_hosts[0] + + # set auth details + remotes_and_roles = self.ctx.cluster.remotes.items() + ips = [host for (host, port) in + (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] + benchmark_config['cosbench']['auth'] = "username=cosbench:operator;password=intel2012;url=http://%s:80/auth/v1.0;retry=9" %(ips[0]) + client_endpoints_config = self.config.get('client_endpoints', None) + + return dict( + cluster=cluster_config, + benchmarks=benchmark_config, + client_endpoints = client_endpoints_config, + ) + + def install_dependencies(self): + system_type = misc.get_system_type(self.first_mon) + + if system_type == 'rpm': + install_cmd = ['sudo', 'yum', '-y', 'install'] + cbt_depends = ['python3-yaml', 'python3-lxml', 'librbd-devel', 'pdsh', 'collectl'] + else: + install_cmd = ['sudo', 'apt-get', '-y', '--force-yes', 'install'] + cbt_depends = ['python3-yaml', 'python3-lxml', 'librbd-dev', 'collectl'] + self.first_mon.run(args=install_cmd + cbt_depends) + + benchmark_type = next(iter(self.cbt_config.get('benchmarks').keys())) + self.log.info('benchmark: %s', benchmark_type) + + if benchmark_type in ['librbdfio', 'fio']: + # install fio + testdir = misc.get_testdir(self.ctx) + self.first_mon.run( + args=[ + 'git', 'clone', '-b', 'master', + 'https://github.com/axboe/fio.git', + '{tdir}/fio'.format(tdir=testdir) + ] + ) + self.first_mon.run( + args=[ + 'cd', os.path.join(testdir, 'fio'), run.Raw('&&'), + './configure', run.Raw('&&'), + 'make' + ] + ) + + if benchmark_type == 'cosbench': + # install cosbench + self.log.info('install dependencies for cosbench') + if system_type == 'rpm': + cosbench_depends = ['wget', 'unzip', 'java-1.7.0-openjdk', 'curl'] + else: + cosbench_depends = ['wget', 'unzip', 'openjdk-8-jre', 'curl'] + self.first_mon.run(args=install_cmd + cosbench_depends) + testdir = misc.get_testdir(self.ctx) + cosbench_version = '0.4.2.c3' + cosbench_location = 'https://github.com/intel-cloud/cosbench/releases/download/v0.4.2.c3/0.4.2.c3.zip' + os_version = misc.get_system_type(self.first_mon, False, True) + + # additional requirements for bionic + if os_version == '18.04': + self.first_mon.run( + args=['sudo', 'apt-get', '-y', 'purge', 'openjdk-11*']) + # use our own version of cosbench + cosbench_version = 'cosbench-0.4.2.c3.1' + # contains additional parameter "-N" to nc + cosbench_location = 'http://drop.ceph.com/qa/cosbench-0.4.2.c3.1.zip' + cosbench_dir = os.path.join(testdir, cosbench_version) + self.ctx.cluster.run(args=['mkdir', '-p', '-m0755', '--', cosbench_dir]) + self.first_mon.run( + args=[ + 'cd', testdir, run.Raw('&&'), + 'wget', + cosbench_location, run.Raw('&&'), + 'unzip', '{name}.zip'.format(name=cosbench_version), '-d', cosbench_version + ] + ) + else: + self.first_mon.run( + args=[ + 'cd', testdir, run.Raw('&&'), + 'wget', + cosbench_location, run.Raw('&&'), + 'unzip', '{name}.zip'.format(name=cosbench_version) + ] + ) + self.first_mon.run( + args=[ + 'cd', testdir, run.Raw('&&'), + 'ln', '-s', cosbench_version, 'cos', + ] + ) + self.first_mon.run( + args=[ + 'cd', os.path.join(testdir, 'cos'), run.Raw('&&'), + 'chmod', '+x', run.Raw('*.sh'), + ] + ) + + # start cosbench and check info + self.log.info('start cosbench') + self.first_mon.run( + args=[ + 'cd', testdir, run.Raw('&&'), + 'cd', 'cos', run.Raw('&&'), + 'sh', 'start-all.sh' + ] + ) + self.log.info('check cosbench info') + self.first_mon.run( + args=[ + 'cd', testdir, run.Raw('&&'), + 'cd', 'cos', run.Raw('&&'), + 'sh', 'cli.sh', 'info' + ] + ) + + def checkout_cbt(self): + testdir = misc.get_testdir(self.ctx) + repo = self.config.get('repo', 'https://github.com/ceph/cbt.git') + branch = self.config.get('branch', 'master') + branch = self.config.get('force-branch', branch) + sha1 = self.config.get('sha1') + if sha1 is None: + self.first_mon.run( + args=[ + 'git', 'clone', '--depth', '1', '-b', branch, repo, + '{tdir}/cbt'.format(tdir=testdir) + ] + ) + else: + self.first_mon.run( + args=[ + 'git', 'clone', '-b', branch, repo, + '{tdir}/cbt'.format(tdir=testdir) + ] + ) + self.first_mon.run( + args=[ + 'cd', os.path.join(testdir, 'cbt'), run.Raw('&&'), + 'git', 'reset', '--hard', sha1, + ] + ) + + def setup(self): + super(CBT, self).setup() + self.first_mon = next(iter(self.ctx.cluster.only(misc.get_first_mon(self.ctx, self.config)).remotes.keys())) + self.cbt_config = self.generate_cbt_config() + self.log.info('cbt configuration is %s', self.cbt_config) + self.cbt_dir = os.path.join(misc.get_archive_dir(self.ctx), 'cbt') + self.ctx.cluster.run(args=['mkdir', '-p', '-m0755', '--', self.cbt_dir]) + self.first_mon.write_file( + os.path.join(self.cbt_dir, 'cbt_config.yaml'), + yaml.safe_dump(self.cbt_config, default_flow_style=False)) + self.checkout_cbt() + self.install_dependencies() + + def begin(self): + super(CBT, self).begin() + testdir = misc.get_testdir(self.ctx) + self.first_mon.run( + args=[ + '{tdir}/cbt/cbt.py'.format(tdir=testdir), + '-a', self.cbt_dir, + '{cbtdir}/cbt_config.yaml'.format(cbtdir=self.cbt_dir), + ], + ) + preserve_file = os.path.join(self.ctx.archive, '.preserve') + open(preserve_file, 'a').close() + + def end(self): + super(CBT, self).end() + testdir = misc.get_testdir(self.ctx) + self.first_mon.run( + args=[ + 'rm', '--one-file-system', '-rf', '--', + '{tdir}/cbt'.format(tdir=testdir), + ] + ) + benchmark_type = next(iter(self.cbt_config.get('benchmarks').keys())) + if benchmark_type in ['librbdfio', 'fio']: + self.first_mon.run( + args=[ + 'rm', '--one-file-system', '-rf', '--', + '{tdir}/fio'.format(tdir=testdir), + ] + ) + + if benchmark_type == 'cosbench': + os_version = misc.get_system_type(self.first_mon, False, True) + if os_version == '18.04': + cosbench_version = 'cosbench-0.4.2.c3.1' + else: + cosbench_version = '0.4.2.c3' + # note: stop-all requires 'nc' + self.first_mon.run( + args=[ + 'cd', testdir, run.Raw('&&'), + 'cd', 'cos', run.Raw('&&'), + 'sh', 'stop-all.sh', + run.Raw('||'), 'true' + ] + ) + self.first_mon.run( + args=[ + 'sudo', 'killall', '-9', 'java', + run.Raw('||'), 'true' + ] + ) + self.first_mon.run( + args=[ + 'rm', '--one-file-system', '-rf', '--', + '{tdir}/cos'.format(tdir=testdir), + ] + ) + self.first_mon.run( + args=[ + 'rm', '--one-file-system', '-rf', '--', + '{tdir}/{version}'.format(tdir=testdir, version=cosbench_version), + ] + ) + self.first_mon.run( + args=[ + 'rm', '--one-file-system', '-rf', '--', + '{tdir}/{version}.zip'.format(tdir=testdir, version=cosbench_version), + ] + ) + self.first_mon.run( + args=[ + 'rm', '--one-file-system', '-rf', '--', + '{tdir}/xml'.format(tdir=testdir), + ] + ) + + +task = CBT diff --git a/qa/tasks/ceph.conf.template b/qa/tasks/ceph.conf.template new file mode 100644 index 000000000..609a124b8 --- /dev/null +++ b/qa/tasks/ceph.conf.template @@ -0,0 +1,107 @@ +[global] + chdir = "" + pid file = /var/run/ceph/$cluster-$name.pid + auth supported = cephx + + filestore xattr use omap = true + + mon clock drift allowed = 1.000 + + osd crush chooseleaf type = 0 + auth debug = true + + ms die on old message = true + ms die on bug = true + + mon max pg per osd = 10000 # >= luminous + mon pg warn max object skew = 0 + + # disable pg_autoscaler by default for new pools + osd_pool_default_pg_autoscale_mode = off + + osd pool default size = 2 + + mon osd allow primary affinity = true + mon osd allow pg remap = true + mon warn on legacy crush tunables = false + mon warn on crush straw calc version zero = false + mon warn on no sortbitwise = false + mon warn on osd down out interval zero = false + mon warn on too few osds = false + mon_warn_on_pool_pg_num_not_power_of_two = false + mon_warn_on_pool_no_redundancy = false + mon_allow_pool_size_one = true + + osd pool default erasure code profile = "plugin=jerasure technique=reed_sol_van k=2 m=1 ruleset-failure-domain=osd crush-failure-domain=osd" + + osd default data pool replay window = 5 + + mon allow pool delete = true + + mon cluster log file level = debug + debug asserts on shutdown = true + mon health detail to clog = false + +[osd] + osd journal size = 100 + + osd scrub load threshold = 5.0 + osd scrub max interval = 600 + + osd recover clone overlap = true + osd recovery max chunk = 1048576 + + osd debug shutdown = true + osd debug op order = true + osd debug verify stray on activate = true + + osd open classes on start = true + osd debug pg log writeout = true + + osd deep scrub update digest min age = 30 + + osd map max advance = 10 + + journal zero on create = true + + filestore ondisk finisher threads = 3 + filestore apply finisher threads = 3 + + bdev debug aio = true + osd debug misdirected ops = true + +[mgr] + debug ms = 1 + debug mgr = 20 + debug mon = 20 + debug auth = 20 + mon reweight min pgs per osd = 4 + mon reweight min bytes per osd = 10 + mgr/telemetry/nag = false + +[mon] + debug ms = 1 + debug mon = 20 + debug paxos = 20 + debug auth = 20 + mon data avail warn = 5 + mon mgr mkfs grace = 240 + mon reweight min pgs per osd = 4 + mon osd reporter subtree level = osd + mon osd prime pg temp = true + mon reweight min bytes per osd = 10 + + # rotate auth tickets quickly to exercise renewal paths + auth mon ticket ttl = 660 # 11m + auth service ticket ttl = 240 # 4m + + # don't complain about insecure global_id in the test suite + mon_warn_on_insecure_global_id_reclaim = false + mon_warn_on_insecure_global_id_reclaim_allowed = false + +[client] + rgw cache enabled = true + rgw enable ops log = true + rgw enable usage log = true + log file = /var/log/ceph/$cluster-$name.$pid.log + admin socket = /var/run/ceph/$cluster-$name.$pid.asok diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py new file mode 100644 index 000000000..a6eab9be8 --- /dev/null +++ b/qa/tasks/ceph.py @@ -0,0 +1,1924 @@ +""" +Ceph cluster task. + +Handle the setup, starting, and clean-up of a Ceph cluster. +""" +from copy import deepcopy +from io import BytesIO +from io import StringIO + +import argparse +import configobj +import contextlib +import errno +import logging +import os +import json +import time +import gevent +import re +import socket +import yaml + +from paramiko import SSHException +from tasks.ceph_manager import CephManager, write_conf, get_valgrind_args +from tarfile import ReadError +from tasks.cephfs.filesystem import MDSCluster, Filesystem +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology import exceptions +from teuthology.orchestra import run +from tasks import ceph_client as cclient +from teuthology.orchestra.daemon import DaemonGroup +from tasks.daemonwatchdog import DaemonWatchdog + +CEPH_ROLE_TYPES = ['mon', 'mgr', 'osd', 'mds', 'rgw'] +DATA_PATH = '/var/lib/ceph/{type_}/{cluster}-{id_}' + +log = logging.getLogger(__name__) + + +def generate_caps(type_): + """ + Each call will return the next capability for each system type + (essentially a subset of possible role values). Valid types are osd, + mds and client. + """ + defaults = dict( + osd=dict( + mon='allow profile osd', + mgr='allow profile osd', + osd='allow *', + ), + mgr=dict( + mon='allow profile mgr', + osd='allow *', + mds='allow *', + ), + mds=dict( + mon='allow *', + mgr='allow *', + osd='allow *', + mds='allow', + ), + client=dict( + mon='allow rw', + mgr='allow r', + osd='allow rwx', + mds='allow', + ), + ) + for subsystem, capability in defaults[type_].items(): + yield '--cap' + yield subsystem + yield capability + + +def update_archive_setting(ctx, key, value): + """ + Add logs directory to job's info log file + """ + if ctx.archive is None: + return + with open(os.path.join(ctx.archive, 'info.yaml'), 'r+') as info_file: + info_yaml = yaml.safe_load(info_file) + info_file.seek(0) + if 'archive' in info_yaml: + info_yaml['archive'][key] = value + else: + info_yaml['archive'] = {key: value} + yaml.safe_dump(info_yaml, info_file, default_flow_style=False) + + +@contextlib.contextmanager +def ceph_crash(ctx, config): + """ + Gather crash dumps from /var/lib/ceph/crash + """ + + # Add crash directory to job's archive + update_archive_setting(ctx, 'crash', '/var/lib/ceph/crash') + + try: + yield + + finally: + if ctx.archive is not None: + log.info('Archiving crash dumps...') + path = os.path.join(ctx.archive, 'remote') + try: + os.makedirs(path) + except OSError: + pass + for remote in ctx.cluster.remotes.keys(): + sub = os.path.join(path, remote.shortname) + try: + os.makedirs(sub) + except OSError: + pass + try: + teuthology.pull_directory(remote, '/var/lib/ceph/crash', + os.path.join(sub, 'crash')) + except ReadError: + pass + + +@contextlib.contextmanager +def ceph_log(ctx, config): + """ + Create /var/log/ceph log directory that is open to everyone. + Add valgrind and profiling-logger directories. + + :param ctx: Context + :param config: Configuration + """ + log.info('Making ceph log dir writeable by non-root...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'chmod', + '777', + '/var/log/ceph', + ], + wait=False, + ) + ) + log.info('Disabling ceph logrotate...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'rm', '-f', '--', + '/etc/logrotate.d/ceph', + ], + wait=False, + ) + ) + log.info('Creating extra log directories...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'install', '-d', '-m0777', '--', + '/var/log/ceph/valgrind', + '/var/log/ceph/profiling-logger', + ], + wait=False, + ) + ) + + # Add logs directory to job's info log file + update_archive_setting(ctx, 'log', '/var/log/ceph') + + class Rotater(object): + stop_event = gevent.event.Event() + + def invoke_logrotate(self): + # 1) install ceph-test.conf in /etc/logrotate.d + # 2) continuously loop over logrotate invocation with ceph-test.conf + while not self.stop_event.is_set(): + self.stop_event.wait(timeout=30) + try: + procs = ctx.cluster.run( + args=['sudo', 'logrotate', '/etc/logrotate.d/ceph-test.conf'], + wait=False, + stderr=StringIO() + ) + run.wait(procs) + except exceptions.ConnectionLostError as e: + # Some tests may power off nodes during test, in which + # case we will see connection errors that we should ignore. + log.debug("Missed logrotate, node '{0}' is offline".format( + e.node)) + except EOFError: + # Paramiko sometimes raises this when it fails to + # connect to a node during open_session. As with + # ConnectionLostError, we ignore this because nodes + # are allowed to get power cycled during tests. + log.debug("Missed logrotate, EOFError") + except SSHException: + log.debug("Missed logrotate, SSHException") + except run.CommandFailedError as e: + for p in procs: + if p.finished and p.exitstatus != 0: + err = p.stderr.getvalue() + if 'error: error renaming temp state file' in err: + log.info('ignoring transient state error: %s', e) + else: + raise + except socket.error as e: + if e.errno in (errno.EHOSTUNREACH, errno.ECONNRESET): + log.debug("Missed logrotate, host unreachable") + else: + raise + + def begin(self): + self.thread = gevent.spawn(self.invoke_logrotate) + + def end(self): + self.stop_event.set() + self.thread.get() + + def write_rotate_conf(ctx, daemons): + testdir = teuthology.get_testdir(ctx) + remote_logrotate_conf = '%s/logrotate.ceph-test.conf' % testdir + rotate_conf_path = os.path.join(os.path.dirname(__file__), 'logrotate.conf') + with open(rotate_conf_path) as f: + conf = "" + for daemon, size in daemons.items(): + log.info('writing logrotate stanza for {}'.format(daemon)) + conf += f.read().format(daemon_type=daemon, + max_size=size) + f.seek(0, 0) + + for remote in ctx.cluster.remotes.keys(): + remote.write_file(remote_logrotate_conf, BytesIO(conf.encode())) + remote.sh( + f'sudo mv {remote_logrotate_conf} /etc/logrotate.d/ceph-test.conf && ' + 'sudo chmod 0644 /etc/logrotate.d/ceph-test.conf && ' + 'sudo chown root.root /etc/logrotate.d/ceph-test.conf') + remote.chcon('/etc/logrotate.d/ceph-test.conf', + 'system_u:object_r:etc_t:s0') + + if ctx.config.get('log-rotate'): + daemons = ctx.config.get('log-rotate') + log.info('Setting up log rotation with ' + str(daemons)) + write_rotate_conf(ctx, daemons) + logrotater = Rotater() + logrotater.begin() + try: + yield + + finally: + if ctx.config.get('log-rotate'): + log.info('Shutting down logrotate') + logrotater.end() + ctx.cluster.sh('sudo rm /etc/logrotate.d/ceph-test.conf') + if ctx.archive is not None and \ + not (ctx.config.get('archive-on-error') and ctx.summary['success']): + # and logs + log.info('Compressing logs...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'find', + '/var/log/ceph', + '-name', + '*.log', + '-print0', + run.Raw('|'), + 'sudo', + 'xargs', + '-0', + '--no-run-if-empty', + '--', + 'gzip', + '--', + ], + wait=False, + ), + ) + + log.info('Archiving logs...') + path = os.path.join(ctx.archive, 'remote') + try: + os.makedirs(path) + except OSError: + pass + for remote in ctx.cluster.remotes.keys(): + sub = os.path.join(path, remote.shortname) + try: + os.makedirs(sub) + except OSError: + pass + teuthology.pull_directory(remote, '/var/log/ceph', + os.path.join(sub, 'log')) + + +def assign_devs(roles, devs): + """ + Create a dictionary of devs indexed by roles + + :param roles: List of roles + :param devs: Corresponding list of devices. + :returns: Dictionary of devs indexed by roles. + """ + return dict(zip(roles, devs)) + + +@contextlib.contextmanager +def valgrind_post(ctx, config): + """ + After the tests run, look through all the valgrind logs. Exceptions are raised + if textual errors occurred in the logs, or if valgrind exceptions were detected in + the logs. + + :param ctx: Context + :param config: Configuration + """ + try: + yield + finally: + lookup_procs = list() + log.info('Checking for errors in any valgrind logs...') + for remote in ctx.cluster.remotes.keys(): + # look at valgrind logs for each node + proc = remote.run( + args="sudo zgrep '<kind>' /var/log/ceph/valgrind/* " + # include a second file so that we always get + # a filename prefix on the output + "/dev/null | sort | uniq", + wait=False, + check_status=False, + stdout=StringIO(), + ) + lookup_procs.append((proc, remote)) + + valgrind_exception = None + for (proc, remote) in lookup_procs: + proc.wait() + out = proc.stdout.getvalue() + for line in out.split('\n'): + if line == '': + continue + try: + (file, kind) = line.split(':') + except Exception: + log.error('failed to split line %s', line) + raise + log.debug('file %s kind %s', file, kind) + if (file.find('mds') >= 0) and kind.find('Lost') > 0: + continue + log.error('saw valgrind issue %s in %s', kind, file) + valgrind_exception = Exception('saw valgrind issues') + + if config.get('expect_valgrind_errors'): + if not valgrind_exception: + raise Exception('expected valgrind issues and found none') + else: + if valgrind_exception: + raise valgrind_exception + + +@contextlib.contextmanager +def crush_setup(ctx, config): + cluster_name = config['cluster'] + first_mon = teuthology.get_first_mon(ctx, config, cluster_name) + (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys() + + profile = config.get('crush_tunables', 'default') + log.info('Setting crush tunables to %s', profile) + mon_remote.run( + args=['sudo', 'ceph', '--cluster', cluster_name, + 'osd', 'crush', 'tunables', profile]) + yield + + +@contextlib.contextmanager +def setup_manager(ctx, config): + first_mon = teuthology.get_first_mon(ctx, config, config['cluster']) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + if not hasattr(ctx, 'managers'): + ctx.managers = {} + ctx.managers[config['cluster']] = CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager.' + config['cluster']), + cluster=config['cluster'], + ) + yield + +@contextlib.contextmanager +def create_rbd_pool(ctx, config): + cluster_name = config['cluster'] + first_mon = teuthology.get_first_mon(ctx, config, cluster_name) + (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys() + log.info('Waiting for OSDs to come up') + teuthology.wait_until_osds_up( + ctx, + cluster=ctx.cluster, + remote=mon_remote, + ceph_cluster=cluster_name, + ) + if config.get('create_rbd_pool', True): + log.info('Creating RBD pool') + mon_remote.run( + args=['sudo', 'ceph', '--cluster', cluster_name, + 'osd', 'pool', 'create', 'rbd', '8']) + mon_remote.run( + args=[ + 'sudo', 'ceph', '--cluster', cluster_name, + 'osd', 'pool', 'application', 'enable', + 'rbd', 'rbd', '--yes-i-really-mean-it' + ], + check_status=False) + yield + +@contextlib.contextmanager +def cephfs_setup(ctx, config): + cluster_name = config['cluster'] + + first_mon = teuthology.get_first_mon(ctx, config, cluster_name) + (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys() + mdss = ctx.cluster.only(teuthology.is_type('mds', cluster_name)) + # If there are any MDSs, then create a filesystem for them to use + # Do this last because requires mon cluster to be up and running + if mdss.remotes: + log.info('Setting up CephFS filesystem(s)...') + cephfs_config = config.get('cephfs', {}) + fs_configs = cephfs_config.pop('fs', [{'name': 'cephfs'}]) + set_allow_multifs = len(fs_configs) > 1 + + # wait for standbys to become available (slow due to valgrind, perhaps) + mdsc = MDSCluster(ctx) + mds_count = len(list(teuthology.all_roles_of_type(ctx.cluster, 'mds'))) + with contextutil.safe_while(sleep=2,tries=150) as proceed: + while proceed(): + if len(mdsc.get_standby_daemons()) >= mds_count: + break + + fss = [] + for fs_config in fs_configs: + assert isinstance(fs_config, dict) + name = fs_config.pop('name') + temp = deepcopy(cephfs_config) + teuthology.deep_merge(temp, fs_config) + fs = Filesystem(ctx, fs_config=temp, name=name, create=True) + if set_allow_multifs: + fs.set_allow_multifs() + set_allow_multifs = False + fss.append(fs) + + yield + + for fs in fss: + fs.destroy() + else: + yield + +@contextlib.contextmanager +def watchdog_setup(ctx, config): + ctx.ceph[config['cluster']].thrashers = [] + ctx.ceph[config['cluster']].watchdog = DaemonWatchdog(ctx, config, ctx.ceph[config['cluster']].thrashers) + ctx.ceph[config['cluster']].watchdog.start() + yield + +def get_mons(roles, ips, cluster_name, + mon_bind_msgr2=False, + mon_bind_addrvec=False): + """ + Get monitors and their associated addresses + """ + mons = {} + v1_ports = {} + v2_ports = {} + is_mon = teuthology.is_type('mon', cluster_name) + for idx, roles in enumerate(roles): + for role in roles: + if not is_mon(role): + continue + if ips[idx] not in v1_ports: + v1_ports[ips[idx]] = 6789 + else: + v1_ports[ips[idx]] += 1 + if mon_bind_msgr2: + if ips[idx] not in v2_ports: + v2_ports[ips[idx]] = 3300 + addr = '{ip}'.format(ip=ips[idx]) + else: + assert mon_bind_addrvec + v2_ports[ips[idx]] += 1 + addr = '[v2:{ip}:{port2},v1:{ip}:{port1}]'.format( + ip=ips[idx], + port2=v2_ports[ips[idx]], + port1=v1_ports[ips[idx]], + ) + elif mon_bind_addrvec: + addr = '[v1:{ip}:{port}]'.format( + ip=ips[idx], + port=v1_ports[ips[idx]], + ) + else: + addr = '{ip}:{port}'.format( + ip=ips[idx], + port=v1_ports[ips[idx]], + ) + mons[role] = addr + assert mons + return mons + +def skeleton_config(ctx, roles, ips, mons, cluster='ceph'): + """ + Returns a ConfigObj that is prefilled with a skeleton config. + + Use conf[section][key]=value or conf.merge to change it. + + Use conf.write to write it out, override .filename first if you want. + """ + path = os.path.join(os.path.dirname(__file__), 'ceph.conf.template') + conf = configobj.ConfigObj(path, file_error=True) + mon_hosts = [] + for role, addr in mons.items(): + mon_cluster, _, _ = teuthology.split_role(role) + if mon_cluster != cluster: + continue + name = teuthology.ceph_role(role) + conf.setdefault(name, {}) + mon_hosts.append(addr) + conf.setdefault('global', {}) + conf['global']['mon host'] = ','.join(mon_hosts) + # set up standby mds's + is_mds = teuthology.is_type('mds', cluster) + for roles_subset in roles: + for role in roles_subset: + if is_mds(role): + name = teuthology.ceph_role(role) + conf.setdefault(name, {}) + return conf + +def create_simple_monmap(ctx, remote, conf, mons, + path=None, + mon_bind_addrvec=False): + """ + Writes a simple monmap based on current ceph.conf into path, or + <testdir>/monmap by default. + + Assumes ceph_conf is up to date. + + Assumes mon sections are named "mon.*", with the dot. + + :return the FSID (as a string) of the newly created monmap + """ + + addresses = list(mons.items()) + assert addresses, "There are no monitors in config!" + log.debug('Ceph mon addresses: %s', addresses) + + try: + log.debug('writing out conf {c}'.format(c=conf)) + except: + log.debug('my conf logging attempt failed') + testdir = teuthology.get_testdir(ctx) + tmp_conf_path = '{tdir}/ceph.tmp.conf'.format(tdir=testdir) + conf_fp = BytesIO() + conf.write(conf_fp) + conf_fp.seek(0) + teuthology.write_file(remote, tmp_conf_path, conf_fp) + args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'monmaptool', + '-c', + '{conf}'.format(conf=tmp_conf_path), + '--create', + '--clobber', + ] + if mon_bind_addrvec: + args.extend(['--enable-all-features']) + for (role, addr) in addresses: + _, _, n = teuthology.split_role(role) + if mon_bind_addrvec and (',' in addr or 'v' in addr or ':' in addr): + args.extend(('--addv', n, addr)) + else: + args.extend(('--add', n, addr)) + if not path: + path = '{tdir}/monmap'.format(tdir=testdir) + args.extend([ + '--print', + path + ]) + + monmap_output = remote.sh(args) + fsid = re.search("generated fsid (.+)$", + monmap_output, re.MULTILINE).group(1) + teuthology.delete_file(remote, tmp_conf_path) + return fsid + + +def maybe_redirect_stderr(config, type_, args, log_path): + if type_ == 'osd' and \ + config.get('flavor', 'default') == 'crimson': + # teuthworker uses ubuntu:ubuntu to access the test nodes + create_log_cmd = \ + f'sudo install -b -o ubuntu -g ubuntu /dev/null {log_path}' + return create_log_cmd, args + [run.Raw('2>>'), log_path] + else: + return None, args + + +@contextlib.contextmanager +def cluster(ctx, config): + """ + Handle the creation and removal of a ceph cluster. + + On startup: + Create directories needed for the cluster. + Create remote journals for all osds. + Create and set keyring. + Copy the monmap to the test systems. + Setup mon nodes. + Setup mds nodes. + Mkfs osd nodes. + Add keyring information to monmaps + Mkfs mon nodes. + + On exit: + If errors occurred, extract a failure message and store in ctx.summary. + Unmount all test files and temporary journaling files. + Save the monitor information and archive all ceph logs. + Cleanup the keyring setup, and remove all monitor map and data files left over. + + :param ctx: Context + :param config: Configuration + """ + if ctx.config.get('use_existing_cluster', False) is True: + log.info("'use_existing_cluster' is true; skipping cluster creation") + yield + + testdir = teuthology.get_testdir(ctx) + cluster_name = config['cluster'] + data_dir = '{tdir}/{cluster}.data'.format(tdir=testdir, cluster=cluster_name) + log.info('Creating ceph cluster %s...', cluster_name) + log.info('config %s', config) + log.info('ctx.config %s', ctx.config) + run.wait( + ctx.cluster.run( + args=[ + 'install', '-d', '-m0755', '--', + data_dir, + ], + wait=False, + ) + ) + + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'install', '-d', '-m0777', '--', '/var/run/ceph', + ], + wait=False, + ) + ) + + devs_to_clean = {} + remote_to_roles_to_devs = {} + osds = ctx.cluster.only(teuthology.is_type('osd', cluster_name)) + for remote, roles_for_host in osds.remotes.items(): + devs = teuthology.get_scratch_devices(remote) + roles_to_devs = assign_devs( + teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name), devs + ) + devs_to_clean[remote] = [] + log.info('osd dev map: {}'.format(roles_to_devs)) + assert roles_to_devs, \ + "remote {} has osd roles, but no osd devices were specified!".format(remote.hostname) + remote_to_roles_to_devs[remote] = roles_to_devs + log.info("remote_to_roles_to_devs: {}".format(remote_to_roles_to_devs)) + for osd_role, dev_name in remote_to_roles_to_devs.items(): + assert dev_name, "{} has no associated device!".format(osd_role) + + log.info('Generating config...') + remotes_and_roles = ctx.cluster.remotes.items() + roles = [role_list for (remote, role_list) in remotes_and_roles] + ips = [host for (host, port) in + (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] + mons = get_mons( + roles, ips, cluster_name, + mon_bind_msgr2=config.get('mon_bind_msgr2'), + mon_bind_addrvec=config.get('mon_bind_addrvec'), + ) + conf = skeleton_config( + ctx, roles=roles, ips=ips, mons=mons, cluster=cluster_name, + ) + for section, keys in config['conf'].items(): + for key, value in keys.items(): + log.info("[%s] %s = %s" % (section, key, value)) + if section not in conf: + conf[section] = {} + conf[section][key] = value + + if not hasattr(ctx, 'ceph'): + ctx.ceph = {} + ctx.ceph[cluster_name] = argparse.Namespace() + ctx.ceph[cluster_name].conf = conf + ctx.ceph[cluster_name].mons = mons + + default_keyring = '/etc/ceph/{cluster}.keyring'.format(cluster=cluster_name) + keyring_path = config.get('keyring_path', default_keyring) + + coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) + + firstmon = teuthology.get_first_mon(ctx, config, cluster_name) + + log.info('Setting up %s...' % firstmon) + ctx.cluster.only(firstmon).run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + '--create-keyring', + keyring_path, + ], + ) + ctx.cluster.only(firstmon).run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + '--gen-key', + '--name=mon.', + keyring_path, + ], + ) + ctx.cluster.only(firstmon).run( + args=[ + 'sudo', + 'chmod', + '0644', + keyring_path, + ], + ) + (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() + monmap_path = '{tdir}/{cluster}.monmap'.format(tdir=testdir, + cluster=cluster_name) + fsid = create_simple_monmap( + ctx, + remote=mon0_remote, + conf=conf, + mons=mons, + path=monmap_path, + mon_bind_addrvec=config.get('mon_bind_addrvec'), + ) + ctx.ceph[cluster_name].fsid = fsid + if not 'global' in conf: + conf['global'] = {} + conf['global']['fsid'] = fsid + + default_conf_path = '/etc/ceph/{cluster}.conf'.format(cluster=cluster_name) + conf_path = config.get('conf_path', default_conf_path) + log.info('Writing %s for FSID %s...' % (conf_path, fsid)) + write_conf(ctx, conf_path, cluster_name) + + log.info('Creating admin key on %s...' % firstmon) + ctx.cluster.only(firstmon).run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + '--gen-key', + '--name=client.admin', + '--cap', 'mon', 'allow *', + '--cap', 'osd', 'allow *', + '--cap', 'mds', 'allow *', + '--cap', 'mgr', 'allow *', + keyring_path, + ], + ) + + log.info('Copying monmap to all nodes...') + keyring = mon0_remote.read_file(keyring_path) + monmap = mon0_remote.read_file(monmap_path) + + for rem in ctx.cluster.remotes.keys(): + # copy mon key and initial monmap + log.info('Sending monmap to node {remote}'.format(remote=rem)) + rem.write_file(keyring_path, keyring, mode='0644', sudo=True) + rem.write_file(monmap_path, monmap) + + log.info('Setting up mon nodes...') + mons = ctx.cluster.only(teuthology.is_type('mon', cluster_name)) + + if not config.get('skip_mgr_daemons', False): + log.info('Setting up mgr nodes...') + mgrs = ctx.cluster.only(teuthology.is_type('mgr', cluster_name)) + for remote, roles_for_host in mgrs.remotes.items(): + for role in teuthology.cluster_roles_of_type(roles_for_host, 'mgr', + cluster_name): + _, _, id_ = teuthology.split_role(role) + mgr_dir = DATA_PATH.format( + type_='mgr', cluster=cluster_name, id_=id_) + remote.run( + args=[ + 'sudo', + 'mkdir', + '-p', + mgr_dir, + run.Raw('&&'), + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + '--create-keyring', + '--gen-key', + '--name=mgr.{id}'.format(id=id_), + mgr_dir + '/keyring', + ], + ) + + log.info('Setting up mds nodes...') + mdss = ctx.cluster.only(teuthology.is_type('mds', cluster_name)) + for remote, roles_for_host in mdss.remotes.items(): + for role in teuthology.cluster_roles_of_type(roles_for_host, 'mds', + cluster_name): + _, _, id_ = teuthology.split_role(role) + mds_dir = DATA_PATH.format( + type_='mds', cluster=cluster_name, id_=id_) + remote.run( + args=[ + 'sudo', + 'mkdir', + '-p', + mds_dir, + run.Raw('&&'), + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + '--create-keyring', + '--gen-key', + '--name=mds.{id}'.format(id=id_), + mds_dir + '/keyring', + ], + ) + remote.run(args=[ + 'sudo', 'chown', '-R', 'ceph:ceph', mds_dir + ]) + + cclient.create_keyring(ctx, cluster_name) + log.info('Running mkfs on osd nodes...') + + if not hasattr(ctx, 'disk_config'): + ctx.disk_config = argparse.Namespace() + if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev'): + ctx.disk_config.remote_to_roles_to_dev = {} + if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev_mount_options'): + ctx.disk_config.remote_to_roles_to_dev_mount_options = {} + if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev_fstype'): + ctx.disk_config.remote_to_roles_to_dev_fstype = {} + + teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_dev, remote_to_roles_to_devs) + + log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev))) + for remote, roles_for_host in osds.remotes.items(): + roles_to_devs = remote_to_roles_to_devs[remote] + + for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name): + _, _, id_ = teuthology.split_role(role) + mnt_point = DATA_PATH.format( + type_='osd', cluster=cluster_name, id_=id_) + remote.run( + args=[ + 'sudo', + 'mkdir', + '-p', + mnt_point, + ]) + log.info('roles_to_devs: {}'.format(roles_to_devs)) + log.info('role: {}'.format(role)) + if roles_to_devs.get(role): + dev = roles_to_devs[role] + fs = config.get('fs') + package = None + mkfs_options = config.get('mkfs_options') + mount_options = config.get('mount_options') + if fs == 'btrfs': + # package = 'btrfs-tools' + if mount_options is None: + mount_options = ['noatime', 'user_subvol_rm_allowed'] + if mkfs_options is None: + mkfs_options = ['-m', 'single', + '-l', '32768', + '-n', '32768'] + if fs == 'xfs': + # package = 'xfsprogs' + if mount_options is None: + mount_options = ['noatime'] + if mkfs_options is None: + mkfs_options = ['-f', '-i', 'size=2048'] + if fs == 'ext4' or fs == 'ext3': + if mount_options is None: + mount_options = ['noatime', 'user_xattr'] + + if mount_options is None: + mount_options = [] + if mkfs_options is None: + mkfs_options = [] + mkfs = ['mkfs.%s' % fs] + mkfs_options + log.info('%s on %s on %s' % (mkfs, dev, remote)) + if package is not None: + remote.sh('sudo apt-get install -y %s' % package) + + try: + remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) + except run.CommandFailedError: + # Newer btfs-tools doesn't prompt for overwrite, use -f + if '-f' not in mount_options: + mkfs_options.append('-f') + mkfs = ['mkfs.%s' % fs] + mkfs_options + log.info('%s on %s on %s' % (mkfs, dev, remote)) + remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) + + log.info('mount %s on %s -o %s' % (dev, remote, + ','.join(mount_options))) + remote.run( + args=[ + 'sudo', + 'mount', + '-t', fs, + '-o', ','.join(mount_options), + dev, + mnt_point, + ] + ) + remote.run( + args=[ + 'sudo', '/sbin/restorecon', mnt_point, + ], + check_status=False, + ) + if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options: + ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {} + ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][role] = mount_options + if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype: + ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {} + ctx.disk_config.remote_to_roles_to_dev_fstype[remote][role] = fs + devs_to_clean[remote].append(mnt_point) + + for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name): + _, _, id_ = teuthology.split_role(role) + try: + args = ['sudo', + 'MALLOC_CHECK_=3', + 'adjust-ulimits', + 'ceph-coverage', coverage_dir, + 'ceph-osd', + '--no-mon-config', + '--cluster', cluster_name, + '--mkfs', + '--mkkey', + '-i', id_, + '--monmap', monmap_path] + log_path = f'/var/log/ceph/{cluster_name}-osd.{id_}.log' + create_log_cmd, args = \ + maybe_redirect_stderr(config, 'osd', args, log_path) + if create_log_cmd: + remote.sh(create_log_cmd) + remote.run(args=args) + except run.CommandFailedError: + # try without --no-mon-config.. this may be an upgrade test + remote.run( + args=[ + 'sudo', + 'MALLOC_CHECK_=3', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-osd', + '--cluster', + cluster_name, + '--mkfs', + '--mkkey', + '-i', id_, + '--monmap', monmap_path, + ], + ) + mnt_point = DATA_PATH.format( + type_='osd', cluster=cluster_name, id_=id_) + remote.run(args=[ + 'sudo', 'chown', '-R', 'ceph:ceph', mnt_point + ]) + + log.info('Reading keys from all nodes...') + keys_fp = BytesIO() + keys = [] + for remote, roles_for_host in ctx.cluster.remotes.items(): + for type_ in ['mgr', 'mds', 'osd']: + if type_ == 'mgr' and config.get('skip_mgr_daemons', False): + continue + for role in teuthology.cluster_roles_of_type(roles_for_host, type_, cluster_name): + _, _, id_ = teuthology.split_role(role) + data = remote.read_file( + os.path.join( + DATA_PATH.format( + type_=type_, id_=id_, cluster=cluster_name), + 'keyring', + ), + sudo=True, + ) + keys.append((type_, id_, data)) + keys_fp.write(data) + for remote, roles_for_host in ctx.cluster.remotes.items(): + for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', cluster_name): + _, _, id_ = teuthology.split_role(role) + data = remote.read_file( + '/etc/ceph/{cluster}.client.{id}.keyring'.format(id=id_, cluster=cluster_name) + ) + keys.append(('client', id_, data)) + keys_fp.write(data) + + log.info('Adding keys to all mons...') + writes = mons.run( + args=[ + 'sudo', 'tee', '-a', + keyring_path, + ], + stdin=run.PIPE, + wait=False, + stdout=BytesIO(), + ) + keys_fp.seek(0) + teuthology.feed_many_stdins_and_close(keys_fp, writes) + run.wait(writes) + for type_, id_, data in keys: + run.wait( + mons.run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + keyring_path, + '--name={type}.{id}'.format( + type=type_, + id=id_, + ), + ] + list(generate_caps(type_)), + wait=False, + ), + ) + + log.info('Running mkfs on mon nodes...') + for remote, roles_for_host in mons.remotes.items(): + for role in teuthology.cluster_roles_of_type(roles_for_host, 'mon', cluster_name): + _, _, id_ = teuthology.split_role(role) + mnt_point = DATA_PATH.format( + type_='mon', id_=id_, cluster=cluster_name) + remote.run( + args=[ + 'sudo', + 'mkdir', + '-p', + mnt_point, + ], + ) + remote.run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-mon', + '--cluster', cluster_name, + '--mkfs', + '-i', id_, + '--monmap', monmap_path, + '--keyring', keyring_path, + ], + ) + remote.run(args=[ + 'sudo', 'chown', '-R', 'ceph:ceph', mnt_point + ]) + + run.wait( + mons.run( + args=[ + 'rm', + '--', + monmap_path, + ], + wait=False, + ), + ) + + try: + yield + except Exception: + # we need to know this below + ctx.summary['success'] = False + raise + finally: + (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() + + log.info('Checking cluster log for badness...') + + def first_in_ceph_log(pattern, excludes): + """ + Find the first occurrence of the pattern specified in the Ceph log, + Returns None if none found. + + :param pattern: Pattern scanned for. + :param excludes: Patterns to ignore. + :return: First line of text (or None if not found) + """ + args = [ + 'sudo', + 'egrep', pattern, + '/var/log/ceph/{cluster}.log'.format(cluster=cluster_name), + ] + for exclude in excludes: + args.extend([run.Raw('|'), 'egrep', '-v', exclude]) + args.extend([ + run.Raw('|'), 'head', '-n', '1', + ]) + stdout = mon0_remote.sh(args) + return stdout or None + + if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', + config['log_ignorelist']) is not None: + log.warning('Found errors (ERR|WRN|SEC) in cluster log') + ctx.summary['success'] = False + # use the most severe problem as the failure reason + if 'failure_reason' not in ctx.summary: + for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: + match = first_in_ceph_log(pattern, config['log_ignorelist']) + if match is not None: + ctx.summary['failure_reason'] = \ + '"{match}" in cluster log'.format( + match=match.rstrip('\n'), + ) + break + + for remote, dirs in devs_to_clean.items(): + for dir_ in dirs: + log.info('Unmounting %s on %s' % (dir_, remote)) + try: + remote.run( + args=[ + 'sync', + run.Raw('&&'), + 'sudo', + 'umount', + '-f', + dir_ + ] + ) + except Exception as e: + remote.run(args=[ + 'sudo', + run.Raw('PATH=/usr/sbin:$PATH'), + 'lsof', + run.Raw(';'), + 'ps', 'auxf', + ]) + raise e + + if ctx.archive is not None and \ + not (ctx.config.get('archive-on-error') and ctx.summary['success']): + + # archive mon data, too + log.info('Archiving mon data...') + path = os.path.join(ctx.archive, 'data') + try: + os.makedirs(path) + except OSError as e: + if e.errno == errno.EEXIST: + pass + else: + raise + for remote, roles in mons.remotes.items(): + for role in roles: + is_mon = teuthology.is_type('mon', cluster_name) + if is_mon(role): + _, _, id_ = teuthology.split_role(role) + mon_dir = DATA_PATH.format( + type_='mon', id_=id_, cluster=cluster_name) + teuthology.pull_directory_tarball( + remote, + mon_dir, + path + '/' + role + '.tgz') + + log.info('Cleaning ceph cluster...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'rm', + '-rf', + '--', + conf_path, + keyring_path, + data_dir, + monmap_path, + run.Raw('{tdir}/../*.pid'.format(tdir=testdir)), + ], + wait=False, + ), + ) + + +def osd_scrub_pgs(ctx, config): + """ + Scrub pgs when we exit. + + First make sure all pgs are active and clean. + Next scrub all osds. + Then periodically check until all pgs have scrub time stamps that + indicate the last scrub completed. Time out if no progress is made + here after two minutes. + """ + retries = 40 + delays = 20 + cluster_name = config['cluster'] + manager = ctx.managers[cluster_name] + for _ in range(retries): + stats = manager.get_pg_stats() + unclean = [stat['pgid'] for stat in stats if 'active+clean' not in stat['state']] + split_merge = [] + osd_dump = manager.get_osd_dump_json() + try: + split_merge = [i['pool_name'] for i in osd_dump['pools'] if i['pg_num'] != i['pg_num_target']] + except KeyError: + # we don't support pg_num_target before nautilus + pass + if not unclean and not split_merge: + break + waiting_on = [] + if unclean: + waiting_on.append(f'{unclean} to go clean') + if split_merge: + waiting_on.append(f'{split_merge} to split/merge') + waiting_on = ' and '.join(waiting_on) + log.info('Waiting for all PGs to be active+clean and split+merged, waiting on %s', waiting_on) + time.sleep(delays) + else: + raise RuntimeError("Scrubbing terminated -- not all pgs were active and clean.") + check_time_now = time.localtime() + time.sleep(1) + all_roles = teuthology.all_roles(ctx.cluster) + for role in teuthology.cluster_roles_of_type(all_roles, 'osd', cluster_name): + log.info("Scrubbing {osd}".format(osd=role)) + _, _, id_ = teuthology.split_role(role) + # allow this to fail; in certain cases the OSD might not be up + # at this point. we will catch all pgs below. + try: + manager.raw_cluster_cmd('tell', 'osd.' + id_, 'config', 'set', + 'osd_debug_deep_scrub_sleep', '0'); + manager.raw_cluster_cmd('osd', 'deep-scrub', id_) + except run.CommandFailedError: + pass + prev_good = 0 + gap_cnt = 0 + loop = True + while loop: + stats = manager.get_pg_stats() + timez = [(stat['pgid'],stat['last_scrub_stamp']) for stat in stats] + loop = False + thiscnt = 0 + re_scrub = [] + for (pgid, tmval) in timez: + t = tmval[0:tmval.find('.')].replace(' ', 'T') + pgtm = time.strptime(t, '%Y-%m-%dT%H:%M:%S') + if pgtm > check_time_now: + thiscnt += 1 + else: + log.info('pgid %s last_scrub_stamp %s %s <= %s', pgid, tmval, pgtm, check_time_now) + loop = True + re_scrub.append(pgid) + if thiscnt > prev_good: + prev_good = thiscnt + gap_cnt = 0 + else: + gap_cnt += 1 + if gap_cnt % 6 == 0: + for pgid in re_scrub: + # re-request scrub every so often in case the earlier + # request was missed. do not do it every time because + # the scrub may be in progress or not reported yet and + # we will starve progress. + manager.raw_cluster_cmd('pg', 'deep-scrub', pgid) + if gap_cnt > retries: + raise RuntimeError('Exiting scrub checking -- not all pgs scrubbed.') + if loop: + log.info('Still waiting for all pgs to be scrubbed.') + time.sleep(delays) + + +@contextlib.contextmanager +def run_daemon(ctx, config, type_): + """ + Run daemons for a role type. Handle the startup and termination of a a daemon. + On startup -- set coverages, cpu_profile, valgrind values for all remotes, + and a max_mds value for one mds. + On cleanup -- Stop all existing daemons of this type. + + :param ctx: Context + :param config: Configuration + :param type_: Role type + """ + cluster_name = config['cluster'] + log.info('Starting %s daemons in cluster %s...', type_, cluster_name) + testdir = teuthology.get_testdir(ctx) + daemons = ctx.cluster.only(teuthology.is_type(type_, cluster_name)) + + # check whether any daemons if this type are configured + if daemons is None: + return + coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) + + daemon_signal = 'kill' + if config.get('coverage') or config.get('valgrind') is not None: + daemon_signal = 'term' + + # create osds in order. (this only matters for pre-luminous, which might + # be jewel/hammer, which doesn't take an id_ argument to legacy 'osd create'). + osd_uuids = {} + for remote, roles_for_host in daemons.remotes.items(): + is_type_ = teuthology.is_type(type_, cluster_name) + for role in roles_for_host: + if not is_type_(role): + continue + _, _, id_ = teuthology.split_role(role) + + + if type_ == 'osd': + datadir='/var/lib/ceph/osd/{cluster}-{id}'.format( + cluster=cluster_name, id=id_) + osd_uuid = remote.read_file( + datadir + '/fsid', sudo=True).decode().strip() + osd_uuids[id_] = osd_uuid + for osd_id in range(len(osd_uuids)): + id_ = str(osd_id) + osd_uuid = osd_uuids.get(id_) + try: + remote.run( + args=[ + 'sudo', 'ceph', '--cluster', cluster_name, + 'osd', 'new', osd_uuid, id_, + ] + ) + except: + # fallback to pre-luminous (jewel) + remote.run( + args=[ + 'sudo', 'ceph', '--cluster', cluster_name, + 'osd', 'create', osd_uuid, + ] + ) + if config.get('add_osds_to_crush'): + remote.run( + args=[ + 'sudo', 'ceph', '--cluster', cluster_name, + 'osd', 'crush', 'create-or-move', 'osd.' + id_, + '1.0', 'host=localhost', 'root=default', + ] + ) + + for remote, roles_for_host in daemons.remotes.items(): + is_type_ = teuthology.is_type(type_, cluster_name) + for role in roles_for_host: + if not is_type_(role): + continue + _, _, id_ = teuthology.split_role(role) + + run_cmd = [ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'daemon-helper', + daemon_signal, + ] + run_cmd_tail = [ + 'ceph-%s' % (type_), + '-f', + '--cluster', cluster_name, + '-i', id_] + + if type_ in config.get('cpu_profile', []): + profile_path = '/var/log/ceph/profiling-logger/%s.prof' % (role) + run_cmd.extend(['env', 'CPUPROFILE=%s' % profile_path]) + + vc = config.get('valgrind') + if vc is not None: + valgrind_args = None + if type_ in vc: + valgrind_args = vc[type_] + if role in vc: + valgrind_args = vc[role] + exit_on_first_error = vc.get('exit_on_first_error', True) + run_cmd = get_valgrind_args(testdir, role, run_cmd, valgrind_args, + exit_on_first_error=exit_on_first_error) + + run_cmd.extend(run_cmd_tail) + log_path = f'/var/log/ceph/{cluster_name}-{type_}.{id_}.log' + create_log_cmd, run_cmd = \ + maybe_redirect_stderr(config, type_, run_cmd, log_path) + if create_log_cmd: + remote.sh(create_log_cmd) + # always register mgr; don't necessarily start + ctx.daemons.register_daemon( + remote, type_, id_, + cluster=cluster_name, + args=run_cmd, + logger=log.getChild(role), + stdin=run.PIPE, + wait=False + ) + if type_ != 'mgr' or not config.get('skip_mgr_daemons', False): + role = cluster_name + '.' + type_ + ctx.daemons.get_daemon(type_, id_, cluster_name).restart() + + # kludge: run any pre-manager commands + if type_ == 'mon': + for cmd in config.get('pre-mgr-commands', []): + firstmon = teuthology.get_first_mon(ctx, config, cluster_name) + (remote,) = ctx.cluster.only(firstmon).remotes.keys() + remote.run(args=cmd.split(' ')) + + try: + yield + finally: + teuthology.stop_daemons_of_type(ctx, type_, cluster_name) + + +def healthy(ctx, config): + """ + Wait for all osd's to be up, and for the ceph health monitor to return HEALTH_OK. + + :param ctx: Context + :param config: Configuration + """ + config = config if isinstance(config, dict) else dict() + cluster_name = config.get('cluster', 'ceph') + log.info('Waiting until %s daemons up and pgs clean...', cluster_name) + manager = ctx.managers[cluster_name] + try: + manager.wait_for_mgr_available(timeout=30) + except (run.CommandFailedError, AssertionError) as e: + log.info('ignoring mgr wait error, probably testing upgrade: %s', e) + + manager.wait_for_all_osds_up(timeout=300) + + try: + manager.flush_all_pg_stats() + except (run.CommandFailedError, Exception) as e: + log.info('ignoring flush pg stats error, probably testing upgrade: %s', e) + manager.wait_for_clean() + + if config.get('wait-for-healthy', True): + log.info('Waiting until ceph cluster %s is healthy...', cluster_name) + manager.wait_until_healthy(timeout=300) + + if ctx.cluster.only(teuthology.is_type('mds', cluster_name)).remotes: + # Some MDSs exist, wait for them to be healthy + ceph_fs = Filesystem(ctx) # TODO: make Filesystem cluster-aware + ceph_fs.wait_for_daemons(timeout=300) + + +def wait_for_mon_quorum(ctx, config): + """ + Check renote ceph status until all monitors are up. + + :param ctx: Context + :param config: Configuration + """ + if isinstance(config, dict): + mons = config['daemons'] + cluster_name = config.get('cluster', 'ceph') + else: + assert isinstance(config, list) + mons = config + cluster_name = 'ceph' + firstmon = teuthology.get_first_mon(ctx, config, cluster_name) + (remote,) = ctx.cluster.only(firstmon).remotes.keys() + with contextutil.safe_while(sleep=10, tries=60, + action='wait for monitor quorum') as proceed: + while proceed(): + quorum_status = remote.sh('sudo ceph quorum_status', + logger=log.getChild('quorum_status')) + j = json.loads(quorum_status) + q = j.get('quorum_names', []) + log.debug('Quorum: %s', q) + if sorted(q) == sorted(mons): + break + + +def created_pool(ctx, config): + """ + Add new pools to the dictionary of pools that the ceph-manager + knows about. + """ + for new_pool in config: + if new_pool not in ctx.managers['ceph'].pools: + ctx.managers['ceph'].pools[new_pool] = ctx.managers['ceph'].get_pool_int_property( + new_pool, 'pg_num') + + +@contextlib.contextmanager +def suppress_mon_health_to_clog(ctx, config): + """ + set the option, and then restore it with its original value + + Note, due to the way how tasks are executed/nested, it's not suggested to + use this method as a standalone task. otherwise, it's likely that it will + restore the tweaked option at the /end/ of 'tasks' block. + """ + if config.get('mon-health-to-clog', 'true') == 'false': + cluster = config.get('cluster', 'ceph') + manager = ctx.managers[cluster] + manager.raw_cluster_command( + 'config', 'set', 'mon', 'mon_health_to_clog', 'false' + ) + yield + manager.raw_cluster_command( + 'config', 'rm', 'mon', 'mon_health_to_clog' + ) + else: + yield + +@contextlib.contextmanager +def restart(ctx, config): + """ + restart ceph daemons + + For example:: + tasks: + - ceph.restart: [all] + + For example:: + tasks: + - ceph.restart: [osd.0, mon.1, mds.*] + + or:: + + tasks: + - ceph.restart: + daemons: [osd.0, mon.1] + wait-for-healthy: false + wait-for-osds-up: true + + :param ctx: Context + :param config: Configuration + """ + if config is None: + config = {} + elif isinstance(config, list): + config = {'daemons': config} + + daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES, True) + clusters = set() + + with suppress_mon_health_to_clog(ctx, config): + for role in daemons: + cluster, type_, id_ = teuthology.split_role(role) + ctx.daemons.get_daemon(type_, id_, cluster).stop() + if type_ == 'osd': + ctx.managers[cluster].mark_down_osd(id_) + ctx.daemons.get_daemon(type_, id_, cluster).restart() + clusters.add(cluster) + + if config.get('wait-for-healthy', True): + for cluster in clusters: + healthy(ctx=ctx, config=dict(cluster=cluster)) + if config.get('wait-for-osds-up', False): + for cluster in clusters: + ctx.managers[cluster].wait_for_all_osds_up() + yield + + +@contextlib.contextmanager +def stop(ctx, config): + """ + Stop ceph daemons + + For example:: + tasks: + - ceph.stop: [mds.*] + + tasks: + - ceph.stop: [osd.0, osd.2] + + tasks: + - ceph.stop: + daemons: [osd.0, osd.2] + + """ + if config is None: + config = {} + elif isinstance(config, list): + config = {'daemons': config} + + daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES, True) + clusters = set() + + for role in daemons: + cluster, type_, id_ = teuthology.split_role(role) + ctx.daemons.get_daemon(type_, id_, cluster).stop() + clusters.add(cluster) + + + for cluster in clusters: + ctx.ceph[cluster].watchdog.stop() + ctx.ceph[cluster].watchdog.join() + + yield + + +@contextlib.contextmanager +def wait_for_failure(ctx, config): + """ + Wait for a failure of a ceph daemon + + For example:: + tasks: + - ceph.wait_for_failure: [mds.*] + + tasks: + - ceph.wait_for_failure: [osd.0, osd.2] + + tasks: + - ceph.wait_for_failure: + daemons: [osd.0, osd.2] + + """ + if config is None: + config = {} + elif isinstance(config, list): + config = {'daemons': config} + + daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES, True) + for role in daemons: + cluster, type_, id_ = teuthology.split_role(role) + try: + ctx.daemons.get_daemon(type_, id_, cluster).wait() + except: + log.info('Saw expected daemon failure. Continuing.') + pass + else: + raise RuntimeError('daemon %s did not fail' % role) + + yield + + +def validate_config(ctx, config): + """ + Perform some simple validation on task configuration. + Raises exceptions.ConfigError if an error is found. + """ + # check for osds from multiple clusters on the same host + for remote, roles_for_host in ctx.cluster.remotes.items(): + last_cluster = None + last_role = None + for role in roles_for_host: + role_cluster, role_type, _ = teuthology.split_role(role) + if role_type != 'osd': + continue + if last_cluster and last_cluster != role_cluster: + msg = "Host should not have osds (%s and %s) from multiple clusters" % ( + last_role, role) + raise exceptions.ConfigError(msg) + last_cluster = role_cluster + last_role = role + + +@contextlib.contextmanager +def task(ctx, config): + """ + Set up and tear down a Ceph cluster. + + For example:: + + tasks: + - ceph: + - interactive: + + You can also specify what branch to run:: + + tasks: + - ceph: + branch: foo + + Or a tag:: + + tasks: + - ceph: + tag: v0.42.13 + + Or a sha1:: + + tasks: + - ceph: + sha1: 1376a5ab0c89780eab39ffbbe436f6a6092314ed + + Or a local source dir:: + + tasks: + - ceph: + path: /home/sage/ceph + + To capture code coverage data, use:: + + tasks: + - ceph: + coverage: true + + To use btrfs, ext4, or xfs on the target's scratch disks, use:: + + tasks: + - ceph: + fs: xfs + mkfs_options: [-b,size=65536,-l,logdev=/dev/sdc1] + mount_options: [nobarrier, inode64] + + To change the cephfs's default max_mds (1), use:: + + tasks: + - ceph: + cephfs: + max_mds: 2 + + To change the max_mds of a specific filesystem, use:: + + tasks: + - ceph: + cephfs: + max_mds: 2 + fs: + - name: a + max_mds: 3 + - name: b + + In the above example, filesystem 'a' will have 'max_mds' 3, + and filesystme 'b' will have 'max_mds' 2. + + To change the mdsmap's default session_timeout (60 seconds), use:: + + tasks: + - ceph: + cephfs: + session_timeout: 300 + + Note, this will cause the task to check the /scratch_devs file on each node + for available devices. If no such file is found, /dev/sdb will be used. + + To run some daemons under valgrind, include their names + and the tool/args to use in a valgrind section:: + + tasks: + - ceph: + valgrind: + mds.1: --tool=memcheck + osd.1: [--tool=memcheck, --leak-check=no] + + Those nodes which are using memcheck or valgrind will get + checked for bad results. + + To adjust or modify config options, use:: + + tasks: + - ceph: + conf: + section: + key: value + + For example:: + + tasks: + - ceph: + conf: + mds.0: + some option: value + other key: other value + client.0: + debug client: 10 + debug ms: 1 + + By default, the cluster log is checked for errors and warnings, + and the run marked failed if any appear. You can ignore log + entries by giving a list of egrep compatible regexes, i.e.: + + tasks: + - ceph: + log-ignorelist: ['foo.*bar', 'bad message'] + + To run multiple ceph clusters, use multiple ceph tasks, and roles + with a cluster name prefix, e.g. cluster1.client.0. Roles with no + cluster use the default cluster name, 'ceph'. OSDs from separate + clusters must be on separate hosts. Clients and non-osd daemons + from multiple clusters may be colocated. For each cluster, add an + instance of the ceph task with the cluster name specified, e.g.:: + + roles: + - [mon.a, osd.0, osd.1] + - [backup.mon.a, backup.osd.0, backup.osd.1] + - [client.0, backup.client.0] + tasks: + - ceph: + cluster: ceph + - ceph: + cluster: backup + + :param ctx: Context + :param config: Configuration + + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + "task ceph only supports a dictionary for configuration" + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('ceph', {})) + + first_ceph_cluster = False + if not hasattr(ctx, 'daemons'): + first_ceph_cluster = True + ctx.daemons = DaemonGroup() + + testdir = teuthology.get_testdir(ctx) + if config.get('coverage'): + coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) + log.info('Creating coverage directory...') + run.wait( + ctx.cluster.run( + args=[ + 'install', '-d', '-m0755', '--', + coverage_dir, + ], + wait=False, + ) + ) + + if 'cluster' not in config: + config['cluster'] = 'ceph' + + validate_config(ctx, config) + + subtasks = [] + if first_ceph_cluster: + # these tasks handle general log setup and parsing on all hosts, + # so they should only be run once + subtasks = [ + lambda: ceph_log(ctx=ctx, config=None), + lambda: ceph_crash(ctx=ctx, config=None), + lambda: valgrind_post(ctx=ctx, config=config), + ] + + subtasks += [ + lambda: cluster(ctx=ctx, config=dict( + conf=config.get('conf', {}), + fs=config.get('fs', 'xfs'), + mkfs_options=config.get('mkfs_options', None), + mount_options=config.get('mount_options', None), + skip_mgr_daemons=config.get('skip_mgr_daemons', False), + log_ignorelist=config.get('log-ignorelist', []), + cpu_profile=set(config.get('cpu_profile', []),), + cluster=config['cluster'], + mon_bind_msgr2=config.get('mon_bind_msgr2', True), + mon_bind_addrvec=config.get('mon_bind_addrvec', True), + )), + lambda: run_daemon(ctx=ctx, config=config, type_='mon'), + lambda: run_daemon(ctx=ctx, config=config, type_='mgr'), + lambda: crush_setup(ctx=ctx, config=config), + lambda: run_daemon(ctx=ctx, config=config, type_='osd'), + lambda: setup_manager(ctx=ctx, config=config), + lambda: create_rbd_pool(ctx=ctx, config=config), + lambda: run_daemon(ctx=ctx, config=config, type_='mds'), + lambda: cephfs_setup(ctx=ctx, config=config), + lambda: watchdog_setup(ctx=ctx, config=config), + ] + + with contextutil.nested(*subtasks): + try: + if config.get('wait-for-healthy', True): + healthy(ctx=ctx, config=dict(cluster=config['cluster'])) + + yield + finally: + # set pg_num_targets back to actual pg_num, so we don't have to + # wait for pending merges (which can take a while!) + ctx.managers[config['cluster']].stop_pg_num_changes() + + if config.get('wait-for-scrub', True): + # wait for pgs to become active+clean in case any + # recoveries were triggered since the last health check + ctx.managers[config['cluster']].wait_for_clean() + osd_scrub_pgs(ctx, config) + + # stop logging health to clog during shutdown, or else we generate + # a bunch of scary messages unrelated to our actual run. + firstmon = teuthology.get_first_mon(ctx, config, config['cluster']) + (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() + mon0_remote.run( + args=[ + 'sudo', + 'ceph', + '--cluster', config['cluster'], + 'config', 'set', 'global', + 'mon_health_to_clog', 'false', + ], + check_status=False, + ) diff --git a/qa/tasks/ceph_client.py b/qa/tasks/ceph_client.py new file mode 100644 index 000000000..74e818f93 --- /dev/null +++ b/qa/tasks/ceph_client.py @@ -0,0 +1,42 @@ +""" +Set up client keyring +""" +import logging + +from teuthology import misc as teuthology +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +def create_keyring(ctx, cluster_name): + """ + Set up key ring on remote sites + """ + log.info('Setting up client nodes...') + clients = ctx.cluster.only(teuthology.is_type('client', cluster_name)) + testdir = teuthology.get_testdir(ctx) + coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) + for remote, roles_for_host in clients.remotes.items(): + for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', + cluster_name): + name = teuthology.ceph_role(role) + client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name, name) + remote.run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + '--create-keyring', + '--gen-key', + # TODO this --name= is not really obeyed, all unknown "types" are munged to "client" + '--name={name}'.format(name=name), + client_keyring, + run.Raw('&&'), + 'sudo', + 'chmod', + '0644', + client_keyring, + ], + ) diff --git a/qa/tasks/ceph_deploy.py b/qa/tasks/ceph_deploy.py new file mode 100644 index 000000000..99c8c1ffb --- /dev/null +++ b/qa/tasks/ceph_deploy.py @@ -0,0 +1,916 @@ +""" +Execute ceph-deploy as a task +""" + +import contextlib +import os +import time +import logging +import traceback + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.config import config as teuth_config +from teuthology.task import install as install_fn +from teuthology.orchestra import run +from tasks.cephfs.filesystem import Filesystem +from teuthology.misc import wait_until_healthy + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def download_ceph_deploy(ctx, config): + """ + Downloads ceph-deploy from the ceph.com git mirror and (by default) + switches to the master branch. If the `ceph-deploy-branch` is specified, it + will use that instead. The `bootstrap` script is ran, with the argument + obtained from `python_version`, if specified. + """ + # use mon.a for ceph_admin + (ceph_admin,) = ctx.cluster.only('mon.a').remotes.keys() + + try: + py_ver = str(config['python_version']) + except KeyError: + pass + else: + supported_versions = ['2', '3'] + if py_ver not in supported_versions: + raise ValueError("python_version must be: {}, not {}".format( + ' or '.join(supported_versions), py_ver + )) + + log.info("Installing Python") + system_type = teuthology.get_system_type(ceph_admin) + + if system_type == 'rpm': + package = 'python36' if py_ver == '3' else 'python' + ctx.cluster.run(args=[ + 'sudo', 'yum', '-y', 'install', + package, 'python-virtualenv' + ]) + else: + package = 'python3' if py_ver == '3' else 'python' + ctx.cluster.run(args=[ + 'sudo', 'apt-get', '-y', '--force-yes', 'install', + package, 'python-virtualenv' + ]) + + log.info('Downloading ceph-deploy...') + testdir = teuthology.get_testdir(ctx) + ceph_deploy_branch = config.get('ceph-deploy-branch', 'master') + + ceph_admin.run( + args=[ + 'git', 'clone', '-b', ceph_deploy_branch, + teuth_config.ceph_git_base_url + 'ceph-deploy.git', + '{tdir}/ceph-deploy'.format(tdir=testdir), + ], + ) + args = [ + 'cd', + '{tdir}/ceph-deploy'.format(tdir=testdir), + run.Raw('&&'), + './bootstrap', + ] + try: + args.append(str(config['python_version'])) + except KeyError: + pass + ceph_admin.run(args=args) + + try: + yield + finally: + log.info('Removing ceph-deploy ...') + ceph_admin.run( + args=[ + 'rm', + '-rf', + '{tdir}/ceph-deploy'.format(tdir=testdir), + ], + ) + + +def is_healthy(ctx, config): + """Wait until a Ceph cluster is healthy.""" + testdir = teuthology.get_testdir(ctx) + ceph_admin = teuthology.get_first_mon(ctx, config) + (remote,) = ctx.cluster.only(ceph_admin).remotes.keys() + max_tries = 90 # 90 tries * 10 secs --> 15 minutes + tries = 0 + while True: + tries += 1 + if tries >= max_tries: + msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes" + remote.run( + args=[ + 'cd', + '{tdir}'.format(tdir=testdir), + run.Raw('&&'), + 'sudo', 'ceph', + 'report', + ], + ) + raise RuntimeError(msg) + + out = remote.sh( + [ + 'cd', + '{tdir}'.format(tdir=testdir), + run.Raw('&&'), + 'sudo', 'ceph', + 'health', + ], + logger=log.getChild('health'), + ) + log.info('Ceph health: %s', out.rstrip('\n')) + if out.split(None, 1)[0] == 'HEALTH_OK': + break + time.sleep(10) + + +def get_nodes_using_role(ctx, target_role): + """ + Extract the names of nodes that match a given role from a cluster, and modify the + cluster's service IDs to match the resulting node-based naming scheme that ceph-deploy + uses, such that if "mon.a" is on host "foo23", it'll be renamed to "mon.foo23". + """ + + # Nodes containing a service of the specified role + nodes_of_interest = [] + + # Prepare a modified version of cluster.remotes with ceph-deploy-ized names + modified_remotes = {} + ceph_deploy_mapped = dict() + for _remote, roles_for_host in ctx.cluster.remotes.items(): + modified_remotes[_remote] = [] + for svc_id in roles_for_host: + if svc_id.startswith("{0}.".format(target_role)): + fqdn = str(_remote).split('@')[-1] + nodename = str(str(_remote).split('.')[0]).split('@')[1] + if target_role == 'mon': + nodes_of_interest.append(fqdn) + else: + nodes_of_interest.append(nodename) + mapped_role = "{0}.{1}".format(target_role, nodename) + modified_remotes[_remote].append(mapped_role) + # keep dict of mapped role for later use by tasks + # eg. mon.a => mon.node1 + ceph_deploy_mapped[svc_id] = mapped_role + else: + modified_remotes[_remote].append(svc_id) + + ctx.cluster.remotes = modified_remotes + # since the function is called multiple times for target roles + # append new mapped roles + if not hasattr(ctx.cluster, 'mapped_role'): + ctx.cluster.mapped_role = ceph_deploy_mapped + else: + ctx.cluster.mapped_role.update(ceph_deploy_mapped) + log.info("New mapped_role={mr}".format(mr=ctx.cluster.mapped_role)) + return nodes_of_interest + + +def get_dev_for_osd(ctx, config): + """Get a list of all osd device names.""" + osd_devs = [] + for remote, roles_for_host in ctx.cluster.remotes.items(): + host = remote.name.split('@')[-1] + shortname = host.split('.')[0] + devs = teuthology.get_scratch_devices(remote) + num_osd_per_host = list( + teuthology.roles_of_type( + roles_for_host, 'osd')) + num_osds = len(num_osd_per_host) + if config.get('separate_journal_disk') is not None: + num_devs_reqd = 2 * num_osds + assert num_devs_reqd <= len( + devs), 'fewer data and journal disks than required ' + shortname + for dindex in range(0, num_devs_reqd, 2): + jd_index = dindex + 1 + dev_short = devs[dindex].split('/')[-1] + jdev_short = devs[jd_index].split('/')[-1] + osd_devs.append((shortname, dev_short, jdev_short)) + else: + assert num_osds <= len(devs), 'fewer disks than osds ' + shortname + for dev in devs[:num_osds]: + dev_short = dev.split('/')[-1] + osd_devs.append((shortname, dev_short)) + return osd_devs + + +def get_all_nodes(ctx, config): + """Return a string of node names separated by blanks""" + nodelist = [] + for t, k in ctx.config['targets'].items(): + host = t.split('@')[-1] + simple_host = host.split('.')[0] + nodelist.append(simple_host) + nodelist = " ".join(nodelist) + return nodelist + +@contextlib.contextmanager +def build_ceph_cluster(ctx, config): + """Build a ceph cluster""" + + # Expect to find ceph_admin on the first mon by ID, same place that the download task + # puts it. Remember this here, because subsequently IDs will change from those in + # the test config to those that ceph-deploy invents. + + (ceph_admin,) = ctx.cluster.only('mon.a').remotes.keys() + + def execute_ceph_deploy(cmd): + """Remotely execute a ceph_deploy command""" + return ceph_admin.run( + args=[ + 'cd', + '{tdir}/ceph-deploy'.format(tdir=testdir), + run.Raw('&&'), + run.Raw(cmd), + ], + check_status=False, + ).exitstatus + + def ceph_disk_osd_create(ctx, config): + node_dev_list = get_dev_for_osd(ctx, config) + no_of_osds = 0 + for d in node_dev_list: + node = d[0] + for disk in d[1:]: + zap = './ceph-deploy disk zap ' + node + ' ' + disk + estatus = execute_ceph_deploy(zap) + if estatus != 0: + raise RuntimeError("ceph-deploy: Failed to zap osds") + osd_create_cmd = './ceph-deploy osd create ' + # first check for filestore, default is bluestore with ceph-deploy + if config.get('filestore') is not None: + osd_create_cmd += '--filestore ' + elif config.get('bluestore') is not None: + osd_create_cmd += '--bluestore ' + if config.get('dmcrypt') is not None: + osd_create_cmd += '--dmcrypt ' + osd_create_cmd += ":".join(d) + estatus_osd = execute_ceph_deploy(osd_create_cmd) + if estatus_osd == 0: + log.info('successfully created osd') + no_of_osds += 1 + else: + raise RuntimeError("ceph-deploy: Failed to create osds") + return no_of_osds + + def ceph_volume_osd_create(ctx, config): + osds = ctx.cluster.only(teuthology.is_type('osd')) + no_of_osds = 0 + for remote in osds.remotes.keys(): + # all devs should be lvm + osd_create_cmd = './ceph-deploy osd create --debug ' + remote.shortname + ' ' + # default is bluestore so we just need config item for filestore + roles = ctx.cluster.remotes[remote] + dev_needed = len([role for role in roles + if role.startswith('osd')]) + all_devs = teuthology.get_scratch_devices(remote) + log.info("node={n}, need_devs={d}, available={a}".format( + n=remote.shortname, + d=dev_needed, + a=all_devs, + )) + devs = all_devs[0:dev_needed] + # rest of the devices can be used for journal if required + jdevs = dev_needed + for device in devs: + device_split = device.split('/') + lv_device = device_split[-2] + '/' + device_split[-1] + if config.get('filestore') is not None: + osd_create_cmd += '--filestore --data ' + lv_device + ' ' + # filestore with ceph-volume also needs journal disk + try: + jdevice = all_devs.pop(jdevs) + except IndexError: + raise RuntimeError("No device available for \ + journal configuration") + jdevice_split = jdevice.split('/') + j_lv = jdevice_split[-2] + '/' + jdevice_split[-1] + osd_create_cmd += '--journal ' + j_lv + else: + osd_create_cmd += ' --data ' + lv_device + estatus_osd = execute_ceph_deploy(osd_create_cmd) + if estatus_osd == 0: + log.info('successfully created osd') + no_of_osds += 1 + else: + raise RuntimeError("ceph-deploy: Failed to create osds") + return no_of_osds + + try: + log.info('Building ceph cluster using ceph-deploy...') + testdir = teuthology.get_testdir(ctx) + ceph_branch = None + if config.get('branch') is not None: + cbranch = config.get('branch') + for var, val in cbranch.items(): + ceph_branch = '--{var}={val}'.format(var=var, val=val) + all_nodes = get_all_nodes(ctx, config) + mds_nodes = get_nodes_using_role(ctx, 'mds') + mds_nodes = " ".join(mds_nodes) + mon_node = get_nodes_using_role(ctx, 'mon') + mon_nodes = " ".join(mon_node) + # skip mgr based on config item + # this is needed when test uses latest code to install old ceph + # versions + skip_mgr = config.get('skip-mgr', False) + if not skip_mgr: + mgr_nodes = get_nodes_using_role(ctx, 'mgr') + mgr_nodes = " ".join(mgr_nodes) + new_mon = './ceph-deploy new' + " " + mon_nodes + if not skip_mgr: + mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes + mon_hostname = mon_nodes.split(' ')[0] + mon_hostname = str(mon_hostname) + gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname + deploy_mds = './ceph-deploy mds create' + " " + mds_nodes + + if mon_nodes is None: + raise RuntimeError("no monitor nodes in the config file") + + estatus_new = execute_ceph_deploy(new_mon) + if estatus_new != 0: + raise RuntimeError("ceph-deploy: new command failed") + + log.info('adding config inputs...') + testdir = teuthology.get_testdir(ctx) + conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir) + + if config.get('conf') is not None: + confp = config.get('conf') + for section, keys in confp.items(): + lines = '[{section}]\n'.format(section=section) + ceph_admin.sudo_write_file(conf_path, lines, append=True) + for key, value in keys.items(): + log.info("[%s] %s = %s" % (section, key, value)) + lines = '{key} = {value}\n'.format(key=key, value=value) + ceph_admin.sudo_write_file(conf_path, lines, append=True) + + # install ceph + dev_branch = ctx.config['branch'] + branch = '--dev={branch}'.format(branch=dev_branch) + if ceph_branch: + option = ceph_branch + else: + option = branch + install_nodes = './ceph-deploy install ' + option + " " + all_nodes + estatus_install = execute_ceph_deploy(install_nodes) + if estatus_install != 0: + raise RuntimeError("ceph-deploy: Failed to install ceph") + # install ceph-test package too + install_nodes2 = './ceph-deploy install --tests ' + option + \ + " " + all_nodes + estatus_install = execute_ceph_deploy(install_nodes2) + if estatus_install != 0: + raise RuntimeError("ceph-deploy: Failed to install ceph-test") + + mon_create_nodes = './ceph-deploy mon create-initial' + # If the following fails, it is OK, it might just be that the monitors + # are taking way more than a minute/monitor to form quorum, so lets + # try the next block which will wait up to 15 minutes to gatherkeys. + execute_ceph_deploy(mon_create_nodes) + + estatus_gather = execute_ceph_deploy(gather_keys) + if estatus_gather != 0: + raise RuntimeError("ceph-deploy: Failed during gather keys") + + # install admin key on mons (ceph-create-keys doesn't do this any more) + mons = ctx.cluster.only(teuthology.is_type('mon')) + for remote in mons.remotes.keys(): + execute_ceph_deploy('./ceph-deploy admin ' + remote.shortname) + + # create osd's + if config.get('use-ceph-volume', False): + no_of_osds = ceph_volume_osd_create(ctx, config) + else: + # this method will only work with ceph-deploy v1.5.39 or older + no_of_osds = ceph_disk_osd_create(ctx, config) + + if not skip_mgr: + execute_ceph_deploy(mgr_create) + + if mds_nodes: + estatus_mds = execute_ceph_deploy(deploy_mds) + if estatus_mds != 0: + raise RuntimeError("ceph-deploy: Failed to deploy mds") + + if config.get('test_mon_destroy') is not None: + for d in range(1, len(mon_node)): + mon_destroy_nodes = './ceph-deploy mon destroy' + \ + " " + mon_node[d] + estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes) + if estatus_mon_d != 0: + raise RuntimeError("ceph-deploy: Failed to delete monitor") + + + + if config.get('wait-for-healthy', True) and no_of_osds >= 2: + is_healthy(ctx=ctx, config=None) + + log.info('Setting up client nodes...') + conf_path = '/etc/ceph/ceph.conf' + admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring' + first_mon = teuthology.get_first_mon(ctx, config) + (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() + conf_data = mon0_remote.read_file(conf_path, sudo=True) + admin_keyring = mon0_remote.read_file(admin_keyring_path, sudo=True) + + clients = ctx.cluster.only(teuthology.is_type('client')) + for remote, roles_for_host in clients.remotes.items(): + for id_ in teuthology.roles_of_type(roles_for_host, 'client'): + client_keyring = \ + '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) + mon0_remote.run( + args=[ + 'cd', + '{tdir}'.format(tdir=testdir), + run.Raw('&&'), + 'sudo', 'bash', '-c', + run.Raw('"'), 'ceph', + 'auth', + 'get-or-create', + 'client.{id}'.format(id=id_), + 'mds', 'allow', + 'mon', 'allow *', + 'osd', 'allow *', + run.Raw('>'), + client_keyring, + run.Raw('"'), + ], + ) + key_data = mon0_remote.read_file( + path=client_keyring, + sudo=True, + ) + remote.sudo_write_file( + path=client_keyring, + data=key_data, + mode='0644' + ) + remote.sudo_write_file( + path=admin_keyring_path, + data=admin_keyring, + mode='0644' + ) + remote.sudo_write_file( + path=conf_path, + data=conf_data, + mode='0644' + ) + + if mds_nodes: + log.info('Configuring CephFS...') + Filesystem(ctx, create=True) + elif not config.get('only_mon'): + raise RuntimeError( + "The cluster is NOT operational due to insufficient OSDs") + # create rbd pool + ceph_admin.run( + args=[ + 'sudo', 'ceph', '--cluster', 'ceph', + 'osd', 'pool', 'create', 'rbd', '128', '128'], + check_status=False) + ceph_admin.run( + args=[ + 'sudo', 'ceph', '--cluster', 'ceph', + 'osd', 'pool', 'application', 'enable', + 'rbd', 'rbd', '--yes-i-really-mean-it' + ], + check_status=False) + yield + + except Exception: + log.info( + "Error encountered, logging exception before tearing down ceph-deploy") + log.info(traceback.format_exc()) + raise + finally: + if config.get('keep_running'): + return + log.info('Stopping ceph...') + ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'], + check_status=False) + time.sleep(4) + + # and now just check for the processes themselves, as if upstart/sysvinit + # is lying to us. Ignore errors if the grep fails + ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'), + 'grep', '-v', 'grep', run.Raw('|'), + 'grep', 'ceph'], check_status=False) + ctx.cluster.run(args=['sudo', 'systemctl', run.Raw('|'), + 'grep', 'ceph'], check_status=False) + + if ctx.archive is not None: + # archive mon data, too + log.info('Archiving mon data...') + path = os.path.join(ctx.archive, 'data') + os.makedirs(path) + mons = ctx.cluster.only(teuthology.is_type('mon')) + for remote, roles in mons.remotes.items(): + for role in roles: + if role.startswith('mon.'): + teuthology.pull_directory_tarball( + remote, + '/var/lib/ceph/mon', + path + '/' + role + '.tgz') + + log.info('Compressing logs...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'find', + '/var/log/ceph', + '-name', + '*.log', + '-print0', + run.Raw('|'), + 'sudo', + 'xargs', + '-0', + '--no-run-if-empty', + '--', + 'gzip', + '--', + ], + wait=False, + ), + ) + + log.info('Archiving logs...') + path = os.path.join(ctx.archive, 'remote') + os.makedirs(path) + for remote in ctx.cluster.remotes.keys(): + sub = os.path.join(path, remote.shortname) + os.makedirs(sub) + teuthology.pull_directory(remote, '/var/log/ceph', + os.path.join(sub, 'log')) + + # Prevent these from being undefined if the try block fails + all_nodes = get_all_nodes(ctx, config) + purge_nodes = './ceph-deploy purge' + " " + all_nodes + purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes + + log.info('Purging package...') + execute_ceph_deploy(purge_nodes) + log.info('Purging data...') + execute_ceph_deploy(purgedata_nodes) + + +@contextlib.contextmanager +def cli_test(ctx, config): + """ + ceph-deploy cli to exercise most commonly use cli's and ensure + all commands works and also startup the init system. + + """ + log.info('Ceph-deploy Test') + if config is None: + config = {} + test_branch = '' + conf_dir = teuthology.get_testdir(ctx) + "/cdtest" + + def execute_cdeploy(admin, cmd, path): + """Execute ceph-deploy commands """ + """Either use git path or repo path """ + args = ['cd', conf_dir, run.Raw(';')] + if path: + args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path)) + else: + args.append('ceph-deploy') + args.append(run.Raw(cmd)) + ec = admin.run(args=args, check_status=False).exitstatus + if ec != 0: + raise RuntimeError( + "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec)) + + if config.get('rhbuild'): + path = None + else: + path = teuthology.get_testdir(ctx) + # test on branch from config eg: wip-* , master or next etc + # packages for all distro's should exist for wip* + if ctx.config.get('branch'): + branch = ctx.config.get('branch') + test_branch = ' --dev={branch} '.format(branch=branch) + mons = ctx.cluster.only(teuthology.is_type('mon')) + for node, role in mons.remotes.items(): + admin = node + admin.run(args=['mkdir', conf_dir], check_status=False) + nodename = admin.shortname + system_type = teuthology.get_system_type(admin) + if config.get('rhbuild'): + admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y']) + log.info('system type is %s', system_type) + osds = ctx.cluster.only(teuthology.is_type('osd')) + + for remote, roles in osds.remotes.items(): + devs = teuthology.get_scratch_devices(remote) + log.info("roles %s", roles) + if (len(devs) < 3): + log.error( + 'Test needs minimum of 3 devices, only found %s', + str(devs)) + raise RuntimeError("Needs minimum of 3 devices ") + + conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir) + new_cmd = 'new ' + nodename + execute_cdeploy(admin, new_cmd, path) + if config.get('conf') is not None: + confp = config.get('conf') + for section, keys in confp.items(): + lines = '[{section}]\n'.format(section=section) + admin.sudo_write_file(conf_path, lines, append=True) + for key, value in keys.items(): + log.info("[%s] %s = %s" % (section, key, value)) + lines = '{key} = {value}\n'.format(key=key, value=value) + admin.sudo_write_file(conf_path, lines, append=True) + new_mon_install = 'install {branch} --mon '.format( + branch=test_branch) + nodename + new_mgr_install = 'install {branch} --mgr '.format( + branch=test_branch) + nodename + new_osd_install = 'install {branch} --osd '.format( + branch=test_branch) + nodename + new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename + create_initial = 'mon create-initial ' + mgr_create = 'mgr create ' + nodename + # either use create-keys or push command + push_keys = 'admin ' + nodename + execute_cdeploy(admin, new_mon_install, path) + execute_cdeploy(admin, new_mgr_install, path) + execute_cdeploy(admin, new_osd_install, path) + execute_cdeploy(admin, new_admin, path) + execute_cdeploy(admin, create_initial, path) + execute_cdeploy(admin, mgr_create, path) + execute_cdeploy(admin, push_keys, path) + + for i in range(3): + zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i]) + prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i]) + execute_cdeploy(admin, zap_disk, path) + execute_cdeploy(admin, prepare, path) + + log.info("list files for debugging purpose to check file permissions") + admin.run(args=['ls', run.Raw('-lt'), conf_dir]) + remote.run(args=['sudo', 'ceph', '-s'], check_status=False) + out = remote.sh('sudo ceph health') + log.info('Ceph health: %s', out.rstrip('\n')) + log.info("Waiting for cluster to become healthy") + with contextutil.safe_while(sleep=10, tries=6, + action='check health') as proceed: + while proceed(): + out = remote.sh('sudo ceph health') + if (out.split(None, 1)[0] == 'HEALTH_OK'): + break + rgw_install = 'install {branch} --rgw {node}'.format( + branch=test_branch, + node=nodename, + ) + rgw_create = 'rgw create ' + nodename + execute_cdeploy(admin, rgw_install, path) + execute_cdeploy(admin, rgw_create, path) + log.info('All ceph-deploy cli tests passed') + try: + yield + finally: + log.info("cleaning up") + ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'], + check_status=False) + time.sleep(4) + for i in range(3): + umount_dev = "{d}1".format(d=devs[i]) + remote.run(args=['sudo', 'umount', run.Raw(umount_dev)]) + cmd = 'purge ' + nodename + execute_cdeploy(admin, cmd, path) + cmd = 'purgedata ' + nodename + execute_cdeploy(admin, cmd, path) + log.info("Removing temporary dir") + admin.run( + args=[ + 'rm', + run.Raw('-rf'), + run.Raw(conf_dir)], + check_status=False) + if config.get('rhbuild'): + admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y']) + + +@contextlib.contextmanager +def single_node_test(ctx, config): + """ + - ceph-deploy.single_node_test: null + + #rhbuild testing + - ceph-deploy.single_node_test: + rhbuild: 1.2.3 + + """ + log.info("Testing ceph-deploy on single node") + if config is None: + config = {} + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('ceph-deploy', {})) + + if config.get('rhbuild'): + log.info("RH Build, Skip Download") + with contextutil.nested( + lambda: cli_test(ctx=ctx, config=config), + ): + yield + else: + with contextutil.nested( + lambda: install_fn.ship_utilities(ctx=ctx, config=None), + lambda: download_ceph_deploy(ctx=ctx, config=config), + lambda: cli_test(ctx=ctx, config=config), + ): + yield + + +@contextlib.contextmanager +def upgrade(ctx, config): + """ + Upgrade using ceph-deploy + eg: + ceph-deploy.upgrade: + # to upgrade to specific branch, use + branch: + stable: jewel + # to setup mgr node, use + setup-mgr-node: True + # to wait for cluster to be healthy after all upgrade, use + wait-for-healthy: True + role: (upgrades the below roles serially) + mon.a + mon.b + osd.0 + """ + roles = config.get('roles') + # get the roles that are mapped as per ceph-deploy + # roles are mapped for mon/mds eg: mon.a => mon.host_short_name + mapped_role = ctx.cluster.mapped_role + log.info("roles={r}, mapped_roles={mr}".format(r=roles, mr=mapped_role)) + if config.get('branch'): + branch = config.get('branch') + (var, val) = branch.items()[0] + ceph_branch = '--{var}={val}'.format(var=var, val=val) + else: + # default to wip-branch under test + dev_branch = ctx.config['branch'] + ceph_branch = '--dev={branch}'.format(branch=dev_branch) + # get the node used for initial deployment which is mon.a + mon_a = mapped_role.get('mon.a') + (ceph_admin,) = ctx.cluster.only(mon_a).remotes.keys() + testdir = teuthology.get_testdir(ctx) + cmd = './ceph-deploy install ' + ceph_branch + for role in roles: + # check if this role is mapped (mon or mds) + if mapped_role.get(role): + role = mapped_role.get(role) + remotes_and_roles = ctx.cluster.only(role).remotes + for remote, roles in remotes_and_roles.items(): + nodename = remote.shortname + cmd = cmd + ' ' + nodename + log.info("Upgrading ceph on %s", nodename) + ceph_admin.run( + args=[ + 'cd', + '{tdir}/ceph-deploy'.format(tdir=testdir), + run.Raw('&&'), + run.Raw(cmd), + ], + ) + # restart all ceph services, ideally upgrade should but it does not + remote.run( + args=[ + 'sudo', 'systemctl', 'restart', 'ceph.target' + ] + ) + ceph_admin.run(args=['sudo', 'ceph', '-s']) + + # workaround for http://tracker.ceph.com/issues/20950 + # write the correct mgr key to disk + if config.get('setup-mgr-node', None): + mons = ctx.cluster.only(teuthology.is_type('mon')) + for remote, roles in mons.remotes.items(): + remote.run( + args=[ + run.Raw('sudo ceph auth get client.bootstrap-mgr'), + run.Raw('|'), + run.Raw('sudo tee'), + run.Raw('/var/lib/ceph/bootstrap-mgr/ceph.keyring') + ] + ) + + if config.get('setup-mgr-node', None): + mgr_nodes = get_nodes_using_role(ctx, 'mgr') + mgr_nodes = " ".join(mgr_nodes) + mgr_install = './ceph-deploy install --mgr ' + ceph_branch + " " + mgr_nodes + mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes + # install mgr + ceph_admin.run( + args=[ + 'cd', + '{tdir}/ceph-deploy'.format(tdir=testdir), + run.Raw('&&'), + run.Raw(mgr_install), + ], + ) + # create mgr + ceph_admin.run( + args=[ + 'cd', + '{tdir}/ceph-deploy'.format(tdir=testdir), + run.Raw('&&'), + run.Raw(mgr_create), + ], + ) + ceph_admin.run(args=['sudo', 'ceph', '-s']) + if config.get('wait-for-healthy', None): + wait_until_healthy(ctx, ceph_admin, use_sudo=True) + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Set up and tear down a Ceph cluster. + + For example:: + + tasks: + - install: + extras: yes + - ssh_keys: + - ceph-deploy: + branch: + stable: bobtail + mon_initial_members: 1 + ceph-deploy-branch: my-ceph-deploy-branch + only_mon: true + keep_running: true + # either choose bluestore or filestore, default is bluestore + bluestore: True + # or + filestore: True + # skip install of mgr for old release using below flag + skip-mgr: True ( default is False ) + # to use ceph-volume instead of ceph-disk + # ceph-disk can only be used with old ceph-deploy release from pypi + use-ceph-volume: true + + tasks: + - install: + extras: yes + - ssh_keys: + - ceph-deploy: + branch: + dev: master + conf: + mon: + debug mon = 20 + + tasks: + - install: + extras: yes + - ssh_keys: + - ceph-deploy: + branch: + testing: + dmcrypt: yes + separate_journal_disk: yes + + """ + if config is None: + config = {} + + assert isinstance(config, dict), \ + "task ceph-deploy only supports a dictionary for configuration" + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('ceph-deploy', {})) + + if config.get('branch') is not None: + assert isinstance( + config['branch'], dict), 'branch must be a dictionary' + + log.info('task ceph-deploy with config ' + str(config)) + + # we need to use 1.5.39-stable for testing jewel or master branch with + # ceph-disk + if config.get('use-ceph-volume', False) is False: + # check we are not testing specific branch + if config.get('ceph-deploy-branch', False) is False: + config['ceph-deploy-branch'] = '1.5.39-stable' + + with contextutil.nested( + lambda: install_fn.ship_utilities(ctx=ctx, config=None), + lambda: download_ceph_deploy(ctx=ctx, config=config), + lambda: build_ceph_cluster(ctx=ctx, config=config), + ): + yield diff --git a/qa/tasks/ceph_fuse.py b/qa/tasks/ceph_fuse.py new file mode 100644 index 000000000..d2db29732 --- /dev/null +++ b/qa/tasks/ceph_fuse.py @@ -0,0 +1,175 @@ +""" +Ceph FUSE client task +""" + +import contextlib +import logging + +from teuthology import misc +from tasks.cephfs.fuse_mount import FuseMount + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Mount/unmount a ``ceph-fuse`` client. + + The config is optional and defaults to mounting on all clients. If + a config is given, it is expected to be a list of clients to do + this operation on. This lets you e.g. set up one client with + ``ceph-fuse`` and another with ``kclient``. + + ``brxnet`` should be a Private IPv4 Address range, default range is + [192.168.0.0/16] + + Example that mounts all clients:: + + tasks: + - ceph: + - ceph-fuse: + - interactive: + - brxnet: [192.168.0.0/16] + + Example that uses both ``kclient` and ``ceph-fuse``:: + + tasks: + - ceph: + - ceph-fuse: [client.0] + - kclient: [client.1] + - interactive: + + Example that enables valgrind: + + tasks: + - ceph: + - ceph-fuse: + client.0: + valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes] + - interactive: + + Example that stops an already-mounted client: + + :: + + tasks: + - ceph: + - ceph-fuse: [client.0] + - ... do something that requires the FS mounted ... + - ceph-fuse: + client.0: + mounted: false + - ... do something that requires the FS unmounted ... + + Example that adds more generous wait time for mount (for virtual machines): + + tasks: + - ceph: + - ceph-fuse: + client.0: + mount_wait: 60 # default is 0, do not wait before checking /sys/ + mount_timeout: 120 # default is 30, give up if /sys/ is not populated + - interactive: + + :param ctx: Context + :param config: Configuration + """ + log.info('Running ceph_fuse task...') + + if config is None: + ids = misc.all_roles_of_type(ctx.cluster, 'client') + client_roles = [f'client.{id_}' for id_ in ids] + config = dict([r, dict()] for r in client_roles) + elif isinstance(config, list): + client_roles = config + config = dict([r, dict()] for r in client_roles) + elif isinstance(config, dict): + client_roles = filter(lambda x: 'client.' in x, config.keys()) + else: + raise ValueError(f"Invalid config object: {config} ({config.__class__})") + log.info(f"config is {config}") + + clients = list(misc.get_clients(ctx=ctx, roles=client_roles)) + testdir = misc.get_testdir(ctx) + all_mounts = getattr(ctx, 'mounts', {}) + mounted_by_me = {} + skipped = {} + remotes = set() + + brxnet = config.get("brxnet", None) + + # Construct any new FuseMount instances + overrides = ctx.config.get('overrides', {}).get('ceph-fuse', {}) + top_overrides = dict(filter(lambda x: 'client.' not in x[0], overrides.items())) + for id_, remote in clients: + entity = f"client.{id_}" + client_config = config.get(entity) + if client_config is None: + client_config = {} + # top level overrides + misc.deep_merge(client_config, top_overrides) + # mount specific overrides + client_config_overrides = overrides.get(entity) + misc.deep_merge(client_config, client_config_overrides) + log.info(f"{entity} config is {client_config}") + + remotes.add(remote) + auth_id = client_config.get("auth_id", id_) + cephfs_name = client_config.get("cephfs_name") + + skip = client_config.get("skip", False) + if skip: + skipped[id_] = skip + continue + + if id_ not in all_mounts: + fuse_mount = FuseMount(ctx=ctx, client_config=client_config, + test_dir=testdir, client_id=auth_id, + client_remote=remote, brxnet=brxnet, + cephfs_name=cephfs_name) + all_mounts[id_] = fuse_mount + else: + # Catch bad configs where someone has e.g. tried to use ceph-fuse and kcephfs for the same client + assert isinstance(all_mounts[id_], FuseMount) + + if not config.get("disabled", False) and client_config.get('mounted', True): + mounted_by_me[id_] = {"config": client_config, "mount": all_mounts[id_]} + + ctx.mounts = all_mounts + + # Umount any pre-existing clients that we have not been asked to mount + for client_id in set(all_mounts.keys()) - set(mounted_by_me.keys()) - set(skipped.keys()): + mount = all_mounts[client_id] + if mount.is_mounted(): + mount.umount_wait() + + for remote in remotes: + FuseMount.cleanup_stale_netnses_and_bridge(remote) + + # Mount any clients we have been asked to (default to mount all) + log.info('Mounting ceph-fuse clients...') + for info in mounted_by_me.values(): + config = info["config"] + mount_x = info['mount'] + if config.get("mount_path"): + mount_x.cephfs_mntpt = config.get("mount_path") + if config.get("mountpoint"): + mount_x.hostfs_mntpt = config.get("mountpoint") + mount_x.mount(createfs=False) + + for info in mounted_by_me.values(): + info["mount"].wait_until_mounted() + + try: + yield all_mounts + finally: + log.info('Unmounting ceph-fuse clients...') + + for info in mounted_by_me.values(): + # Conditional because an inner context might have umounted it + mount = info["mount"] + if mount.is_mounted(): + mount.umount_wait() + for remote in remotes: + FuseMount.cleanup_stale_netnses_and_bridge(remote) diff --git a/qa/tasks/ceph_iscsi.py b/qa/tasks/ceph_iscsi.py new file mode 100644 index 000000000..a8870743a --- /dev/null +++ b/qa/tasks/ceph_iscsi.py @@ -0,0 +1,141 @@ +""" +Run ceph-iscsi cluster setup +""" +import logging +import contextlib +from io import StringIO +from teuthology.exceptions import CommandFailedError, ConnectionLostError +from teuthology.orchestra import run +from textwrap import dedent + +log = logging.getLogger(__name__) + +class IscsiSetup(object): + def __init__(self, ctx, config): + self.ctx = ctx + self.config = config + self.target_iqn = "iqn.2003-01.com.redhat.iscsi-gw:ceph-gw" + self.client_iqn = "iqn.1994-05.com.redhat:client" + self.trusted_ip_list = [] + self.background_procs = [] + + def run_daemon(self, remote, cmds): + p = remote.run(args=['sudo', 'adjust-ulimits', 'daemon-helper', 'kill', cmds], + wait=False, stdin=run.PIPE, stdout=StringIO()) + self.background_procs.append(p) + + def _kill_background(self, p): + if p.stdin: + p.stdin.close() + try: + p.wait() + except (CommandFailedError, ConnectionLostError): + pass + + def kill_backgrounds(self): + for p in self.background_procs: + self._kill_background(p) + self.background_procs = [] + + def _setup_iscsi_gateway_cfg(self, role): + # setup the iscsi-gateway.cfg file, we only set the + # clust_name and trusted_ip_list and all the others + # as default + ips = ','.join(self.trusted_ip_list) + conf = dedent(f'''\ +[config] +cluster_name = ceph +pool = rbd +api_secure = false +api_port = 5000 +trusted_ip_list = {ips} +''') + path = "/etc/ceph/iscsi-gateway.cfg" + (remote,) = (self.ctx.cluster.only(role).remotes.keys()) + remote.sudo_write_file(path, conf) + + def _setup_gateway(self, role): + """Spawned task that setups the gateway""" + (remote,) = (self.ctx.cluster.only(role).remotes.keys()) + + self._setup_iscsi_gateway_cfg(role) + + self.run_daemon(remote, "/usr/bin/tcmu-runner") + self.run_daemon(remote, "/usr/bin/rbd-target-gw") + self.run_daemon(remote, "/usr/bin/rbd-target-api") + + def setup_gateways(self): + for role in self.config['gateways']: + (remote,) = (self.ctx.cluster.only(role).remotes.keys()) + self.trusted_ip_list.append(remote.ip_address) + + for role in self.config['gateways']: + self._setup_gateway(role) + + def _setup_client(self, role): + """Spawned task that setups the gateway""" + (remote,) = (self.ctx.cluster.only(role).remotes.keys()) + + # copy the "iscsi-gateway.cfg" to client and will be + # used to get the IPs + self._setup_iscsi_gateway_cfg(role) + + conf = dedent(f''' +InitiatorName={self.client_iqn} +''') + path = "/etc/iscsi/initiatorname.iscsi" + remote.sudo_write_file(path, conf, mkdir=True) + + # the restart is needed after the above change is applied + remote.run(args=['sudo', 'systemctl', 'restart', 'iscsid']) + + remote.run(args=['sudo', 'modprobe', 'dm_multipath']) + remote.run(args=['sudo', 'mpathconf', '--enable']) + conf = dedent('''\ +devices { + device { + vendor "LIO-ORG" + product "LIO-ORG" + hardware_handler "1 alua" + path_grouping_policy "failover" + path_selector "queue-length 0" + failback 60 + path_checker tur + prio alua + prio_args exclusive_pref_bit + fast_io_fail_tmo 25 + no_path_retry queue + } +} +''') + path = "/etc/multipath.conf" + remote.sudo_write_file(path, conf, append=True) + remote.run(args=['sudo', 'systemctl', 'start', 'multipathd']) + + def setup_clients(self): + for role in self.config['clients']: + self._setup_client(role) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run ceph iscsi setup. + + Specify the list of gateways to run :: + + tasks: + ceph_iscsi: + gateways: [a_gateway.0, c_gateway.1] + clients: [b_client.0] + + """ + log.info('Setting ceph iscsi cluster...') + iscsi = IscsiSetup(ctx, config) + iscsi.setup_gateways() + iscsi.setup_clients() + + try: + yield + finally: + log.info('Ending ceph iscsi daemons') + iscsi.kill_backgrounds() diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py new file mode 100644 index 000000000..bb1abfd51 --- /dev/null +++ b/qa/tasks/ceph_manager.py @@ -0,0 +1,3221 @@ +""" +ceph manager -- Thrasher and CephManager objects +""" +from functools import wraps +import contextlib +import errno +import random +import signal +import time +import gevent +import base64 +import json +import logging +import threading +import traceback +import os +import shlex + +from io import BytesIO, StringIO +from subprocess import DEVNULL +from teuthology import misc as teuthology +from tasks.scrub import Scrubber +from tasks.util.rados import cmd_erasure_code_profile +from tasks.util import get_remote +from teuthology.contextutil import safe_while +from teuthology.orchestra.remote import Remote +from teuthology.orchestra import run +from teuthology.exceptions import CommandFailedError +from tasks.thrasher import Thrasher + + +DEFAULT_CONF_PATH = '/etc/ceph/ceph.conf' + +log = logging.getLogger(__name__) + +# this is for cephadm clusters +def shell(ctx, cluster_name, remote, args, name=None, **kwargs): + extra_args = [] + if name: + extra_args = ['-n', name] + return remote.run( + args=[ + 'sudo', + ctx.cephadm, + '--image', ctx.ceph[cluster_name].image, + 'shell', + ] + extra_args + [ + '--fsid', ctx.ceph[cluster_name].fsid, + '--', + ] + args, + **kwargs + ) + +# this is for rook clusters +def toolbox(ctx, cluster_name, args, **kwargs): + return ctx.rook[cluster_name].remote.run( + args=[ + 'kubectl', + '-n', 'rook-ceph', + 'exec', + ctx.rook[cluster_name].toolbox, + '--', + ] + args, + **kwargs + ) + + +def write_conf(ctx, conf_path=DEFAULT_CONF_PATH, cluster='ceph'): + conf_fp = BytesIO() + ctx.ceph[cluster].conf.write(conf_fp) + conf_fp.seek(0) + writes = ctx.cluster.run( + args=[ + 'sudo', 'mkdir', '-p', '/etc/ceph', run.Raw('&&'), + 'sudo', 'chmod', '0755', '/etc/ceph', run.Raw('&&'), + 'sudo', 'tee', conf_path, run.Raw('&&'), + 'sudo', 'chmod', '0644', conf_path, + run.Raw('>'), '/dev/null', + + ], + stdin=run.PIPE, + wait=False) + teuthology.feed_many_stdins_and_close(conf_fp, writes) + run.wait(writes) + +def get_valgrind_args(testdir, name, preamble, v, exit_on_first_error=True, cd=True): + """ + Build a command line for running valgrind. + + testdir - test results directory + name - name of daemon (for naming hte log file) + preamble - stuff we should run before valgrind + v - valgrind arguments + """ + if v is None: + return preamble + if not isinstance(v, list): + v = [v] + + # https://tracker.ceph.com/issues/44362 + preamble.extend([ + 'env', 'OPENSSL_ia32cap=~0x1000000000000000', + ]) + + val_path = '/var/log/ceph/valgrind' + if '--tool=memcheck' in v or '--tool=helgrind' in v: + extra_args = [ + 'valgrind', + '--trace-children=no', + '--child-silent-after-fork=yes', + '--soname-synonyms=somalloc=*tcmalloc*', + '--num-callers=50', + '--suppressions={tdir}/valgrind.supp'.format(tdir=testdir), + '--xml=yes', + '--xml-file={vdir}/{n}.log'.format(vdir=val_path, n=name), + '--time-stamp=yes', + '--vgdb=yes', + ] + else: + extra_args = [ + 'valgrind', + '--trace-children=no', + '--child-silent-after-fork=yes', + '--soname-synonyms=somalloc=*tcmalloc*', + '--suppressions={tdir}/valgrind.supp'.format(tdir=testdir), + '--log-file={vdir}/{n}.log'.format(vdir=val_path, n=name), + '--time-stamp=yes', + '--vgdb=yes', + ] + if exit_on_first_error: + extra_args.extend([ + # at least Valgrind 3.14 is required + '--exit-on-first-error=yes', + '--error-exitcode=42', + ]) + args = [] + if cd: + args += ['cd', testdir, run.Raw('&&')] + args += preamble + extra_args + v + log.debug('running %s under valgrind with args %s', name, args) + return args + + +def mount_osd_data(ctx, remote, cluster, osd): + """ + Mount a remote OSD + + :param ctx: Context + :param remote: Remote site + :param cluster: name of ceph cluster + :param osd: Osd name + """ + log.debug('Mounting data for osd.{o} on {r}'.format(o=osd, r=remote)) + role = "{0}.osd.{1}".format(cluster, osd) + alt_role = role if cluster != 'ceph' else "osd.{0}".format(osd) + if remote in ctx.disk_config.remote_to_roles_to_dev: + if alt_role in ctx.disk_config.remote_to_roles_to_dev[remote]: + role = alt_role + if role not in ctx.disk_config.remote_to_roles_to_dev[remote]: + return + dev = ctx.disk_config.remote_to_roles_to_dev[remote][role] + mount_options = ctx.disk_config.\ + remote_to_roles_to_dev_mount_options[remote][role] + fstype = ctx.disk_config.remote_to_roles_to_dev_fstype[remote][role] + mnt = os.path.join('/var/lib/ceph/osd', '{0}-{1}'.format(cluster, osd)) + + log.info('Mounting osd.{o}: dev: {n}, cluster: {c}' + 'mountpoint: {p}, type: {t}, options: {v}'.format( + o=osd, n=remote.name, p=mnt, t=fstype, v=mount_options, + c=cluster)) + + remote.run( + args=[ + 'sudo', + 'mount', + '-t', fstype, + '-o', ','.join(mount_options), + dev, + mnt, + ] + ) + + +def log_exc(func): + @wraps(func) + def wrapper(self): + try: + return func(self) + except: + self.log(traceback.format_exc()) + raise + return wrapper + + +class PoolType: + REPLICATED = 1 + ERASURE_CODED = 3 + + +class OSDThrasher(Thrasher): + """ + Object used to thrash Ceph + """ + def __init__(self, manager, config, name, logger): + super(OSDThrasher, self).__init__() + + self.ceph_manager = manager + self.cluster = manager.cluster + self.ceph_manager.wait_for_clean() + osd_status = self.ceph_manager.get_osd_status() + self.in_osds = osd_status['in'] + self.live_osds = osd_status['live'] + self.out_osds = osd_status['out'] + self.dead_osds = osd_status['dead'] + self.stopping = False + self.logger = logger + self.config = config + self.name = name + self.revive_timeout = self.config.get("revive_timeout", 360) + self.pools_to_fix_pgp_num = set() + if self.config.get('powercycle'): + self.revive_timeout += 120 + self.clean_wait = self.config.get('clean_wait', 0) + self.minin = self.config.get("min_in", 4) + self.chance_move_pg = self.config.get('chance_move_pg', 1.0) + self.sighup_delay = self.config.get('sighup_delay') + self.optrack_toggle_delay = self.config.get('optrack_toggle_delay') + self.dump_ops_enable = self.config.get('dump_ops_enable') + self.noscrub_toggle_delay = self.config.get('noscrub_toggle_delay') + self.chance_thrash_cluster_full = self.config.get('chance_thrash_cluster_full', .05) + self.chance_thrash_pg_upmap = self.config.get('chance_thrash_pg_upmap', 1.0) + self.chance_thrash_pg_upmap_items = self.config.get('chance_thrash_pg_upmap', 1.0) + self.random_eio = self.config.get('random_eio') + self.chance_force_recovery = self.config.get('chance_force_recovery', 0.3) + + num_osds = self.in_osds + self.out_osds + self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * len(num_osds) + self.min_pgs = self.config.get("min_pgs_per_pool_osd", 1) * len(num_osds) + if self.config is None: + self.config = dict() + # prevent monitor from auto-marking things out while thrasher runs + # try both old and new tell syntax, in case we are testing old code + self.saved_options = [] + # assuming that the default settings do not vary from one daemon to + # another + first_mon = teuthology.get_first_mon(manager.ctx, self.config).split('.') + opts = [('mon', 'mon_osd_down_out_interval', 0)] + #why do we disable marking an OSD out automatically? :/ + for service, opt, new_value in opts: + old_value = manager.get_config(first_mon[0], + first_mon[1], + opt) + self.saved_options.append((service, opt, old_value)) + manager.inject_args(service, '*', opt, new_value) + # initialize ceph_objectstore_tool property - must be done before + # do_thrash is spawned - http://tracker.ceph.com/issues/18799 + if (self.config.get('powercycle') or + not self.cmd_exists_on_osds("ceph-objectstore-tool") or + self.config.get('disable_objectstore_tool_tests', False)): + self.ceph_objectstore_tool = False + if self.config.get('powercycle'): + self.log("Unable to test ceph-objectstore-tool, " + "powercycle testing") + else: + self.log("Unable to test ceph-objectstore-tool, " + "not available on all OSD nodes") + else: + self.ceph_objectstore_tool = \ + self.config.get('ceph_objectstore_tool', True) + # spawn do_thrash + self.thread = gevent.spawn(self.do_thrash) + if self.sighup_delay: + self.sighup_thread = gevent.spawn(self.do_sighup) + if self.optrack_toggle_delay: + self.optrack_toggle_thread = gevent.spawn(self.do_optrack_toggle) + if self.dump_ops_enable == "true": + self.dump_ops_thread = gevent.spawn(self.do_dump_ops) + if self.noscrub_toggle_delay: + self.noscrub_toggle_thread = gevent.spawn(self.do_noscrub_toggle) + + def log(self, msg, *args, **kwargs): + self.logger.info(msg, *args, **kwargs) + + def cmd_exists_on_osds(self, cmd): + if self.ceph_manager.cephadm or self.ceph_manager.rook: + return True + allremotes = self.ceph_manager.ctx.cluster.only(\ + teuthology.is_type('osd', self.cluster)).remotes.keys() + allremotes = list(set(allremotes)) + for remote in allremotes: + proc = remote.run(args=['type', cmd], wait=True, + check_status=False, stdout=BytesIO(), + stderr=BytesIO()) + if proc.exitstatus != 0: + return False; + return True; + + def run_ceph_objectstore_tool(self, remote, osd, cmd): + if self.ceph_manager.cephadm: + return shell( + self.ceph_manager.ctx, self.ceph_manager.cluster, remote, + args=['ceph-objectstore-tool'] + cmd, + name=osd, + wait=True, check_status=False, + stdout=StringIO(), + stderr=StringIO()) + elif self.ceph_manager.rook: + assert False, 'not implemented' + else: + return remote.run( + args=['sudo', 'adjust-ulimits', 'ceph-objectstore-tool'] + cmd, + wait=True, check_status=False, + stdout=StringIO(), + stderr=StringIO()) + + def run_ceph_bluestore_tool(self, remote, osd, cmd): + if self.ceph_manager.cephadm: + return shell( + self.ceph_manager.ctx, self.ceph_manager.cluster, remote, + args=['ceph-bluestore-tool', '--err-to-stderr'] + cmd, + name=osd, + wait=True, check_status=False, + stdout=StringIO(), + stderr=StringIO()) + elif self.ceph_manager.rook: + assert False, 'not implemented' + else: + return remote.run( + args=['sudo', 'ceph-bluestore-tool', '--err-to-stderr'] + cmd, + wait=True, check_status=False, + stdout=StringIO(), + stderr=StringIO()) + + def kill_osd(self, osd=None, mark_down=False, mark_out=False): + """ + :param osd: Osd to be killed. + :mark_down: Mark down if true. + :mark_out: Mark out if true. + """ + if osd is None: + osd = random.choice(self.live_osds) + self.log("Killing osd %s, live_osds are %s" % (str(osd), + str(self.live_osds))) + self.live_osds.remove(osd) + self.dead_osds.append(osd) + self.ceph_manager.kill_osd(osd) + if mark_down: + self.ceph_manager.mark_down_osd(osd) + if mark_out and osd in self.in_osds: + self.out_osd(osd) + if self.ceph_objectstore_tool: + self.log("Testing ceph-objectstore-tool on down osd.%s" % osd) + remote = self.ceph_manager.find_remote('osd', osd) + FSPATH = self.ceph_manager.get_filepath() + JPATH = os.path.join(FSPATH, "journal") + exp_osd = imp_osd = osd + self.log('remote for osd %s is %s' % (osd, remote)) + exp_remote = imp_remote = remote + # If an older osd is available we'll move a pg from there + if (len(self.dead_osds) > 1 and + random.random() < self.chance_move_pg): + exp_osd = random.choice(self.dead_osds[:-1]) + exp_remote = self.ceph_manager.find_remote('osd', exp_osd) + self.log('remote for exp osd %s is %s' % (exp_osd, exp_remote)) + prefix = [ + '--no-mon-config', + '--log-file=/var/log/ceph/objectstore_tool.$pid.log', + ] + + if self.ceph_manager.rook: + assert False, 'not implemented' + + if not self.ceph_manager.cephadm: + # ceph-objectstore-tool might be temporarily absent during an + # upgrade - see http://tracker.ceph.com/issues/18014 + with safe_while(sleep=15, tries=40, action="type ceph-objectstore-tool") as proceed: + while proceed(): + proc = exp_remote.run(args=['type', 'ceph-objectstore-tool'], + wait=True, check_status=False, stdout=BytesIO(), + stderr=BytesIO()) + if proc.exitstatus == 0: + break + log.debug("ceph-objectstore-tool binary not present, trying again") + + # ceph-objectstore-tool might bogusly fail with "OSD has the store locked" + # see http://tracker.ceph.com/issues/19556 + with safe_while(sleep=15, tries=40, action="ceph-objectstore-tool --op list-pgs") as proceed: + while proceed(): + proc = self.run_ceph_objectstore_tool( + exp_remote, 'osd.%s' % exp_osd, + prefix + [ + '--data-path', FSPATH.format(id=exp_osd), + '--journal-path', JPATH.format(id=exp_osd), + '--op', 'list-pgs', + ]) + if proc.exitstatus == 0: + break + elif (proc.exitstatus == 1 and + proc.stderr.getvalue() == "OSD has the store locked"): + continue + else: + raise Exception("ceph-objectstore-tool: " + "exp list-pgs failure with status {ret}". + format(ret=proc.exitstatus)) + + pgs = proc.stdout.getvalue().split('\n')[:-1] + if len(pgs) == 0: + self.log("No PGs found for osd.{osd}".format(osd=exp_osd)) + return + pg = random.choice(pgs) + #exp_path = teuthology.get_testdir(self.ceph_manager.ctx) + #exp_path = os.path.join(exp_path, '{0}.data'.format(self.cluster)) + exp_path = os.path.join('/var/log/ceph', # available inside 'shell' container + "exp.{pg}.{id}".format( + pg=pg, + id=exp_osd)) + if self.ceph_manager.cephadm: + exp_host_path = os.path.join( + '/var/log/ceph', + self.ceph_manager.ctx.ceph[self.ceph_manager.cluster].fsid, + "exp.{pg}.{id}".format( + pg=pg, + id=exp_osd)) + else: + exp_host_path = exp_path + + # export + # Can't use new export-remove op since this is part of upgrade testing + proc = self.run_ceph_objectstore_tool( + exp_remote, 'osd.%s' % exp_osd, + prefix + [ + '--data-path', FSPATH.format(id=exp_osd), + '--journal-path', JPATH.format(id=exp_osd), + '--op', 'export', + '--pgid', pg, + '--file', exp_path, + ]) + if proc.exitstatus: + raise Exception("ceph-objectstore-tool: " + "export failure with status {ret}". + format(ret=proc.exitstatus)) + # remove + proc = self.run_ceph_objectstore_tool( + exp_remote, 'osd.%s' % exp_osd, + prefix + [ + '--data-path', FSPATH.format(id=exp_osd), + '--journal-path', JPATH.format(id=exp_osd), + '--force', + '--op', 'remove', + '--pgid', pg, + ]) + if proc.exitstatus: + raise Exception("ceph-objectstore-tool: " + "remove failure with status {ret}". + format(ret=proc.exitstatus)) + # If there are at least 2 dead osds we might move the pg + if exp_osd != imp_osd: + # If pg isn't already on this osd, then we will move it there + proc = self.run_ceph_objectstore_tool( + imp_remote, + 'osd.%s' % imp_osd, + prefix + [ + '--data-path', FSPATH.format(id=imp_osd), + '--journal-path', JPATH.format(id=imp_osd), + '--op', 'list-pgs', + ]) + if proc.exitstatus: + raise Exception("ceph-objectstore-tool: " + "imp list-pgs failure with status {ret}". + format(ret=proc.exitstatus)) + pgs = proc.stdout.getvalue().split('\n')[:-1] + if pg not in pgs: + self.log("Moving pg {pg} from osd.{fosd} to osd.{tosd}". + format(pg=pg, fosd=exp_osd, tosd=imp_osd)) + if imp_remote != exp_remote: + # Copy export file to the other machine + self.log("Transfer export file from {srem} to {trem}". + format(srem=exp_remote, trem=imp_remote)) + # just in case an upgrade make /var/log/ceph unreadable by non-root, + exp_remote.run(args=['sudo', 'chmod', '777', + '/var/log/ceph']) + imp_remote.run(args=['sudo', 'chmod', '777', + '/var/log/ceph']) + tmpexport = Remote.get_file(exp_remote, exp_host_path, + sudo=True) + if exp_host_path != exp_path: + # push to /var/log/ceph, then rename (we can't + # chmod 777 the /var/log/ceph/$fsid mountpoint) + Remote.put_file(imp_remote, tmpexport, exp_path) + imp_remote.run(args=[ + 'sudo', 'mv', exp_path, exp_host_path]) + else: + Remote.put_file(imp_remote, tmpexport, exp_host_path) + os.remove(tmpexport) + else: + # Can't move the pg after all + imp_osd = exp_osd + imp_remote = exp_remote + # import + proc = self.run_ceph_objectstore_tool( + imp_remote, 'osd.%s' % imp_osd, + [ + '--data-path', FSPATH.format(id=imp_osd), + '--journal-path', JPATH.format(id=imp_osd), + '--log-file=/var/log/ceph/objectstore_tool.$pid.log', + '--op', 'import', + '--file', exp_path, + ]) + if proc.exitstatus == 1: + bogosity = "The OSD you are using is older than the exported PG" + if bogosity in proc.stderr.getvalue(): + self.log("OSD older than exported PG" + "...ignored") + elif proc.exitstatus == 10: + self.log("Pool went away before processing an import" + "...ignored") + elif proc.exitstatus == 11: + self.log("Attempt to import an incompatible export" + "...ignored") + elif proc.exitstatus == 12: + # this should be safe to ignore because we only ever move 1 + # copy of the pg at a time, and merge is only initiated when + # all replicas are peered and happy. /me crosses fingers + self.log("PG merged on target" + "...ignored") + elif proc.exitstatus: + raise Exception("ceph-objectstore-tool: " + "import failure with status {ret}". + format(ret=proc.exitstatus)) + cmd = "sudo rm -f {file}".format(file=exp_host_path) + exp_remote.run(args=cmd) + if imp_remote != exp_remote: + imp_remote.run(args=cmd) + + # apply low split settings to each pool + if not self.ceph_manager.cephadm: + for pool in self.ceph_manager.list_pools(): + cmd = ("CEPH_ARGS='--filestore-merge-threshold 1 " + "--filestore-split-multiple 1' sudo -E " + + 'ceph-objectstore-tool ' + + ' '.join(prefix + [ + '--data-path', FSPATH.format(id=imp_osd), + '--journal-path', JPATH.format(id=imp_osd), + ]) + + " --op apply-layout-settings --pool " + pool).format(id=osd) + proc = imp_remote.run(args=cmd, + wait=True, check_status=False, + stderr=StringIO()) + if 'Couldn\'t find pool' in proc.stderr.getvalue(): + continue + if proc.exitstatus: + raise Exception("ceph-objectstore-tool apply-layout-settings" + " failed with {status}".format(status=proc.exitstatus)) + + + def blackhole_kill_osd(self, osd=None): + """ + If all else fails, kill the osd. + :param osd: Osd to be killed. + """ + if osd is None: + osd = random.choice(self.live_osds) + self.log("Blackholing and then killing osd %s, live_osds are %s" % + (str(osd), str(self.live_osds))) + self.live_osds.remove(osd) + self.dead_osds.append(osd) + self.ceph_manager.blackhole_kill_osd(osd) + + def revive_osd(self, osd=None, skip_admin_check=False): + """ + Revive the osd. + :param osd: Osd to be revived. + """ + if osd is None: + osd = random.choice(self.dead_osds) + self.log("Reviving osd %s" % (str(osd),)) + self.ceph_manager.revive_osd( + osd, + self.revive_timeout, + skip_admin_check=skip_admin_check) + self.dead_osds.remove(osd) + self.live_osds.append(osd) + if self.random_eio > 0 and osd == self.rerrosd: + self.ceph_manager.set_config(self.rerrosd, + filestore_debug_random_read_err = self.random_eio) + self.ceph_manager.set_config(self.rerrosd, + bluestore_debug_random_read_err = self.random_eio) + + + def out_osd(self, osd=None): + """ + Mark the osd out + :param osd: Osd to be marked. + """ + if osd is None: + osd = random.choice(self.in_osds) + self.log("Removing osd %s, in_osds are: %s" % + (str(osd), str(self.in_osds))) + self.ceph_manager.mark_out_osd(osd) + self.in_osds.remove(osd) + self.out_osds.append(osd) + + def in_osd(self, osd=None): + """ + Mark the osd out + :param osd: Osd to be marked. + """ + if osd is None: + osd = random.choice(self.out_osds) + if osd in self.dead_osds: + return self.revive_osd(osd) + self.log("Adding osd %s" % (str(osd),)) + self.out_osds.remove(osd) + self.in_osds.append(osd) + self.ceph_manager.mark_in_osd(osd) + self.log("Added osd %s" % (str(osd),)) + + def reweight_osd_or_by_util(self, osd=None): + """ + Reweight an osd that is in + :param osd: Osd to be marked. + """ + if osd is not None or random.choice([True, False]): + if osd is None: + osd = random.choice(self.in_osds) + val = random.uniform(.1, 1.0) + self.log("Reweighting osd %s to %s" % (str(osd), str(val))) + self.ceph_manager.raw_cluster_cmd('osd', 'reweight', + str(osd), str(val)) + else: + # do it several times, the option space is large + for i in range(5): + options = { + 'max_change': random.choice(['0.05', '1.0', '3.0']), + 'overage': random.choice(['110', '1000']), + 'type': random.choice([ + 'reweight-by-utilization', + 'test-reweight-by-utilization']), + } + self.log("Reweighting by: %s"%(str(options),)) + self.ceph_manager.raw_cluster_cmd( + 'osd', + options['type'], + options['overage'], + options['max_change']) + + def primary_affinity(self, osd=None): + self.log("primary_affinity") + if osd is None: + osd = random.choice(self.in_osds) + if random.random() >= .5: + pa = random.random() + elif random.random() >= .5: + pa = 1 + else: + pa = 0 + self.log('Setting osd %s primary_affinity to %f' % (str(osd), pa)) + self.ceph_manager.raw_cluster_cmd('osd', 'primary-affinity', + str(osd), str(pa)) + + def thrash_cluster_full(self): + """ + Set and unset cluster full condition + """ + self.log('Setting full ratio to .001') + self.ceph_manager.raw_cluster_cmd('osd', 'set-full-ratio', '.001') + time.sleep(1) + self.log('Setting full ratio back to .95') + self.ceph_manager.raw_cluster_cmd('osd', 'set-full-ratio', '.95') + + def thrash_pg_upmap(self): + """ + Install or remove random pg_upmap entries in OSDMap + """ + self.log("thrash_pg_upmap") + from random import shuffle + out = self.ceph_manager.raw_cluster_cmd('osd', 'dump', '-f', 'json-pretty') + j = json.loads(out) + self.log('j is %s' % j) + try: + if random.random() >= .3: + pgs = self.ceph_manager.get_pg_stats() + if not pgs: + self.log('No pgs; doing nothing') + return + pg = random.choice(pgs) + pgid = str(pg['pgid']) + poolid = int(pgid.split('.')[0]) + sizes = [x['size'] for x in j['pools'] if x['pool'] == poolid] + if len(sizes) == 0: + self.log('No pools; doing nothing') + return + n = sizes[0] + osds = self.in_osds + self.out_osds + shuffle(osds) + osds = osds[0:n] + self.log('Setting %s to %s' % (pgid, osds)) + cmd = ['osd', 'pg-upmap', pgid] + [str(x) for x in osds] + self.log('cmd %s' % cmd) + self.ceph_manager.raw_cluster_cmd(*cmd) + else: + m = j['pg_upmap'] + if len(m) > 0: + shuffle(m) + pg = m[0]['pgid'] + self.log('Clearing pg_upmap on %s' % pg) + self.ceph_manager.raw_cluster_cmd( + 'osd', + 'rm-pg-upmap', + pg) + else: + self.log('No pg_upmap entries; doing nothing') + except CommandFailedError: + self.log('Failed to rm-pg-upmap, ignoring') + + def thrash_pg_upmap_items(self): + """ + Install or remove random pg_upmap_items entries in OSDMap + """ + self.log("thrash_pg_upmap_items") + from random import shuffle + out = self.ceph_manager.raw_cluster_cmd('osd', 'dump', '-f', 'json-pretty') + j = json.loads(out) + self.log('j is %s' % j) + try: + if random.random() >= .3: + pgs = self.ceph_manager.get_pg_stats() + if not pgs: + self.log('No pgs; doing nothing') + return + pg = random.choice(pgs) + pgid = str(pg['pgid']) + poolid = int(pgid.split('.')[0]) + sizes = [x['size'] for x in j['pools'] if x['pool'] == poolid] + if len(sizes) == 0: + self.log('No pools; doing nothing') + return + n = sizes[0] + osds = self.in_osds + self.out_osds + shuffle(osds) + osds = osds[0:n*2] + self.log('Setting %s to %s' % (pgid, osds)) + cmd = ['osd', 'pg-upmap-items', pgid] + [str(x) for x in osds] + self.log('cmd %s' % cmd) + self.ceph_manager.raw_cluster_cmd(*cmd) + else: + m = j['pg_upmap_items'] + if len(m) > 0: + shuffle(m) + pg = m[0]['pgid'] + self.log('Clearing pg_upmap on %s' % pg) + self.ceph_manager.raw_cluster_cmd( + 'osd', + 'rm-pg-upmap-items', + pg) + else: + self.log('No pg_upmap entries; doing nothing') + except CommandFailedError: + self.log('Failed to rm-pg-upmap-items, ignoring') + + def force_recovery(self): + """ + Force recovery on some of PGs + """ + backfill = random.random() >= 0.5 + j = self.ceph_manager.get_pgids_to_force(backfill) + if j: + try: + if backfill: + self.ceph_manager.raw_cluster_cmd('pg', 'force-backfill', *j) + else: + self.ceph_manager.raw_cluster_cmd('pg', 'force-recovery', *j) + except CommandFailedError: + self.log('Failed to force backfill|recovery, ignoring') + + + def cancel_force_recovery(self): + """ + Force recovery on some of PGs + """ + backfill = random.random() >= 0.5 + j = self.ceph_manager.get_pgids_to_cancel_force(backfill) + if j: + try: + if backfill: + self.ceph_manager.raw_cluster_cmd('pg', 'cancel-force-backfill', *j) + else: + self.ceph_manager.raw_cluster_cmd('pg', 'cancel-force-recovery', *j) + except CommandFailedError: + self.log('Failed to force backfill|recovery, ignoring') + + def force_cancel_recovery(self): + """ + Force or cancel forcing recovery + """ + if random.random() >= 0.4: + self.force_recovery() + else: + self.cancel_force_recovery() + + def all_up(self): + """ + Make sure all osds are up and not out. + """ + while len(self.dead_osds) > 0: + self.log("reviving osd") + self.revive_osd() + while len(self.out_osds) > 0: + self.log("inning osd") + self.in_osd() + + def all_up_in(self): + """ + Make sure all osds are up and fully in. + """ + self.all_up(); + for osd in self.live_osds: + self.ceph_manager.raw_cluster_cmd('osd', 'reweight', + str(osd), str(1)) + self.ceph_manager.raw_cluster_cmd('osd', 'primary-affinity', + str(osd), str(1)) + + def do_join(self): + """ + Break out of this Ceph loop + """ + self.stopping = True + self.thread.get() + if self.sighup_delay: + self.log("joining the do_sighup greenlet") + self.sighup_thread.get() + if self.optrack_toggle_delay: + self.log("joining the do_optrack_toggle greenlet") + self.optrack_toggle_thread.join() + if self.dump_ops_enable == "true": + self.log("joining the do_dump_ops greenlet") + self.dump_ops_thread.join() + if self.noscrub_toggle_delay: + self.log("joining the do_noscrub_toggle greenlet") + self.noscrub_toggle_thread.join() + + def grow_pool(self): + """ + Increase the size of the pool + """ + pool = self.ceph_manager.get_pool() + if pool is None: + return + self.log("Growing pool %s" % (pool,)) + if self.ceph_manager.expand_pool(pool, + self.config.get('pool_grow_by', 10), + self.max_pgs): + self.pools_to_fix_pgp_num.add(pool) + + def shrink_pool(self): + """ + Decrease the size of the pool + """ + pool = self.ceph_manager.get_pool() + if pool is None: + return + _ = self.ceph_manager.get_pool_pg_num(pool) + self.log("Shrinking pool %s" % (pool,)) + if self.ceph_manager.contract_pool( + pool, + self.config.get('pool_shrink_by', 10), + self.min_pgs): + self.pools_to_fix_pgp_num.add(pool) + + def fix_pgp_num(self, pool=None): + """ + Fix number of pgs in pool. + """ + if pool is None: + pool = self.ceph_manager.get_pool() + if not pool: + return + force = False + else: + force = True + self.log("fixing pg num pool %s" % (pool,)) + if self.ceph_manager.set_pool_pgpnum(pool, force): + self.pools_to_fix_pgp_num.discard(pool) + + def test_pool_min_size(self): + """ + Loop to selectively push PGs below their min_size and test that recovery + still occurs. + """ + self.log("test_pool_min_size") + self.all_up() + time.sleep(60) # buffer time for recovery to start. + self.ceph_manager.wait_for_recovery( + timeout=self.config.get('timeout') + ) + minout = int(self.config.get("min_out", 1)) + minlive = int(self.config.get("min_live", 2)) + mindead = int(self.config.get("min_dead", 1)) + self.log("doing min_size thrashing") + self.ceph_manager.wait_for_clean(timeout=180) + assert self.ceph_manager.is_clean(), \ + 'not clean before minsize thrashing starts' + while not self.stopping: + # look up k and m from all the pools on each loop, in case it + # changes as the cluster runs + k = 0 + m = 99 + has_pools = False + pools_json = self.ceph_manager.get_osd_dump_json()['pools'] + + for pool_json in pools_json: + pool = pool_json['pool_name'] + has_pools = True + pool_type = pool_json['type'] # 1 for rep, 3 for ec + min_size = pool_json['min_size'] + self.log("pool {pool} min_size is {min_size}".format(pool=pool,min_size=min_size)) + try: + ec_profile = self.ceph_manager.get_pool_property(pool, 'erasure_code_profile') + if pool_type != PoolType.ERASURE_CODED: + continue + ec_profile = pool_json['erasure_code_profile'] + ec_profile_json = self.ceph_manager.raw_cluster_cmd( + 'osd', + 'erasure-code-profile', + 'get', + ec_profile, + '--format=json') + ec_json = json.loads(ec_profile_json) + local_k = int(ec_json['k']) + local_m = int(ec_json['m']) + self.log("pool {pool} local_k={k} local_m={m}".format(pool=pool, + k=local_k, m=local_m)) + if local_k > k: + self.log("setting k={local_k} from previous {k}".format(local_k=local_k, k=k)) + k = local_k + if local_m < m: + self.log("setting m={local_m} from previous {m}".format(local_m=local_m, m=m)) + m = local_m + except CommandFailedError: + self.log("failed to read erasure_code_profile. %s was likely removed", pool) + continue + + if has_pools : + self.log("using k={k}, m={m}".format(k=k,m=m)) + else: + self.log("No pools yet, waiting") + time.sleep(5) + continue + + if minout > len(self.out_osds): # kill OSDs and mark out + self.log("forced to out an osd") + self.kill_osd(mark_out=True) + continue + elif mindead > len(self.dead_osds): # kill OSDs but force timeout + self.log("forced to kill an osd") + self.kill_osd() + continue + else: # make mostly-random choice to kill or revive OSDs + minup = max(minlive, k) + rand_val = random.uniform(0, 1) + self.log("choosing based on number of live OSDs and rand val {rand}".\ + format(rand=rand_val)) + if len(self.live_osds) > minup+1 and rand_val < 0.5: + # chose to knock out as many OSDs as we can w/out downing PGs + + most_killable = min(len(self.live_osds) - minup, m) + self.log("chose to kill {n} OSDs".format(n=most_killable)) + for i in range(1, most_killable): + self.kill_osd(mark_out=True) + time.sleep(10) + # try a few times since there might be a concurrent pool + # creation or deletion + with safe_while( + sleep=25, tries=5, + action='check for active or peered') as proceed: + while proceed(): + if self.ceph_manager.all_active_or_peered(): + break + self.log('not all PGs are active or peered') + else: # chose to revive OSDs, bring up a random fraction of the dead ones + self.log("chose to revive osds") + for i in range(1, int(rand_val * len(self.dead_osds))): + self.revive_osd(i) + + # let PGs repair themselves or our next knockout might kill one + self.ceph_manager.wait_for_clean(timeout=self.config.get('timeout')) + + # / while not self.stopping + self.all_up_in() + + self.ceph_manager.wait_for_recovery( + timeout=self.config.get('timeout') + ) + + def inject_pause(self, conf_key, duration, check_after, should_be_down): + """ + Pause injection testing. Check for osd being down when finished. + """ + the_one = random.choice(self.live_osds) + self.log("inject_pause on osd.{osd}".format(osd=the_one)) + self.log( + "Testing {key} pause injection for duration {duration}".format( + key=conf_key, + duration=duration + )) + self.log( + "Checking after {after}, should_be_down={shouldbedown}".format( + after=check_after, + shouldbedown=should_be_down + )) + self.ceph_manager.set_config(the_one, **{conf_key: duration}) + if not should_be_down: + return + time.sleep(check_after) + status = self.ceph_manager.get_osd_status() + assert the_one in status['down'] + time.sleep(duration - check_after + 20) + status = self.ceph_manager.get_osd_status() + assert not the_one in status['down'] + + def test_backfill_full(self): + """ + Test backfills stopping when the replica fills up. + + First, use injectfull admin command to simulate a now full + osd by setting it to 0 on all of the OSDs. + + Second, on a random subset, set + osd_debug_skip_full_check_in_backfill_reservation to force + the more complicated check in do_scan to be exercised. + + Then, verify that all backfillings stop. + """ + self.log("injecting backfill full") + for i in self.live_osds: + self.ceph_manager.set_config( + i, + osd_debug_skip_full_check_in_backfill_reservation= + random.choice(['false', 'true'])) + self.ceph_manager.osd_admin_socket(i, command=['injectfull', 'backfillfull'], + check_status=True, timeout=30, stdout=DEVNULL) + for i in range(30): + status = self.ceph_manager.compile_pg_status() + if 'backfilling' not in status.keys(): + break + self.log( + "waiting for {still_going} backfillings".format( + still_going=status.get('backfilling'))) + time.sleep(1) + assert('backfilling' not in self.ceph_manager.compile_pg_status().keys()) + for i in self.live_osds: + self.ceph_manager.set_config( + i, + osd_debug_skip_full_check_in_backfill_reservation='false') + self.ceph_manager.osd_admin_socket(i, command=['injectfull', 'none'], + check_status=True, timeout=30, stdout=DEVNULL) + + + def generate_random_sharding(self): + prefixes = [ + 'm','O','P','L' + ] + new_sharding = '' + for prefix in prefixes: + choose = random.choice([False, True]) + if not choose: + continue + if new_sharding != '': + new_sharding = new_sharding + ' ' + columns = random.randint(1, 5) + do_hash = random.choice([False, True]) + if do_hash: + low_hash = random.choice([0, 5, 8]) + do_high_hash = random.choice([False, True]) + if do_high_hash: + high_hash = random.choice([8, 16, 30]) + low_hash + new_sharding = new_sharding + prefix + '(' + str(columns) + ',' + str(low_hash) + '-' + str(high_hash) + ')' + else: + new_sharding = new_sharding + prefix + '(' + str(columns) + ',' + str(low_hash) + '-)' + else: + if columns == 1: + new_sharding = new_sharding + prefix + else: + new_sharding = new_sharding + prefix + '(' + str(columns) + ')' + return new_sharding + + def test_bluestore_reshard_action(self): + """ + Test if resharding of bluestore works properly. + If bluestore is not used, or bluestore is in version that + does not support sharding, skip. + """ + + osd = random.choice(self.dead_osds) + remote = self.ceph_manager.find_remote('osd', osd) + FSPATH = self.ceph_manager.get_filepath() + + prefix = [ + '--no-mon-config', + '--log-file=/var/log/ceph/bluestore_tool.$pid.log', + '--log-level=10', + '--path', FSPATH.format(id=osd) + ] + + # sanity check if bluestore-tool accessible + self.log('checking if target objectstore is bluestore on osd.%s' % osd) + cmd = prefix + [ + 'show-label' + ] + proc = self.run_ceph_bluestore_tool(remote, 'osd.%s' % osd, cmd) + if proc.exitstatus != 0: + raise Exception("ceph-bluestore-tool access failed.") + + # check if sharding is possible + self.log('checking if target bluestore supports sharding on osd.%s' % osd) + cmd = prefix + [ + 'show-sharding' + ] + proc = self.run_ceph_bluestore_tool(remote, 'osd.%s' % osd, cmd) + if proc.exitstatus != 0: + self.log("Unable to test resharding, " + "ceph-bluestore-tool does not support it.") + return + + # now go for reshard to something else + self.log('applying new sharding to bluestore on osd.%s' % osd) + new_sharding = self.config.get('bluestore_new_sharding','random') + + if new_sharding == 'random': + self.log('generate random sharding') + new_sharding = self.generate_random_sharding() + + self.log("applying new sharding: " + new_sharding) + cmd = prefix + [ + '--sharding', new_sharding, + 'reshard' + ] + proc = self.run_ceph_bluestore_tool(remote, 'osd.%s' % osd, cmd) + if proc.exitstatus != 0: + raise Exception("ceph-bluestore-tool resharding failed.") + + # now do fsck to + self.log('running fsck to verify new sharding on osd.%s' % osd) + cmd = prefix + [ + 'fsck' + ] + proc = self.run_ceph_bluestore_tool(remote, 'osd.%s' % osd, cmd) + if proc.exitstatus != 0: + raise Exception("ceph-bluestore-tool fsck failed.") + self.log('resharding successfully completed') + + def test_bluestore_reshard(self): + """ + 1) kills an osd + 2) reshards bluestore on killed osd + 3) revives the osd + """ + self.log('test_bluestore_reshard started') + self.kill_osd(mark_down=True, mark_out=True) + self.test_bluestore_reshard_action() + self.revive_osd() + self.log('test_bluestore_reshard completed') + + + def test_map_discontinuity(self): + """ + 1) Allows the osds to recover + 2) kills an osd + 3) allows the remaining osds to recover + 4) waits for some time + 5) revives the osd + This sequence should cause the revived osd to have to handle + a map gap since the mons would have trimmed + """ + self.log("test_map_discontinuity") + while len(self.in_osds) < (self.minin + 1): + self.in_osd() + self.log("Waiting for recovery") + self.ceph_manager.wait_for_all_osds_up( + timeout=self.config.get('timeout') + ) + # now we wait 20s for the pg status to change, if it takes longer, + # the test *should* fail! + time.sleep(20) + self.ceph_manager.wait_for_clean( + timeout=self.config.get('timeout') + ) + + # now we wait 20s for the backfill replicas to hear about the clean + time.sleep(20) + self.log("Recovered, killing an osd") + self.kill_osd(mark_down=True, mark_out=True) + self.log("Waiting for clean again") + self.ceph_manager.wait_for_clean( + timeout=self.config.get('timeout') + ) + self.log("Waiting for trim") + time.sleep(int(self.config.get("map_discontinuity_sleep_time", 40))) + self.revive_osd() + + def choose_action(self): + """ + Random action selector. + """ + chance_down = self.config.get('chance_down', 0.4) + _ = self.config.get('chance_test_min_size', 0) + chance_test_backfill_full = \ + self.config.get('chance_test_backfill_full', 0) + if isinstance(chance_down, int): + chance_down = float(chance_down) / 100 + minin = self.minin + minout = int(self.config.get("min_out", 0)) + minlive = int(self.config.get("min_live", 2)) + mindead = int(self.config.get("min_dead", 0)) + + self.log('choose_action: min_in %d min_out ' + '%d min_live %d min_dead %d ' + 'chance_down %.2f' % + (minin, minout, minlive, mindead, chance_down)) + actions = [] + if len(self.in_osds) > minin: + actions.append((self.out_osd, 1.0,)) + if len(self.live_osds) > minlive and chance_down > 0: + actions.append((self.kill_osd, chance_down,)) + if len(self.out_osds) > minout: + actions.append((self.in_osd, 1.7,)) + if len(self.dead_osds) > mindead: + actions.append((self.revive_osd, 1.0,)) + if self.config.get('thrash_primary_affinity', True): + actions.append((self.primary_affinity, 1.0,)) + actions.append((self.reweight_osd_or_by_util, + self.config.get('reweight_osd', .5),)) + actions.append((self.grow_pool, + self.config.get('chance_pgnum_grow', 0),)) + actions.append((self.shrink_pool, + self.config.get('chance_pgnum_shrink', 0),)) + actions.append((self.fix_pgp_num, + self.config.get('chance_pgpnum_fix', 0),)) + actions.append((self.test_pool_min_size, + self.config.get('chance_test_min_size', 0),)) + actions.append((self.test_backfill_full, + chance_test_backfill_full,)) + if self.chance_thrash_cluster_full > 0: + actions.append((self.thrash_cluster_full, self.chance_thrash_cluster_full,)) + if self.chance_thrash_pg_upmap > 0: + actions.append((self.thrash_pg_upmap, self.chance_thrash_pg_upmap,)) + if self.chance_thrash_pg_upmap_items > 0: + actions.append((self.thrash_pg_upmap_items, self.chance_thrash_pg_upmap_items,)) + if self.chance_force_recovery > 0: + actions.append((self.force_cancel_recovery, self.chance_force_recovery)) + + for key in ['heartbeat_inject_failure', 'filestore_inject_stall']: + for scenario in [ + (lambda: + self.inject_pause(key, + self.config.get('pause_short', 3), + 0, + False), + self.config.get('chance_inject_pause_short', 1),), + (lambda: + self.inject_pause(key, + self.config.get('pause_long', 80), + self.config.get('pause_check_after', 70), + True), + self.config.get('chance_inject_pause_long', 0),)]: + actions.append(scenario) + + # only consider resharding if objectstore is bluestore + cluster_name = self.ceph_manager.cluster + cluster = self.ceph_manager.ctx.ceph[cluster_name] + if cluster.conf.get('osd', {}).get('osd objectstore', 'bluestore') == 'bluestore': + actions.append((self.test_bluestore_reshard, + self.config.get('chance_bluestore_reshard', 0),)) + + total = sum([y for (x, y) in actions]) + val = random.uniform(0, total) + for (action, prob) in actions: + if val < prob: + return action + val -= prob + return None + + def do_thrash(self): + """ + _do_thrash() wrapper. + """ + try: + self._do_thrash() + except Exception as e: + # See _run exception comment for MDSThrasher + self.set_thrasher_exception(e) + self.logger.exception("exception:") + # Allow successful completion so gevent doesn't see an exception. + # The DaemonWatchdog will observe the error and tear down the test. + + @log_exc + def do_sighup(self): + """ + Loops and sends signal.SIGHUP to a random live osd. + + Loop delay is controlled by the config value sighup_delay. + """ + delay = float(self.sighup_delay) + self.log("starting do_sighup with a delay of {0}".format(delay)) + while not self.stopping: + osd = random.choice(self.live_osds) + self.ceph_manager.signal_osd(osd, signal.SIGHUP, silent=True) + time.sleep(delay) + + @log_exc + def do_optrack_toggle(self): + """ + Loops and toggle op tracking to all osds. + + Loop delay is controlled by the config value optrack_toggle_delay. + """ + delay = float(self.optrack_toggle_delay) + osd_state = "true" + self.log("starting do_optrack_toggle with a delay of {0}".format(delay)) + while not self.stopping: + if osd_state == "true": + osd_state = "false" + else: + osd_state = "true" + try: + self.ceph_manager.inject_args('osd', '*', + 'osd_enable_op_tracker', + osd_state) + except CommandFailedError: + self.log('Failed to tell all osds, ignoring') + gevent.sleep(delay) + + @log_exc + def do_dump_ops(self): + """ + Loops and does op dumps on all osds + """ + self.log("starting do_dump_ops") + while not self.stopping: + for osd in self.live_osds: + # Ignore errors because live_osds is in flux + self.ceph_manager.osd_admin_socket(osd, command=['dump_ops_in_flight'], + check_status=False, timeout=30, stdout=DEVNULL) + self.ceph_manager.osd_admin_socket(osd, command=['dump_blocked_ops'], + check_status=False, timeout=30, stdout=DEVNULL) + self.ceph_manager.osd_admin_socket(osd, command=['dump_historic_ops'], + check_status=False, timeout=30, stdout=DEVNULL) + gevent.sleep(0) + + @log_exc + def do_noscrub_toggle(self): + """ + Loops and toggle noscrub flags + + Loop delay is controlled by the config value noscrub_toggle_delay. + """ + delay = float(self.noscrub_toggle_delay) + scrub_state = "none" + self.log("starting do_noscrub_toggle with a delay of {0}".format(delay)) + while not self.stopping: + if scrub_state == "none": + self.ceph_manager.raw_cluster_cmd('osd', 'set', 'noscrub') + scrub_state = "noscrub" + elif scrub_state == "noscrub": + self.ceph_manager.raw_cluster_cmd('osd', 'set', 'nodeep-scrub') + scrub_state = "both" + elif scrub_state == "both": + self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'noscrub') + scrub_state = "nodeep-scrub" + else: + self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'nodeep-scrub') + scrub_state = "none" + gevent.sleep(delay) + self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'noscrub') + self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'nodeep-scrub') + + @log_exc + def _do_thrash(self): + """ + Loop to select random actions to thrash ceph manager with. + """ + cleanint = self.config.get("clean_interval", 60) + scrubint = self.config.get("scrub_interval", -1) + maxdead = self.config.get("max_dead", 0) + delay = self.config.get("op_delay", 5) + self.rerrosd = self.live_osds[0] + if self.random_eio > 0: + self.ceph_manager.inject_args('osd', self.rerrosd, + 'filestore_debug_random_read_err', + self.random_eio) + self.ceph_manager.inject_args('osd', self.rerrosd, + 'bluestore_debug_random_read_err', + self.random_eio) + self.log("starting do_thrash") + while not self.stopping: + to_log = [str(x) for x in ["in_osds: ", self.in_osds, + "out_osds: ", self.out_osds, + "dead_osds: ", self.dead_osds, + "live_osds: ", self.live_osds]] + self.log(" ".join(to_log)) + if random.uniform(0, 1) < (float(delay) / cleanint): + while len(self.dead_osds) > maxdead: + self.revive_osd() + for osd in self.in_osds: + self.ceph_manager.raw_cluster_cmd('osd', 'reweight', + str(osd), str(1)) + if random.uniform(0, 1) < float( + self.config.get('chance_test_map_discontinuity', 0)) \ + and len(self.live_osds) > 5: # avoid m=2,k=2 stall, w/ some buffer for crush being picky + self.test_map_discontinuity() + else: + self.ceph_manager.wait_for_recovery( + timeout=self.config.get('timeout') + ) + time.sleep(self.clean_wait) + if scrubint > 0: + if random.uniform(0, 1) < (float(delay) / scrubint): + self.log('Scrubbing while thrashing being performed') + Scrubber(self.ceph_manager, self.config) + self.choose_action()() + time.sleep(delay) + self.all_up() + if self.random_eio > 0: + self.ceph_manager.inject_args('osd', self.rerrosd, + 'filestore_debug_random_read_err', '0.0') + self.ceph_manager.inject_args('osd', self.rerrosd, + 'bluestore_debug_random_read_err', '0.0') + for pool in list(self.pools_to_fix_pgp_num): + if self.ceph_manager.get_pool_pg_num(pool) > 0: + self.fix_pgp_num(pool) + self.pools_to_fix_pgp_num.clear() + for service, opt, saved_value in self.saved_options: + self.ceph_manager.inject_args(service, '*', opt, saved_value) + self.saved_options = [] + self.all_up_in() + + +class ObjectStoreTool: + + def __init__(self, manager, pool, **kwargs): + self.manager = manager + self.pool = pool + self.osd = kwargs.get('osd', None) + self.object_name = kwargs.get('object_name', None) + self.do_revive = kwargs.get('do_revive', True) + if self.osd and self.pool and self.object_name: + if self.osd == "primary": + self.osd = self.manager.get_object_primary(self.pool, + self.object_name) + assert self.osd is not None + if self.object_name: + self.pgid = self.manager.get_object_pg_with_shard(self.pool, + self.object_name, + self.osd) + self.remote = next(iter(self.manager.ctx.\ + cluster.only('osd.{o}'.format(o=self.osd)).remotes.keys())) + path = self.manager.get_filepath().format(id=self.osd) + self.paths = ("--data-path {path} --journal-path {path}/journal". + format(path=path)) + + def build_cmd(self, options, args, stdin): + lines = [] + if self.object_name: + lines.append("object=$(sudo adjust-ulimits ceph-objectstore-tool " + "{paths} --pgid {pgid} --op list |" + "grep '\"oid\":\"{name}\"')". + format(paths=self.paths, + pgid=self.pgid, + name=self.object_name)) + args = '"$object" ' + args + options += " --pgid {pgid}".format(pgid=self.pgid) + cmd = ("sudo adjust-ulimits ceph-objectstore-tool {paths} {options} {args}". + format(paths=self.paths, + args=args, + options=options)) + if stdin: + cmd = ("echo {payload} | base64 --decode | {cmd}". + format(payload=base64.encode(stdin), + cmd=cmd)) + lines.append(cmd) + return "\n".join(lines) + + def run(self, options, args): + self.manager.kill_osd(self.osd) + cmd = self.build_cmd(options, args, None) + self.manager.log(cmd) + try: + proc = self.remote.run(args=['bash', '-e', '-x', '-c', cmd], + check_status=False, + stdout=BytesIO(), + stderr=BytesIO()) + proc.wait() + if proc.exitstatus != 0: + self.manager.log("failed with " + str(proc.exitstatus)) + error = proc.stdout.getvalue().decode() + " " + \ + proc.stderr.getvalue().decode() + raise Exception(error) + finally: + if self.do_revive: + self.manager.revive_osd(self.osd) + self.manager.wait_till_osd_is_up(self.osd, 300) + + +# XXX: this class has nothing to do with the Ceph daemon (ceph-mgr) of +# the same name. +class CephManager: + """ + Ceph manager object. + Contains several local functions that form a bulk of this module. + + :param controller: the remote machine where the Ceph commands should be + executed + :param ctx: the cluster context + :param config: path to Ceph config file + :param logger: for logging messages + :param cluster: name of the Ceph cluster + """ + + def __init__(self, controller, ctx=None, config=None, logger=None, + cluster='ceph', cephadm=False, rook=False) -> None: + self.lock = threading.RLock() + self.ctx = ctx + self.config = config + self.controller = controller + self.next_pool_id = 0 + self.cluster = cluster + self.cephadm = cephadm + self.rook = rook + if (logger): + self.log = lambda x: logger.info(x) + else: + def tmp(x): + """ + implement log behavior. + """ + print(x) + self.log = tmp + if self.config is None: + self.config = dict() + pools = self.list_pools() + self.pools = {} + for pool in pools: + # we may race with a pool deletion; ignore failures here + try: + self.pools[pool] = self.get_pool_int_property(pool, 'pg_num') + except CommandFailedError: + self.log('Failed to get pg_num from pool %s, ignoring' % pool) + + def ceph(self, cmd, **kwargs): + """ + Simple Ceph admin command wrapper around run_cluster_cmd. + """ + + kwargs.pop('args', None) + args = shlex.split(cmd) + stdout = kwargs.pop('stdout', StringIO()) + stderr = kwargs.pop('stderr', StringIO()) + return self.run_cluster_cmd(args=args, stdout=stdout, stderr=stderr, **kwargs) + + def run_cluster_cmd(self, **kwargs): + """ + Run a Ceph command and return the object representing the process + for the command. + + Accepts arguments same as that of teuthology.orchestra.run.run() + """ + if self.cephadm: + return shell(self.ctx, self.cluster, self.controller, + args=['ceph'] + list(kwargs['args']), + stdout=StringIO(), + check_status=kwargs.get('check_status', True)) + if self.rook: + return toolbox(self.ctx, self.cluster, + args=['ceph'] + list(kwargs['args']), + stdout=StringIO(), + check_status=kwargs.get('check_status', True)) + + testdir = teuthology.get_testdir(self.ctx) + prefix = ['sudo', 'adjust-ulimits', 'ceph-coverage', + f'{testdir}/archive/coverage', 'timeout', '120', 'ceph', + '--cluster', self.cluster] + kwargs['args'] = prefix + list(kwargs['args']) + return self.controller.run(**kwargs) + + def raw_cluster_cmd(self, *args, **kwargs) -> str: + """ + Start ceph on a raw cluster. Return count + """ + stdout = kwargs.pop('stdout', StringIO()) + p = self.run_cluster_cmd(args=args, stdout=stdout, **kwargs) + return p.stdout.getvalue() + + def raw_cluster_cmd_result(self, *args, **kwargs): + """ + Start ceph on a cluster. Return success or failure information. + """ + kwargs['args'], kwargs['check_status'] = args, False + return self.run_cluster_cmd(**kwargs).exitstatus + + def run_ceph_w(self, watch_channel=None): + """ + Execute "ceph -w" in the background with stdout connected to a BytesIO, + and return the RemoteProcess. + + :param watch_channel: Specifies the channel to be watched. This can be + 'cluster', 'audit', ... + :type watch_channel: str + """ + args = ["sudo", + "daemon-helper", + "kill", + "ceph", + '--cluster', + self.cluster, + "-w"] + if watch_channel is not None: + args.append("--watch-channel") + args.append(watch_channel) + return self.controller.run(args=args, wait=False, stdout=StringIO(), stdin=run.PIPE) + + def get_mon_socks(self): + """ + Get monitor sockets. + + :return socks: tuple of strings; strings are individual sockets. + """ + from json import loads + + output = loads(self.raw_cluster_cmd(['--format=json', 'mon', 'dump'])) + socks = [] + for mon in output['mons']: + for addrvec_mem in mon['public_addrs']['addrvec']: + socks.append(addrvec_mem['addr']) + return tuple(socks) + + def get_msgrv1_mon_socks(self): + """ + Get monitor sockets that use msgrv1 to operate. + + :return socks: tuple of strings; strings are individual sockets. + """ + from json import loads + + output = loads(self.raw_cluster_cmd('--format=json', 'mon', 'dump')) + socks = [] + for mon in output['mons']: + for addrvec_mem in mon['public_addrs']['addrvec']: + if addrvec_mem['type'] == 'v1': + socks.append(addrvec_mem['addr']) + return tuple(socks) + + def get_msgrv2_mon_socks(self): + """ + Get monitor sockets that use msgrv2 to operate. + + :return socks: tuple of strings; strings are individual sockets. + """ + from json import loads + + output = loads(self.raw_cluster_cmd('--format=json', 'mon', 'dump')) + socks = [] + for mon in output['mons']: + for addrvec_mem in mon['public_addrs']['addrvec']: + if addrvec_mem['type'] == 'v2': + socks.append(addrvec_mem['addr']) + return tuple(socks) + + def flush_pg_stats(self, osds, no_wait=None, wait_for_mon=300): + """ + Flush pg stats from a list of OSD ids, ensuring they are reflected + all the way to the monitor. Luminous and later only. + + :param osds: list of OSDs to flush + :param no_wait: list of OSDs not to wait for seq id. by default, we + wait for all specified osds, but some of them could be + moved out of osdmap, so we cannot get their updated + stat seq from monitor anymore. in that case, you need + to pass a blocklist. + :param wait_for_mon: wait for mon to be synced with mgr. 0 to disable + it. (5 min by default) + """ + seq = {osd: int(self.raw_cluster_cmd('tell', 'osd.%d' % osd, 'flush_pg_stats')) + for osd in osds} + if not wait_for_mon: + return + if no_wait is None: + no_wait = [] + for osd, need in seq.items(): + if osd in no_wait: + continue + got = 0 + while wait_for_mon > 0: + got = int(self.raw_cluster_cmd('osd', 'last-stat-seq', 'osd.%d' % osd)) + self.log('need seq {need} got {got} for osd.{osd}'.format( + need=need, got=got, osd=osd)) + if got >= need: + break + A_WHILE = 1 + time.sleep(A_WHILE) + wait_for_mon -= A_WHILE + else: + raise Exception('timed out waiting for mon to be updated with ' + 'osd.{osd}: {got} < {need}'. + format(osd=osd, got=got, need=need)) + + def flush_all_pg_stats(self): + self.flush_pg_stats(range(len(self.get_osd_dump()))) + + def do_rados(self, cmd, pool=None, namespace=None, remote=None, **kwargs): + """ + Execute a remote rados command. + """ + if remote is None: + remote = self.controller + + testdir = teuthology.get_testdir(self.ctx) + pre = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rados', + '--cluster', + self.cluster, + ] + if pool is not None: + pre += ['--pool', pool] + if namespace is not None: + pre += ['--namespace', namespace] + pre.extend(cmd) + proc = remote.run( + args=pre, + wait=True, + **kwargs + ) + return proc + + def rados_write_objects(self, pool, num_objects, size, + timelimit, threads, cleanup=False): + """ + Write rados objects + Threads not used yet. + """ + args = [ + '--num-objects', num_objects, + '-b', size, + 'bench', timelimit, + 'write' + ] + if not cleanup: + args.append('--no-cleanup') + return self.do_rados(map(str, args), pool=pool) + + def do_put(self, pool, obj, fname, namespace=None): + """ + Implement rados put operation + """ + args = ['put', obj, fname] + return self.do_rados( + args, + check_status=False, + pool=pool, + namespace=namespace + ).exitstatus + + def do_get(self, pool, obj, fname='/dev/null', namespace=None): + """ + Implement rados get operation + """ + args = ['get', obj, fname] + return self.do_rados( + args, + check_status=False, + pool=pool, + namespace=namespace, + ).exitstatus + + def do_rm(self, pool, obj, namespace=None): + """ + Implement rados rm operation + """ + args = ['rm', obj] + return self.do_rados( + args, + check_status=False, + pool=pool, + namespace=namespace + ).exitstatus + + def osd_admin_socket(self, osd_id, command, check_status=True, timeout=0, stdout=None): + if stdout is None: + stdout = StringIO() + return self.admin_socket('osd', osd_id, command, check_status, timeout, stdout) + + def find_remote(self, service_type, service_id): + """ + Get the Remote for the host where a particular service runs. + + :param service_type: 'mds', 'osd', 'client' + :param service_id: The second part of a role, e.g. '0' for + the role 'client.0' + :return: a Remote instance for the host where the + requested role is placed + """ + return get_remote(self.ctx, self.cluster, + service_type, service_id) + + def admin_socket(self, service_type, service_id, + command, check_status=True, timeout=0, stdout=None): + """ + Remotely start up ceph specifying the admin socket + :param command: a list of words to use as the command + to the admin socket + """ + if stdout is None: + stdout = StringIO() + + remote = self.find_remote(service_type, service_id) + + if self.cephadm: + return shell( + self.ctx, self.cluster, remote, + args=[ + 'ceph', 'daemon', '%s.%s' % (service_type, service_id), + ] + command, + stdout=stdout, + wait=True, + check_status=check_status, + ) + if self.rook: + assert False, 'not implemented' + + testdir = teuthology.get_testdir(self.ctx) + args = [ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'timeout', + str(timeout), + 'ceph', + '--cluster', + self.cluster, + '--admin-daemon', + '/var/run/ceph/{cluster}-{type}.{id}.asok'.format( + cluster=self.cluster, + type=service_type, + id=service_id), + ] + args.extend(command) + return remote.run( + args=args, + stdout=stdout, + wait=True, + check_status=check_status + ) + + def objectstore_tool(self, pool, options, args, **kwargs): + return ObjectStoreTool(self, pool, **kwargs).run(options, args) + + def get_pgid(self, pool, pgnum): + """ + :param pool: pool name + :param pgnum: pg number + :returns: a string representing this pg. + """ + poolnum = self.get_pool_num(pool) + pg_str = "{poolnum}.{pgnum}".format( + poolnum=poolnum, + pgnum=pgnum) + return pg_str + + def get_pg_replica(self, pool, pgnum): + """ + get replica for pool, pgnum (e.g. (data, 0)->0 + """ + pg_str = self.get_pgid(pool, pgnum) + output = self.raw_cluster_cmd("pg", "map", pg_str, '--format=json') + j = json.loads('\n'.join(output.split('\n')[1:])) + return int(j['acting'][-1]) + assert False + + def wait_for_pg_stats(func): + # both osd_mon_report_interval and mgr_stats_period are 5 seconds + # by default, and take the faulty injection in ms into consideration, + # 12 seconds are more than enough + delays = [1, 1, 2, 3, 5, 8, 13, 0] + @wraps(func) + def wrapper(self, *args, **kwargs): + exc = None + for delay in delays: + try: + return func(self, *args, **kwargs) + except AssertionError as e: + time.sleep(delay) + exc = e + raise exc + return wrapper + + def get_pg_primary(self, pool, pgnum): + """ + get primary for pool, pgnum (e.g. (data, 0)->0 + """ + pg_str = self.get_pgid(pool, pgnum) + output = self.raw_cluster_cmd("pg", "map", pg_str, '--format=json') + j = json.loads('\n'.join(output.split('\n')[1:])) + return int(j['acting'][0]) + assert False + + def get_pool_num(self, pool): + """ + get number for pool (e.g., data -> 2) + """ + return int(self.get_pool_dump(pool)['pool']) + + def list_pools(self): + """ + list all pool names + """ + osd_dump = self.get_osd_dump_json() + self.log(osd_dump['pools']) + return [str(i['pool_name']) for i in osd_dump['pools']] + + def clear_pools(self): + """ + remove all pools + """ + [self.remove_pool(i) for i in self.list_pools()] + + def kick_recovery_wq(self, osdnum): + """ + Run kick_recovery_wq on cluster. + """ + return self.raw_cluster_cmd( + 'tell', "osd.%d" % (int(osdnum),), + 'debug', + 'kick_recovery_wq', + '0') + + def wait_run_admin_socket(self, service_type, + service_id, args=['version'], timeout=75, stdout=None): + """ + If osd_admin_socket call succeeds, return. Otherwise wait + five seconds and try again. + """ + if stdout is None: + stdout = StringIO() + tries = 0 + while True: + proc = self.admin_socket(service_type, service_id, + args, check_status=False, stdout=stdout) + if proc.exitstatus == 0: + return proc + else: + tries += 1 + if (tries * 5) > timeout: + raise Exception('timed out waiting for admin_socket ' + 'to appear after {type}.{id} restart'. + format(type=service_type, + id=service_id)) + self.log("waiting on admin_socket for {type}-{id}, " + "{command}".format(type=service_type, + id=service_id, + command=args)) + time.sleep(5) + + def get_pool_dump(self, pool): + """ + get the osd dump part of a pool + """ + osd_dump = self.get_osd_dump_json() + for i in osd_dump['pools']: + if i['pool_name'] == pool: + return i + assert False + + def get_config(self, service_type, service_id, name): + """ + :param node: like 'mon.a' + :param name: the option name + """ + proc = self.wait_run_admin_socket(service_type, service_id, + ['config', 'show']) + j = json.loads(proc.stdout.getvalue()) + return j[name] + + def inject_args(self, service_type, service_id, name, value): + whom = '{0}.{1}'.format(service_type, service_id) + if isinstance(value, bool): + value = 'true' if value else 'false' + opt_arg = '--{name}={value}'.format(name=name, value=value) + self.raw_cluster_cmd('--', 'tell', whom, 'injectargs', opt_arg) + + def set_config(self, osdnum, **argdict): + """ + :param osdnum: osd number + :param argdict: dictionary containing values to set. + """ + for k, v in argdict.items(): + self.wait_run_admin_socket( + 'osd', osdnum, + ['config', 'set', str(k), str(v)]) + + def raw_cluster_status(self): + """ + Get status from cluster + """ + status = self.raw_cluster_cmd('status', '--format=json') + return json.loads(status) + + def raw_osd_status(self): + """ + Get osd status from cluster + """ + return self.raw_cluster_cmd('osd', 'dump') + + def get_osd_status(self): + """ + Get osd statuses sorted by states that the osds are in. + """ + osd_lines = list(filter( + lambda x: x.startswith('osd.') and (("up" in x) or ("down" in x)), + self.raw_osd_status().split('\n'))) + self.log(osd_lines) + in_osds = [int(i[4:].split()[0]) + for i in filter(lambda x: " in " in x, osd_lines)] + out_osds = [int(i[4:].split()[0]) + for i in filter(lambda x: " out " in x, osd_lines)] + up_osds = [int(i[4:].split()[0]) + for i in filter(lambda x: " up " in x, osd_lines)] + down_osds = [int(i[4:].split()[0]) + for i in filter(lambda x: " down " in x, osd_lines)] + dead_osds = [int(x.id_) + for x in filter(lambda x: + not x.running(), + self.ctx.daemons. + iter_daemons_of_role('osd', self.cluster))] + live_osds = [int(x.id_) for x in + filter(lambda x: + x.running(), + self.ctx.daemons.iter_daemons_of_role('osd', + self.cluster))] + return {'in': in_osds, 'out': out_osds, 'up': up_osds, + 'down': down_osds, 'dead': dead_osds, 'live': live_osds, + 'raw': osd_lines} + + def get_num_pgs(self): + """ + Check cluster status for the number of pgs + """ + status = self.raw_cluster_status() + self.log(status) + return status['pgmap']['num_pgs'] + + def create_erasure_code_profile(self, profile_name, profile): + """ + Create an erasure code profile name that can be used as a parameter + when creating an erasure coded pool. + """ + with self.lock: + args = cmd_erasure_code_profile(profile_name, profile) + self.raw_cluster_cmd(*args) + + def create_pool_with_unique_name(self, pg_num=16, + erasure_code_profile_name=None, + min_size=None, + erasure_code_use_overwrites=False): + """ + Create a pool named unique_pool_X where X is unique. + """ + name = "" + with self.lock: + name = "unique_pool_%s" % (str(self.next_pool_id),) + self.next_pool_id += 1 + self.create_pool( + name, + pg_num, + erasure_code_profile_name=erasure_code_profile_name, + min_size=min_size, + erasure_code_use_overwrites=erasure_code_use_overwrites) + return name + + @contextlib.contextmanager + def pool(self, pool_name, pg_num=16, erasure_code_profile_name=None): + self.create_pool(pool_name, pg_num, erasure_code_profile_name) + yield + self.remove_pool(pool_name) + + def create_pool(self, pool_name, pg_num=16, + erasure_code_profile_name=None, + min_size=None, + erasure_code_use_overwrites=False): + """ + Create a pool named from the pool_name parameter. + :param pool_name: name of the pool being created. + :param pg_num: initial number of pgs. + :param erasure_code_profile_name: if set and !None create an + erasure coded pool using the profile + :param erasure_code_use_overwrites: if true, allow overwrites + """ + with self.lock: + assert isinstance(pool_name, str) + assert isinstance(pg_num, int) + assert pool_name not in self.pools + self.log("creating pool_name %s" % (pool_name,)) + if erasure_code_profile_name: + self.raw_cluster_cmd('osd', 'pool', 'create', + pool_name, str(pg_num), str(pg_num), + 'erasure', erasure_code_profile_name) + else: + self.raw_cluster_cmd('osd', 'pool', 'create', + pool_name, str(pg_num)) + if min_size is not None: + self.raw_cluster_cmd( + 'osd', 'pool', 'set', pool_name, + 'min_size', + str(min_size)) + if erasure_code_use_overwrites: + self.raw_cluster_cmd( + 'osd', 'pool', 'set', pool_name, + 'allow_ec_overwrites', + 'true') + self.raw_cluster_cmd( + 'osd', 'pool', 'application', 'enable', + pool_name, 'rados', '--yes-i-really-mean-it', + run.Raw('||'), 'true') + self.pools[pool_name] = pg_num + time.sleep(1) + + def add_pool_snap(self, pool_name, snap_name): + """ + Add pool snapshot + :param pool_name: name of pool to snapshot + :param snap_name: name of snapshot to take + """ + self.raw_cluster_cmd('osd', 'pool', 'mksnap', + str(pool_name), str(snap_name)) + + def remove_pool_snap(self, pool_name, snap_name): + """ + Remove pool snapshot + :param pool_name: name of pool to snapshot + :param snap_name: name of snapshot to remove + """ + self.raw_cluster_cmd('osd', 'pool', 'rmsnap', + str(pool_name), str(snap_name)) + + def remove_pool(self, pool_name): + """ + Remove the indicated pool + :param pool_name: Pool to be removed + """ + with self.lock: + assert isinstance(pool_name, str) + assert pool_name in self.pools + self.log("removing pool_name %s" % (pool_name,)) + del self.pools[pool_name] + self.raw_cluster_cmd('osd', 'pool', 'rm', pool_name, pool_name, + "--yes-i-really-really-mean-it") + + def get_pool(self): + """ + Pick a random pool + """ + with self.lock: + if self.pools: + return random.sample(self.pools.keys(), 1)[0] + + def get_pool_pg_num(self, pool_name): + """ + Return the number of pgs in the pool specified. + """ + with self.lock: + assert isinstance(pool_name, str) + if pool_name in self.pools: + return self.pools[pool_name] + return 0 + + def get_pool_property(self, pool_name, prop): + """ + :param pool_name: pool + :param prop: property to be checked. + :returns: property as string + """ + with self.lock: + assert isinstance(pool_name, str) + assert isinstance(prop, str) + output = self.raw_cluster_cmd( + 'osd', + 'pool', + 'get', + pool_name, + prop) + return output.split()[1] + + def get_pool_int_property(self, pool_name, prop): + return int(self.get_pool_property(pool_name, prop)) + + def set_pool_property(self, pool_name, prop, val): + """ + :param pool_name: pool + :param prop: property to be set. + :param val: value to set. + + This routine retries if set operation fails. + """ + with self.lock: + assert isinstance(pool_name, str) + assert isinstance(prop, str) + assert isinstance(val, int) + tries = 0 + while True: + r = self.raw_cluster_cmd_result( + 'osd', + 'pool', + 'set', + pool_name, + prop, + str(val)) + if r != 11: # EAGAIN + break + tries += 1 + if tries > 50: + raise Exception('timed out getting EAGAIN ' + 'when setting pool property %s %s = %s' % + (pool_name, prop, val)) + self.log('got EAGAIN setting pool property, ' + 'waiting a few seconds...') + time.sleep(2) + + def expand_pool(self, pool_name, by, max_pgs): + """ + Increase the number of pgs in a pool + """ + with self.lock: + assert isinstance(pool_name, str) + assert isinstance(by, int) + assert pool_name in self.pools + if self.get_num_creating() > 0: + return False + if (self.pools[pool_name] + by) > max_pgs: + return False + self.log("increase pool size by %d" % (by,)) + new_pg_num = self.pools[pool_name] + by + self.set_pool_property(pool_name, "pg_num", new_pg_num) + self.pools[pool_name] = new_pg_num + return True + + def contract_pool(self, pool_name, by, min_pgs): + """ + Decrease the number of pgs in a pool + """ + with self.lock: + self.log('contract_pool %s by %s min %s' % ( + pool_name, str(by), str(min_pgs))) + assert isinstance(pool_name, str) + assert isinstance(by, int) + assert pool_name in self.pools + if self.get_num_creating() > 0: + self.log('too many creating') + return False + proj = self.pools[pool_name] - by + if proj < min_pgs: + self.log('would drop below min_pgs, proj %d, currently %d' % (proj,self.pools[pool_name],)) + return False + self.log("decrease pool size by %d" % (by,)) + new_pg_num = self.pools[pool_name] - by + self.set_pool_property(pool_name, "pg_num", new_pg_num) + self.pools[pool_name] = new_pg_num + return True + + def stop_pg_num_changes(self): + """ + Reset all pg_num_targets back to pg_num, canceling splits and merges + """ + self.log('Canceling any pending splits or merges...') + osd_dump = self.get_osd_dump_json() + try: + for pool in osd_dump['pools']: + if pool['pg_num'] != pool['pg_num_target']: + self.log('Setting pool %s (%d) pg_num %d -> %d' % + (pool['pool_name'], pool['pool'], + pool['pg_num_target'], + pool['pg_num'])) + self.raw_cluster_cmd('osd', 'pool', 'set', pool['pool_name'], + 'pg_num', str(pool['pg_num'])) + except KeyError: + # we don't support pg_num_target before nautilus + pass + + def set_pool_pgpnum(self, pool_name, force): + """ + Set pgpnum property of pool_name pool. + """ + with self.lock: + assert isinstance(pool_name, str) + assert pool_name in self.pools + if not force and self.get_num_creating() > 0: + return False + self.set_pool_property(pool_name, 'pgp_num', self.pools[pool_name]) + return True + + def list_pg_unfound(self, pgid): + """ + return list of unfound pgs with the id specified + """ + r = None + offset = {} + while True: + out = self.raw_cluster_cmd('--', 'pg', pgid, 'list_unfound', + json.dumps(offset)) + j = json.loads(out) + if r is None: + r = j + else: + r['objects'].extend(j['objects']) + if not 'more' in j: + break + if j['more'] == 0: + break + offset = j['objects'][-1]['oid'] + if 'more' in r: + del r['more'] + return r + + def get_pg_stats(self): + """ + Dump the cluster and get pg stats + """ + out = self.raw_cluster_cmd('pg', 'dump', '--format=json') + j = json.loads('\n'.join(out.split('\n')[1:])) + try: + return j['pg_map']['pg_stats'] + except KeyError: + return j['pg_stats'] + + def get_osd_df(self, osdid): + """ + Get the osd df stats + """ + out = self.raw_cluster_cmd('osd', 'df', 'name', 'osd.{}'.format(osdid), + '--format=json') + j = json.loads('\n'.join(out.split('\n')[1:])) + return j['nodes'][0] + + def get_pool_df(self, name): + """ + Get the pool df stats + """ + out = self.raw_cluster_cmd('df', 'detail', '--format=json') + j = json.loads('\n'.join(out.split('\n')[1:])) + return next((p['stats'] for p in j['pools'] if p['name'] == name), + None) + + def get_pgids_to_force(self, backfill): + """ + Return the randomized list of PGs that can have their recovery/backfill forced + """ + j = self.get_pg_stats(); + pgids = [] + if backfill: + wanted = ['degraded', 'backfilling', 'backfill_wait'] + else: + wanted = ['recovering', 'degraded', 'recovery_wait'] + for pg in j: + status = pg['state'].split('+') + for t in wanted: + if random.random() > 0.5 and not ('forced_backfill' in status or 'forced_recovery' in status) and t in status: + pgids.append(pg['pgid']) + break + return pgids + + def get_pgids_to_cancel_force(self, backfill): + """ + Return the randomized list of PGs whose recovery/backfill priority is forced + """ + j = self.get_pg_stats(); + pgids = [] + if backfill: + wanted = 'forced_backfill' + else: + wanted = 'forced_recovery' + for pg in j: + status = pg['state'].split('+') + if wanted in status and random.random() > 0.5: + pgids.append(pg['pgid']) + return pgids + + def compile_pg_status(self): + """ + Return a histogram of pg state values + """ + ret = {} + j = self.get_pg_stats() + for pg in j: + for status in pg['state'].split('+'): + if status not in ret: + ret[status] = 0 + ret[status] += 1 + return ret + + @wait_for_pg_stats # type: ignore + def with_pg_state(self, pool, pgnum, check): + pgstr = self.get_pgid(pool, pgnum) + stats = self.get_single_pg_stats(pgstr) + assert(check(stats['state'])) + + @wait_for_pg_stats # type: ignore + def with_pg(self, pool, pgnum, check): + pgstr = self.get_pgid(pool, pgnum) + stats = self.get_single_pg_stats(pgstr) + return check(stats) + + def get_last_scrub_stamp(self, pool, pgnum): + """ + Get the timestamp of the last scrub. + """ + stats = self.get_single_pg_stats(self.get_pgid(pool, pgnum)) + return stats["last_scrub_stamp"] + + def do_pg_scrub(self, pool, pgnum, stype): + """ + Scrub pg and wait for scrubbing to finish + """ + init = self.get_last_scrub_stamp(pool, pgnum) + RESEND_TIMEOUT = 120 # Must be a multiple of SLEEP_TIME + FATAL_TIMEOUT = RESEND_TIMEOUT * 3 + SLEEP_TIME = 10 + timer = 0 + while init == self.get_last_scrub_stamp(pool, pgnum): + assert timer < FATAL_TIMEOUT, "fatal timeout trying to " + stype + self.log("waiting for scrub type %s" % (stype,)) + if (timer % RESEND_TIMEOUT) == 0: + self.raw_cluster_cmd('pg', stype, self.get_pgid(pool, pgnum)) + # The first time in this loop is the actual request + if timer != 0 and stype == "repair": + self.log("WARNING: Resubmitted a non-idempotent repair") + time.sleep(SLEEP_TIME) + timer += SLEEP_TIME + + def wait_snap_trimming_complete(self, pool): + """ + Wait for snap trimming on pool to end + """ + POLL_PERIOD = 10 + FATAL_TIMEOUT = 600 + start = time.time() + poolnum = self.get_pool_num(pool) + poolnumstr = "%s." % (poolnum,) + while (True): + now = time.time() + if (now - start) > FATAL_TIMEOUT: + assert (now - start) < FATAL_TIMEOUT, \ + 'failed to complete snap trimming before timeout' + all_stats = self.get_pg_stats() + trimming = False + for pg in all_stats: + if (poolnumstr in pg['pgid']) and ('snaptrim' in pg['state']): + self.log("pg {pg} in trimming, state: {state}".format( + pg=pg['pgid'], + state=pg['state'])) + trimming = True + if not trimming: + break + self.log("{pool} still trimming, waiting".format(pool=pool)) + time.sleep(POLL_PERIOD) + + def get_single_pg_stats(self, pgid): + """ + Return pg for the pgid specified. + """ + all_stats = self.get_pg_stats() + + for pg in all_stats: + if pg['pgid'] == pgid: + return pg + + return None + + def get_object_pg_with_shard(self, pool, name, osdid): + """ + """ + pool_dump = self.get_pool_dump(pool) + object_map = self.get_object_map(pool, name) + if pool_dump["type"] == PoolType.ERASURE_CODED: + shard = object_map['acting'].index(osdid) + return "{pgid}s{shard}".format(pgid=object_map['pgid'], + shard=shard) + else: + return object_map['pgid'] + + def get_object_primary(self, pool, name): + """ + """ + object_map = self.get_object_map(pool, name) + return object_map['acting_primary'] + + def get_object_map(self, pool, name): + """ + osd map --format=json converted to a python object + :returns: the python object + """ + out = self.raw_cluster_cmd('--format=json', 'osd', 'map', pool, name) + return json.loads('\n'.join(out.split('\n')[1:])) + + def get_osd_dump_json(self): + """ + osd dump --format=json converted to a python object + :returns: the python object + """ + out = self.raw_cluster_cmd('osd', 'dump', '--format=json') + return json.loads('\n'.join(out.split('\n')[1:])) + + def get_osd_dump(self): + """ + Dump osds + :returns: all osds + """ + return self.get_osd_dump_json()['osds'] + + def get_osd_metadata(self): + """ + osd metadata --format=json converted to a python object + :returns: the python object containing osd metadata information + """ + out = self.raw_cluster_cmd('osd', 'metadata', '--format=json') + return json.loads('\n'.join(out.split('\n')[1:])) + + def get_mgr_dump(self): + out = self.raw_cluster_cmd('mgr', 'dump', '--format=json') + return json.loads(out) + + def get_stuck_pgs(self, type_, threshold): + """ + :returns: stuck pg information from the cluster + """ + out = self.raw_cluster_cmd('pg', 'dump_stuck', type_, str(threshold), + '--format=json') + return json.loads(out).get('stuck_pg_stats',[]) + + def get_num_unfound_objects(self): + """ + Check cluster status to get the number of unfound objects + """ + status = self.raw_cluster_status() + self.log(status) + return status['pgmap'].get('unfound_objects', 0) + + def get_num_creating(self): + """ + Find the number of pgs in creating mode. + """ + pgs = self.get_pg_stats() + num = 0 + for pg in pgs: + if 'creating' in pg['state']: + num += 1 + return num + + def get_num_active_clean(self): + """ + Find the number of active and clean pgs. + """ + pgs = self.get_pg_stats() + return self._get_num_active_clean(pgs) + + def _get_num_active_clean(self, pgs): + num = 0 + for pg in pgs: + if (pg['state'].count('active') and + pg['state'].count('clean') and + not pg['state'].count('stale')): + num += 1 + return num + + def get_num_active_recovered(self): + """ + Find the number of active and recovered pgs. + """ + pgs = self.get_pg_stats() + return self._get_num_active_recovered(pgs) + + def _get_num_active_recovered(self, pgs): + num = 0 + for pg in pgs: + if (pg['state'].count('active') and + not pg['state'].count('recover') and + not pg['state'].count('backfilling') and + not pg['state'].count('stale')): + num += 1 + return num + + def get_is_making_recovery_progress(self): + """ + Return whether there is recovery progress discernable in the + raw cluster status + """ + status = self.raw_cluster_status() + kps = status['pgmap'].get('recovering_keys_per_sec', 0) + bps = status['pgmap'].get('recovering_bytes_per_sec', 0) + ops = status['pgmap'].get('recovering_objects_per_sec', 0) + return kps > 0 or bps > 0 or ops > 0 + + def get_num_active(self): + """ + Find the number of active pgs. + """ + pgs = self.get_pg_stats() + return self._get_num_active(pgs) + + def _get_num_active(self, pgs): + num = 0 + for pg in pgs: + if pg['state'].count('active') and not pg['state'].count('stale'): + num += 1 + return num + + def get_num_down(self): + """ + Find the number of pgs that are down. + """ + pgs = self.get_pg_stats() + num = 0 + for pg in pgs: + if ((pg['state'].count('down') and not + pg['state'].count('stale')) or + (pg['state'].count('incomplete') and not + pg['state'].count('stale'))): + num += 1 + return num + + def get_num_active_down(self): + """ + Find the number of pgs that are either active or down. + """ + pgs = self.get_pg_stats() + return self._get_num_active_down(pgs) + + def _get_num_active_down(self, pgs): + num = 0 + for pg in pgs: + if ((pg['state'].count('active') and not + pg['state'].count('stale')) or + (pg['state'].count('down') and not + pg['state'].count('stale')) or + (pg['state'].count('incomplete') and not + pg['state'].count('stale'))): + num += 1 + return num + + def get_num_peered(self): + """ + Find the number of PGs that are peered + """ + pgs = self.get_pg_stats() + return self._get_num_peered(pgs) + + def _get_num_peered(self, pgs): + num = 0 + for pg in pgs: + if pg['state'].count('peered') and not pg['state'].count('stale'): + num += 1 + return num + + def is_clean(self): + """ + True if all pgs are clean + """ + pgs = self.get_pg_stats() + if self._get_num_active_clean(pgs) == len(pgs): + return True + else: + self.dump_pgs_not_active_clean() + return False + + def is_recovered(self): + """ + True if all pgs have recovered + """ + pgs = self.get_pg_stats() + return self._get_num_active_recovered(pgs) == len(pgs) + + def is_active_or_down(self): + """ + True if all pgs are active or down + """ + pgs = self.get_pg_stats() + return self._get_num_active_down(pgs) == len(pgs) + + def dump_pgs_not_active_clean(self): + """ + Dumps all pgs that are not active+clean + """ + pgs = self.get_pg_stats() + for pg in pgs: + if pg['state'] != 'active+clean': + self.log('PG %s is not active+clean' % pg['pgid']) + self.log(pg) + + def dump_pgs_not_active_down(self): + """ + Dumps all pgs that are not active or down + """ + pgs = self.get_pg_stats() + for pg in pgs: + if 'active' not in pg['state'] and 'down' not in pg['state']: + self.log('PG %s is not active or down' % pg['pgid']) + self.log(pg) + + def dump_pgs_not_active(self): + """ + Dumps all pgs that are not active + """ + pgs = self.get_pg_stats() + for pg in pgs: + if 'active' not in pg['state']: + self.log('PG %s is not active' % pg['pgid']) + self.log(pg) + + def dump_pgs_not_active_peered(self, pgs): + for pg in pgs: + if (not pg['state'].count('active')) and (not pg['state'].count('peered')): + self.log('PG %s is not active or peered' % pg['pgid']) + self.log(pg) + + def wait_for_clean(self, timeout=1200): + """ + Returns true when all pgs are clean. + """ + self.log("waiting for clean") + start = time.time() + num_active_clean = self.get_num_active_clean() + while not self.is_clean(): + if timeout is not None: + if self.get_is_making_recovery_progress(): + self.log("making progress, resetting timeout") + start = time.time() + else: + self.log("no progress seen, keeping timeout for now") + if time.time() - start >= timeout: + self.log('dumping pgs not clean') + self.dump_pgs_not_active_clean() + assert time.time() - start < timeout, \ + 'wait_for_clean: failed before timeout expired' + cur_active_clean = self.get_num_active_clean() + if cur_active_clean != num_active_clean: + start = time.time() + num_active_clean = cur_active_clean + time.sleep(3) + self.log("clean!") + + def are_all_osds_up(self): + """ + Returns true if all osds are up. + """ + x = self.get_osd_dump() + return (len(x) == sum([(y['up'] > 0) for y in x])) + + def wait_for_all_osds_up(self, timeout=None): + """ + When this exits, either the timeout has expired, or all + osds are up. + """ + self.log("waiting for all up") + start = time.time() + while not self.are_all_osds_up(): + if timeout is not None: + assert time.time() - start < timeout, \ + 'timeout expired in wait_for_all_osds_up' + time.sleep(3) + self.log("all up!") + + def pool_exists(self, pool): + if pool in self.list_pools(): + return True + return False + + def wait_for_pool(self, pool, timeout=300): + """ + Wait for a pool to exist + """ + self.log('waiting for pool %s to exist' % pool) + start = time.time() + while not self.pool_exists(pool): + if timeout is not None: + assert time.time() - start < timeout, \ + 'timeout expired in wait_for_pool' + time.sleep(3) + + def wait_for_pools(self, pools): + for pool in pools: + self.wait_for_pool(pool) + + def is_mgr_available(self): + x = self.get_mgr_dump() + return x.get('available', False) + + def wait_for_mgr_available(self, timeout=None): + self.log("waiting for mgr available") + start = time.time() + while not self.is_mgr_available(): + if timeout is not None: + assert time.time() - start < timeout, \ + 'timeout expired in wait_for_mgr_available' + time.sleep(3) + self.log("mgr available!") + + def wait_for_recovery(self, timeout=None): + """ + Check peering. When this exists, we have recovered. + """ + self.log("waiting for recovery to complete") + start = time.time() + num_active_recovered = self.get_num_active_recovered() + while not self.is_recovered(): + now = time.time() + if timeout is not None: + if self.get_is_making_recovery_progress(): + self.log("making progress, resetting timeout") + start = time.time() + else: + self.log("no progress seen, keeping timeout for now") + if now - start >= timeout: + if self.is_recovered(): + break + self.log('dumping pgs not recovered yet') + self.dump_pgs_not_active_clean() + assert now - start < timeout, \ + 'wait_for_recovery: failed before timeout expired' + cur_active_recovered = self.get_num_active_recovered() + if cur_active_recovered != num_active_recovered: + start = time.time() + num_active_recovered = cur_active_recovered + time.sleep(3) + self.log("recovered!") + + def wait_for_active(self, timeout=None): + """ + Check peering. When this exists, we are definitely active + """ + self.log("waiting for peering to complete") + start = time.time() + num_active = self.get_num_active() + while not self.is_active(): + if timeout is not None: + if time.time() - start >= timeout: + self.log('dumping pgs not active') + self.dump_pgs_not_active() + assert time.time() - start < timeout, \ + 'wait_for_active: failed before timeout expired' + cur_active = self.get_num_active() + if cur_active != num_active: + start = time.time() + num_active = cur_active + time.sleep(3) + self.log("active!") + + def wait_for_active_or_down(self, timeout=None): + """ + Check peering. When this exists, we are definitely either + active or down + """ + self.log("waiting for peering to complete or become blocked") + start = time.time() + num_active_down = self.get_num_active_down() + while not self.is_active_or_down(): + if timeout is not None: + if time.time() - start >= timeout: + self.log('dumping pgs not active or down') + self.dump_pgs_not_active_down() + assert time.time() - start < timeout, \ + 'wait_for_active_or_down: failed before timeout expired' + cur_active_down = self.get_num_active_down() + if cur_active_down != num_active_down: + start = time.time() + num_active_down = cur_active_down + time.sleep(3) + self.log("active or down!") + + def osd_is_up(self, osd): + """ + Wrapper for osd check + """ + osds = self.get_osd_dump() + return osds[osd]['up'] > 0 + + def wait_till_osd_is_up(self, osd, timeout=None): + """ + Loop waiting for osd. + """ + self.log('waiting for osd.%d to be up' % osd) + start = time.time() + while not self.osd_is_up(osd): + if timeout is not None: + assert time.time() - start < timeout, \ + 'osd.%d failed to come up before timeout expired' % osd + time.sleep(3) + self.log('osd.%d is up' % osd) + + def is_active(self): + """ + Wrapper to check if all pgs are active + """ + return self.get_num_active() == self.get_num_pgs() + + def all_active_or_peered(self): + """ + Wrapper to check if all PGs are active or peered + """ + pgs = self.get_pg_stats() + if self._get_num_active(pgs) + self._get_num_peered(pgs) == len(pgs): + return True + else: + self.dump_pgs_not_active_peered(pgs) + return False + + def wait_till_active(self, timeout=None): + """ + Wait until all pgs are active. + """ + self.log("waiting till active") + start = time.time() + while not self.is_active(): + if timeout is not None: + if time.time() - start >= timeout: + self.log('dumping pgs not active') + self.dump_pgs_not_active() + assert time.time() - start < timeout, \ + 'wait_till_active: failed before timeout expired' + time.sleep(3) + self.log("active!") + + def wait_till_pg_convergence(self, timeout=None): + start = time.time() + old_stats = None + active_osds = [osd['osd'] for osd in self.get_osd_dump() + if osd['in'] and osd['up']] + while True: + # strictly speaking, no need to wait for mon. but due to the + # "ms inject socket failures" setting, the osdmap could be delayed, + # so mgr is likely to ignore the pg-stat messages with pgs serving + # newly created pools which is not yet known by mgr. so, to make sure + # the mgr is updated with the latest pg-stats, waiting for mon/mgr is + # necessary. + self.flush_pg_stats(active_osds) + new_stats = dict((stat['pgid'], stat['state']) + for stat in self.get_pg_stats()) + if old_stats == new_stats: + return old_stats + if timeout is not None: + assert time.time() - start < timeout, \ + 'failed to reach convergence before %d secs' % timeout + old_stats = new_stats + # longer than mgr_stats_period + time.sleep(5 + 1) + + def mark_out_osd(self, osd): + """ + Wrapper to mark osd out. + """ + self.raw_cluster_cmd('osd', 'out', str(osd)) + + def kill_osd(self, osd): + """ + Kill osds by either power cycling (if indicated by the config) + or by stopping. + """ + if self.config.get('powercycle'): + remote = self.find_remote('osd', osd) + self.log('kill_osd on osd.{o} ' + 'doing powercycle of {s}'.format(o=osd, s=remote.name)) + self._assert_ipmi(remote) + remote.console.power_off() + elif self.config.get('bdev_inject_crash') and self.config.get('bdev_inject_crash_probability'): + if random.uniform(0, 1) < self.config.get('bdev_inject_crash_probability', .5): + self.inject_args( + 'osd', osd, + 'bdev-inject-crash', self.config.get('bdev_inject_crash')) + try: + self.ctx.daemons.get_daemon('osd', osd, self.cluster).wait() + except: + pass + else: + raise RuntimeError('osd.%s did not fail' % osd) + else: + self.ctx.daemons.get_daemon('osd', osd, self.cluster).stop() + else: + self.ctx.daemons.get_daemon('osd', osd, self.cluster).stop() + + @staticmethod + def _assert_ipmi(remote): + assert remote.console.has_ipmi_credentials, ( + "powercycling requested but RemoteConsole is not " + "initialized. Check ipmi config.") + + def blackhole_kill_osd(self, osd): + """ + Stop osd if nothing else works. + """ + self.inject_args('osd', osd, + 'objectstore-blackhole', True) + time.sleep(2) + self.ctx.daemons.get_daemon('osd', osd, self.cluster).stop() + + def revive_osd(self, osd, timeout=360, skip_admin_check=False): + """ + Revive osds by either power cycling (if indicated by the config) + or by restarting. + """ + if self.config.get('powercycle'): + remote = self.find_remote('osd', osd) + self.log('kill_osd on osd.{o} doing powercycle of {s}'. + format(o=osd, s=remote.name)) + self._assert_ipmi(remote) + remote.console.power_on() + if not remote.console.check_status(300): + raise Exception('Failed to revive osd.{o} via ipmi'. + format(o=osd)) + teuthology.reconnect(self.ctx, 60, [remote]) + mount_osd_data(self.ctx, remote, self.cluster, str(osd)) + self.make_admin_daemon_dir(remote) + self.ctx.daemons.get_daemon('osd', osd, self.cluster).reset() + self.ctx.daemons.get_daemon('osd', osd, self.cluster).restart() + + if not skip_admin_check: + # wait for dump_ops_in_flight; this command doesn't appear + # until after the signal handler is installed and it is safe + # to stop the osd again without making valgrind leak checks + # unhappy. see #5924. + self.wait_run_admin_socket('osd', osd, + args=['dump_ops_in_flight'], + timeout=timeout, stdout=DEVNULL) + + def mark_down_osd(self, osd): + """ + Cluster command wrapper + """ + self.raw_cluster_cmd('osd', 'down', str(osd)) + + def mark_in_osd(self, osd): + """ + Cluster command wrapper + """ + self.raw_cluster_cmd('osd', 'in', str(osd)) + + def signal_osd(self, osd, sig, silent=False): + """ + Wrapper to local get_daemon call which sends the given + signal to the given osd. + """ + self.ctx.daemons.get_daemon('osd', osd, + self.cluster).signal(sig, silent=silent) + + ## monitors + def signal_mon(self, mon, sig, silent=False): + """ + Wrapper to local get_daemon call + """ + self.ctx.daemons.get_daemon('mon', mon, + self.cluster).signal(sig, silent=silent) + + def kill_mon(self, mon): + """ + Kill the monitor by either power cycling (if the config says so), + or by doing a stop. + """ + if self.config.get('powercycle'): + remote = self.find_remote('mon', mon) + self.log('kill_mon on mon.{m} doing powercycle of {s}'. + format(m=mon, s=remote.name)) + self._assert_ipmi(remote) + remote.console.power_off() + else: + self.ctx.daemons.get_daemon('mon', mon, self.cluster).stop() + + def revive_mon(self, mon): + """ + Restart by either power cycling (if the config says so), + or by doing a normal restart. + """ + if self.config.get('powercycle'): + remote = self.find_remote('mon', mon) + self.log('revive_mon on mon.{m} doing powercycle of {s}'. + format(m=mon, s=remote.name)) + self._assert_ipmi(remote) + remote.console.power_on() + self.make_admin_daemon_dir(remote) + self.ctx.daemons.get_daemon('mon', mon, self.cluster).restart() + + def revive_mgr(self, mgr): + """ + Restart by either power cycling (if the config says so), + or by doing a normal restart. + """ + if self.config.get('powercycle'): + remote = self.find_remote('mgr', mgr) + self.log('revive_mgr on mgr.{m} doing powercycle of {s}'. + format(m=mgr, s=remote.name)) + self._assert_ipmi(remote) + remote.console.power_on() + self.make_admin_daemon_dir(remote) + self.ctx.daemons.get_daemon('mgr', mgr, self.cluster).restart() + + def get_mon_status(self, mon): + """ + Extract all the monitor status information from the cluster + """ + out = self.raw_cluster_cmd('tell', 'mon.%s' % mon, 'mon_status') + return json.loads(out) + + def get_mon_quorum(self): + """ + Extract monitor quorum information from the cluster + """ + out = self.raw_cluster_cmd('quorum_status') + j = json.loads(out) + return j['quorum'] + + def wait_for_mon_quorum_size(self, size, timeout=300): + """ + Loop until quorum size is reached. + """ + self.log('waiting for quorum size %d' % size) + sleep = 3 + with safe_while(sleep=sleep, + tries=timeout // sleep, + action=f'wait for quorum size {size}') as proceed: + while proceed(): + try: + if len(self.get_mon_quorum()) == size: + break + except CommandFailedError as e: + # could fail instea4d of blocked if the rotating key of the + # connected monitor is not updated yet after they form the + # quorum + if e.exitstatus == errno.EACCES: + pass + else: + raise + self.log("quorum is size %d" % size) + + def get_mon_health(self, debug=False): + """ + Extract all the monitor health information. + """ + out = self.raw_cluster_cmd('health', '--format=json') + if debug: + self.log('health:\n{h}'.format(h=out)) + return json.loads(out) + + def wait_until_healthy(self, timeout=None): + self.log("wait_until_healthy") + start = time.time() + while self.get_mon_health()['status'] != 'HEALTH_OK': + if timeout is not None: + assert time.time() - start < timeout, \ + 'timeout expired in wait_until_healthy' + time.sleep(3) + self.log("wait_until_healthy done") + + def get_filepath(self): + """ + Return path to osd data with {id} needing to be replaced + """ + return '/var/lib/ceph/osd/' + self.cluster + '-{id}' + + def make_admin_daemon_dir(self, remote): + """ + Create /var/run/ceph directory on remote site. + + :param ctx: Context + :param remote: Remote site + """ + remote.run(args=['sudo', + 'install', '-d', '-m0777', '--', '/var/run/ceph', ], ) + + def get_service_task_status(self, service, status_key): + """ + Return daemon task status for a given ceph service. + + :param service: ceph service (mds, osd, etc...) + :param status_key: matching task status key + """ + task_status = {} + status = self.raw_cluster_status() + try: + for k,v in status['servicemap']['services'][service]['daemons'].items(): + ts = dict(v).get('task_status', None) + if ts: + task_status[k] = ts[status_key] + except KeyError: # catches missing service and status key + return {} + self.log(task_status) + return task_status + +def utility_task(name): + """ + Generate ceph_manager subtask corresponding to ceph_manager + method name + """ + def task(ctx, config): + if config is None: + config = {} + args = config.get('args', []) + kwargs = config.get('kwargs', {}) + cluster = config.get('cluster', 'ceph') + fn = getattr(ctx.managers[cluster], name) + fn(*args, **kwargs) + return task + +revive_osd = utility_task("revive_osd") +revive_mon = utility_task("revive_mon") +kill_osd = utility_task("kill_osd") +kill_mon = utility_task("kill_mon") +create_pool = utility_task("create_pool") +remove_pool = utility_task("remove_pool") +wait_for_clean = utility_task("wait_for_clean") +flush_all_pg_stats = utility_task("flush_all_pg_stats") +set_pool_property = utility_task("set_pool_property") +do_pg_scrub = utility_task("do_pg_scrub") +wait_for_pool = utility_task("wait_for_pool") +wait_for_pools = utility_task("wait_for_pools") diff --git a/qa/tasks/ceph_objectstore_tool.py b/qa/tasks/ceph_objectstore_tool.py new file mode 100644 index 000000000..9c29d80b2 --- /dev/null +++ b/qa/tasks/ceph_objectstore_tool.py @@ -0,0 +1,662 @@ +""" +ceph_objectstore_tool - Simple test of ceph-objectstore-tool utility +""" +from io import BytesIO + +import contextlib +import json +import logging +import os +import sys +import tempfile +import time +from tasks import ceph_manager +from tasks.util.rados import (rados, create_replicated_pool, create_ec_pool) +from teuthology import misc as teuthology +from teuthology.orchestra import run + +from teuthology.exceptions import CommandFailedError + +# from util.rados import (rados, create_ec_pool, +# create_replicated_pool, +# create_cache_pool) + +log = logging.getLogger(__name__) + +# Should get cluster name "ceph" from somewhere +# and normal path from osd_data and osd_journal in conf +FSPATH = "/var/lib/ceph/osd/ceph-{id}" +JPATH = "/var/lib/ceph/osd/ceph-{id}/journal" + + +def cod_setup_local_data(log, ctx, NUM_OBJECTS, DATADIR, + BASE_NAME, DATALINECOUNT): + objects = range(1, NUM_OBJECTS + 1) + for i in objects: + NAME = BASE_NAME + "{num}".format(num=i) + LOCALNAME = os.path.join(DATADIR, NAME) + + dataline = range(DATALINECOUNT) + fd = open(LOCALNAME, "w") + data = "This is the data for " + NAME + "\n" + for _ in dataline: + fd.write(data) + fd.close() + + +def cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR, + BASE_NAME, DATALINECOUNT): + + objects = range(1, NUM_OBJECTS + 1) + for i in objects: + NAME = BASE_NAME + "{num}".format(num=i) + DDNAME = os.path.join(DATADIR, NAME) + + remote.run(args=['rm', '-f', DDNAME]) + + dataline = range(DATALINECOUNT) + data = "This is the data for " + NAME + "\n" + DATA = "" + for _ in dataline: + DATA += data + remote.write_file(DDNAME, DATA) + + +def cod_setup(log, ctx, remote, NUM_OBJECTS, DATADIR, + BASE_NAME, DATALINECOUNT, POOL, db, ec): + ERRORS = 0 + log.info("Creating {objs} objects in pool".format(objs=NUM_OBJECTS)) + + objects = range(1, NUM_OBJECTS + 1) + for i in objects: + NAME = BASE_NAME + "{num}".format(num=i) + DDNAME = os.path.join(DATADIR, NAME) + + proc = rados(ctx, remote, ['-p', POOL, 'put', NAME, DDNAME], + wait=False) + # proc = remote.run(args=['rados', '-p', POOL, 'put', NAME, DDNAME]) + ret = proc.wait() + if ret != 0: + log.critical("Rados put failed with status {ret}". + format(ret=proc.exitstatus)) + sys.exit(1) + + db[NAME] = {} + + keys = range(i) + db[NAME]["xattr"] = {} + for k in keys: + if k == 0: + continue + mykey = "key{i}-{k}".format(i=i, k=k) + myval = "val{i}-{k}".format(i=i, k=k) + proc = remote.run(args=['rados', '-p', POOL, 'setxattr', + NAME, mykey, myval]) + ret = proc.wait() + if ret != 0: + log.error("setxattr failed with {ret}".format(ret=ret)) + ERRORS += 1 + db[NAME]["xattr"][mykey] = myval + + # Erasure coded pools don't support omap + if ec: + continue + + # Create omap header in all objects but REPobject1 + if i != 1: + myhdr = "hdr{i}".format(i=i) + proc = remote.run(args=['rados', '-p', POOL, 'setomapheader', + NAME, myhdr]) + ret = proc.wait() + if ret != 0: + log.critical("setomapheader failed with {ret}".format(ret=ret)) + ERRORS += 1 + db[NAME]["omapheader"] = myhdr + + db[NAME]["omap"] = {} + for k in keys: + if k == 0: + continue + mykey = "okey{i}-{k}".format(i=i, k=k) + myval = "oval{i}-{k}".format(i=i, k=k) + proc = remote.run(args=['rados', '-p', POOL, 'setomapval', + NAME, mykey, myval]) + ret = proc.wait() + if ret != 0: + log.critical("setomapval failed with {ret}".format(ret=ret)) + db[NAME]["omap"][mykey] = myval + + return ERRORS + + +def get_lines(filename): + tmpfd = open(filename, "r") + line = True + lines = [] + while line: + line = tmpfd.readline().rstrip('\n') + if line: + lines += [line] + tmpfd.close() + os.unlink(filename) + return lines + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run ceph_objectstore_tool test + + The config should be as follows:: + + ceph_objectstore_tool: + objects: 20 # <number of objects> + pgnum: 12 + """ + + if config is None: + config = {} + assert isinstance(config, dict), \ + 'ceph_objectstore_tool task only accepts a dict for configuration' + + log.info('Beginning ceph_objectstore_tool...') + + log.debug(config) + log.debug(ctx) + clients = ctx.cluster.only(teuthology.is_type('client')) + assert len(clients.remotes) > 0, 'Must specify at least 1 client' + (cli_remote, _) = clients.remotes.popitem() + log.debug(cli_remote) + + # clients = dict(teuthology.get_clients(ctx=ctx, roles=config.keys())) + # client = clients.popitem() + # log.info(client) + osds = ctx.cluster.only(teuthology.is_type('osd')) + log.info("OSDS") + log.info(osds) + log.info(osds.remotes) + + manager = ctx.managers['ceph'] + while (len(manager.get_osd_status()['up']) != + len(manager.get_osd_status()['raw'])): + time.sleep(10) + while (len(manager.get_osd_status()['in']) != + len(manager.get_osd_status()['up'])): + time.sleep(10) + manager.raw_cluster_cmd('osd', 'set', 'noout') + manager.raw_cluster_cmd('osd', 'set', 'nodown') + + PGNUM = config.get('pgnum', 12) + log.info("pgnum: {num}".format(num=PGNUM)) + + ERRORS = 0 + + REP_POOL = "rep_pool" + REP_NAME = "REPobject" + create_replicated_pool(cli_remote, REP_POOL, PGNUM) + ERRORS += test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME) + + EC_POOL = "ec_pool" + EC_NAME = "ECobject" + create_ec_pool(cli_remote, EC_POOL, 'default', PGNUM) + ERRORS += test_objectstore(ctx, config, cli_remote, + EC_POOL, EC_NAME, ec=True) + + if ERRORS == 0: + log.info("TEST PASSED") + else: + log.error("TEST FAILED WITH {errcount} ERRORS".format(errcount=ERRORS)) + + assert ERRORS == 0 + + try: + yield + finally: + log.info('Ending ceph_objectstore_tool') + + +def test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME, ec=False): + manager = ctx.managers['ceph'] + + osds = ctx.cluster.only(teuthology.is_type('osd')) + + TEUTHDIR = teuthology.get_testdir(ctx) + DATADIR = os.path.join(TEUTHDIR, "ceph.data") + DATALINECOUNT = 10000 + ERRORS = 0 + NUM_OBJECTS = config.get('objects', 10) + log.info("objects: {num}".format(num=NUM_OBJECTS)) + + pool_dump = manager.get_pool_dump(REP_POOL) + REPID = pool_dump['pool'] + + log.debug("repid={num}".format(num=REPID)) + + db = {} + + LOCALDIR = tempfile.mkdtemp("cod") + + cod_setup_local_data(log, ctx, NUM_OBJECTS, LOCALDIR, + REP_NAME, DATALINECOUNT) + allremote = [] + allremote.append(cli_remote) + allremote += list(osds.remotes.keys()) + allremote = list(set(allremote)) + for remote in allremote: + cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR, + REP_NAME, DATALINECOUNT) + + ERRORS += cod_setup(log, ctx, cli_remote, NUM_OBJECTS, DATADIR, + REP_NAME, DATALINECOUNT, REP_POOL, db, ec) + + pgs = {} + for stats in manager.get_pg_stats(): + if stats["pgid"].find(str(REPID) + ".") != 0: + continue + if pool_dump["type"] == ceph_manager.PoolType.REPLICATED: + for osd in stats["acting"]: + pgs.setdefault(osd, []).append(stats["pgid"]) + elif pool_dump["type"] == ceph_manager.PoolType.ERASURE_CODED: + shard = 0 + for osd in stats["acting"]: + pgs.setdefault(osd, []).append("{pgid}s{shard}". + format(pgid=stats["pgid"], + shard=shard)) + shard += 1 + else: + raise Exception("{pool} has an unexpected type {type}". + format(pool=REP_POOL, type=pool_dump["type"])) + + log.info(pgs) + log.info(db) + + for osd in manager.get_osd_status()['up']: + manager.kill_osd(osd) + time.sleep(5) + + pgswithobjects = set() + objsinpg = {} + + # Test --op list and generate json for all objects + log.info("Test --op list by generating json for all objects") + prefix = ("sudo ceph-objectstore-tool " + "--data-path {fpath} " + "--journal-path {jpath} ").format(fpath=FSPATH, jpath=JPATH) + for remote in osds.remotes.keys(): + log.debug(remote) + log.debug(osds.remotes[remote]) + for role in osds.remotes[remote]: + if not role.startswith("osd."): + continue + osdid = int(role.split('.')[1]) + log.info("process osd.{id} on {remote}". + format(id=osdid, remote=remote)) + cmd = (prefix + "--op list").format(id=osdid) + try: + lines = remote.sh(cmd, check_status=False).splitlines() + for pgline in lines: + if not pgline: + continue + (pg, obj) = json.loads(pgline) + name = obj['oid'] + if name in db: + pgswithobjects.add(pg) + objsinpg.setdefault(pg, []).append(name) + db[name].setdefault("pg2json", + {})[pg] = json.dumps(obj) + except CommandFailedError as e: + log.error("Bad exit status {ret} from --op list request". + format(ret=e.exitstatus)) + ERRORS += 1 + + log.info(db) + log.info(pgswithobjects) + log.info(objsinpg) + + if pool_dump["type"] == ceph_manager.PoolType.REPLICATED: + # Test get-bytes + log.info("Test get-bytes and set-bytes") + for basename in db.keys(): + file = os.path.join(DATADIR, basename) + GETNAME = os.path.join(DATADIR, "get") + SETNAME = os.path.join(DATADIR, "set") + + for remote in osds.remotes.keys(): + for role in osds.remotes[remote]: + if not role.startswith("osd."): + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg, JSON in db[basename]["pg2json"].items(): + if pg in pgs[osdid]: + cmd = ((prefix + "--pgid {pg}"). + format(id=osdid, pg=pg).split()) + cmd.append(run.Raw("'{json}'".format(json=JSON))) + cmd += ("get-bytes {fname}". + format(fname=GETNAME).split()) + proc = remote.run(args=cmd, check_status=False) + if proc.exitstatus != 0: + remote.run(args="rm -f {getfile}". + format(getfile=GETNAME).split()) + log.error("Bad exit status {ret}". + format(ret=proc.exitstatus)) + ERRORS += 1 + continue + cmd = ("diff -q {file} {getfile}". + format(file=file, getfile=GETNAME)) + proc = remote.run(args=cmd.split()) + if proc.exitstatus != 0: + log.error("Data from get-bytes differ") + # log.debug("Got:") + # cat_file(logging.DEBUG, GETNAME) + # log.debug("Expected:") + # cat_file(logging.DEBUG, file) + ERRORS += 1 + remote.run(args="rm -f {getfile}". + format(getfile=GETNAME).split()) + + data = ("put-bytes going into {file}\n". + format(file=file)) + remote.write_file(SETNAME, data) + cmd = ((prefix + "--pgid {pg}"). + format(id=osdid, pg=pg).split()) + cmd.append(run.Raw("'{json}'".format(json=JSON))) + cmd += ("set-bytes {fname}". + format(fname=SETNAME).split()) + proc = remote.run(args=cmd, check_status=False) + proc.wait() + if proc.exitstatus != 0: + log.info("set-bytes failed for object {obj} " + "in pg {pg} osd.{id} ret={ret}". + format(obj=basename, pg=pg, + id=osdid, ret=proc.exitstatus)) + ERRORS += 1 + + cmd = ((prefix + "--pgid {pg}"). + format(id=osdid, pg=pg).split()) + cmd.append(run.Raw("'{json}'".format(json=JSON))) + cmd += "get-bytes -".split() + try: + output = remote.sh(cmd, wait=True) + if data != output: + log.error("Data inconsistent after " + "set-bytes, got:") + log.error(output) + ERRORS += 1 + except CommandFailedError as e: + log.error("get-bytes after " + "set-bytes ret={ret}". + format(ret=e.exitstatus)) + ERRORS += 1 + + cmd = ((prefix + "--pgid {pg}"). + format(id=osdid, pg=pg).split()) + cmd.append(run.Raw("'{json}'".format(json=JSON))) + cmd += ("set-bytes {fname}". + format(fname=file).split()) + proc = remote.run(args=cmd, check_status=False) + proc.wait() + if proc.exitstatus != 0: + log.info("set-bytes failed for object {obj} " + "in pg {pg} osd.{id} ret={ret}". + format(obj=basename, pg=pg, + id=osdid, ret=proc.exitstatus)) + ERRORS += 1 + + log.info("Test list-attrs get-attr") + for basename in db.keys(): + file = os.path.join(DATADIR, basename) + GETNAME = os.path.join(DATADIR, "get") + SETNAME = os.path.join(DATADIR, "set") + + for remote in osds.remotes.keys(): + for role in osds.remotes[remote]: + if not role.startswith("osd."): + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg, JSON in db[basename]["pg2json"].items(): + if pg in pgs[osdid]: + cmd = ((prefix + "--pgid {pg}"). + format(id=osdid, pg=pg).split()) + cmd.append(run.Raw("'{json}'".format(json=JSON))) + cmd += ["list-attrs"] + try: + keys = remote.sh(cmd, wait=True, stderr=BytesIO()).split() + except CommandFailedError as e: + log.error("Bad exit status {ret}". + format(ret=e.exitstatus)) + ERRORS += 1 + continue + values = dict(db[basename]["xattr"]) + + for key in keys: + if (key == "_" or + key == "snapset" or + key == "hinfo_key"): + continue + key = key.strip("_") + if key not in values: + log.error("The key {key} should be present". + format(key=key)) + ERRORS += 1 + continue + exp = values.pop(key) + cmd = ((prefix + "--pgid {pg}"). + format(id=osdid, pg=pg).split()) + cmd.append(run.Raw("'{json}'".format(json=JSON))) + cmd += ("get-attr {key}". + format(key="_" + key).split()) + try: + val = remote.sh(cmd, wait=True) + except CommandFailedError as e: + log.error("get-attr failed with {ret}". + format(ret=e.exitstatus)) + ERRORS += 1 + continue + if exp != val: + log.error("For key {key} got value {got} " + "instead of {expected}". + format(key=key, got=val, + expected=exp)) + ERRORS += 1 + if "hinfo_key" in keys: + cmd_prefix = prefix.format(id=osdid) + cmd = """ + expected=$({prefix} --pgid {pg} '{json}' get-attr {key} | base64) + echo placeholder | {prefix} --pgid {pg} '{json}' set-attr {key} - + test $({prefix} --pgid {pg} '{json}' get-attr {key}) = placeholder + echo $expected | base64 --decode | \ + {prefix} --pgid {pg} '{json}' set-attr {key} - + test $({prefix} --pgid {pg} '{json}' get-attr {key} | base64) = $expected + """.format(prefix=cmd_prefix, pg=pg, json=JSON, + key="hinfo_key") + log.debug(cmd) + proc = remote.run(args=['bash', '-e', '-x', + '-c', cmd], + check_status=False, + stdout=BytesIO(), + stderr=BytesIO()) + proc.wait() + if proc.exitstatus != 0: + log.error("failed with " + + str(proc.exitstatus)) + log.error(" ".join([ + proc.stdout.getvalue().decode(), + proc.stderr.getvalue().decode(), + ])) + ERRORS += 1 + + if len(values) != 0: + log.error("Not all keys found, remaining keys:") + log.error(values) + + log.info("Test pg info") + for remote in osds.remotes.keys(): + for role in osds.remotes[remote]: + if not role.startswith("osd."): + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg in pgs[osdid]: + cmd = ((prefix + "--op info --pgid {pg}"). + format(id=osdid, pg=pg).split()) + try: + info = remote.sh(cmd, wait=True) + except CommandFailedError as e: + log.error("Failure of --op info command with %s", + e.exitstatus) + ERRORS += 1 + continue + if not str(pg) in info: + log.error("Bad data from info: %s", info) + ERRORS += 1 + + log.info("Test pg logging") + for remote in osds.remotes.keys(): + for role in osds.remotes[remote]: + if not role.startswith("osd."): + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg in pgs[osdid]: + cmd = ((prefix + "--op log --pgid {pg}"). + format(id=osdid, pg=pg).split()) + try: + output = remote.sh(cmd, wait=True) + except CommandFailedError as e: + log.error("Getting log failed for pg {pg} " + "from osd.{id} with {ret}". + format(pg=pg, id=osdid, ret=e.exitstatus)) + ERRORS += 1 + continue + HASOBJ = pg in pgswithobjects + MODOBJ = "modify" in output + if HASOBJ != MODOBJ: + log.error("Bad log for pg {pg} from osd.{id}". + format(pg=pg, id=osdid)) + MSG = (HASOBJ and [""] or ["NOT "])[0] + log.error("Log should {msg}have a modify entry". + format(msg=MSG)) + ERRORS += 1 + + log.info("Test pg export") + EXP_ERRORS = 0 + for remote in osds.remotes.keys(): + for role in osds.remotes[remote]: + if not role.startswith("osd."): + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg in pgs[osdid]: + fpath = os.path.join(DATADIR, "osd{id}.{pg}". + format(id=osdid, pg=pg)) + + cmd = ((prefix + "--op export --pgid {pg} --file {file}"). + format(id=osdid, pg=pg, file=fpath)) + try: + remote.sh(cmd, wait=True) + except CommandFailedError as e: + log.error("Exporting failed for pg {pg} " + "on osd.{id} with {ret}". + format(pg=pg, id=osdid, ret=e.exitstatus)) + EXP_ERRORS += 1 + + ERRORS += EXP_ERRORS + + log.info("Test pg removal") + RM_ERRORS = 0 + for remote in osds.remotes.keys(): + for role in osds.remotes[remote]: + if not role.startswith("osd."): + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg in pgs[osdid]: + cmd = ((prefix + "--force --op remove --pgid {pg}"). + format(pg=pg, id=osdid)) + try: + remote.sh(cmd, wait=True) + except CommandFailedError as e: + log.error("Removing failed for pg {pg} " + "on osd.{id} with {ret}". + format(pg=pg, id=osdid, ret=e.exitstatus)) + RM_ERRORS += 1 + + ERRORS += RM_ERRORS + + IMP_ERRORS = 0 + if EXP_ERRORS == 0 and RM_ERRORS == 0: + log.info("Test pg import") + + for remote in osds.remotes.keys(): + for role in osds.remotes[remote]: + if not role.startswith("osd."): + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg in pgs[osdid]: + fpath = os.path.join(DATADIR, "osd{id}.{pg}". + format(id=osdid, pg=pg)) + + cmd = ((prefix + "--op import --file {file}"). + format(id=osdid, file=fpath)) + try: + remote.sh(cmd, wait=True) + except CommandFailedError as e: + log.error("Import failed from {file} with {ret}". + format(file=fpath, ret=e.exitstatus)) + IMP_ERRORS += 1 + else: + log.warning("SKIPPING IMPORT TESTS DUE TO PREVIOUS FAILURES") + + ERRORS += IMP_ERRORS + + if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0: + log.info("Restarting OSDs....") + # They are still look to be up because of setting nodown + for osd in manager.get_osd_status()['up']: + manager.revive_osd(osd) + # Wait for health? + time.sleep(5) + # Let scrub after test runs verify consistency of all copies + log.info("Verify replicated import data") + objects = range(1, NUM_OBJECTS + 1) + for i in objects: + NAME = REP_NAME + "{num}".format(num=i) + TESTNAME = os.path.join(DATADIR, "gettest") + REFNAME = os.path.join(DATADIR, NAME) + + proc = rados(ctx, cli_remote, + ['-p', REP_POOL, 'get', NAME, TESTNAME], wait=False) + + ret = proc.wait() + if ret != 0: + log.error("After import, rados get failed with {ret}". + format(ret=proc.exitstatus)) + ERRORS += 1 + continue + + cmd = "diff -q {gettest} {ref}".format(gettest=TESTNAME, + ref=REFNAME) + proc = cli_remote.run(args=cmd, check_status=False) + proc.wait() + if proc.exitstatus != 0: + log.error("Data comparison failed for {obj}".format(obj=NAME)) + ERRORS += 1 + + return ERRORS diff --git a/qa/tasks/ceph_test_case.py b/qa/tasks/ceph_test_case.py new file mode 100644 index 000000000..0de395c06 --- /dev/null +++ b/qa/tasks/ceph_test_case.py @@ -0,0 +1,215 @@ +from typing import Optional, TYPE_CHECKING +import unittest +import time +import logging + +from teuthology.orchestra.run import CommandFailedError + +if TYPE_CHECKING: + from tasks.mgr.mgr_test_case import MgrCluster + +log = logging.getLogger(__name__) + +class TestTimeoutError(RuntimeError): + pass + +class CephTestCase(unittest.TestCase): + """ + For test tasks that want to define a structured set of + tests implemented in python. Subclass this with appropriate + helpers for the subsystem you're testing. + """ + + # Environment references + mounts = None + fs = None + recovery_fs = None + backup_fs = None + ceph_cluster = None + mds_cluster = None + mgr_cluster: Optional['MgrCluster'] = None + ctx = None + + mon_manager = None + + # Declarative test requirements: subclasses should override these to indicate + # their special needs. If not met, tests will be skipped. + REQUIRE_MEMSTORE = False + + def setUp(self): + self._mon_configs_set = set() + + self.ceph_cluster.mon_manager.raw_cluster_cmd("log", + "Starting test {0}".format(self.id())) + + if self.REQUIRE_MEMSTORE: + objectstore = self.ceph_cluster.get_config("osd_objectstore", "osd") + if objectstore != "memstore": + # You certainly *could* run this on a real OSD, but you don't want to sit + # here for hours waiting for the test to fill up a 1TB drive! + raise self.skipTest("Require `memstore` OSD backend (test " \ + "would take too long on full sized OSDs") + + def tearDown(self): + self.config_clear() + + self.ceph_cluster.mon_manager.raw_cluster_cmd("log", + "Ended test {0}".format(self.id())) + + def config_clear(self): + for section, key in self._mon_configs_set: + self.config_rm(section, key) + self._mon_configs_set.clear() + + def _fix_key(self, key): + return str(key).replace(' ', '_') + + def config_get(self, section, key): + key = self._fix_key(key) + return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "get", section, key).strip() + + def config_show(self, entity, key): + key = self._fix_key(key) + return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "show", entity, key).strip() + + def config_minimal(self): + return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "generate-minimal-conf").strip() + + def config_rm(self, section, key): + key = self._fix_key(key) + self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "rm", section, key) + # simplification: skip removing from _mon_configs_set; + # let tearDown clear everything again + + def config_set(self, section, key, value): + key = self._fix_key(key) + self._mon_configs_set.add((section, key)) + self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "set", section, key, str(value)) + + def cluster_cmd(self, command: str): + assert self.ceph_cluster is not None + return self.ceph_cluster.mon_manager.raw_cluster_cmd(*(command.split(" "))) + + + def assert_cluster_log(self, expected_pattern, invert_match=False, + timeout=10, watch_channel=None): + """ + Context manager. Assert that during execution, or up to 5 seconds later, + the Ceph cluster log emits a message matching the expected pattern. + + :param expected_pattern: A string that you expect to see in the log output + :type expected_pattern: str + :param watch_channel: Specifies the channel to be watched. This can be + 'cluster', 'audit', ... + :type watch_channel: str + """ + + ceph_manager = self.ceph_cluster.mon_manager + + class ContextManager(object): + def match(self): + found = expected_pattern in self.watcher_process.stdout.getvalue() + if invert_match: + return not found + + return found + + def __enter__(self): + self.watcher_process = ceph_manager.run_ceph_w(watch_channel) + + def __exit__(self, exc_type, exc_val, exc_tb): + if not self.watcher_process.finished: + # Check if we got an early match, wait a bit if we didn't + if self.match(): + return + else: + log.debug("No log hits yet, waiting...") + # Default monc tick interval is 10s, so wait that long and + # then some grace + time.sleep(5 + timeout) + + self.watcher_process.stdin.close() + try: + self.watcher_process.wait() + except CommandFailedError: + pass + + if not self.match(): + log.error("Log output: \n{0}\n".format(self.watcher_process.stdout.getvalue())) + raise AssertionError("Expected log message not found: '{0}'".format(expected_pattern)) + + return ContextManager() + + def wait_for_health(self, pattern, timeout): + """ + Wait until 'ceph health' contains messages matching the pattern + """ + def seen_health_warning(): + health = self.ceph_cluster.mon_manager.get_mon_health() + codes = [s for s in health['checks']] + summary_strings = [s[1]['summary']['message'] for s in health['checks'].items()] + if len(summary_strings) == 0: + log.debug("Not expected number of summary strings ({0})".format(summary_strings)) + return False + else: + for ss in summary_strings: + if pattern in ss: + return True + if pattern in codes: + return True + + log.debug("Not found expected summary strings yet ({0})".format(summary_strings)) + return False + + self.wait_until_true(seen_health_warning, timeout) + + def wait_for_health_clear(self, timeout): + """ + Wait until `ceph health` returns no messages + """ + def is_clear(): + health = self.ceph_cluster.mon_manager.get_mon_health() + return len(health['checks']) == 0 + + self.wait_until_true(is_clear, timeout) + + def wait_until_equal(self, get_fn, expect_val, timeout, reject_fn=None, period=5): + elapsed = 0 + while True: + val = get_fn() + if val == expect_val: + return + elif reject_fn and reject_fn(val): + raise RuntimeError("wait_until_equal: forbidden value {0} seen".format(val)) + else: + if elapsed >= timeout: + raise TestTimeoutError("Timed out after {0} seconds waiting for {1} (currently {2})".format( + elapsed, expect_val, val + )) + else: + log.debug("wait_until_equal: {0} != {1}, waiting (timeout={2})...".format(val, expect_val, timeout)) + time.sleep(period) + elapsed += period + + log.debug("wait_until_equal: success") + + @classmethod + def wait_until_true(cls, condition, timeout, check_fn=None, period=5): + elapsed = 0 + retry_count = 0 + while True: + if condition(): + log.debug("wait_until_true: success in {0}s and {1} retries".format(elapsed, retry_count)) + return + else: + if elapsed >= timeout: + if check_fn and check_fn() and retry_count < 5: + elapsed = 0 + retry_count += 1 + log.debug("wait_until_true: making progress, waiting (timeout={0} retry_count={1})...".format(timeout, retry_count)) + else: + raise TestTimeoutError("Timed out after {0}s and {1} retries".format(elapsed, retry_count)) + else: + log.debug("wait_until_true: waiting (timeout={0} retry_count={1})...".format(timeout, retry_count)) + time.sleep(period) + elapsed += period diff --git a/qa/tasks/cephadm.conf b/qa/tasks/cephadm.conf new file mode 100644 index 000000000..bd1ab821e --- /dev/null +++ b/qa/tasks/cephadm.conf @@ -0,0 +1,89 @@ +[global] +# make logging friendly to teuthology +log_to_file = true +log_to_stderr = false +mon cluster log file level = debug + +mon clock drift allowed = 1.000 + +# replicate across OSDs, not hosts +osd crush chooseleaf type = 0 +#osd pool default size = 2 +osd pool default erasure code profile = "plugin=jerasure technique=reed_sol_van k=2 m=1 ruleset-failure-domain=osd crush-failure-domain=osd" + +# enable some debugging +auth debug = true +ms die on old message = true +ms die on bug = true +debug asserts on shutdown = true + +# adjust warnings +mon max pg per osd = 10000 # >= luminous +mon pg warn max object skew = 0 +mon osd allow primary affinity = true +mon osd allow pg remap = true +mon warn on legacy crush tunables = false +mon warn on crush straw calc version zero = false +mon warn on no sortbitwise = false +mon warn on osd down out interval zero = false +mon warn on too few osds = false +mon_warn_on_pool_pg_num_not_power_of_two = false + +# disable pg_autoscaler by default for new pools +osd_pool_default_pg_autoscale_mode = off + +# tests delete pools +mon allow pool delete = true + +[osd] +osd scrub load threshold = 5.0 +osd scrub max interval = 600 + +osd recover clone overlap = true +osd recovery max chunk = 1048576 + +osd deep scrub update digest min age = 30 + +osd map max advance = 10 + +osd memory target autotune = true + +# debugging +osd debug shutdown = true +osd debug op order = true +osd debug verify stray on activate = true +osd debug pg log writeout = true +osd debug verify cached snaps = true +osd debug verify missing on start = true +osd debug misdirected ops = true +osd op queue = debug_random +osd op queue cut off = debug_random +osd shutdown pgref assert = true +bdev debug aio = true +osd sloppy crc = true + +[mgr] +mon reweight min pgs per osd = 4 +mon reweight min bytes per osd = 10 +mgr/telemetry/nag = false + +[mon] +mon data avail warn = 5 +mon mgr mkfs grace = 240 +mon reweight min pgs per osd = 4 +mon osd reporter subtree level = osd +mon osd prime pg temp = true +mon reweight min bytes per osd = 10 + +# rotate auth tickets quickly to exercise renewal paths +auth mon ticket ttl = 660 # 11m +auth service ticket ttl = 240 # 4m + +# don't complain about global id reclaim +mon_warn_on_insecure_global_id_reclaim = false +mon_warn_on_insecure_global_id_reclaim_allowed = false + +[client.rgw] +rgw cache enabled = true +rgw enable ops log = true +rgw enable usage log = true diff --git a/qa/tasks/cephadm.py b/qa/tasks/cephadm.py new file mode 100644 index 000000000..8bd7f220c --- /dev/null +++ b/qa/tasks/cephadm.py @@ -0,0 +1,1503 @@ +""" +Ceph cluster task, deployed via cephadm orchestrator +""" +import argparse +import configobj +import contextlib +import logging +import os +import json +import re +import uuid +import yaml + +from io import BytesIO, StringIO +from tarfile import ReadError +from tasks.ceph_manager import CephManager +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run +from teuthology.orchestra.daemon import DaemonGroup +from teuthology.config import config as teuth_config + +# these items we use from ceph.py should probably eventually move elsewhere +from tasks.ceph import get_mons, healthy +from tasks.vip import subst_vip + +CEPH_ROLE_TYPES = ['mon', 'mgr', 'osd', 'mds', 'rgw', 'prometheus'] + +log = logging.getLogger(__name__) + + +def _shell(ctx, cluster_name, remote, args, extra_cephadm_args=[], **kwargs): + teuthology.get_testdir(ctx) + return remote.run( + args=[ + 'sudo', + ctx.cephadm, + '--image', ctx.ceph[cluster_name].image, + 'shell', + '-c', '/etc/ceph/{}.conf'.format(cluster_name), + '-k', '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), + '--fsid', ctx.ceph[cluster_name].fsid, + ] + extra_cephadm_args + [ + '--', + ] + args, + **kwargs + ) + + +def build_initial_config(ctx, config): + cluster_name = config['cluster'] + + path = os.path.join(os.path.dirname(__file__), 'cephadm.conf') + conf = configobj.ConfigObj(path, file_error=True) + + conf.setdefault('global', {}) + conf['global']['fsid'] = ctx.ceph[cluster_name].fsid + + # overrides + for section, keys in config.get('conf',{}).items(): + for key, value in keys.items(): + log.info(" override: [%s] %s = %s" % (section, key, value)) + if section not in conf: + conf[section] = {} + conf[section][key] = value + + return conf + + +def update_archive_setting(ctx, key, value): + """ + Add logs directory to job's info log file + """ + if ctx.archive is None: + return + with open(os.path.join(ctx.archive, 'info.yaml'), 'r+') as info_file: + info_yaml = yaml.safe_load(info_file) + info_file.seek(0) + if 'archive' in info_yaml: + info_yaml['archive'][key] = value + else: + info_yaml['archive'] = {key: value} + yaml.safe_dump(info_yaml, info_file, default_flow_style=False) + + +@contextlib.contextmanager +def normalize_hostnames(ctx): + """ + Ensure we have short hostnames throughout, for consistency between + remote.shortname and socket.gethostname() in cephadm. + """ + log.info('Normalizing hostnames...') + ctx.cluster.run(args=[ + 'sudo', + 'hostname', + run.Raw('$(hostname -s)'), + ]) + + try: + yield + finally: + pass + + +@contextlib.contextmanager +def download_cephadm(ctx, config, ref): + cluster_name = config['cluster'] + + if config.get('cephadm_mode') != 'cephadm-package': + ref = config.get('cephadm_branch', ref) + git_url = config.get('cephadm_git_url', teuth_config.get_ceph_git_url()) + log.info('Downloading cephadm (repo %s ref %s)...' % (git_url, ref)) + if ctx.config.get('redhat'): + log.info("Install cephadm using RPM") + # cephadm already installed from redhat.install task + ctx.cluster.run( + args=[ + 'cp', + run.Raw('$(which cephadm)'), + ctx.cephadm, + run.Raw('&&'), + 'ls', '-l', + ctx.cephadm, + ] + ) + elif git_url.startswith('https://github.com/'): + # git archive doesn't like https:// URLs, which we use with github. + rest = git_url.split('https://github.com/', 1)[1] + rest = re.sub(r'\.git/?$', '', rest).strip() # no .git suffix + ctx.cluster.run( + args=[ + 'curl', '--silent', + 'https://raw.githubusercontent.com/' + rest + '/' + ref + '/src/cephadm/cephadm', + run.Raw('>'), + ctx.cephadm, + run.Raw('&&'), + 'ls', '-l', + ctx.cephadm, + ], + ) + else: + ctx.cluster.run( + args=[ + 'git', 'archive', + '--remote=' + git_url, + ref, + 'src/cephadm/cephadm', + run.Raw('|'), + 'tar', '-xO', 'src/cephadm/cephadm', + run.Raw('>'), + ctx.cephadm, + ], + ) + # sanity-check the resulting file and set executable bit + cephadm_file_size = '$(stat -c%s {})'.format(ctx.cephadm) + ctx.cluster.run( + args=[ + 'test', '-s', ctx.cephadm, + run.Raw('&&'), + 'test', run.Raw(cephadm_file_size), "-gt", run.Raw('1000'), + run.Raw('&&'), + 'chmod', '+x', ctx.cephadm, + ], + ) + + try: + yield + finally: + log.info('Removing cluster...') + ctx.cluster.run(args=[ + 'sudo', + ctx.cephadm, + 'rm-cluster', + '--fsid', ctx.ceph[cluster_name].fsid, + '--force', + ]) + + if config.get('cephadm_mode') == 'root': + log.info('Removing cephadm ...') + ctx.cluster.run( + args=[ + 'rm', + '-rf', + ctx.cephadm, + ], + ) + + +@contextlib.contextmanager +def ceph_log(ctx, config): + cluster_name = config['cluster'] + fsid = ctx.ceph[cluster_name].fsid + + update_archive_setting(ctx, 'log', '/var/log/ceph') + + + try: + yield + + except Exception: + # we need to know this below + ctx.summary['success'] = False + raise + + finally: + log.info('Checking cluster log for badness...') + def first_in_ceph_log(pattern, excludes): + """ + Find the first occurrence of the pattern specified in the Ceph log, + Returns None if none found. + + :param pattern: Pattern scanned for. + :param excludes: Patterns to ignore. + :return: First line of text (or None if not found) + """ + args = [ + 'sudo', + 'egrep', pattern, + '/var/log/ceph/{fsid}/ceph.log'.format( + fsid=fsid), + ] + if excludes: + for exclude in excludes: + args.extend([run.Raw('|'), 'egrep', '-v', exclude]) + args.extend([ + run.Raw('|'), 'head', '-n', '1', + ]) + r = ctx.ceph[cluster_name].bootstrap_remote.run( + stdout=StringIO(), + args=args, + ) + stdout = r.stdout.getvalue() + if stdout != '': + return stdout + return None + + if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', + config.get('log-ignorelist')) is not None: + log.warning('Found errors (ERR|WRN|SEC) in cluster log') + ctx.summary['success'] = False + # use the most severe problem as the failure reason + if 'failure_reason' not in ctx.summary: + for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: + match = first_in_ceph_log(pattern, config['log-ignorelist']) + if match is not None: + ctx.summary['failure_reason'] = \ + '"{match}" in cluster log'.format( + match=match.rstrip('\n'), + ) + break + + if ctx.archive is not None and \ + not (ctx.config.get('archive-on-error') and ctx.summary['success']): + # and logs + log.info('Compressing logs...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'find', + '/var/log/ceph', # all logs, not just for the cluster + '/var/log/rbd-target-api', # ceph-iscsi + '-name', + '*.log', + '-print0', + run.Raw('|'), + 'sudo', + 'xargs', + '-0', + '--no-run-if-empty', + '--', + 'gzip', + '--', + ], + wait=False, + ), + ) + + log.info('Archiving logs...') + path = os.path.join(ctx.archive, 'remote') + try: + os.makedirs(path) + except OSError: + pass + for remote in ctx.cluster.remotes.keys(): + sub = os.path.join(path, remote.name) + try: + os.makedirs(sub) + except OSError: + pass + try: + teuthology.pull_directory(remote, '/var/log/ceph', # everything + os.path.join(sub, 'log')) + except ReadError: + pass + + +@contextlib.contextmanager +def ceph_crash(ctx, config): + """ + Gather crash dumps from /var/lib/ceph/$fsid/crash + """ + cluster_name = config['cluster'] + fsid = ctx.ceph[cluster_name].fsid + + update_archive_setting(ctx, 'crash', '/var/lib/ceph/crash') + + try: + yield + + finally: + if ctx.archive is not None: + log.info('Archiving crash dumps...') + path = os.path.join(ctx.archive, 'remote') + try: + os.makedirs(path) + except OSError: + pass + for remote in ctx.cluster.remotes.keys(): + sub = os.path.join(path, remote.name) + try: + os.makedirs(sub) + except OSError: + pass + try: + teuthology.pull_directory(remote, + '/var/lib/ceph/%s/crash' % fsid, + os.path.join(sub, 'crash')) + except ReadError: + pass + + +@contextlib.contextmanager +def ceph_bootstrap(ctx, config): + """ + Bootstrap ceph cluster. + + :param ctx: the argparse.Namespace object + :param config: the config dict + """ + cluster_name = config['cluster'] + testdir = teuthology.get_testdir(ctx) + fsid = ctx.ceph[cluster_name].fsid + + bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote + first_mon = ctx.ceph[cluster_name].first_mon + first_mon_role = ctx.ceph[cluster_name].first_mon_role + mons = ctx.ceph[cluster_name].mons + + ctx.cluster.run(args=[ + 'sudo', 'mkdir', '-p', '/etc/ceph', + ]); + ctx.cluster.run(args=[ + 'sudo', 'chmod', '777', '/etc/ceph', + ]); + try: + # write seed config + log.info('Writing seed config...') + conf_fp = BytesIO() + seed_config = build_initial_config(ctx, config) + seed_config.write(conf_fp) + bootstrap_remote.write_file( + path='{}/seed.{}.conf'.format(testdir, cluster_name), + data=conf_fp.getvalue()) + log.debug('Final config:\n' + conf_fp.getvalue().decode()) + ctx.ceph[cluster_name].conf = seed_config + + # register initial daemons + ctx.daemons.register_daemon( + bootstrap_remote, 'mon', first_mon, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild('mon.' + first_mon), + wait=False, + started=True, + ) + if not ctx.ceph[cluster_name].roleless: + first_mgr = ctx.ceph[cluster_name].first_mgr + ctx.daemons.register_daemon( + bootstrap_remote, 'mgr', first_mgr, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild('mgr.' + first_mgr), + wait=False, + started=True, + ) + + # bootstrap + log.info('Bootstrapping...') + cmd = [ + 'sudo', + ctx.cephadm, + '--image', ctx.ceph[cluster_name].image, + '-v', + 'bootstrap', + '--fsid', fsid, + '--config', '{}/seed.{}.conf'.format(testdir, cluster_name), + '--output-config', '/etc/ceph/{}.conf'.format(cluster_name), + '--output-keyring', + '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), + '--output-pub-ssh-key', '{}/{}.pub'.format(testdir, cluster_name), + ] + + if config.get('registry-login'): + registry = config['registry-login'] + cmd += [ + "--registry-url", registry['url'], + "--registry-username", registry['username'], + "--registry-password", registry['password'], + ] + + if not ctx.ceph[cluster_name].roleless: + cmd += [ + '--mon-id', first_mon, + '--mgr-id', first_mgr, + '--orphan-initial-daemons', # we will do it explicitly! + '--skip-monitoring-stack', # we'll provision these explicitly + ] + + if mons[first_mon_role].startswith('['): + cmd += ['--mon-addrv', mons[first_mon_role]] + else: + cmd += ['--mon-ip', mons[first_mon_role]] + if config.get('skip_dashboard'): + cmd += ['--skip-dashboard'] + if config.get('skip_monitoring_stack'): + cmd += ['--skip-monitoring-stack'] + if config.get('single_host_defaults'): + cmd += ['--single-host-defaults'] + if not config.get('avoid_pacific_features', False): + cmd += ['--skip-admin-label'] + # bootstrap makes the keyring root 0600, so +r it for our purposes + cmd += [ + run.Raw('&&'), + 'sudo', 'chmod', '+r', + '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), + ] + bootstrap_remote.run(args=cmd) + + # fetch keys and configs + log.info('Fetching config...') + ctx.ceph[cluster_name].config_file = \ + bootstrap_remote.read_file(f'/etc/ceph/{cluster_name}.conf') + log.info('Fetching client.admin keyring...') + ctx.ceph[cluster_name].admin_keyring = \ + bootstrap_remote.read_file(f'/etc/ceph/{cluster_name}.client.admin.keyring') + log.info('Fetching mon keyring...') + ctx.ceph[cluster_name].mon_keyring = \ + bootstrap_remote.read_file(f'/var/lib/ceph/{fsid}/mon.{first_mon}/keyring', sudo=True) + + # fetch ssh key, distribute to additional nodes + log.info('Fetching pub ssh key...') + ssh_pub_key = bootstrap_remote.read_file( + f'{testdir}/{cluster_name}.pub').decode('ascii').strip() + + log.info('Installing pub ssh key for root users...') + ctx.cluster.run(args=[ + 'sudo', 'install', '-d', '-m', '0700', '/root/.ssh', + run.Raw('&&'), + 'echo', ssh_pub_key, + run.Raw('|'), + 'sudo', 'tee', '-a', '/root/.ssh/authorized_keys', + run.Raw('&&'), + 'sudo', 'chmod', '0600', '/root/.ssh/authorized_keys', + ]) + + # set options + if config.get('allow_ptrace', True): + _shell(ctx, cluster_name, bootstrap_remote, + ['ceph', 'config', 'set', 'mgr', 'mgr/cephadm/allow_ptrace', 'true']) + + if not config.get('avoid_pacific_features', False): + log.info('Distributing conf and client.admin keyring to all hosts + 0755') + _shell(ctx, cluster_name, bootstrap_remote, + ['ceph', 'orch', 'client-keyring', 'set', 'client.admin', + '*', '--mode', '0755'], + check_status=False) + + # add other hosts + for remote in ctx.cluster.remotes.keys(): + if remote == bootstrap_remote: + continue + + # note: this may be redundant (see above), but it avoids + # us having to wait for cephadm to do it. + log.info('Writing (initial) conf and keyring to %s' % remote.shortname) + remote.write_file( + path='/etc/ceph/{}.conf'.format(cluster_name), + data=ctx.ceph[cluster_name].config_file) + remote.write_file( + path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name), + data=ctx.ceph[cluster_name].admin_keyring) + + log.info('Adding host %s to orchestrator...' % remote.shortname) + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'host', 'add', + remote.shortname + ]) + r = _shell(ctx, cluster_name, remote, + ['ceph', 'orch', 'host', 'ls', '--format=json'], + stdout=StringIO()) + hosts = [node['hostname'] for node in json.loads(r.stdout.getvalue())] + assert remote.shortname in hosts + + yield + + finally: + log.info('Cleaning up testdir ceph.* files...') + ctx.cluster.run(args=[ + 'rm', '-f', + '{}/seed.{}.conf'.format(testdir, cluster_name), + '{}/{}.pub'.format(testdir, cluster_name), + ]) + + log.info('Stopping all daemons...') + + # this doesn't block until they are all stopped... + #ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target']) + + # stop the daemons we know + for role in ctx.daemons.resolve_role_list(None, CEPH_ROLE_TYPES, True): + cluster, type_, id_ = teuthology.split_role(role) + try: + ctx.daemons.get_daemon(type_, id_, cluster).stop() + except Exception: + log.exception(f'Failed to stop "{role}"') + raise + + # tear down anything left (but leave the logs behind) + ctx.cluster.run( + args=[ + 'sudo', + ctx.cephadm, + 'rm-cluster', + '--fsid', fsid, + '--force', + '--keep-logs', + ], + check_status=False, # may fail if upgrading from old cephadm + ) + + # clean up /etc/ceph + ctx.cluster.run(args=[ + 'sudo', 'rm', '-f', + '/etc/ceph/{}.conf'.format(cluster_name), + '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), + ]) + + +@contextlib.contextmanager +def ceph_mons(ctx, config): + """ + Deploy any additional mons + """ + cluster_name = config['cluster'] + fsid = ctx.ceph[cluster_name].fsid + + try: + daemons = {} + if config.get('add_mons_via_daemon_add'): + # This is the old way of adding mons that works with the (early) octopus + # cephadm scheduler. + num_mons = 1 + for remote, roles in ctx.cluster.remotes.items(): + for mon in [r for r in roles + if teuthology.is_type('mon', cluster_name)(r)]: + c_, _, id_ = teuthology.split_role(mon) + if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon: + continue + log.info('Adding %s on %s' % (mon, remote.shortname)) + num_mons += 1 + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'daemon', 'add', 'mon', + remote.shortname + ':' + ctx.ceph[cluster_name].mons[mon] + '=' + id_, + ]) + ctx.daemons.register_daemon( + remote, 'mon', id_, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild(mon), + wait=False, + started=True, + ) + daemons[mon] = (remote, id_) + + with contextutil.safe_while(sleep=1, tries=180) as proceed: + while proceed(): + log.info('Waiting for %d mons in monmap...' % (num_mons)) + r = _shell( + ctx=ctx, + cluster_name=cluster_name, + remote=remote, + args=[ + 'ceph', 'mon', 'dump', '-f', 'json', + ], + stdout=StringIO(), + ) + j = json.loads(r.stdout.getvalue()) + if len(j['mons']) == num_mons: + break + else: + nodes = [] + for remote, roles in ctx.cluster.remotes.items(): + for mon in [r for r in roles + if teuthology.is_type('mon', cluster_name)(r)]: + c_, _, id_ = teuthology.split_role(mon) + log.info('Adding %s on %s' % (mon, remote.shortname)) + nodes.append(remote.shortname + + ':' + ctx.ceph[cluster_name].mons[mon] + + '=' + id_) + if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon: + continue + daemons[mon] = (remote, id_) + + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'apply', 'mon', + str(len(nodes)) + ';' + ';'.join(nodes)] + ) + for mgr, i in daemons.items(): + remote, id_ = i + ctx.daemons.register_daemon( + remote, 'mon', id_, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild(mon), + wait=False, + started=True, + ) + + with contextutil.safe_while(sleep=1, tries=180) as proceed: + while proceed(): + log.info('Waiting for %d mons in monmap...' % (len(nodes))) + r = _shell( + ctx=ctx, + cluster_name=cluster_name, + remote=remote, + args=[ + 'ceph', 'mon', 'dump', '-f', 'json', + ], + stdout=StringIO(), + ) + j = json.loads(r.stdout.getvalue()) + if len(j['mons']) == len(nodes): + break + + # refresh our (final) ceph.conf file + bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote + log.info('Generating final ceph.conf file...') + r = _shell( + ctx=ctx, + cluster_name=cluster_name, + remote=bootstrap_remote, + args=[ + 'ceph', 'config', 'generate-minimal-conf', + ], + stdout=StringIO(), + ) + ctx.ceph[cluster_name].config_file = r.stdout.getvalue() + + yield + + finally: + pass + + +@contextlib.contextmanager +def ceph_mgrs(ctx, config): + """ + Deploy any additional mgrs + """ + cluster_name = config['cluster'] + fsid = ctx.ceph[cluster_name].fsid + + try: + nodes = [] + daemons = {} + for remote, roles in ctx.cluster.remotes.items(): + for mgr in [r for r in roles + if teuthology.is_type('mgr', cluster_name)(r)]: + c_, _, id_ = teuthology.split_role(mgr) + log.info('Adding %s on %s' % (mgr, remote.shortname)) + nodes.append(remote.shortname + '=' + id_) + if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mgr: + continue + daemons[mgr] = (remote, id_) + if nodes: + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'apply', 'mgr', + str(len(nodes)) + ';' + ';'.join(nodes)] + ) + for mgr, i in daemons.items(): + remote, id_ = i + ctx.daemons.register_daemon( + remote, 'mgr', id_, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild(mgr), + wait=False, + started=True, + ) + + yield + + finally: + pass + + +@contextlib.contextmanager +def ceph_osds(ctx, config): + """ + Deploy OSDs + """ + cluster_name = config['cluster'] + fsid = ctx.ceph[cluster_name].fsid + + try: + log.info('Deploying OSDs...') + + # provision OSDs in numeric order + id_to_remote = {} + devs_by_remote = {} + for remote, roles in ctx.cluster.remotes.items(): + devs_by_remote[remote] = teuthology.get_scratch_devices(remote) + for osd in [r for r in roles + if teuthology.is_type('osd', cluster_name)(r)]: + _, _, id_ = teuthology.split_role(osd) + id_to_remote[int(id_)] = (osd, remote) + + cur = 0 + for osd_id in sorted(id_to_remote.keys()): + osd, remote = id_to_remote[osd_id] + _, _, id_ = teuthology.split_role(osd) + assert int(id_) == cur + devs = devs_by_remote[remote] + assert devs ## FIXME ## + dev = devs.pop() + if all(_ in dev for _ in ('lv', 'vg')): + short_dev = dev.replace('/dev/', '') + else: + short_dev = dev + log.info('Deploying %s on %s with %s...' % ( + osd, remote.shortname, dev)) + _shell(ctx, cluster_name, remote, [ + 'ceph-volume', 'lvm', 'zap', dev]) + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'daemon', 'add', 'osd', + remote.shortname + ':' + short_dev + ]) + ctx.daemons.register_daemon( + remote, 'osd', id_, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild(osd), + wait=False, + started=True, + ) + cur += 1 + + if cur == 0: + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'apply', 'osd', '--all-available-devices', + ]) + # expect the number of scratch devs + num_osds = sum(map(len, devs_by_remote.values())) + assert num_osds + else: + # expect the number of OSDs we created + num_osds = cur + + log.info(f'Waiting for {num_osds} OSDs to come up...') + with contextutil.safe_while(sleep=1, tries=120) as proceed: + while proceed(): + p = _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, + ['ceph', 'osd', 'stat', '-f', 'json'], stdout=StringIO()) + j = json.loads(p.stdout.getvalue()) + if int(j.get('num_up_osds', 0)) == num_osds: + break; + + yield + finally: + pass + + +@contextlib.contextmanager +def ceph_mdss(ctx, config): + """ + Deploy MDSss + """ + cluster_name = config['cluster'] + fsid = ctx.ceph[cluster_name].fsid + + nodes = [] + daemons = {} + for remote, roles in ctx.cluster.remotes.items(): + for role in [r for r in roles + if teuthology.is_type('mds', cluster_name)(r)]: + c_, _, id_ = teuthology.split_role(role) + log.info('Adding %s on %s' % (role, remote.shortname)) + nodes.append(remote.shortname + '=' + id_) + daemons[role] = (remote, id_) + if nodes: + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'apply', 'mds', + 'all', + str(len(nodes)) + ';' + ';'.join(nodes)] + ) + for role, i in daemons.items(): + remote, id_ = i + ctx.daemons.register_daemon( + remote, 'mds', id_, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild(role), + wait=False, + started=True, + ) + + yield + + +@contextlib.contextmanager +def ceph_monitoring(daemon_type, ctx, config): + """ + Deploy prometheus, node-exporter, etc. + """ + cluster_name = config['cluster'] + fsid = ctx.ceph[cluster_name].fsid + + nodes = [] + daemons = {} + for remote, roles in ctx.cluster.remotes.items(): + for role in [r for r in roles + if teuthology.is_type(daemon_type, cluster_name)(r)]: + c_, _, id_ = teuthology.split_role(role) + log.info('Adding %s on %s' % (role, remote.shortname)) + nodes.append(remote.shortname + '=' + id_) + daemons[role] = (remote, id_) + if nodes: + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'apply', daemon_type, + str(len(nodes)) + ';' + ';'.join(nodes)] + ) + for role, i in daemons.items(): + remote, id_ = i + ctx.daemons.register_daemon( + remote, daemon_type, id_, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild(role), + wait=False, + started=True, + ) + + yield + + +@contextlib.contextmanager +def ceph_rgw(ctx, config): + """ + Deploy rgw + """ + cluster_name = config['cluster'] + fsid = ctx.ceph[cluster_name].fsid + + nodes = {} + daemons = {} + for remote, roles in ctx.cluster.remotes.items(): + for role in [r for r in roles + if teuthology.is_type('rgw', cluster_name)(r)]: + c_, _, id_ = teuthology.split_role(role) + log.info('Adding %s on %s' % (role, remote.shortname)) + svc = '.'.join(id_.split('.')[0:2]) + if svc not in nodes: + nodes[svc] = [] + nodes[svc].append(remote.shortname + '=' + id_) + daemons[role] = (remote, id_) + + for svc, nodes in nodes.items(): + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'apply', 'rgw', svc, + '--placement', + str(len(nodes)) + ';' + ';'.join(nodes)] + ) + for role, i in daemons.items(): + remote, id_ = i + ctx.daemons.register_daemon( + remote, 'rgw', id_, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild(role), + wait=False, + started=True, + ) + + yield + + +@contextlib.contextmanager +def ceph_iscsi(ctx, config): + """ + Deploy iSCSIs + """ + cluster_name = config['cluster'] + fsid = ctx.ceph[cluster_name].fsid + + nodes = [] + daemons = {} + for remote, roles in ctx.cluster.remotes.items(): + for role in [r for r in roles + if teuthology.is_type('iscsi', cluster_name)(r)]: + c_, _, id_ = teuthology.split_role(role) + log.info('Adding %s on %s' % (role, remote.shortname)) + nodes.append(remote.shortname + '=' + id_) + daemons[role] = (remote, id_) + if nodes: + poolname = 'iscsi' + # ceph osd pool create iscsi 3 3 replicated + _shell(ctx, cluster_name, remote, [ + 'ceph', 'osd', 'pool', 'create', + poolname, '3', '3', 'replicated'] + ) + + _shell(ctx, cluster_name, remote, [ + 'ceph', 'osd', 'pool', 'application', 'enable', + poolname, 'rbd'] + ) + + # ceph orch apply iscsi iscsi user password + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'apply', 'iscsi', + poolname, 'user', 'password', + '--placement', str(len(nodes)) + ';' + ';'.join(nodes)] + ) + for role, i in daemons.items(): + remote, id_ = i + ctx.daemons.register_daemon( + remote, 'iscsi', id_, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild(role), + wait=False, + started=True, + ) + + yield + + +@contextlib.contextmanager +def ceph_clients(ctx, config): + cluster_name = config['cluster'] + + log.info('Setting up client nodes...') + clients = ctx.cluster.only(teuthology.is_type('client', cluster_name)) + for remote, roles_for_host in clients.remotes.items(): + for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', + cluster_name): + name = teuthology.ceph_role(role) + client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name, + name) + r = _shell( + ctx=ctx, + cluster_name=cluster_name, + remote=remote, + args=[ + 'ceph', 'auth', + 'get-or-create', name, + 'mon', 'allow *', + 'osd', 'allow *', + 'mds', 'allow *', + 'mgr', 'allow *', + ], + stdout=StringIO(), + ) + keyring = r.stdout.getvalue() + remote.sudo_write_file(client_keyring, keyring, mode='0644') + yield + + +@contextlib.contextmanager +def ceph_initial(): + try: + yield + finally: + log.info('Teardown complete') + + +## public methods +@contextlib.contextmanager +def stop(ctx, config): + """ + Stop ceph daemons + + For example:: + tasks: + - ceph.stop: [mds.*] + + tasks: + - ceph.stop: [osd.0, osd.2] + + tasks: + - ceph.stop: + daemons: [osd.0, osd.2] + + """ + if config is None: + config = {} + elif isinstance(config, list): + config = {'daemons': config} + + daemons = ctx.daemons.resolve_role_list( + config.get('daemons', None), CEPH_ROLE_TYPES, True) + clusters = set() + + for role in daemons: + cluster, type_, id_ = teuthology.split_role(role) + ctx.daemons.get_daemon(type_, id_, cluster).stop() + clusters.add(cluster) + +# for cluster in clusters: +# ctx.ceph[cluster].watchdog.stop() +# ctx.ceph[cluster].watchdog.join() + + yield + + +def shell(ctx, config): + """ + Execute (shell) commands + """ + cluster_name = config.get('cluster', 'ceph') + + args = [] + for k in config.pop('env', []): + args.extend(['-e', k + '=' + ctx.config.get(k, '')]) + for k in config.pop('volumes', []): + args.extend(['-v', k]) + + if 'all-roles' in config and len(config) == 1: + a = config['all-roles'] + roles = teuthology.all_roles(ctx.cluster) + config = dict((id_, a) for id_ in roles if not id_.startswith('host.')) + elif 'all-hosts' in config and len(config) == 1: + a = config['all-hosts'] + roles = teuthology.all_roles(ctx.cluster) + config = dict((id_, a) for id_ in roles if id_.startswith('host.')) + + for role, cmd in config.items(): + (remote,) = ctx.cluster.only(role).remotes.keys() + log.info('Running commands on role %s host %s', role, remote.name) + if isinstance(cmd, list): + for c in cmd: + _shell(ctx, cluster_name, remote, + ['bash', '-c', subst_vip(ctx, c)], + extra_cephadm_args=args) + else: + assert isinstance(cmd, str) + _shell(ctx, cluster_name, remote, + ['bash', '-ex', '-c', subst_vip(ctx, cmd)], + extra_cephadm_args=args) + + +def apply(ctx, config): + """ + Apply spec + + tasks: + - cephadm.apply: + specs: + - service_type: rgw + service_id: foo + spec: + rgw_frontend_port: 8000 + - service_type: rgw + service_id: bar + spec: + rgw_frontend_port: 9000 + zone: bar + realm: asdf + + """ + cluster_name = config.get('cluster', 'ceph') + + specs = config.get('specs', []) + y = subst_vip(ctx, yaml.dump_all(specs)) + + log.info(f'Applying spec(s):\n{y}') + _shell( + ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, + ['ceph', 'orch', 'apply', '-i', '-'], + stdin=y, + ) + + +def wait_for_service(ctx, config): + """ + Wait for a service to be fully started + + tasks: + - cephadm.wait_for_service: + service: rgw.foo + timeout: 60 # defaults to 300 + + """ + cluster_name = config.get('cluster', 'ceph') + timeout = config.get('timeout', 300) + service = config.get('service') + assert service + + log.info( + f'Waiting for {cluster_name} service {service} to start (timeout {timeout})...' + ) + with contextutil.safe_while(sleep=1, tries=timeout) as proceed: + while proceed(): + r = _shell( + ctx=ctx, + cluster_name=cluster_name, + remote=ctx.ceph[cluster_name].bootstrap_remote, + args=[ + 'ceph', 'orch', 'ls', '-f', 'json', + ], + stdout=StringIO(), + ) + j = json.loads(r.stdout.getvalue()) + svc = None + for s in j: + if s['service_name'] == service: + svc = s + break + if svc: + log.info( + f"{service} has {s['status']['running']}/{s['status']['size']}" + ) + if s['status']['running'] == s['status']['size']: + break + + +@contextlib.contextmanager +def tweaked_option(ctx, config): + """ + set an option, and then restore it with its original value + + Note, due to the way how tasks are executed/nested, it's not suggested to + use this method as a standalone task. otherwise, it's likely that it will + restore the tweaked option at the /end/ of 'tasks' block. + """ + saved_options = {} + # we can complicate this when necessary + options = ['mon-health-to-clog'] + type_, id_ = 'mon', '*' + cluster = config.get('cluster', 'ceph') + manager = ctx.managers[cluster] + if id_ == '*': + get_from = next(teuthology.all_roles_of_type(ctx.cluster, type_)) + else: + get_from = id_ + for option in options: + if option not in config: + continue + value = 'true' if config[option] else 'false' + option = option.replace('-', '_') + old_value = manager.get_config(type_, get_from, option) + if value != old_value: + saved_options[option] = old_value + manager.inject_args(type_, id_, option, value) + yield + for option, value in saved_options.items(): + manager.inject_args(type_, id_, option, value) + + +@contextlib.contextmanager +def restart(ctx, config): + """ + restart ceph daemons + + For example:: + tasks: + - ceph.restart: [all] + + For example:: + tasks: + - ceph.restart: [osd.0, mon.1, mds.*] + + or:: + + tasks: + - ceph.restart: + daemons: [osd.0, mon.1] + wait-for-healthy: false + wait-for-osds-up: true + + :param ctx: Context + :param config: Configuration + """ + if config is None: + config = {} + elif isinstance(config, list): + config = {'daemons': config} + + daemons = ctx.daemons.resolve_role_list( + config.get('daemons', None), CEPH_ROLE_TYPES, True) + clusters = set() + + log.info('daemons %s' % daemons) + with tweaked_option(ctx, config): + for role in daemons: + cluster, type_, id_ = teuthology.split_role(role) + d = ctx.daemons.get_daemon(type_, id_, cluster) + assert d, 'daemon %s does not exist' % role + d.stop() + if type_ == 'osd': + ctx.managers[cluster].mark_down_osd(id_) + d.restart() + clusters.add(cluster) + + if config.get('wait-for-healthy', True): + for cluster in clusters: + healthy(ctx=ctx, config=dict(cluster=cluster)) + if config.get('wait-for-osds-up', False): + for cluster in clusters: + ctx.managers[cluster].wait_for_all_osds_up() + yield + + +@contextlib.contextmanager +def distribute_config_and_admin_keyring(ctx, config): + """ + Distribute a sufficient config and keyring for clients + """ + cluster_name = config['cluster'] + log.info('Distributing (final) config and client.admin keyring...') + for remote, roles in ctx.cluster.remotes.items(): + remote.write_file( + '/etc/ceph/{}.conf'.format(cluster_name), + ctx.ceph[cluster_name].config_file, + sudo=True) + remote.write_file( + path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name), + data=ctx.ceph[cluster_name].admin_keyring, + sudo=True) + try: + yield + finally: + ctx.cluster.run(args=[ + 'sudo', 'rm', '-f', + '/etc/ceph/{}.conf'.format(cluster_name), + '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), + ]) + + +@contextlib.contextmanager +def crush_setup(ctx, config): + cluster_name = config['cluster'] + + profile = config.get('crush_tunables', 'default') + log.info('Setting crush tunables to %s', profile) + _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, + args=['ceph', 'osd', 'crush', 'tunables', profile]) + yield + + +@contextlib.contextmanager +def create_rbd_pool(ctx, config): + if config.get('create_rbd_pool', False): + cluster_name = config['cluster'] + log.info('Waiting for OSDs to come up') + teuthology.wait_until_osds_up( + ctx, + cluster=ctx.cluster, + remote=ctx.ceph[cluster_name].bootstrap_remote, + ceph_cluster=cluster_name, + ) + log.info('Creating RBD pool') + _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, + args=['sudo', 'ceph', '--cluster', cluster_name, + 'osd', 'pool', 'create', 'rbd', '8']) + _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, + args=['sudo', 'ceph', '--cluster', cluster_name, + 'osd', 'pool', 'application', 'enable', + 'rbd', 'rbd', '--yes-i-really-mean-it' + ]) + yield + + +@contextlib.contextmanager +def _bypass(): + yield + + +@contextlib.contextmanager +def initialize_config(ctx, config): + cluster_name = config['cluster'] + testdir = teuthology.get_testdir(ctx) + + ctx.ceph[cluster_name].thrashers = [] + # fixme: setup watchdog, ala ceph.py + + ctx.ceph[cluster_name].roleless = False # see below + + first_ceph_cluster = False + if not hasattr(ctx, 'daemons'): + first_ceph_cluster = True + + # cephadm mode? + if 'cephadm_mode' not in config: + config['cephadm_mode'] = 'root' + assert config['cephadm_mode'] in ['root', 'cephadm-package'] + if config['cephadm_mode'] == 'root': + ctx.cephadm = testdir + '/cephadm' + else: + ctx.cephadm = 'cephadm' # in the path + + if first_ceph_cluster: + # FIXME: this is global for all clusters + ctx.daemons = DaemonGroup( + use_cephadm=ctx.cephadm) + + # uuid + fsid = str(uuid.uuid1()) + log.info('Cluster fsid is %s' % fsid) + ctx.ceph[cluster_name].fsid = fsid + + # mon ips + log.info('Choosing monitor IPs and ports...') + remotes_and_roles = ctx.cluster.remotes.items() + ips = [host for (host, port) in + (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] + + if config.get('roleless', False): + # mons will be named after hosts + first_mon = None + for remote, _ in remotes_and_roles: + ctx.cluster.remotes[remote].append('mon.' + remote.shortname) + if not first_mon: + first_mon = remote.shortname + bootstrap_remote = remote + log.info('No mon roles; fabricating mons') + + roles = [role_list for (remote, role_list) in ctx.cluster.remotes.items()] + + ctx.ceph[cluster_name].mons = get_mons( + roles, ips, cluster_name, + mon_bind_msgr2=config.get('mon_bind_msgr2', True), + mon_bind_addrvec=config.get('mon_bind_addrvec', True), + ) + log.info('Monitor IPs: %s' % ctx.ceph[cluster_name].mons) + + if config.get('roleless', False): + ctx.ceph[cluster_name].roleless = True + ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote + ctx.ceph[cluster_name].first_mon = first_mon + ctx.ceph[cluster_name].first_mon_role = 'mon.' + first_mon + else: + first_mon_role = sorted(ctx.ceph[cluster_name].mons.keys())[0] + _, _, first_mon = teuthology.split_role(first_mon_role) + (bootstrap_remote,) = ctx.cluster.only(first_mon_role).remotes.keys() + log.info('First mon is mon.%s on %s' % (first_mon, + bootstrap_remote.shortname)) + ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote + ctx.ceph[cluster_name].first_mon = first_mon + ctx.ceph[cluster_name].first_mon_role = first_mon_role + + others = ctx.cluster.remotes[bootstrap_remote] + mgrs = sorted([r for r in others + if teuthology.is_type('mgr', cluster_name)(r)]) + if not mgrs: + raise RuntimeError('no mgrs on the same host as first mon %s' % first_mon) + _, _, first_mgr = teuthology.split_role(mgrs[0]) + log.info('First mgr is %s' % (first_mgr)) + ctx.ceph[cluster_name].first_mgr = first_mgr + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Deploy ceph cluster using cephadm + + For example, teuthology.yaml can contain the 'defaults' section: + + defaults: + cephadm: + containers: + image: 'quay.io/ceph-ci/ceph' + + Using overrides makes it possible to customize it per run. + The equivalent 'overrides' section looks like: + + overrides: + cephadm: + containers: + image: 'quay.io/ceph-ci/ceph' + registry-login: + url: registry-url + username: registry-user + password: registry-password + + :param ctx: the argparse.Namespace object + :param config: the config dict + """ + if config is None: + config = {} + + assert isinstance(config, dict), \ + "task only supports a dictionary for configuration" + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('ceph', {})) + teuthology.deep_merge(config, overrides.get('cephadm', {})) + log.info('Config: ' + str(config)) + + # set up cluster context + if not hasattr(ctx, 'ceph'): + ctx.ceph = {} + if 'cluster' not in config: + config['cluster'] = 'ceph' + cluster_name = config['cluster'] + if cluster_name not in ctx.ceph: + ctx.ceph[cluster_name] = argparse.Namespace() + ctx.ceph[cluster_name].bootstrapped = False + + # image + teuth_defaults = teuth_config.get('defaults', {}) + cephadm_defaults = teuth_defaults.get('cephadm', {}) + containers_defaults = cephadm_defaults.get('containers', {}) + container_image_name = containers_defaults.get('image', None) + + containers = config.get('containers', {}) + container_image_name = containers.get('image', container_image_name) + + if not hasattr(ctx.ceph[cluster_name], 'image'): + ctx.ceph[cluster_name].image = config.get('image') + ref = None + if not ctx.ceph[cluster_name].image: + if not container_image_name: + raise Exception("Configuration error occurred. " + "The 'image' value is undefined for 'cephadm' task. " + "Please provide corresponding options in the task's " + "config, task 'overrides', or teuthology 'defaults' " + "section.") + sha1 = config.get('sha1') + flavor = config.get('flavor', 'default') + + if sha1: + if flavor == "crimson": + ctx.ceph[cluster_name].image = container_image_name + ':' + sha1 + '-' + flavor + else: + ctx.ceph[cluster_name].image = container_image_name + ':' + sha1 + ref = sha1 + else: + # hmm, fall back to branch? + branch = config.get('branch', 'master') + ref = branch + ctx.ceph[cluster_name].image = container_image_name + ':' + branch + log.info('Cluster image is %s' % ctx.ceph[cluster_name].image) + + + with contextutil.nested( + #if the cluster is already bootstrapped bypass corresponding methods + lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\ + else initialize_config(ctx=ctx, config=config), + lambda: ceph_initial(), + lambda: normalize_hostnames(ctx=ctx), + lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\ + else download_cephadm(ctx=ctx, config=config, ref=ref), + lambda: ceph_log(ctx=ctx, config=config), + lambda: ceph_crash(ctx=ctx, config=config), + lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped)\ + else ceph_bootstrap(ctx, config), + lambda: crush_setup(ctx=ctx, config=config), + lambda: ceph_mons(ctx=ctx, config=config), + lambda: distribute_config_and_admin_keyring(ctx=ctx, config=config), + lambda: ceph_mgrs(ctx=ctx, config=config), + lambda: ceph_osds(ctx=ctx, config=config), + lambda: ceph_mdss(ctx=ctx, config=config), + lambda: ceph_rgw(ctx=ctx, config=config), + lambda: ceph_iscsi(ctx=ctx, config=config), + lambda: ceph_monitoring('prometheus', ctx=ctx, config=config), + lambda: ceph_monitoring('node-exporter', ctx=ctx, config=config), + lambda: ceph_monitoring('alertmanager', ctx=ctx, config=config), + lambda: ceph_monitoring('grafana', ctx=ctx, config=config), + lambda: ceph_clients(ctx=ctx, config=config), + lambda: create_rbd_pool(ctx=ctx, config=config), + ): + if not hasattr(ctx, 'managers'): + ctx.managers = {} + ctx.managers[cluster_name] = CephManager( + ctx.ceph[cluster_name].bootstrap_remote, + ctx=ctx, + logger=log.getChild('ceph_manager.' + cluster_name), + cluster=cluster_name, + cephadm=True, + ) + + try: + if config.get('wait-for-healthy', True): + healthy(ctx=ctx, config=config) + + log.info('Setup complete, yielding') + yield + + finally: + log.info('Teardown begin') + diff --git a/qa/tasks/cephadm_cases/__init__.py b/qa/tasks/cephadm_cases/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/tasks/cephadm_cases/__init__.py diff --git a/qa/tasks/cephadm_cases/test_cli.py b/qa/tasks/cephadm_cases/test_cli.py new file mode 100644 index 000000000..c4cab4901 --- /dev/null +++ b/qa/tasks/cephadm_cases/test_cli.py @@ -0,0 +1,63 @@ +import logging + +from tasks.mgr.mgr_test_case import MgrTestCase + +log = logging.getLogger(__name__) + + +class TestCephadmCLI(MgrTestCase): + def _cmd(self, *args) -> str: + assert self.mgr_cluster is not None + return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args) + + def _orch_cmd(self, *args) -> str: + return self._cmd("orch", *args) + + def setUp(self): + super(TestCephadmCLI, self).setUp() + + def test_yaml(self): + """ + to prevent oddities like + + >>> import yaml + ... from collections import OrderedDict + ... assert yaml.dump(OrderedDict()) == '!!python/object/apply:collections.OrderedDict\\n- []\\n' + """ + out = self._orch_cmd('device', 'ls', '--format', 'yaml') + self.assertNotIn('!!python', out) + + out = self._orch_cmd('host', 'ls', '--format', 'yaml') + self.assertNotIn('!!python', out) + + out = self._orch_cmd('ls', '--format', 'yaml') + self.assertNotIn('!!python', out) + + out = self._orch_cmd('ps', '--format', 'yaml') + self.assertNotIn('!!python', out) + + out = self._orch_cmd('status', '--format', 'yaml') + self.assertNotIn('!!python', out) + + def test_pause(self): + self._orch_cmd('pause') + self.wait_for_health('CEPHADM_PAUSED', 60) + self._orch_cmd('resume') + self.wait_for_health_clear(60) + + def test_daemon_restart(self): + self._orch_cmd('daemon', 'stop', 'osd.0') + self.wait_for_health('OSD_DOWN', 60) + self._orch_cmd('daemon', 'start', 'osd.0') + self.wait_for_health_clear(120) + self._orch_cmd('daemon', 'restart', 'osd.0') + + def test_device_ls_wide(self): + self._orch_cmd('device', 'ls', '--wide') + + def test_cephfs_mirror(self): + self._orch_cmd('apply', 'cephfs-mirror') + self.wait_until_true(lambda: 'cephfs-mirror' in self._orch_cmd('ps'), 60) + self.wait_for_health_clear(60) + self._orch_cmd('rm', 'cephfs-mirror') + self.wait_until_true(lambda: 'cephfs-mirror' not in self._orch_cmd('ps'), 60) diff --git a/qa/tasks/cephadm_cases/test_cli_mon.py b/qa/tasks/cephadm_cases/test_cli_mon.py new file mode 100644 index 000000000..72aee094e --- /dev/null +++ b/qa/tasks/cephadm_cases/test_cli_mon.py @@ -0,0 +1,71 @@ +import json +import logging + +from tasks.mgr.mgr_test_case import MgrTestCase + +log = logging.getLogger(__name__) + + +class TestCephadmCLI(MgrTestCase): + + APPLY_MON_PERIOD = 60 + + def _cmd(self, *args) -> str: + assert self.mgr_cluster is not None + return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args) + + def _orch_cmd(self, *args) -> str: + return self._cmd("orch", *args) + + def setUp(self): + super(TestCephadmCLI, self).setUp() + + def _create_and_write_pool(self, pool_name): + # Create new pool and write to it, simulating a small workload. + self.mgr_cluster.mon_manager.create_pool(pool_name) + args = [ + "rados", "-p", pool_name, "bench", "30", "write", "-t", "16"] + self.mgr_cluster.admin_remote.run(args=args, wait=True) + + def _get_quorum_size(self) -> int: + # Evaluate if the quorum size of the cluster is correct. + # log the quorum_status before reducing the monitors + retstr = self._cmd('quorum_status') + log.info("test_apply_mon._check_quorum_size: %s" % json.dumps(retstr, indent=2)) + quorum_size = len(json.loads(retstr)['quorum']) # get quorum size + return quorum_size + + def _check_no_crashes(self): + # Evaluate if there are no crashes + # log the crash + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'ls', + ) + log.info("test_apply_mon._check_no_crashes: %s" % retstr) + self.assertEqual(0, len(retstr)) # check if there are no crashes + + def test_apply_mon_three(self): + # Evaluating the process of reducing the number of + # monitors from 5 to 3 and increasing the number of + # monitors from 3 to 5, using the `ceph orch apply mon <num>` command. + + self.wait_until_equal(lambda : self._get_quorum_size(), 5, + timeout=self.APPLY_MON_PERIOD, period=10) + + self._orch_cmd('apply', 'mon', '3') # reduce the monitors from 5 -> 3 + + self._create_and_write_pool('test_pool1') + + self.wait_until_equal(lambda : self._get_quorum_size(), 3, + timeout=self.APPLY_MON_PERIOD, period=10) + + self._check_no_crashes() + + self._orch_cmd('apply', 'mon', '5') # increase the monitors from 3 -> 5 + + self._create_and_write_pool('test_pool2') + + self.wait_until_equal(lambda : self._get_quorum_size(), 5, + timeout=self.APPLY_MON_PERIOD, period=10) + + self._check_no_crashes()
\ No newline at end of file diff --git a/qa/tasks/cephfs/__init__.py b/qa/tasks/cephfs/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/tasks/cephfs/__init__.py diff --git a/qa/tasks/cephfs/caps_helper.py b/qa/tasks/cephfs/caps_helper.py new file mode 100644 index 000000000..39b5963be --- /dev/null +++ b/qa/tasks/cephfs/caps_helper.py @@ -0,0 +1,79 @@ +""" +Helper methods to test that MON and MDS caps are enforced properly. +""" +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +from teuthology.orchestra.run import Raw + +class CapsHelper(CephFSTestCase): + + def run_mon_cap_tests(self, moncap, keyring): + keyring_path = self.create_keyring_file(self.fs.admin_remote, keyring) + + fsls = self.run_cluster_cmd(f'fs ls --id {self.client_id} -k ' + f'{keyring_path}') + + # we need to check only for default FS when fsname clause is absent + # in MON/MDS caps + if 'fsname' not in moncap: + self.assertIn(self.fs.name, fsls) + return + + fss = (self.fs1.name, self.fs2.name) if hasattr(self, 'fs1') else \ + (self.fs.name,) + for fsname in fss: + if fsname in moncap: + self.assertIn('name: ' + fsname, fsls) + else: + self.assertNotIn('name: ' + fsname, fsls) + + def run_mds_cap_tests(self, filepaths, filedata, mounts, perm): + self.conduct_pos_test_for_read_caps(filepaths, filedata, mounts) + + if perm == 'rw': + self.conduct_pos_test_for_write_caps(filepaths, mounts) + elif perm == 'r': + self.conduct_neg_test_for_write_caps(filepaths, mounts) + else: + raise RuntimeError(f'perm = {perm}\nIt should be "r" or "rw".') + + def conduct_pos_test_for_read_caps(self, filepaths, filedata, mounts): + for mount in mounts: + for path, data in zip(filepaths, filedata): + # XXX: conduct tests only if path belongs to current mount; in + # teuth tests client are located on same machines. + if path.find(mount.hostfs_mntpt) != -1: + contents = mount.read_file(path) + self.assertEqual(data, contents) + + def conduct_pos_test_for_write_caps(self, filepaths, mounts): + filedata = ('some new data on first fs', 'some new data on second fs') + + for mount in mounts: + for path, data in zip(filepaths, filedata): + if path.find(mount.hostfs_mntpt) != -1: + # test that write was successful + mount.write_file(path=path, data=data) + # verify that contents written was same as the one that was + # intended + contents1 = mount.read_file(path=path) + self.assertEqual(data, contents1) + + def conduct_neg_test_for_write_caps(self, filepaths, mounts): + cmdargs = ['echo', 'some random data', Raw('|'), 'tee'] + + for mount in mounts: + for path in filepaths: + if path.find(mount.hostfs_mntpt) != -1: + cmdargs.append(path) + mount.negtestcmd(args=cmdargs, retval=1, + errmsg='permission denied') + + def get_mon_cap_from_keyring(self, client_name): + keyring = self.run_cluster_cmd(cmd=f'auth get {client_name}') + for line in keyring.split('\n'): + if 'caps mon' in line: + return line[line.find(' = "') + 4 : -1] + + raise RuntimeError('get_save_mon_cap: mon cap not found in keyring. ' + 'keyring -\n' + keyring) diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py new file mode 100644 index 000000000..41831dac6 --- /dev/null +++ b/qa/tasks/cephfs/cephfs_test_case.py @@ -0,0 +1,462 @@ +import json +import logging +import os +import re + +from shlex import split as shlex_split + +from tasks.ceph_test_case import CephTestCase + +from teuthology import contextutil +from teuthology.orchestra import run +from teuthology.orchestra.run import CommandFailedError + +log = logging.getLogger(__name__) + +def for_teuthology(f): + """ + Decorator that adds an "is_for_teuthology" attribute to the wrapped function + """ + f.is_for_teuthology = True + return f + + +def needs_trimming(f): + """ + Mark fn as requiring a client capable of trimming its cache (i.e. for ceph-fuse + this means it needs to be able to run as root, currently) + """ + f.needs_trimming = True + return f + + +class MountDetails(): + + def __init__(self, mntobj): + self.client_id = mntobj.client_id + self.client_keyring_path = mntobj.client_keyring_path + self.client_remote = mntobj.client_remote + self.cephfs_name = mntobj.cephfs_name + self.cephfs_mntpt = mntobj.cephfs_mntpt + self.hostfs_mntpt = mntobj.hostfs_mntpt + + def restore(self, mntobj): + mntobj.client_id = self.client_id + mntobj.client_keyring_path = self.client_keyring_path + mntobj.client_remote = self.client_remote + mntobj.cephfs_name = self.cephfs_name + mntobj.cephfs_mntpt = self.cephfs_mntpt + mntobj.hostfs_mntpt = self.hostfs_mntpt + + +class CephFSTestCase(CephTestCase): + """ + Test case for Ceph FS, requires caller to populate Filesystem and Mounts, + into the fs, mount_a, mount_b class attributes (setting mount_b is optional) + + Handles resetting the cluster under test between tests. + """ + + # FIXME weird explicit naming + mount_a = None + mount_b = None + recovery_mount = None + + # Declarative test requirements: subclasses should override these to indicate + # their special needs. If not met, tests will be skipped. + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 1 + REQUIRE_KCLIENT_REMOTE = False + REQUIRE_ONE_CLIENT_REMOTE = False + + # Whether to create the default filesystem during setUp + REQUIRE_FILESYSTEM = True + + # requires REQUIRE_FILESYSTEM = True + REQUIRE_RECOVERY_FILESYSTEM = False + + # create a backup filesystem if required. + # required REQUIRE_FILESYSTEM enabled + REQUIRE_BACKUP_FILESYSTEM = False + + LOAD_SETTINGS = [] # type: ignore + + def _save_mount_details(self): + """ + XXX: Tests may change details of mount objects, so let's stash them so + that these details are restored later to ensure smooth setUps and + tearDowns for upcoming tests. + """ + self._orig_mount_details = [MountDetails(m) for m in self.mounts] + log.info(self._orig_mount_details) + + def _remove_blocklist(self): + # In case anything is in the OSD blocklist list, clear it out. This is to avoid + # the OSD map changing in the background (due to blocklist expiry) while tests run. + try: + self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blocklist", "clear") + except CommandFailedError: + # Fallback for older Ceph cluster + try: + blocklist = json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd("osd", + "dump", "--format=json-pretty"))['blocklist'] + log.info(f"Removing {len(blocklist)} blocklist entries") + for addr, blocklisted_at in blocklist.items(): + self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blocklist", "rm", addr) + except KeyError: + # Fallback for more older Ceph clusters, who will use 'blacklist' instead. + blacklist = json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd("osd", + "dump", "--format=json-pretty"))['blacklist'] + log.info(f"Removing {len(blacklist)} blacklist entries") + for addr, blocklisted_at in blacklist.items(): + self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blacklist", "rm", addr) + + def setUp(self): + super(CephFSTestCase, self).setUp() + + self.config_set('mon', 'mon_allow_pool_delete', True) + + if len(self.mds_cluster.mds_ids) < self.MDSS_REQUIRED: + self.skipTest("Only have {0} MDSs, require {1}".format( + len(self.mds_cluster.mds_ids), self.MDSS_REQUIRED + )) + + if len(self.mounts) < self.CLIENTS_REQUIRED: + self.skipTest("Only have {0} clients, require {1}".format( + len(self.mounts), self.CLIENTS_REQUIRED + )) + + if self.REQUIRE_ONE_CLIENT_REMOTE: + if self.mounts[0].client_remote.hostname in self.mds_cluster.get_mds_hostnames(): + self.skipTest("Require first client to be on separate server from MDSs") + + # Create friendly mount_a, mount_b attrs + for i in range(0, self.CLIENTS_REQUIRED): + setattr(self, "mount_{0}".format(chr(ord('a') + i)), self.mounts[i]) + + self.mds_cluster.clear_firewall() + + # Unmount all clients, we are about to blow away the filesystem + for mount in self.mounts: + if mount.is_mounted(): + mount.umount_wait(force=True) + self._save_mount_details() + + # To avoid any issues with e.g. unlink bugs, we destroy and recreate + # the filesystem rather than just doing a rm -rf of files + self.mds_cluster.delete_all_filesystems() + self.mds_cluster.mds_restart() # to reset any run-time configs, etc. + self.fs = None # is now invalid! + self.backup_fs = None + self.recovery_fs = None + + self._remove_blocklist() + + client_mount_ids = [m.client_id for m in self.mounts] + # In case there were any extra auth identities around from a previous + # test, delete them + for entry in self.auth_list(): + ent_type, ent_id = entry['entity'].split(".") + if ent_type == "client" and ent_id not in client_mount_ids and not (ent_id == "admin" or ent_id[:6] == 'mirror'): + self.mds_cluster.mon_manager.raw_cluster_cmd("auth", "del", entry['entity']) + + if self.REQUIRE_FILESYSTEM: + self.fs = self.mds_cluster.newfs(create=True) + + # In case some test messed with auth caps, reset them + for client_id in client_mount_ids: + cmd = ['auth', 'caps', f'client.{client_id}', 'mon','allow r', + 'osd', f'allow rw pool={self.fs.get_data_pool_name()}', + 'mds', 'allow'] + + if self.run_cluster_cmd_result(cmd) == 0: + break + + cmd[1] = 'add' + if self.run_cluster_cmd_result(cmd) != 0: + raise RuntimeError(f'Failed to create new client {cmd[2]}') + + # wait for ranks to become active + self.fs.wait_for_daemons() + + # Mount the requested number of clients + for i in range(0, self.CLIENTS_REQUIRED): + self.mounts[i].mount_wait() + + if self.REQUIRE_BACKUP_FILESYSTEM: + if not self.REQUIRE_FILESYSTEM: + self.skipTest("backup filesystem requires a primary filesystem as well") + self.fs.mon_manager.raw_cluster_cmd('fs', 'flag', 'set', + 'enable_multiple', 'true', + '--yes-i-really-mean-it') + self.backup_fs = self.mds_cluster.newfs(name="backup_fs") + self.backup_fs.wait_for_daemons() + + if self.REQUIRE_RECOVERY_FILESYSTEM: + if not self.REQUIRE_FILESYSTEM: + self.skipTest("Recovery filesystem requires a primary filesystem as well") + # After Octopus is EOL, we can remove this setting: + self.fs.mon_manager.raw_cluster_cmd('fs', 'flag', 'set', + 'enable_multiple', 'true', + '--yes-i-really-mean-it') + self.recovery_fs = self.mds_cluster.newfs(name="recovery_fs", create=False) + self.recovery_fs.set_metadata_overlay(True) + self.recovery_fs.set_data_pool_name(self.fs.get_data_pool_name()) + self.recovery_fs.create() + self.recovery_fs.getinfo(refresh=True) + self.recovery_fs.wait_for_daemons() + + # Load an config settings of interest + for setting in self.LOAD_SETTINGS: + setattr(self, setting, float(self.fs.mds_asok( + ['config', 'get', setting], list(self.mds_cluster.mds_ids)[0] + )[setting])) + + self.configs_set = set() + + def tearDown(self): + self.mds_cluster.clear_firewall() + for m in self.mounts: + m.teardown() + + # To prevent failover messages during Unwind of ceph task + self.mds_cluster.delete_all_filesystems() + + for m, md in zip(self.mounts, self._orig_mount_details): + md.restore(m) + + for subsys, key in self.configs_set: + self.mds_cluster.clear_ceph_conf(subsys, key) + + return super(CephFSTestCase, self).tearDown() + + def set_conf(self, subsys, key, value): + self.configs_set.add((subsys, key)) + self.mds_cluster.set_ceph_conf(subsys, key, value) + + def auth_list(self): + """ + Convenience wrapper on "ceph auth ls" + """ + return json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd( + "auth", "ls", "--format=json-pretty" + ))['auth_dump'] + + def assert_session_count(self, expected, ls_data=None, mds_id=None): + if ls_data is None: + ls_data = self.fs.mds_asok(['session', 'ls'], mds_id=mds_id) + + alive_count = len([s for s in ls_data if s['state'] != 'killing']) + + self.assertEqual(expected, alive_count, "Expected {0} sessions, found {1}".format( + expected, alive_count + )) + + def assert_session_state(self, client_id, expected_state): + self.assertEqual( + self._session_by_id( + self.fs.mds_asok(['session', 'ls'])).get(client_id, {'state': None})['state'], + expected_state) + + def get_session_data(self, client_id): + return self._session_by_id(client_id) + + def _session_list(self): + ls_data = self.fs.mds_asok(['session', 'ls']) + ls_data = [s for s in ls_data if s['state'] not in ['stale', 'closed']] + return ls_data + + def get_session(self, client_id, session_ls=None): + if session_ls is None: + session_ls = self.fs.mds_asok(['session', 'ls']) + + return self._session_by_id(session_ls)[client_id] + + def _session_by_id(self, session_ls): + return dict([(s['id'], s) for s in session_ls]) + + def perf_dump(self, rank=None, status=None): + return self.fs.rank_asok(['perf', 'dump'], rank=rank, status=status) + + def wait_until_evicted(self, client_id, timeout=30): + def is_client_evicted(): + ls = self._session_list() + for s in ls: + if s['id'] == client_id: + return False + return True + self.wait_until_true(is_client_evicted, timeout) + + def wait_for_daemon_start(self, daemon_ids=None): + """ + Wait until all the daemons appear in the FSMap, either assigned + MDS ranks or in the list of standbys + """ + def get_daemon_names(): + return [info['name'] for info in self.mds_cluster.status().get_all()] + + if daemon_ids is None: + daemon_ids = self.mds_cluster.mds_ids + + try: + self.wait_until_true( + lambda: set(daemon_ids) & set(get_daemon_names()) == set(daemon_ids), + timeout=30 + ) + except RuntimeError: + log.warning("Timeout waiting for daemons {0}, while we have {1}".format( + daemon_ids, get_daemon_names() + )) + raise + + def delete_mds_coredump(self, daemon_id): + # delete coredump file, otherwise teuthology.internal.coredump will + # catch it later and treat it as a failure. + core_pattern = self.mds_cluster.mds_daemons[daemon_id].remote.sh( + "sudo sysctl -n kernel.core_pattern") + core_dir = os.path.dirname(core_pattern.strip()) + if core_dir: # Non-default core_pattern with a directory in it + # We have seen a core_pattern that looks like it's from teuthology's coredump + # task, so proceed to clear out the core file + if core_dir[0] == '|': + log.info("Piped core dumps to program {0}, skip cleaning".format(core_dir[1:])) + return; + + log.info("Clearing core from directory: {0}".format(core_dir)) + + # Verify that we see the expected single coredump + ls_output = self.mds_cluster.mds_daemons[daemon_id].remote.sh([ + "cd", core_dir, run.Raw('&&'), + "sudo", "ls", run.Raw('|'), "sudo", "xargs", "file" + ]) + cores = [l.partition(":")[0] + for l in ls_output.strip().split("\n") + if re.match(r'.*ceph-mds.* -i +{0}'.format(daemon_id), l)] + + log.info("Enumerated cores: {0}".format(cores)) + self.assertEqual(len(cores), 1) + + log.info("Found core file {0}, deleting it".format(cores[0])) + + self.mds_cluster.mds_daemons[daemon_id].remote.run(args=[ + "cd", core_dir, run.Raw('&&'), "sudo", "rm", "-f", cores[0] + ]) + else: + log.info("No core_pattern directory set, nothing to clear (internal.coredump not enabled?)") + + def _get_subtrees(self, status=None, rank=None, path=None): + if path is None: + path = "/" + try: + with contextutil.safe_while(sleep=1, tries=3) as proceed: + while proceed(): + try: + if rank == "all": + subtrees = [] + for r in self.fs.get_ranks(status=status): + s = self.fs.rank_asok(["get", "subtrees"], status=status, rank=r['rank']) + s = filter(lambda s: s['auth_first'] == r['rank'] and s['auth_second'] == -2, s) + subtrees += s + else: + subtrees = self.fs.rank_asok(["get", "subtrees"], status=status, rank=rank) + subtrees = filter(lambda s: s['dir']['path'].startswith(path), subtrees) + return list(subtrees) + except CommandFailedError as e: + # Sometimes we get transient errors + if e.exitstatus == 22: + pass + else: + raise + except contextutil.MaxWhileTries as e: + raise RuntimeError(f"could not get subtree state from rank {rank}") from e + + def _wait_subtrees(self, test, status=None, rank=None, timeout=30, sleep=2, action=None, path=None): + test = sorted(test) + try: + with contextutil.safe_while(sleep=sleep, tries=timeout//sleep) as proceed: + while proceed(): + subtrees = self._get_subtrees(status=status, rank=rank, path=path) + filtered = sorted([(s['dir']['path'], s['auth_first']) for s in subtrees]) + log.info("%s =?= %s", filtered, test) + if filtered == test: + # Confirm export_pin in output is correct: + for s in subtrees: + if s['export_pin_target'] >= 0: + self.assertTrue(s['export_pin_target'] == s['auth_first']) + return subtrees + if action is not None: + action() + except contextutil.MaxWhileTries as e: + raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) from e + + def _wait_until_scrub_complete(self, path="/", recursive=True, timeout=100): + out_json = self.fs.run_scrub(["start", path] + ["recursive"] if recursive else []) + if not self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"], + sleep=10, timeout=timeout): + log.info("timed out waiting for scrub to complete") + + def _wait_distributed_subtrees(self, count, status=None, rank=None, path=None): + try: + with contextutil.safe_while(sleep=5, tries=20) as proceed: + while proceed(): + subtrees = self._get_subtrees(status=status, rank=rank, path=path) + subtrees = list(filter(lambda s: s['distributed_ephemeral_pin'] == True and + s['auth_first'] == s['export_pin_target'], + subtrees)) + log.info(f"len={len(subtrees)} {subtrees}") + if len(subtrees) >= count: + return subtrees + except contextutil.MaxWhileTries as e: + raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) from e + + def _wait_random_subtrees(self, count, status=None, rank=None, path=None): + try: + with contextutil.safe_while(sleep=5, tries=20) as proceed: + while proceed(): + subtrees = self._get_subtrees(status=status, rank=rank, path=path) + subtrees = list(filter(lambda s: s['random_ephemeral_pin'] == True and + s['auth_first'] == s['export_pin_target'], + subtrees)) + log.info(f"len={len(subtrees)} {subtrees}") + if len(subtrees) >= count: + return subtrees + except contextutil.MaxWhileTries as e: + raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) from e + + def run_cluster_cmd(self, cmd): + if isinstance(cmd, str): + cmd = shlex_split(cmd) + return self.fs.mon_manager.raw_cluster_cmd(*cmd) + + def run_cluster_cmd_result(self, cmd): + if isinstance(cmd, str): + cmd = shlex_split(cmd) + return self.fs.mon_manager.raw_cluster_cmd_result(*cmd) + + def create_client(self, client_id, moncap=None, osdcap=None, mdscap=None): + if not (moncap or osdcap or mdscap): + if self.fs: + return self.fs.authorize(client_id, ('/', 'rw')) + else: + raise RuntimeError('no caps were passed and the default FS ' + 'is not created yet to allow client auth ' + 'for it.') + + cmd = ['auth', 'add', f'client.{client_id}'] + if moncap: + cmd += ['mon', moncap] + if osdcap: + cmd += ['osd', osdcap] + if mdscap: + cmd += ['mds', mdscap] + + self.run_cluster_cmd(cmd) + return self.run_cluster_cmd(f'auth get {self.client_name}') + + def create_keyring_file(self, remote, keyring): + keyring_path = remote.mktemp(data=keyring) + + # required when triggered using vstart_runner.py. + remote.run(args=['chmod', '644', keyring_path]) + + return keyring_path diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py new file mode 100644 index 000000000..35b80106d --- /dev/null +++ b/qa/tasks/cephfs/filesystem.py @@ -0,0 +1,1653 @@ + +import json +import logging +from gevent import Greenlet +import os +import time +import datetime +import re +import errno +import random +import traceback + +from io import BytesIO, StringIO +from errno import EBUSY + +from teuthology.exceptions import CommandFailedError +from teuthology import misc +from teuthology.nuke import clear_firewall +from teuthology.parallel import parallel +from teuthology import contextutil +from tasks.ceph_manager import write_conf +from tasks import ceph_manager + + +log = logging.getLogger(__name__) + + +DAEMON_WAIT_TIMEOUT = 120 +ROOT_INO = 1 + +class FileLayout(object): + def __init__(self, pool=None, pool_namespace=None, stripe_unit=None, stripe_count=None, object_size=None): + self.pool = pool + self.pool_namespace = pool_namespace + self.stripe_unit = stripe_unit + self.stripe_count = stripe_count + self.object_size = object_size + + @classmethod + def load_from_ceph(layout_str): + # TODO + pass + + def items(self): + if self.pool is not None: + yield ("pool", self.pool) + if self.pool_namespace: + yield ("pool_namespace", self.pool_namespace) + if self.stripe_unit is not None: + yield ("stripe_unit", self.stripe_unit) + if self.stripe_count is not None: + yield ("stripe_count", self.stripe_count) + if self.object_size is not None: + yield ("object_size", self.stripe_size) + +class ObjectNotFound(Exception): + def __init__(self, object_name): + self._object_name = object_name + + def __str__(self): + return "Object not found: '{0}'".format(self._object_name) + +class FSMissing(Exception): + def __init__(self, ident): + self.ident = ident + + def __str__(self): + return f"File system {self.ident} does not exist in the map" + +class FSStatus(object): + """ + Operations on a snapshot of the FSMap. + """ + def __init__(self, mon_manager, epoch=None): + self.mon = mon_manager + cmd = ["fs", "dump", "--format=json"] + if epoch is not None: + cmd.append(str(epoch)) + self.map = json.loads(self.mon.raw_cluster_cmd(*cmd)) + + def __str__(self): + return json.dumps(self.map, indent = 2, sort_keys = True) + + # Expose the fsmap for manual inspection. + def __getitem__(self, key): + """ + Get a field from the fsmap. + """ + return self.map[key] + + def get_filesystems(self): + """ + Iterator for all filesystems. + """ + for fs in self.map['filesystems']: + yield fs + + def get_all(self): + """ + Iterator for all the mds_info components in the FSMap. + """ + for info in self.map['standbys']: + yield info + for fs in self.map['filesystems']: + for info in fs['mdsmap']['info'].values(): + yield info + + def get_standbys(self): + """ + Iterator for all standbys. + """ + for info in self.map['standbys']: + yield info + + def get_fsmap(self, fscid): + """ + Get the fsmap for the given FSCID. + """ + for fs in self.map['filesystems']: + if fscid is None or fs['id'] == fscid: + return fs + raise FSMissing(fscid) + + def get_fsmap_byname(self, name): + """ + Get the fsmap for the given file system name. + """ + for fs in self.map['filesystems']: + if name is None or fs['mdsmap']['fs_name'] == name: + return fs + raise FSMissing(name) + + def get_replays(self, fscid): + """ + Get the standby:replay MDS for the given FSCID. + """ + fs = self.get_fsmap(fscid) + for info in fs['mdsmap']['info'].values(): + if info['state'] == 'up:standby-replay': + yield info + + def get_ranks(self, fscid): + """ + Get the ranks for the given FSCID. + """ + fs = self.get_fsmap(fscid) + for info in fs['mdsmap']['info'].values(): + if info['rank'] >= 0 and info['state'] != 'up:standby-replay': + yield info + + def get_damaged(self, fscid): + """ + Get the damaged ranks for the given FSCID. + """ + fs = self.get_fsmap(fscid) + return fs['mdsmap']['damaged'] + + def get_rank(self, fscid, rank): + """ + Get the rank for the given FSCID. + """ + for info in self.get_ranks(fscid): + if info['rank'] == rank: + return info + raise RuntimeError("FSCID {0} has no rank {1}".format(fscid, rank)) + + def get_mds(self, name): + """ + Get the info for the given MDS name. + """ + for info in self.get_all(): + if info['name'] == name: + return info + return None + + def get_mds_addr(self, name): + """ + Return the instance addr as a string, like "10.214.133.138:6807\/10825" + """ + info = self.get_mds(name) + if info: + return info['addr'] + else: + log.warning(json.dumps(list(self.get_all()), indent=2)) # dump for debugging + raise RuntimeError("MDS id '{0}' not found in map".format(name)) + + def get_mds_addrs(self, name): + """ + Return the instance addr as a string, like "[10.214.133.138:6807 10.214.133.138:6808]" + """ + info = self.get_mds(name) + if info: + return [e['addr'] for e in info['addrs']['addrvec']] + else: + log.warn(json.dumps(list(self.get_all()), indent=2)) # dump for debugging + raise RuntimeError("MDS id '{0}' not found in map".format(name)) + + def get_mds_gid(self, gid): + """ + Get the info for the given MDS gid. + """ + for info in self.get_all(): + if info['gid'] == gid: + return info + return None + + def hadfailover(self, status): + """ + Compares two statuses for mds failovers. + Returns True if there is a failover. + """ + for fs in status.map['filesystems']: + for info in fs['mdsmap']['info'].values(): + oldinfo = self.get_mds_gid(info['gid']) + if oldinfo is None or oldinfo['incarnation'] != info['incarnation']: + return True + #all matching + return False + +class CephCluster(object): + @property + def admin_remote(self): + first_mon = misc.get_first_mon(self._ctx, None) + (result,) = self._ctx.cluster.only(first_mon).remotes.keys() + return result + + def __init__(self, ctx) -> None: + self._ctx = ctx + self.mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=ctx, logger=log.getChild('ceph_manager')) + + def get_config(self, key, service_type=None): + """ + Get config from mon by default, or a specific service if caller asks for it + """ + if service_type is None: + service_type = 'mon' + + service_id = sorted(misc.all_roles_of_type(self._ctx.cluster, service_type))[0] + return self.json_asok(['config', 'get', key], service_type, service_id)[key] + + def set_ceph_conf(self, subsys, key, value): + if subsys not in self._ctx.ceph['ceph'].conf: + self._ctx.ceph['ceph'].conf[subsys] = {} + self._ctx.ceph['ceph'].conf[subsys][key] = value + write_conf(self._ctx) # XXX because we don't have the ceph task's config object, if they + # used a different config path this won't work. + + def clear_ceph_conf(self, subsys, key): + del self._ctx.ceph['ceph'].conf[subsys][key] + write_conf(self._ctx) + + def json_asok(self, command, service_type, service_id, timeout=None): + if timeout is None: + timeout = 15*60 + command.insert(0, '--format=json') + proc = self.mon_manager.admin_socket(service_type, service_id, command, timeout=timeout) + response_data = proc.stdout.getvalue().strip() + if len(response_data) > 0: + j = json.loads(response_data) + pretty = json.dumps(j, sort_keys=True, indent=2) + log.debug(f"_json_asok output\n{pretty}") + return j + else: + log.debug("_json_asok output empty") + return None + + def is_addr_blocklisted(self, addr): + blocklist = json.loads(self.mon_manager.raw_cluster_cmd( + "osd", "dump", "--format=json"))['blocklist'] + if addr in blocklist: + return True + log.warn(f'The address {addr} is not blocklisted') + return False + + +class MDSCluster(CephCluster): + """ + Collective operations on all the MDS daemons in the Ceph cluster. These + daemons may be in use by various Filesystems. + + For the benefit of pre-multi-filesystem tests, this class is also + a parent of Filesystem. The correct way to use MDSCluster going forward is + as a separate instance outside of your (multiple) Filesystem instances. + """ + + def __init__(self, ctx): + super(MDSCluster, self).__init__(ctx) + + @property + def mds_ids(self): + # do this dynamically because the list of ids may change periodically with cephadm + return list(misc.all_roles_of_type(self._ctx.cluster, 'mds')) + + @property + def mds_daemons(self): + return dict([(mds_id, self._ctx.daemons.get_daemon('mds', mds_id)) for mds_id in self.mds_ids]) + + def _one_or_all(self, mds_id, cb, in_parallel=True): + """ + Call a callback for a single named MDS, or for all. + + Note that the parallelism here isn't for performance, it's to avoid being overly kind + to the cluster by waiting a graceful ssh-latency of time between doing things, and to + avoid being overly kind by executing them in a particular order. However, some actions + don't cope with being done in parallel, so it's optional (`in_parallel`) + + :param mds_id: MDS daemon name, or None + :param cb: Callback taking single argument of MDS daemon name + :param in_parallel: whether to invoke callbacks concurrently (else one after the other) + """ + + if mds_id is None: + if in_parallel: + with parallel() as p: + for mds_id in self.mds_ids: + p.spawn(cb, mds_id) + else: + for mds_id in self.mds_ids: + cb(mds_id) + else: + cb(mds_id) + + def get_config(self, key, service_type=None): + """ + get_config specialization of service_type="mds" + """ + if service_type != "mds": + return super(MDSCluster, self).get_config(key, service_type) + + # Some tests stop MDS daemons, don't send commands to a dead one: + running_daemons = [i for i, mds in self.mds_daemons.items() if mds.running()] + service_id = random.sample(running_daemons, 1)[0] + return self.json_asok(['config', 'get', key], service_type, service_id)[key] + + def mds_stop(self, mds_id=None): + """ + Stop the MDS daemon process(se). If it held a rank, that rank + will eventually go laggy. + """ + self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].stop()) + + def mds_fail(self, mds_id=None): + """ + Inform MDSMonitor of the death of the daemon process(es). If it held + a rank, that rank will be relinquished. + """ + self._one_or_all(mds_id, lambda id_: self.mon_manager.raw_cluster_cmd("mds", "fail", id_)) + + def mds_restart(self, mds_id=None): + self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].restart()) + + def mds_fail_restart(self, mds_id=None): + """ + Variation on restart that includes marking MDSs as failed, so that doing this + operation followed by waiting for healthy daemon states guarantees that they + have gone down and come up, rather than potentially seeing the healthy states + that existed before the restart. + """ + def _fail_restart(id_): + self.mds_daemons[id_].stop() + self.mon_manager.raw_cluster_cmd("mds", "fail", id_) + self.mds_daemons[id_].restart() + + self._one_or_all(mds_id, _fail_restart) + + def mds_signal(self, mds_id, sig, silent=False): + """ + signal a MDS daemon + """ + self.mds_daemons[mds_id].signal(sig, silent); + + def newfs(self, name='cephfs', create=True): + return Filesystem(self._ctx, name=name, create=create) + + def status(self, epoch=None): + return FSStatus(self.mon_manager, epoch) + + def get_standby_daemons(self): + return set([s['name'] for s in self.status().get_standbys()]) + + def get_mds_hostnames(self): + result = set() + for mds_id in self.mds_ids: + mds_remote = self.mon_manager.find_remote('mds', mds_id) + result.add(mds_remote.hostname) + + return list(result) + + def set_clients_block(self, blocked, mds_id=None): + """ + Block (using iptables) client communications to this MDS. Be careful: if + other services are running on this MDS, or other MDSs try to talk to this + MDS, their communications may also be blocked as collatoral damage. + + :param mds_id: Optional ID of MDS to block, default to all + :return: + """ + da_flag = "-A" if blocked else "-D" + + def set_block(_mds_id): + remote = self.mon_manager.find_remote('mds', _mds_id) + status = self.status() + + addr = status.get_mds_addr(_mds_id) + ip_str, port_str, inst_str = re.match("(.+):(.+)/(.+)", addr).groups() + + remote.run( + args=["sudo", "iptables", da_flag, "OUTPUT", "-p", "tcp", "--sport", port_str, "-j", "REJECT", "-m", + "comment", "--comment", "teuthology"]) + remote.run( + args=["sudo", "iptables", da_flag, "INPUT", "-p", "tcp", "--dport", port_str, "-j", "REJECT", "-m", + "comment", "--comment", "teuthology"]) + + self._one_or_all(mds_id, set_block, in_parallel=False) + + def set_inter_mds_block(self, blocked, mds_rank_1, mds_rank_2): + """ + Block (using iptables) communications from a provided MDS to other MDSs. + Block all ports that an MDS uses for communication. + + :param blocked: True to block the MDS, False otherwise + :param mds_rank_1: MDS rank + :param mds_rank_2: MDS rank + :return: + """ + da_flag = "-A" if blocked else "-D" + + def set_block(mds_ids): + status = self.status() + + mds = mds_ids[0] + remote = self.mon_manager.find_remote('mds', mds) + addrs = status.get_mds_addrs(mds) + for addr in addrs: + ip_str, port_str = re.match("(.+):(.+)", addr).groups() + remote.run( + args=["sudo", "iptables", da_flag, "INPUT", "-p", "tcp", "--dport", port_str, "-j", "REJECT", "-m", + "comment", "--comment", "teuthology"]) + + + mds = mds_ids[1] + remote = self.mon_manager.find_remote('mds', mds) + addrs = status.get_mds_addrs(mds) + for addr in addrs: + ip_str, port_str = re.match("(.+):(.+)", addr).groups() + remote.run( + args=["sudo", "iptables", da_flag, "OUTPUT", "-p", "tcp", "--sport", port_str, "-j", "REJECT", "-m", + "comment", "--comment", "teuthology"]) + remote.run( + args=["sudo", "iptables", da_flag, "INPUT", "-p", "tcp", "--dport", port_str, "-j", "REJECT", "-m", + "comment", "--comment", "teuthology"]) + + self._one_or_all((mds_rank_1, mds_rank_2), set_block, in_parallel=False) + + def clear_firewall(self): + clear_firewall(self._ctx) + + def get_mds_info(self, mds_id): + return FSStatus(self.mon_manager).get_mds(mds_id) + + def is_pool_full(self, pool_name): + pools = json.loads(self.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools'] + for pool in pools: + if pool['pool_name'] == pool_name: + return 'full' in pool['flags_names'].split(",") + + raise RuntimeError("Pool not found '{0}'".format(pool_name)) + + def delete_all_filesystems(self): + """ + Remove all filesystems that exist, and any pools in use by them. + """ + for fs in self.status().get_filesystems(): + Filesystem(ctx=self._ctx, fscid=fs['id']).destroy() + + +class Filesystem(MDSCluster): + """ + This object is for driving a CephFS filesystem. The MDS daemons driven by + MDSCluster may be shared with other Filesystems. + """ + def __init__(self, ctx, fs_config={}, fscid=None, name=None, create=False): + super(Filesystem, self).__init__(ctx) + + self.name = name + self.id = None + self.metadata_pool_name = None + self.metadata_overlay = False + self.data_pool_name = None + self.data_pools = None + self.fs_config = fs_config + self.ec_profile = fs_config.get('ec_profile') + + client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client')) + self.client_id = client_list[0] + self.client_remote = list(misc.get_clients(ctx=ctx, roles=["client.{0}".format(self.client_id)]))[0][1] + + if name is not None: + if fscid is not None: + raise RuntimeError("cannot specify fscid when creating fs") + if create and not self.legacy_configured(): + self.create() + else: + if fscid is not None: + self.id = fscid + self.getinfo(refresh = True) + + # Stash a reference to the first created filesystem on ctx, so + # that if someone drops to the interactive shell they can easily + # poke our methods. + if not hasattr(self._ctx, "filesystem"): + self._ctx.filesystem = self + + def dead(self): + try: + return not bool(self.get_mds_map()) + except FSMissing: + return True + + def get_task_status(self, status_key): + return self.mon_manager.get_service_task_status("mds", status_key) + + def getinfo(self, refresh = False): + status = self.status() + if self.id is not None: + fsmap = status.get_fsmap(self.id) + elif self.name is not None: + fsmap = status.get_fsmap_byname(self.name) + else: + fss = [fs for fs in status.get_filesystems()] + if len(fss) == 1: + fsmap = fss[0] + elif len(fss) == 0: + raise RuntimeError("no file system available") + else: + raise RuntimeError("more than one file system available") + self.id = fsmap['id'] + self.name = fsmap['mdsmap']['fs_name'] + self.get_pool_names(status = status, refresh = refresh) + return status + + def set_metadata_overlay(self, overlay): + if self.id is not None: + raise RuntimeError("cannot specify fscid when configuring overlay") + self.metadata_overlay = overlay + + def deactivate(self, rank): + if rank < 0: + raise RuntimeError("invalid rank") + elif rank == 0: + raise RuntimeError("cannot deactivate rank 0") + self.mon_manager.raw_cluster_cmd("mds", "deactivate", "%d:%d" % (self.id, rank)) + + def reach_max_mds(self): + # Try to reach rank count == max_mds, up or down (UPGRADE SENSITIVE!) + status = self.getinfo() + mds_map = self.get_mds_map(status=status) + max_mds = mds_map['max_mds'] + + count = len(list(self.get_ranks(status=status))) + if count > max_mds: + try: + # deactivate mds in decending order + status = self.wait_for_daemons(status=status, skip_max_mds_check=True) + while count > max_mds: + targets = sorted(self.get_ranks(status=status), key=lambda r: r['rank'], reverse=True) + target = targets[0] + log.debug("deactivating rank %d" % target['rank']) + self.deactivate(target['rank']) + status = self.wait_for_daemons(skip_max_mds_check=True) + count = len(list(self.get_ranks(status=status))) + except: + # In Mimic, deactivation is done automatically: + log.info("Error:\n{}".format(traceback.format_exc())) + status = self.wait_for_daemons() + else: + status = self.wait_for_daemons() + + mds_map = self.get_mds_map(status=status) + assert(mds_map['max_mds'] == max_mds) + assert(mds_map['in'] == list(range(0, max_mds))) + + def reset(self): + self.mon_manager.raw_cluster_cmd("fs", "reset", str(self.name), '--yes-i-really-mean-it') + + def fail(self): + self.mon_manager.raw_cluster_cmd("fs", "fail", str(self.name)) + + def set_flag(self, var, *args): + a = map(lambda x: str(x).lower(), args) + self.mon_manager.raw_cluster_cmd("fs", "flag", "set", var, *a) + + def set_allow_multifs(self, yes=True): + self.set_flag("enable_multiple", yes) + + def set_var(self, var, *args): + a = map(lambda x: str(x).lower(), args) + self.mon_manager.raw_cluster_cmd("fs", "set", self.name, var, *a) + + def set_down(self, down=True): + self.set_var("down", str(down).lower()) + + def set_joinable(self, joinable=True): + self.set_var("joinable", joinable) + + def set_max_mds(self, max_mds): + self.set_var("max_mds", "%d" % max_mds) + + def set_session_timeout(self, timeout): + self.set_var("session_timeout", "%d" % timeout) + + def set_allow_standby_replay(self, yes): + self.set_var("allow_standby_replay", yes) + + def set_allow_new_snaps(self, yes): + self.set_var("allow_new_snaps", yes, '--yes-i-really-mean-it') + + def compat(self, *args): + a = map(lambda x: str(x).lower(), args) + self.mon_manager.raw_cluster_cmd("fs", "compat", self.name, *a) + + def add_compat(self, *args): + self.compat("add_compat", *args) + + def add_incompat(self, *args): + self.compat("add_incompat", *args) + + def rm_compat(self, *args): + self.compat("rm_compat", *args) + + def rm_incompat(self, *args): + self.compat("rm_incompat", *args) + + def required_client_features(self, *args, **kwargs): + c = ["fs", "required_client_features", self.name, *args] + return self.mon_manager.run_cluster_cmd(args=c, **kwargs) + + # Since v15.1.0 the pg autoscale mode has been enabled as default, + # will let the pg autoscale mode to calculate the pg_num as needed. + # We set the pg_num_min to 64 to make sure that pg autoscale mode + # won't set the pg_num to low to fix Tracker#45434. + pg_num = 64 + pg_num_min = 64 + target_size_ratio = 0.9 + target_size_ratio_ec = 0.9 + + def create(self): + if self.name is None: + self.name = "cephfs" + if self.metadata_pool_name is None: + self.metadata_pool_name = "{0}_metadata".format(self.name) + if self.data_pool_name is None: + data_pool_name = "{0}_data".format(self.name) + else: + data_pool_name = self.data_pool_name + + # will use the ec pool to store the data and a small amount of + # metadata still goes to the primary data pool for all files. + if not self.metadata_overlay and self.ec_profile and 'disabled' not in self.ec_profile: + self.target_size_ratio = 0.05 + + log.debug("Creating filesystem '{0}'".format(self.name)) + + try: + self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', + self.metadata_pool_name, str(self.pg_num), + '--pg_num_min', str(self.pg_num_min)) + + self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', + data_pool_name, str(self.pg_num), + '--pg_num_min', str(self.pg_num_min), + '--target_size_ratio', + str(self.target_size_ratio)) + except CommandFailedError as e: + if e.exitstatus == 22: # nautilus couldn't specify --pg_num_min option + self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', + self.metadata_pool_name, + str(self.pg_num_min)) + + self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', + data_pool_name, str(self.pg_num), + str(self.pg_num_min)) + else: + raise + + if self.metadata_overlay: + self.mon_manager.raw_cluster_cmd('fs', 'new', + self.name, self.metadata_pool_name, data_pool_name, + '--allow-dangerous-metadata-overlay') + else: + self.mon_manager.raw_cluster_cmd('fs', 'new', + self.name, + self.metadata_pool_name, + data_pool_name) + + if self.ec_profile and 'disabled' not in self.ec_profile: + ec_data_pool_name = data_pool_name + "_ec" + log.debug("EC profile is %s", self.ec_profile) + cmd = ['osd', 'erasure-code-profile', 'set', ec_data_pool_name] + cmd.extend(self.ec_profile) + self.mon_manager.raw_cluster_cmd(*cmd) + try: + self.mon_manager.raw_cluster_cmd( + 'osd', 'pool', 'create', ec_data_pool_name, + 'erasure', ec_data_pool_name, + '--pg_num_min', str(self.pg_num_min), + '--target_size_ratio', str(self.target_size_ratio_ec)) + except CommandFailedError as e: + if e.exitstatus == 22: # nautilus couldn't specify --pg_num_min option + self.mon_manager.raw_cluster_cmd( + 'osd', 'pool', 'create', ec_data_pool_name, + str(self.pg_num_min), 'erasure', ec_data_pool_name) + else: + raise + self.mon_manager.raw_cluster_cmd( + 'osd', 'pool', 'set', + ec_data_pool_name, 'allow_ec_overwrites', 'true') + self.add_data_pool(ec_data_pool_name, create=False) + self.check_pool_application(ec_data_pool_name) + + self.run_client_payload(f"setfattr -n ceph.dir.layout.pool -v {ec_data_pool_name} . && getfattr -n ceph.dir.layout .") + + self.check_pool_application(self.metadata_pool_name) + self.check_pool_application(data_pool_name) + + # Turn off spurious standby count warnings from modifying max_mds in tests. + try: + self.mon_manager.raw_cluster_cmd('fs', 'set', self.name, 'standby_count_wanted', '0') + except CommandFailedError as e: + if e.exitstatus == 22: + # standby_count_wanted not available prior to luminous (upgrade tests would fail otherwise) + pass + else: + raise + + if self.fs_config is not None: + max_mds = self.fs_config.get('max_mds', 1) + if max_mds > 1: + self.set_max_mds(max_mds) + + standby_replay = self.fs_config.get('standby_replay', False) + self.set_allow_standby_replay(standby_replay) + + # If absent will use the default value (60 seconds) + session_timeout = self.fs_config.get('session_timeout', 60) + if session_timeout != 60: + self.set_session_timeout(session_timeout) + + self.getinfo(refresh = True) + + # wait pgs to be clean + self.mon_manager.wait_for_clean() + + def run_client_payload(self, cmd): + # avoid circular dep by importing here: + from tasks.cephfs.fuse_mount import FuseMount + + # Wait for at MDS daemons to be ready before mounting the + # ceph-fuse client in run_client_payload() + self.wait_for_daemons() + + d = misc.get_testdir(self._ctx) + m = FuseMount(self._ctx, {}, d, "admin", self.client_remote, cephfs_name=self.name) + m.mount_wait() + m.run_shell_payload(cmd) + m.umount_wait(require_clean=True) + + def _remove_pool(self, name, **kwargs): + c = f'osd pool rm {name} {name} --yes-i-really-really-mean-it' + return self.mon_manager.ceph(c, **kwargs) + + def rm(self, **kwargs): + c = f'fs rm {self.name} --yes-i-really-mean-it' + return self.mon_manager.ceph(c, **kwargs) + + def remove_pools(self, data_pools): + self._remove_pool(self.get_metadata_pool_name()) + for poolname in data_pools: + try: + self._remove_pool(poolname) + except CommandFailedError as e: + # EBUSY, this data pool is used by two metadata pools, let the + # 2nd pass delete it + if e.exitstatus == EBUSY: + pass + else: + raise + + def destroy(self, reset_obj_attrs=True): + log.info(f'Destroying file system {self.name} and related pools') + + if self.dead(): + log.debug('already dead...') + return + + data_pools = self.get_data_pool_names(refresh=True) + + # make sure no MDSs are attached to given FS. + self.fail() + self.rm() + + self.remove_pools(data_pools) + + if reset_obj_attrs: + self.id = None + self.name = None + self.metadata_pool_name = None + self.data_pool_name = None + self.data_pools = None + + def recreate(self): + self.destroy() + + self.create() + self.getinfo(refresh=True) + + def check_pool_application(self, pool_name): + osd_map = self.mon_manager.get_osd_dump_json() + for pool in osd_map['pools']: + if pool['pool_name'] == pool_name: + if "application_metadata" in pool: + if not "cephfs" in pool['application_metadata']: + raise RuntimeError("Pool {pool_name} does not name cephfs as application!".\ + format(pool_name=pool_name)) + + def __del__(self): + if getattr(self._ctx, "filesystem", None) == self: + delattr(self._ctx, "filesystem") + + def exists(self): + """ + Whether a filesystem exists in the mon's filesystem list + """ + fs_list = json.loads(self.mon_manager.raw_cluster_cmd('fs', 'ls', '--format=json-pretty')) + return self.name in [fs['name'] for fs in fs_list] + + def legacy_configured(self): + """ + Check if a legacy (i.e. pre "fs new") filesystem configuration is present. If this is + the case, the caller should avoid using Filesystem.create + """ + try: + out_text = self.mon_manager.raw_cluster_cmd('--format=json-pretty', 'osd', 'lspools') + pools = json.loads(out_text) + metadata_pool_exists = 'metadata' in [p['poolname'] for p in pools] + if metadata_pool_exists: + self.metadata_pool_name = 'metadata' + except CommandFailedError as e: + # For use in upgrade tests, Ceph cuttlefish and earlier don't support + # structured output (--format) from the CLI. + if e.exitstatus == 22: + metadata_pool_exists = True + else: + raise + + return metadata_pool_exists + + def _df(self): + return json.loads(self.mon_manager.raw_cluster_cmd("df", "--format=json-pretty")) + + # may raise FSMissing + def get_mds_map(self, status=None): + if status is None: + status = self.status() + return status.get_fsmap(self.id)['mdsmap'] + + def get_var(self, var, status=None): + return self.get_mds_map(status=status)[var] + + def set_dir_layout(self, mount, path, layout): + for name, value in layout.items(): + mount.run_shell(args=["setfattr", "-n", "ceph.dir.layout."+name, "-v", str(value), path]) + + def add_data_pool(self, name, create=True): + if create: + try: + self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name, + '--pg_num_min', str(self.pg_num_min)) + except CommandFailedError as e: + if e.exitstatus == 22: # nautilus couldn't specify --pg_num_min option + self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name, + str(self.pg_num_min)) + else: + raise + self.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', self.name, name) + self.get_pool_names(refresh = True) + for poolid, fs_name in self.data_pools.items(): + if name == fs_name: + return poolid + raise RuntimeError("could not get just created pool '{0}'".format(name)) + + def get_pool_names(self, refresh = False, status = None): + if refresh or self.metadata_pool_name is None or self.data_pools is None: + if status is None: + status = self.status() + fsmap = status.get_fsmap(self.id) + + osd_map = self.mon_manager.get_osd_dump_json() + id_to_name = {} + for p in osd_map['pools']: + id_to_name[p['pool']] = p['pool_name'] + + self.metadata_pool_name = id_to_name[fsmap['mdsmap']['metadata_pool']] + self.data_pools = {} + for data_pool in fsmap['mdsmap']['data_pools']: + self.data_pools[data_pool] = id_to_name[data_pool] + + def get_data_pool_name(self, refresh = False): + if refresh or self.data_pools is None: + self.get_pool_names(refresh = True) + assert(len(self.data_pools) == 1) + return next(iter(self.data_pools.values())) + + def get_data_pool_id(self, refresh = False): + """ + Don't call this if you have multiple data pools + :return: integer + """ + if refresh or self.data_pools is None: + self.get_pool_names(refresh = True) + assert(len(self.data_pools) == 1) + return next(iter(self.data_pools.keys())) + + def get_data_pool_names(self, refresh = False): + if refresh or self.data_pools is None: + self.get_pool_names(refresh = True) + return list(self.data_pools.values()) + + def get_metadata_pool_name(self): + return self.metadata_pool_name + + def set_data_pool_name(self, name): + if self.id is not None: + raise RuntimeError("can't set filesystem name if its fscid is set") + self.data_pool_name = name + + def get_pool_pg_num(self, pool_name): + pgs = json.loads(self.mon_manager.raw_cluster_cmd('osd', 'pool', 'get', + pool_name, 'pg_num', + '--format=json-pretty')) + return int(pgs['pg_num']) + + def get_namespace_id(self): + return self.id + + def get_pool_df(self, pool_name): + """ + Return a dict like: + {u'bytes_used': 0, u'max_avail': 83848701, u'objects': 0, u'kb_used': 0} + """ + for pool_df in self._df()['pools']: + if pool_df['name'] == pool_name: + return pool_df['stats'] + + raise RuntimeError("Pool name '{0}' not found".format(pool_name)) + + def get_usage(self): + return self._df()['stats']['total_used_bytes'] + + def are_daemons_healthy(self, status=None, skip_max_mds_check=False): + """ + Return true if all daemons are in one of active, standby, standby-replay, and + at least max_mds daemons are in 'active'. + + Unlike most of Filesystem, this function is tolerant of new-style `fs` + commands being missing, because we are part of the ceph installation + process during upgrade suites, so must fall back to old style commands + when we get an EINVAL on a new style command. + + :return: + """ + # First, check to see that processes haven't exited with an error code + for mds in self._ctx.daemons.iter_daemons_of_role('mds'): + mds.check_status() + + active_count = 0 + mds_map = self.get_mds_map(status=status) + + log.debug("are_daemons_healthy: mds map: {0}".format(mds_map)) + + for mds_id, mds_status in mds_map['info'].items(): + if mds_status['state'] not in ["up:active", "up:standby", "up:standby-replay"]: + log.warning("Unhealthy mds state {0}:{1}".format(mds_id, mds_status['state'])) + return False + elif mds_status['state'] == 'up:active': + active_count += 1 + + log.debug("are_daemons_healthy: {0}/{1}".format( + active_count, mds_map['max_mds'] + )) + + if not skip_max_mds_check: + if active_count > mds_map['max_mds']: + log.debug("are_daemons_healthy: number of actives is greater than max_mds: {0}".format(mds_map)) + return False + elif active_count == mds_map['max_mds']: + # The MDSMap says these guys are active, but let's check they really are + for mds_id, mds_status in mds_map['info'].items(): + if mds_status['state'] == 'up:active': + try: + daemon_status = self.mds_tell(["status"], mds_id=mds_status['name']) + except CommandFailedError as cfe: + if cfe.exitstatus == errno.EINVAL: + # Old version, can't do this check + continue + else: + # MDS not even running + return False + + if daemon_status['state'] != 'up:active': + # MDS hasn't taken the latest map yet + return False + + return True + else: + return False + else: + log.debug("are_daemons_healthy: skipping max_mds check") + return True + + def get_daemon_names(self, state=None, status=None): + """ + Return MDS daemon names of those daemons in the given state + :param state: + :return: + """ + mdsmap = self.get_mds_map(status) + result = [] + for mds_status in sorted(mdsmap['info'].values(), + key=lambda _: _['rank']): + if mds_status['state'] == state or state is None: + result.append(mds_status['name']) + + return result + + def get_active_names(self, status=None): + """ + Return MDS daemon names of those daemons holding ranks + in state up:active + + :return: list of strings like ['a', 'b'], sorted by rank + """ + return self.get_daemon_names("up:active", status=status) + + def get_all_mds_rank(self, status=None): + mdsmap = self.get_mds_map(status) + result = [] + for mds_status in sorted(mdsmap['info'].values(), + key=lambda _: _['rank']): + if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay': + result.append(mds_status['rank']) + + return result + + def get_rank(self, rank=None, status=None): + if status is None: + status = self.getinfo() + if rank is None: + rank = 0 + return status.get_rank(self.id, rank) + + def rank_restart(self, rank=0, status=None): + name = self.get_rank(rank=rank, status=status)['name'] + self.mds_restart(mds_id=name) + + def rank_signal(self, signal, rank=0, status=None): + name = self.get_rank(rank=rank, status=status)['name'] + self.mds_signal(name, signal) + + def rank_freeze(self, yes, rank=0): + self.mon_manager.raw_cluster_cmd("mds", "freeze", "{}:{}".format(self.id, rank), str(yes).lower()) + + def rank_fail(self, rank=0): + self.mon_manager.raw_cluster_cmd("mds", "fail", "{}:{}".format(self.id, rank)) + + def get_ranks(self, status=None): + if status is None: + status = self.getinfo() + return status.get_ranks(self.id) + + def get_damaged(self, status=None): + if status is None: + status = self.getinfo() + return status.get_damaged(self.id) + + def get_replays(self, status=None): + if status is None: + status = self.getinfo() + return status.get_replays(self.id) + + def get_replay(self, rank=0, status=None): + for replay in self.get_replays(status=status): + if replay['rank'] == rank: + return replay + return None + + def get_rank_names(self, status=None): + """ + Return MDS daemon names of those daemons holding a rank, + sorted by rank. This includes e.g. up:replay/reconnect + as well as active, but does not include standby or + standby-replay. + """ + mdsmap = self.get_mds_map(status) + result = [] + for mds_status in sorted(mdsmap['info'].values(), + key=lambda _: _['rank']): + if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay': + result.append(mds_status['name']) + + return result + + def wait_for_daemons(self, timeout=None, skip_max_mds_check=False, status=None): + """ + Wait until all daemons are healthy + :return: + """ + + if timeout is None: + timeout = DAEMON_WAIT_TIMEOUT + + if status is None: + status = self.status() + + elapsed = 0 + while True: + if self.are_daemons_healthy(status=status, skip_max_mds_check=skip_max_mds_check): + return status + else: + time.sleep(1) + elapsed += 1 + + if elapsed > timeout: + log.debug("status = {0}".format(status)) + raise RuntimeError("Timed out waiting for MDS daemons to become healthy") + + status = self.status() + + def dencoder(self, obj_type, obj_blob): + args = [os.path.join(self._prefix, "ceph-dencoder"), 'type', obj_type, 'import', '-', 'decode', 'dump_json'] + p = self.mon_manager.controller.run(args=args, stdin=BytesIO(obj_blob), stdout=BytesIO()) + return p.stdout.getvalue() + + def rados(self, *args, **kwargs): + """ + Callout to rados CLI. + """ + + return self.mon_manager.do_rados(*args, **kwargs) + + def radosm(self, *args, **kwargs): + """ + Interact with the metadata pool via rados CLI. + """ + + return self.rados(*args, **kwargs, pool=self.get_metadata_pool_name()) + + def radosmo(self, *args, stdout=BytesIO(), **kwargs): + """ + Interact with the metadata pool via rados CLI. Get the stdout. + """ + + return self.radosm(*args, **kwargs, stdout=stdout).stdout.getvalue() + + def get_metadata_object(self, object_type, object_id): + """ + Retrieve an object from the metadata pool, pass it through + ceph-dencoder to dump it to JSON, and return the decoded object. + """ + + o = self.radosmo(['get', object_id, '-']) + j = self.dencoder(object_type, o) + try: + return json.loads(j) + except (TypeError, ValueError): + log.error("Failed to decode JSON: '{0}'".format(j)) + raise + + def get_journal_version(self): + """ + Read the JournalPointer and Journal::Header objects to learn the version of + encoding in use. + """ + journal_pointer_object = '400.00000000' + journal_pointer_dump = self.get_metadata_object("JournalPointer", journal_pointer_object) + journal_ino = journal_pointer_dump['journal_pointer']['front'] + + journal_header_object = "{0:x}.00000000".format(journal_ino) + journal_header_dump = self.get_metadata_object('Journaler::Header', journal_header_object) + + version = journal_header_dump['journal_header']['stream_format'] + log.debug("Read journal version {0}".format(version)) + + return version + + def mds_asok(self, command, mds_id=None, timeout=None): + if mds_id is None: + return self.rank_asok(command, timeout=timeout) + + return self.json_asok(command, 'mds', mds_id, timeout=timeout) + + def mds_tell(self, command, mds_id=None): + if mds_id is None: + return self.rank_tell(command) + + return json.loads(self.mon_manager.raw_cluster_cmd("tell", f"mds.{mds_id}", *command)) + + def rank_asok(self, command, rank=0, status=None, timeout=None): + info = self.get_rank(rank=rank, status=status) + return self.json_asok(command, 'mds', info['name'], timeout=timeout) + + def rank_tell(self, command, rank=0, status=None): + try: + out = self.mon_manager.raw_cluster_cmd("tell", f"mds.{self.id}:{rank}", *command) + return json.loads(out) + except json.decoder.JSONDecodeError: + log.error("could not decode: {}".format(out)) + raise + + def ranks_tell(self, command, status=None): + if status is None: + status = self.status() + out = [] + for r in status.get_ranks(self.id): + result = self.rank_tell(command, rank=r['rank'], status=status) + out.append((r['rank'], result)) + return sorted(out) + + def ranks_perf(self, f, status=None): + perf = self.ranks_tell(["perf", "dump"], status=status) + out = [] + for rank, perf in perf: + out.append((rank, f(perf))) + return out + + def read_cache(self, path, depth=None): + cmd = ["dump", "tree", path] + if depth is not None: + cmd.append(depth.__str__()) + result = self.mds_asok(cmd) + if len(result) == 0: + raise RuntimeError("Path not found in cache: {0}".format(path)) + + return result + + def wait_for_state(self, goal_state, reject=None, timeout=None, mds_id=None, rank=None): + """ + Block until the MDS reaches a particular state, or a failure condition + is met. + + When there are multiple MDSs, succeed when exaclty one MDS is in the + goal state, or fail when any MDS is in the reject state. + + :param goal_state: Return once the MDS is in this state + :param reject: Fail if the MDS enters this state before the goal state + :param timeout: Fail if this many seconds pass before reaching goal + :return: number of seconds waited, rounded down to integer + """ + + started_at = time.time() + while True: + status = self.status() + if rank is not None: + try: + mds_info = status.get_rank(self.id, rank) + current_state = mds_info['state'] if mds_info else None + log.debug("Looked up MDS state for mds.{0}: {1}".format(rank, current_state)) + except: + mdsmap = self.get_mds_map(status=status) + if rank in mdsmap['failed']: + log.debug("Waiting for rank {0} to come back.".format(rank)) + current_state = None + else: + raise + elif mds_id is not None: + # mds_info is None if no daemon with this ID exists in the map + mds_info = status.get_mds(mds_id) + current_state = mds_info['state'] if mds_info else None + log.debug("Looked up MDS state for {0}: {1}".format(mds_id, current_state)) + else: + # In general, look for a single MDS + states = [m['state'] for m in status.get_ranks(self.id)] + if [s for s in states if s == goal_state] == [goal_state]: + current_state = goal_state + elif reject in states: + current_state = reject + else: + current_state = None + log.debug("mapped states {0} to {1}".format(states, current_state)) + + elapsed = time.time() - started_at + if current_state == goal_state: + log.debug("reached state '{0}' in {1}s".format(current_state, elapsed)) + return elapsed + elif reject is not None and current_state == reject: + raise RuntimeError("MDS in reject state {0}".format(current_state)) + elif timeout is not None and elapsed > timeout: + log.error("MDS status at timeout: {0}".format(status.get_fsmap(self.id))) + raise RuntimeError( + "Reached timeout after {0} seconds waiting for state {1}, while in state {2}".format( + elapsed, goal_state, current_state + )) + else: + time.sleep(1) + + def _read_data_xattr(self, ino_no, xattr_name, obj_type, pool): + if pool is None: + pool = self.get_data_pool_name() + + obj_name = "{0:x}.00000000".format(ino_no) + + args = ["getxattr", obj_name, xattr_name] + try: + proc = self.rados(args, pool=pool, stdout=BytesIO()) + except CommandFailedError as e: + log.error(e.__str__()) + raise ObjectNotFound(obj_name) + + obj_blob = proc.stdout.getvalue() + return json.loads(self.dencoder(obj_type, obj_blob).strip()) + + def _write_data_xattr(self, ino_no, xattr_name, data, pool=None): + """ + Write to an xattr of the 0th data object of an inode. Will + succeed whether the object and/or xattr already exist or not. + + :param ino_no: integer inode number + :param xattr_name: string name of the xattr + :param data: byte array data to write to the xattr + :param pool: name of data pool or None to use primary data pool + :return: None + """ + if pool is None: + pool = self.get_data_pool_name() + + obj_name = "{0:x}.00000000".format(ino_no) + args = ["setxattr", obj_name, xattr_name, data] + self.rados(args, pool=pool) + + def read_backtrace(self, ino_no, pool=None): + """ + Read the backtrace from the data pool, return a dict in the format + given by inode_backtrace_t::dump, which is something like: + + :: + + rados -p cephfs_data getxattr 10000000002.00000000 parent > out.bin + ceph-dencoder type inode_backtrace_t import out.bin decode dump_json + + { "ino": 1099511627778, + "ancestors": [ + { "dirino": 1, + "dname": "blah", + "version": 11}], + "pool": 1, + "old_pools": []} + + :param pool: name of pool to read backtrace from. If omitted, FS must have only + one data pool and that will be used. + """ + return self._read_data_xattr(ino_no, "parent", "inode_backtrace_t", pool) + + def read_layout(self, ino_no, pool=None): + """ + Read 'layout' xattr of an inode and parse the result, returning a dict like: + :: + { + "stripe_unit": 4194304, + "stripe_count": 1, + "object_size": 4194304, + "pool_id": 1, + "pool_ns": "", + } + + :param pool: name of pool to read backtrace from. If omitted, FS must have only + one data pool and that will be used. + """ + return self._read_data_xattr(ino_no, "layout", "file_layout_t", pool) + + def _enumerate_data_objects(self, ino, size): + """ + Get the list of expected data objects for a range, and the list of objects + that really exist. + + :return a tuple of two lists of strings (expected, actual) + """ + stripe_size = 1024 * 1024 * 4 + + size = max(stripe_size, size) + + want_objects = [ + "{0:x}.{1:08x}".format(ino, n) + for n in range(0, ((size - 1) // stripe_size) + 1) + ] + + exist_objects = self.rados(["ls"], pool=self.get_data_pool_name(), stdout=StringIO()).stdout.getvalue().split("\n") + + return want_objects, exist_objects + + def data_objects_present(self, ino, size): + """ + Check that *all* the expected data objects for an inode are present in the data pool + """ + + want_objects, exist_objects = self._enumerate_data_objects(ino, size) + missing = set(want_objects) - set(exist_objects) + + if missing: + log.debug("Objects missing (ino {0}, size {1}): {2}".format( + ino, size, missing + )) + return False + else: + log.debug("All objects for ino {0} size {1} found".format(ino, size)) + return True + + def data_objects_absent(self, ino, size): + want_objects, exist_objects = self._enumerate_data_objects(ino, size) + present = set(want_objects) & set(exist_objects) + + if present: + log.debug("Objects not absent (ino {0}, size {1}): {2}".format( + ino, size, present + )) + return False + else: + log.debug("All objects for ino {0} size {1} are absent".format(ino, size)) + return True + + def dirfrag_exists(self, ino, frag): + try: + self.radosm(["stat", "{0:x}.{1:08x}".format(ino, frag)]) + except CommandFailedError: + return False + else: + return True + + def list_dirfrag(self, dir_ino): + """ + Read the named object and return the list of omap keys + + :return a list of 0 or more strings + """ + + dirfrag_obj_name = "{0:x}.00000000".format(dir_ino) + + try: + key_list_str = self.radosmo(["listomapkeys", dirfrag_obj_name], stdout=StringIO()) + except CommandFailedError as e: + log.error(e.__str__()) + raise ObjectNotFound(dirfrag_obj_name) + + return key_list_str.strip().split("\n") if key_list_str else [] + + def get_meta_of_fs_file(self, dir_ino, obj_name, out): + """ + get metadata from parent to verify the correctness of the data format encoded by the tool, cephfs-meta-injection. + warning : The splitting of directory is not considered here. + """ + + dirfrag_obj_name = "{0:x}.00000000".format(dir_ino) + try: + self.radosm(["getomapval", dirfrag_obj_name, obj_name+"_head", out]) + except CommandFailedError as e: + log.error(e.__str__()) + raise ObjectNotFound(dir_ino) + + def erase_metadata_objects(self, prefix): + """ + For all objects in the metadata pool matching the prefix, + erase them. + + This O(N) with the number of objects in the pool, so only suitable + for use on toy test filesystems. + """ + all_objects = self.radosmo(["ls"], stdout=StringIO()).strip().split("\n") + matching_objects = [o for o in all_objects if o.startswith(prefix)] + for o in matching_objects: + self.radosm(["rm", o]) + + def erase_mds_objects(self, rank): + """ + Erase all the per-MDS objects for a particular rank. This includes + inotable, sessiontable, journal + """ + + def obj_prefix(multiplier): + """ + MDS object naming conventions like rank 1's + journal is at 201.*** + """ + return "%x." % (multiplier * 0x100 + rank) + + # MDS_INO_LOG_OFFSET + self.erase_metadata_objects(obj_prefix(2)) + # MDS_INO_LOG_BACKUP_OFFSET + self.erase_metadata_objects(obj_prefix(3)) + # MDS_INO_LOG_POINTER_OFFSET + self.erase_metadata_objects(obj_prefix(4)) + # MDSTables & SessionMap + self.erase_metadata_objects("mds{rank:d}_".format(rank=rank)) + + @property + def _prefix(self): + """ + Override this to set a different + """ + return "" + + def _make_rank(self, rank): + return "{}:{}".format(self.name, rank) + + def _run_tool(self, tool, args, rank=None, quiet=False): + # Tests frequently have [client] configuration that jacks up + # the objecter log level (unlikely to be interesting here) + # and does not set the mds log level (very interesting here) + if quiet: + base_args = [os.path.join(self._prefix, tool), '--debug-mds=1', '--debug-objecter=1'] + else: + base_args = [os.path.join(self._prefix, tool), '--debug-mds=4', '--debug-objecter=1'] + + if rank is not None: + base_args.extend(["--rank", "%s" % str(rank)]) + + t1 = datetime.datetime.now() + r = self.tool_remote.sh(script=base_args + args, stdout=StringIO()).strip() + duration = datetime.datetime.now() - t1 + log.debug("Ran {0} in time {1}, result:\n{2}".format( + base_args + args, duration, r + )) + return r + + @property + def tool_remote(self): + """ + An arbitrary remote to use when invoking recovery tools. Use an MDS host because + it'll definitely have keys with perms to access cephfs metadata pool. This is public + so that tests can use this remote to go get locally written output files from the tools. + """ + return self.mon_manager.controller + + def journal_tool(self, args, rank, quiet=False): + """ + Invoke cephfs-journal-tool with the passed arguments for a rank, and return its stdout + """ + fs_rank = self._make_rank(rank) + return self._run_tool("cephfs-journal-tool", args, fs_rank, quiet) + + def meta_tool(self, args, rank, quiet=False): + """ + Invoke cephfs-meta-injection with the passed arguments for a rank, and return its stdout + """ + fs_rank = self._make_rank(rank) + return self._run_tool("cephfs-meta-injection", args, fs_rank, quiet) + + def table_tool(self, args, quiet=False): + """ + Invoke cephfs-table-tool with the passed arguments, and return its stdout + """ + return self._run_tool("cephfs-table-tool", args, None, quiet) + + def data_scan(self, args, quiet=False, worker_count=1): + """ + Invoke cephfs-data-scan with the passed arguments, and return its stdout + + :param worker_count: if greater than 1, multiple workers will be run + in parallel and the return value will be None + """ + + workers = [] + + for n in range(0, worker_count): + if worker_count > 1: + # data-scan args first token is a command, followed by args to it. + # insert worker arguments after the command. + cmd = args[0] + worker_args = [cmd] + ["--worker_n", n.__str__(), "--worker_m", worker_count.__str__()] + args[1:] + else: + worker_args = args + + workers.append(Greenlet.spawn(lambda wargs=worker_args: + self._run_tool("cephfs-data-scan", wargs, None, quiet))) + + for w in workers: + w.get() + + if worker_count == 1: + return workers[0].value + else: + return None + + def is_full(self): + return self.is_pool_full(self.get_data_pool_name()) + + def authorize(self, client_id, caps=('/', 'rw')): + """ + Run "ceph fs authorize" and run "ceph auth get" to get and returnt the + keyring. + + client_id: client id that will be authorized + caps: tuple containing the path and permission (can be r or rw) + respectively. + """ + client_name = 'client.' + client_id + return self.mon_manager.raw_cluster_cmd('fs', 'authorize', self.name, + client_name, *caps) + + def grow(self, new_max_mds, status=None): + oldmax = self.get_var('max_mds', status=status) + assert(new_max_mds > oldmax) + self.set_max_mds(new_max_mds) + return self.wait_for_daemons() + + def shrink(self, new_max_mds, status=None): + oldmax = self.get_var('max_mds', status=status) + assert(new_max_mds < oldmax) + self.set_max_mds(new_max_mds) + return self.wait_for_daemons() + + def run_scrub(self, cmd, rank=0): + return self.rank_tell(["scrub"] + cmd, rank) + + def get_scrub_status(self, rank=0): + return self.run_scrub(["status"], rank) + + def wait_until_scrub_complete(self, result=None, tag=None, rank=0, sleep=30, + timeout=300, reverse=False): + # time out after "timeout" seconds and assume as done + if result is None: + result = "no active scrubs running" + with contextutil.safe_while(sleep=sleep, tries=timeout//sleep) as proceed: + while proceed(): + out_json = self.rank_tell(["scrub", "status"], rank=rank) + assert out_json is not None + if not reverse: + if result in out_json['status']: + log.info("all active scrubs completed") + return True + else: + if result not in out_json['status']: + log.info("all active scrubs completed") + return True + + if tag is not None: + status = out_json['scrubs'][tag] + if status is not None: + log.info(f"scrub status for tag:{tag} - {status}") + else: + log.info(f"scrub has completed for tag:{tag}") + return True + + # timed out waiting for scrub to complete + return False diff --git a/qa/tasks/cephfs/fuse_mount.py b/qa/tasks/cephfs/fuse_mount.py new file mode 100644 index 000000000..5c5d1c85c --- /dev/null +++ b/qa/tasks/cephfs/fuse_mount.py @@ -0,0 +1,516 @@ +import json +import time +import logging + +from io import StringIO +from textwrap import dedent + +from teuthology.contextutil import MaxWhileTries +from teuthology.contextutil import safe_while +from teuthology.orchestra import run +from teuthology.orchestra.run import CommandFailedError +from tasks.ceph_manager import get_valgrind_args +from tasks.cephfs.mount import CephFSMount + +log = logging.getLogger(__name__) + +# Refer mount.py for docstrings. +class FuseMount(CephFSMount): + def __init__(self, ctx, client_config, test_dir, client_id, + client_remote, client_keyring_path=None, cephfs_name=None, + cephfs_mntpt=None, hostfs_mntpt=None, brxnet=None): + super(FuseMount, self).__init__(ctx=ctx, test_dir=test_dir, + client_id=client_id, client_remote=client_remote, + client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt, + cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet) + + self.client_config = client_config if client_config else {} + self.fuse_daemon = None + self._fuse_conn = None + self.id = None + self.inst = None + self.addr = None + + def mount(self, mntopts=[], createfs=True, check_status=True, **kwargs): + self.update_attrs(**kwargs) + self.assert_and_log_minimum_mount_details() + + self.setup_netns() + + if createfs: + # TODO: don't call setupfs() from within mount(), since it's + # absurd. The proper order should be: create FS first and then + # call mount(). + self.setupfs(name=self.cephfs_name) + + try: + return self._mount(mntopts, check_status) + except RuntimeError: + # Catch exceptions by the mount() logic (i.e. not remote command + # failures) and ensure the mount is not left half-up. + # Otherwise we might leave a zombie mount point that causes + # anyone traversing cephtest/ to get hung up on. + log.warning("Trying to clean up after failed mount") + self.umount_wait(force=True) + raise + + def _mount(self, mntopts, check_status): + log.info("Client client.%s config is %s" % (self.client_id, + self.client_config)) + + daemon_signal = 'kill' + if self.client_config.get('coverage') or \ + self.client_config.get('valgrind') is not None: + daemon_signal = 'term' + + # Use 0000 mode to prevent undesired modifications to the mountpoint on + # the local file system. + script = f'mkdir -m 0000 -p -v {self.hostfs_mntpt}'.split() + stderr = StringIO() + try: + self.client_remote.run(args=script, timeout=(15*60), + stderr=StringIO()) + except CommandFailedError: + if 'file exists' not in stderr.getvalue().lower(): + raise + + run_cmd = [ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=self.test_dir), + 'daemon-helper', + daemon_signal, + ] + + fuse_cmd = [ + 'ceph-fuse', "-f", + "--admin-socket", "/var/run/ceph/$cluster-$name.$pid.asok", + ] + if self.client_id is not None: + fuse_cmd += ['--id', self.client_id] + if self.client_keyring_path and self.client_id is not None: + fuse_cmd += ['-k', self.client_keyring_path] + if self.cephfs_mntpt is not None: + fuse_cmd += ["--client_mountpoint=" + self.cephfs_mntpt] + if self.cephfs_name is not None: + fuse_cmd += ["--client_fs=" + self.cephfs_name] + if mntopts: + fuse_cmd += mntopts + fuse_cmd.append(self.hostfs_mntpt) + + if self.client_config.get('valgrind') is not None: + run_cmd = get_valgrind_args( + self.test_dir, + 'client.{id}'.format(id=self.client_id), + run_cmd, + self.client_config.get('valgrind'), + cd=False + ) + + netns_prefix = ['sudo', 'nsenter', + '--net=/var/run/netns/{0}'.format(self.netns_name)] + run_cmd = netns_prefix + run_cmd + + run_cmd.extend(fuse_cmd) + + def list_connections(): + conn_dir = "/sys/fs/fuse/connections" + + self.client_remote.run(args=['sudo', 'modprobe', 'fuse'], + check_status=False) + self.client_remote.run( + args=["sudo", "mount", "-t", "fusectl", conn_dir, conn_dir], + check_status=False, timeout=(30)) + + try: + ls_str = self.client_remote.sh("ls " + conn_dir, + stdout=StringIO(), + timeout=(15*60)).strip() + except CommandFailedError: + return [] + + if ls_str: + return [int(n) for n in ls_str.split("\n")] + else: + return [] + + # Before starting ceph-fuse process, note the contents of + # /sys/fs/fuse/connections + pre_mount_conns = list_connections() + log.info("Pre-mount connections: {0}".format(pre_mount_conns)) + + mountcmd_stdout, mountcmd_stderr = StringIO(), StringIO() + self.fuse_daemon = self.client_remote.run( + args=run_cmd, + logger=log.getChild('ceph-fuse.{id}'.format(id=self.client_id)), + stdin=run.PIPE, + stdout=mountcmd_stdout, + stderr=mountcmd_stderr, + wait=False + ) + + # Wait for the connection reference to appear in /sys + mount_wait = self.client_config.get('mount_wait', 0) + if mount_wait > 0: + log.info("Fuse mount waits {0} seconds before checking /sys/".format(mount_wait)) + time.sleep(mount_wait) + timeout = int(self.client_config.get('mount_timeout', 30)) + waited = 0 + + post_mount_conns = list_connections() + while len(post_mount_conns) <= len(pre_mount_conns): + if self.fuse_daemon.finished: + # Did mount fail? Raise the CommandFailedError instead of + # hitting the "failed to populate /sys/" timeout + try: + self.fuse_daemon.wait() + except CommandFailedError as e: + log.info('mount command failed.') + if check_status: + raise + else: + return (e, mountcmd_stdout.getvalue(), + mountcmd_stderr.getvalue()) + time.sleep(1) + waited += 1 + if waited > timeout: + raise RuntimeError( + "Fuse mount failed to populate/sys/ after {} " + "seconds".format(waited)) + else: + post_mount_conns = list_connections() + + log.info("Post-mount connections: {0}".format(post_mount_conns)) + + # Record our fuse connection number so that we can use it when + # forcing an unmount + new_conns = list(set(post_mount_conns) - set(pre_mount_conns)) + if len(new_conns) == 0: + raise RuntimeError("New fuse connection directory not found ({0})".format(new_conns)) + elif len(new_conns) > 1: + raise RuntimeError("Unexpectedly numerous fuse connections {0}".format(new_conns)) + else: + self._fuse_conn = new_conns[0] + + self.gather_mount_info() + + self.mounted = True + + def gather_mount_info(self): + status = self.admin_socket(['status']) + self.id = status['id'] + self.client_pid = status['metadata']['pid'] + try: + self.inst = status['inst_str'] + self.addr = status['addr_str'] + except KeyError: + sessions = self.fs.rank_asok(['session', 'ls']) + for s in sessions: + if s['id'] == self.id: + self.inst = s['inst'] + self.addr = self.inst.split()[1] + if self.inst is None: + raise RuntimeError("cannot find client session") + + def check_mounted_state(self): + proc = self.client_remote.run( + args=[ + 'stat', + '--file-system', + '--printf=%T\n', + '--', + self.hostfs_mntpt, + ], + stdout=StringIO(), + stderr=StringIO(), + wait=False, + timeout=(15*60) + ) + try: + proc.wait() + except CommandFailedError: + error = proc.stderr.getvalue() + if ("endpoint is not connected" in error + or "Software caused connection abort" in error): + # This happens is fuse is killed without unmount + log.warning("Found stale mount point at {0}".format(self.hostfs_mntpt)) + return True + else: + # This happens if the mount directory doesn't exist + log.info('mount point does not exist: %s', self.hostfs_mntpt) + return False + + fstype = proc.stdout.getvalue().rstrip('\n') + if fstype == 'fuseblk': + log.info('ceph-fuse is mounted on %s', self.hostfs_mntpt) + return True + else: + log.debug('ceph-fuse not mounted, got fs type {fstype!r}'.format( + fstype=fstype)) + return False + + def wait_until_mounted(self): + """ + Check to make sure that fuse is mounted on mountpoint. If not, + sleep for 5 seconds and check again. + """ + + while not self.check_mounted_state(): + # Even if it's not mounted, it should at least + # be running: catch simple failures where it has terminated. + assert not self.fuse_daemon.poll() + + time.sleep(5) + + self.mounted = True + + # Now that we're mounted, set permissions so that the rest of the test + # will have unrestricted access to the filesystem mount. + for retry in range(10): + try: + stderr = StringIO() + self.client_remote.run(args=['sudo', 'chmod', '1777', + self.hostfs_mntpt], + timeout=(15*60), + stderr=stderr, omit_sudo=False) + break + except run.CommandFailedError: + stderr = stderr.getvalue().lower() + if "read-only file system" in stderr: + break + elif "permission denied" in stderr: + time.sleep(5) + else: + raise + + def _mountpoint_exists(self): + return self.client_remote.run(args=["ls", "-d", self.hostfs_mntpt], check_status=False, timeout=(15*60)).exitstatus == 0 + + def umount(self, cleanup=True): + """ + umount() must not run cleanup() when it's called by umount_wait() + since "run.wait([self.fuse_daemon], timeout)" would hang otherwise. + """ + if not self.is_mounted(): + if cleanup: + self.cleanup() + return + + try: + log.info('Running fusermount -u on {name}...'.format(name=self.client_remote.name)) + stderr = StringIO() + self.client_remote.run(args=['sudo', 'fusermount', '-u', + self.hostfs_mntpt], + stderr=stderr, + timeout=(30*60), omit_sudo=False) + except run.CommandFailedError: + if "mountpoint not found" in stderr.getvalue(): + # This happens if the mount directory doesn't exist + log.info('mount point does not exist: %s', self.mountpoint) + elif "not mounted" in stderr.getvalue(): + # This happens if the mount directory already unmouted + log.info('mount point not mounted: %s', self.mountpoint) + else: + log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=self.client_remote.name)) + + self.client_remote.run( + args=['sudo', run.Raw('PATH=/usr/sbin:$PATH'), 'lsof', + run.Raw(';'), 'ps', 'auxf'], + timeout=(60*15), omit_sudo=False) + + # abort the fuse mount, killing all hung processes + if self._fuse_conn: + self.run_python(dedent(""" + import os + path = "/sys/fs/fuse/connections/{0}/abort" + if os.path.exists(path): + open(path, "w").write("1") + """).format(self._fuse_conn)) + self._fuse_conn = None + + stderr = StringIO() + # make sure its unmounted + try: + self.client_remote.run(args=['sudo', 'umount', '-l', '-f', + self.hostfs_mntpt], + stderr=stderr, timeout=(60*15), omit_sudo=False) + except CommandFailedError: + if self.is_mounted(): + raise + + self.mounted = False + self._fuse_conn = None + self.id = None + self.inst = None + self.addr = None + if cleanup: + self.cleanup() + + def umount_wait(self, force=False, require_clean=False, timeout=900): + """ + :param force: Complete cleanly even if the MDS is offline + """ + if not (self.is_mounted() and self.fuse_daemon): + log.debug('ceph-fuse client.{id} is not mounted at {remote} ' + '{mnt}'.format(id=self.client_id, + remote=self.client_remote, + mnt=self.hostfs_mntpt)) + self.cleanup() + return + + if force: + assert not require_clean # mutually exclusive + + # When we expect to be forcing, kill the ceph-fuse process directly. + # This should avoid hitting the more aggressive fallback killing + # in umount() which can affect other mounts too. + self.fuse_daemon.stdin.close() + + # However, we will still hit the aggressive wait if there is an ongoing + # mount -o remount (especially if the remount is stuck because MDSs + # are unavailable) + + # cleanup is set to to fail since clieanup must happen after umount is + # complete; otherwise following call to run.wait hangs. + self.umount(cleanup=False) + + try: + # Permit a timeout, so that we do not block forever + run.wait([self.fuse_daemon], timeout) + + except MaxWhileTries: + log.error("process failed to terminate after unmount. This probably" + " indicates a bug within ceph-fuse.") + raise + except CommandFailedError: + if require_clean: + raise + + self.mounted = False + self.cleanup() + + def teardown(self): + """ + Whatever the state of the mount, get it gone. + """ + super(FuseMount, self).teardown() + + self.umount() + + if self.fuse_daemon and not self.fuse_daemon.finished: + self.fuse_daemon.stdin.close() + try: + self.fuse_daemon.wait() + except CommandFailedError: + pass + + self.mounted = False + + def _asok_path(self): + return "/var/run/ceph/ceph-client.{0}.*.asok".format(self.client_id) + + @property + def _prefix(self): + return "" + + def find_admin_socket(self): + pyscript = """ +import glob +import re +import os +import subprocess + +def _find_admin_socket(client_name): + asok_path = "{asok_path}" + files = glob.glob(asok_path) + mountpoint = "{mountpoint}" + + # Given a non-glob path, it better be there + if "*" not in asok_path: + assert(len(files) == 1) + return files[0] + + for f in files: + pid = re.match(".*\.(\d+)\.asok$", f).group(1) + if os.path.exists("/proc/{{0}}".format(pid)): + with open("/proc/{{0}}/cmdline".format(pid), 'r') as proc_f: + contents = proc_f.read() + if mountpoint in contents: + return f + raise RuntimeError("Client socket {{0}} not found".format(client_name)) + +print(_find_admin_socket("{client_name}")) +""".format( + asok_path=self._asok_path(), + client_name="client.{0}".format(self.client_id), + mountpoint=self.mountpoint) + + asok_path = self.run_python(pyscript, sudo=True) + log.info("Found client admin socket at {0}".format(asok_path)) + return asok_path + + def admin_socket(self, args): + asok_path = self.find_admin_socket() + + # Query client ID from admin socket, wait 2 seconds + # and retry 10 times if it is not ready + with safe_while(sleep=2, tries=10) as proceed: + while proceed(): + try: + p = self.client_remote.run(args= + ['sudo', self._prefix + 'ceph', '--admin-daemon', asok_path] + args, + stdout=StringIO(), stderr=StringIO(), wait=False, + timeout=(15*60)) + p.wait() + break + except CommandFailedError: + if "connection refused" in p.stderr.getvalue().lower(): + pass + + return json.loads(p.stdout.getvalue().strip()) + + def get_global_id(self): + """ + Look up the CephFS client ID for this mount + """ + return self.admin_socket(['mds_sessions'])['id'] + + def get_global_inst(self): + """ + Look up the CephFS client instance for this mount + """ + return self.inst + + def get_global_addr(self): + """ + Look up the CephFS client addr for this mount + """ + return self.addr + + def get_client_pid(self): + """ + return pid of ceph-fuse process + """ + status = self.admin_socket(['status']) + return status['metadata']['pid'] + + def get_osd_epoch(self): + """ + Return 2-tuple of osd_epoch, osd_epoch_barrier + """ + status = self.admin_socket(['status']) + return status['osd_epoch'], status['osd_epoch_barrier'] + + def get_dentry_count(self): + """ + Return 2-tuple of dentry_count, dentry_pinned_count + """ + status = self.admin_socket(['status']) + return status['dentry_count'], status['dentry_pinned_count'] + + def set_cache_size(self, size): + return self.admin_socket(['config', 'set', 'client_cache_size', str(size)]) + + def get_op_read_count(self): + return self.admin_socket(['perf', 'dump', 'objecter'])['objecter']['osdop_read'] diff --git a/qa/tasks/cephfs/kernel_mount.py b/qa/tasks/cephfs/kernel_mount.py new file mode 100644 index 000000000..f4640e3fd --- /dev/null +++ b/qa/tasks/cephfs/kernel_mount.py @@ -0,0 +1,324 @@ +import errno +import json +import logging +import os +import re + +from io import StringIO +from textwrap import dedent + +from teuthology.orchestra.run import CommandFailedError +from teuthology.orchestra import run +from teuthology.contextutil import MaxWhileTries + +from tasks.cephfs.mount import CephFSMount + +log = logging.getLogger(__name__) + + +UMOUNT_TIMEOUT = 300 + + +class KernelMount(CephFSMount): + def __init__(self, ctx, test_dir, client_id, client_remote, + client_keyring_path=None, hostfs_mntpt=None, + cephfs_name=None, cephfs_mntpt=None, brxnet=None, config={}): + super(KernelMount, self).__init__(ctx=ctx, test_dir=test_dir, + client_id=client_id, client_remote=client_remote, + client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt, + cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet) + + self.rbytes = config.get('rbytes', False) + self.inst = None + self.addr = None + + def mount(self, mntopts=[], createfs=True, check_status=True, **kwargs): + self.update_attrs(**kwargs) + self.assert_and_log_minimum_mount_details() + + self.setup_netns() + + # TODO: don't call setupfs() from within mount(), since it's + # absurd. The proper order should be: create FS first and then + # call mount(). + if createfs: + self.setupfs(name=self.cephfs_name) + if not self.cephfs_mntpt: + self.cephfs_mntpt = '/' + + stderr = StringIO() + try: + self.client_remote.run(args=['mkdir', '-p', self.hostfs_mntpt], + timeout=(5*60), stderr=stderr) + except CommandFailedError: + if 'file exists' not in stderr.getvalue().lower(): + raise + + retval = self._run_mount_cmd(mntopts, check_status) + if retval: + return retval + + stderr = StringIO() + try: + self.client_remote.run( + args=['sudo', 'chmod', '1777', self.hostfs_mntpt], + stderr=stderr, timeout=(5*60)) + except CommandFailedError: + # the client does not have write permissions in the caps it holds + # for the Ceph FS that was just mounted. + if 'permission denied' in stderr.getvalue().lower(): + pass + + + self.mounted = True + + def _run_mount_cmd(self, mntopts, check_status): + opts = 'norequire_active_mds' + if self.client_id: + opts += ',name=' + self.client_id + if self.client_keyring_path and self.client_id: + opts += ',secret=' + self.get_key_from_keyfile() + if self.config_path: + opts += ',conf=' + self.config_path + if self.cephfs_name: + opts += ",mds_namespace=" + self.cephfs_name + if self.rbytes: + opts += ",rbytes" + else: + opts += ",norbytes" + if mntopts: + opts += ',' + ','.join(mntopts) + + mount_dev = ':' + self.cephfs_mntpt + prefix = ['sudo', 'adjust-ulimits', 'ceph-coverage', + self.test_dir + '/archive/coverage', + 'nsenter', + '--net=/var/run/netns/{0}'.format(self.netns_name)] + cmdargs = prefix + ['/bin/mount', '-t', 'ceph', mount_dev, + self.hostfs_mntpt, '-v', '-o', opts] + + mountcmd_stdout, mountcmd_stderr = StringIO(), StringIO() + try: + self.client_remote.run(args=cmdargs, timeout=(30*60), + stdout=mountcmd_stdout, + stderr=mountcmd_stderr) + except CommandFailedError as e: + log.info('mount command failed') + if check_status: + raise + else: + return (e, mountcmd_stdout.getvalue(), + mountcmd_stderr.getvalue()) + log.info('mount command passed') + + def umount(self, force=False): + if not self.is_mounted(): + self.cleanup() + return + + log.debug('Unmounting client client.{id}...'.format(id=self.client_id)) + + try: + cmd=['sudo', 'umount', self.hostfs_mntpt] + if force: + cmd.append('-f') + self.client_remote.run(args=cmd, timeout=(15*60), omit_sudo=False) + except Exception as e: + self.client_remote.run( + args=['sudo', run.Raw('PATH=/usr/sbin:$PATH'), 'lsof', + run.Raw(';'), 'ps', 'auxf'], + timeout=(15*60), omit_sudo=False) + raise e + + self.mounted = False + self.cleanup() + + def umount_wait(self, force=False, require_clean=False, timeout=900): + """ + Unlike the fuse client, the kernel client's umount is immediate + """ + if not self.is_mounted(): + self.cleanup() + return + + try: + self.umount(force) + except (CommandFailedError, MaxWhileTries): + if not force: + raise + + # force delete the netns and umount + self.client_remote.run(args=['sudo', 'umount', '-f', '-l', + self.mountpoint], + timeout=(15*60), omit_sudo=False) + + self.mounted = False + self.cleanup() + + def wait_until_mounted(self): + """ + Unlike the fuse client, the kernel client is up and running as soon + as the initial mount() function returns. + """ + assert self.mounted + + def teardown(self): + super(KernelMount, self).teardown() + if self.mounted: + self.umount() + + def _get_debug_dir(self): + """ + Get the debugfs folder for this mount + """ + + cluster_name = 'ceph' + fsid = self.ctx.ceph[cluster_name].fsid + + global_id = self._get_global_id() + + return os.path.join("/sys/kernel/debug/ceph/", f"{fsid}.client{global_id}") + + def read_debug_file(self, filename): + """ + Read the debug file "filename", return None if the file doesn't exist. + """ + + path = os.path.join(self._get_debug_dir(), filename) + + stdout = StringIO() + stderr = StringIO() + try: + self.run_shell_payload(f"sudo dd if={path}", timeout=(5 * 60), + stdout=stdout, stderr=stderr) + return stdout.getvalue() + except CommandFailedError: + if 'no such file or directory' in stderr.getvalue().lower(): + return errno.ENOENT + elif 'not a directory' in stderr.getvalue().lower(): + return errno.ENOTDIR + elif 'permission denied' in stderr.getvalue().lower(): + return errno.EACCES + raise + + def _get_global_id(self): + try: + p = self.run_shell_payload("getfattr --only-values -n ceph.client_id .", stdout=StringIO()) + v = p.stdout.getvalue() + prefix = "client" + assert v.startswith(prefix) + return int(v[len(prefix):]) + except CommandFailedError: + # Probably this fallback can be deleted in a few releases when the kernel xattr is widely available. + log.debug("Falling back to messy global_id lookup via /sys...") + + pyscript = dedent(""" + import glob + import os + import json + + def get_id_to_dir(): + result = {} + for dir in glob.glob("/sys/kernel/debug/ceph/*"): + mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines() + global_id = mds_sessions_lines[0].split()[1].strip('"') + client_id = mds_sessions_lines[1].split()[1].strip('"') + result[client_id] = global_id + return result + print(json.dumps(get_id_to_dir())) + """) + + output = self.client_remote.sh([ + 'sudo', 'python3', '-c', pyscript + ], timeout=(5*60)) + client_id_to_global_id = json.loads(output) + + try: + return client_id_to_global_id[self.client_id] + except KeyError: + log.error("Client id '{0}' debug dir not found (clients seen were: {1})".format( + self.client_id, ",".join(client_id_to_global_id.keys()) + )) + raise + + def get_global_id(self): + """ + Look up the CephFS client ID for this mount, using debugfs. + """ + + assert self.mounted + + return self._get_global_id() + + @property + def _global_addr(self): + if self.addr is not None: + return self.addr + + # The first line of the "status" file's output will be something + # like: + # "instance: client.4297 (0)10.72.47.117:0/1148470933" + # What we need here is only the string "10.72.47.117:0/1148470933" + status = self.read_debug_file("status") + if status is None: + return None + + instance = re.findall(r'instance:.*', status)[0] + self.addr = instance.split()[2].split(')')[1] + return self.addr; + + @property + def _global_inst(self): + if self.inst is not None: + return self.inst + + client_gid = "client%d" % self.get_global_id() + self.inst = " ".join([client_gid, self._global_addr]) + return self.inst + + def get_global_inst(self): + """ + Look up the CephFS client instance for this mount + """ + return self._global_inst + + def get_global_addr(self): + """ + Look up the CephFS client addr for this mount + """ + return self._global_addr + + def get_osd_epoch(self): + """ + Return 2-tuple of osd_epoch, osd_epoch_barrier + """ + osd_map = self.read_debug_file("osdmap") + assert osd_map + + lines = osd_map.split("\n") + first_line_tokens = lines[0].split() + epoch, barrier = int(first_line_tokens[1]), int(first_line_tokens[3]) + + return epoch, barrier + + def get_op_read_count(self): + stdout = StringIO() + stderr = StringIO() + try: + path = os.path.join(self._get_debug_dir(), "metrics/size") + self.run_shell(f"sudo stat {path}", stdout=stdout, + stderr=stderr, cwd=None) + buf = self.read_debug_file("metrics/size") + except CommandFailedError: + if 'no such file or directory' in stderr.getvalue().lower() \ + or 'not a directory' in stderr.getvalue().lower(): + try: + path = os.path.join(self._get_debug_dir(), "metrics") + self.run_shell(f"sudo stat {path}", stdout=stdout, + stderr=stderr, cwd=None) + buf = self.read_debug_file("metrics") + except CommandFailedError: + return errno.ENOENT + else: + return 0 + return int(re.findall(r'read.*', buf)[0].split()[1]) diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py new file mode 100644 index 000000000..a24fd284a --- /dev/null +++ b/qa/tasks/cephfs/mount.py @@ -0,0 +1,1329 @@ +import hashlib +import json +import logging +import datetime +import os +import re +import time + +from io import StringIO +from contextlib import contextmanager +from textwrap import dedent +from IPy import IP + +from teuthology.contextutil import safe_while +from teuthology.misc import get_file, write_file +from teuthology.orchestra import run +from teuthology.orchestra.run import CommandFailedError, ConnectionLostError, Raw + +from tasks.cephfs.filesystem import Filesystem + +log = logging.getLogger(__name__) + +class CephFSMount(object): + def __init__(self, ctx, test_dir, client_id, client_remote, + client_keyring_path=None, hostfs_mntpt=None, + cephfs_name=None, cephfs_mntpt=None, brxnet=None): + """ + :param test_dir: Global teuthology test dir + :param client_id: Client ID, the 'foo' in client.foo + :param client_keyring_path: path to keyring for given client_id + :param client_remote: Remote instance for the host where client will + run + :param hostfs_mntpt: Path to directory on the FS on which Ceph FS will + be mounted + :param cephfs_name: Name of Ceph FS to be mounted + :param cephfs_mntpt: Path to directory inside Ceph FS that will be + mounted as root + """ + self.mounted = False + self.ctx = ctx + self.test_dir = test_dir + + self._verify_attrs(client_id=client_id, + client_keyring_path=client_keyring_path, + hostfs_mntpt=hostfs_mntpt, cephfs_name=cephfs_name, + cephfs_mntpt=cephfs_mntpt) + + self.client_id = client_id + self.client_keyring_path = client_keyring_path + self.client_remote = client_remote + if hostfs_mntpt: + self.hostfs_mntpt = hostfs_mntpt + self.hostfs_mntpt_dirname = os.path.basename(self.hostfs_mntpt) + else: + self.hostfs_mntpt = os.path.join(self.test_dir, f'mnt.{self.client_id}') + self.cephfs_name = cephfs_name + self.cephfs_mntpt = cephfs_mntpt + + self.fs = None + + self._netns_name = None + self.nsid = -1 + if brxnet is None: + self.ceph_brx_net = '192.168.0.0/16' + else: + self.ceph_brx_net = brxnet + + self.test_files = ['a', 'b', 'c'] + + self.background_procs = [] + + # This will cleanup the stale netnses, which are from the + # last failed test cases. + @staticmethod + def cleanup_stale_netnses_and_bridge(remote): + p = remote.run(args=['ip', 'netns', 'list'], + stdout=StringIO(), timeout=(5*60)) + p = p.stdout.getvalue().strip() + + # Get the netns name list + netns_list = re.findall(r'ceph-ns-[^()\s][-.\w]+[^():\s]', p) + + # Remove the stale netnses + for ns in netns_list: + ns_name = ns.split()[0] + args = ['sudo', 'ip', 'netns', 'delete', '{0}'.format(ns_name)] + try: + remote.run(args=args, timeout=(5*60), omit_sudo=False) + except Exception: + pass + + # Remove the stale 'ceph-brx' + try: + args = ['sudo', 'ip', 'link', 'delete', 'ceph-brx'] + remote.run(args=args, timeout=(5*60), omit_sudo=False) + except Exception: + pass + + def _parse_netns_name(self): + self._netns_name = '-'.join(["ceph-ns", + re.sub(r'/+', "-", self.mountpoint)]) + + @property + def mountpoint(self): + if self.hostfs_mntpt == None: + self.hostfs_mntpt = os.path.join(self.test_dir, + self.hostfs_mntpt_dirname) + return self.hostfs_mntpt + + @mountpoint.setter + def mountpoint(self, path): + if not isinstance(path, str): + raise RuntimeError('path should be of str type.') + self._mountpoint = self.hostfs_mntpt = path + + @property + def netns_name(self): + if self._netns_name == None: + self._parse_netns_name() + return self._netns_name + + @netns_name.setter + def netns_name(self, name): + self._netns_name = name + + def assert_and_log_minimum_mount_details(self): + """ + Make sure we have minimum details required for mounting. Ideally, this + method should be called at the beginning of the mount method. + """ + if not self.client_id or not self.client_remote or \ + not self.hostfs_mntpt: + errmsg = ('Mounting CephFS requires that at least following ' + 'details to be provided -\n' + '1. the client ID,\n2. the mountpoint and\n' + '3. the remote machine where CephFS will be mounted.\n') + raise RuntimeError(errmsg) + + log.info('Mounting Ceph FS. Following are details of mount; remember ' + '"None" represents Python type None -') + log.info(f'self.client_remote.hostname = {self.client_remote.hostname}') + log.info(f'self.client.name = client.{self.client_id}') + log.info(f'self.hostfs_mntpt = {self.hostfs_mntpt}') + log.info(f'self.cephfs_name = {self.cephfs_name}') + log.info(f'self.cephfs_mntpt = {self.cephfs_mntpt}') + log.info(f'self.client_keyring_path = {self.client_keyring_path}') + if self.client_keyring_path: + log.info('keyring content -\n' + + get_file(self.client_remote, self.client_keyring_path, + sudo=True).decode()) + + def is_mounted(self): + return self.mounted + + def setupfs(self, name=None): + if name is None and self.fs is not None: + # Previous mount existed, reuse the old name + name = self.fs.name + self.fs = Filesystem(self.ctx, name=name) + log.info('Wait for MDS to reach steady state...') + self.fs.wait_for_daemons() + log.info('Ready to start {}...'.format(type(self).__name__)) + + def _setup_brx_and_nat(self): + # The ip for ceph-brx should be + ip = IP(self.ceph_brx_net)[-2] + mask = self.ceph_brx_net.split('/')[1] + brd = IP(self.ceph_brx_net).broadcast() + + brx = self.client_remote.run(args=['ip', 'addr'], stderr=StringIO(), + stdout=StringIO(), timeout=(5*60)) + brx = re.findall(r'inet .* ceph-brx', brx.stdout.getvalue()) + if brx: + # If the 'ceph-brx' already exists, then check whether + # the new net is conflicting with it + _ip, _mask = brx[0].split()[1].split('/', 1) + if _ip != "{}".format(ip) or _mask != mask: + raise RuntimeError("Conflict with existing ceph-brx {0}, new {1}/{2}".format(brx[0].split()[1], ip, mask)) + + # Setup the ceph-brx and always use the last valid IP + if not brx: + log.info("Setuping the 'ceph-brx' with {0}/{1}".format(ip, mask)) + + self.run_shell_payload(f""" + set -e + sudo ip link add name ceph-brx type bridge + sudo ip addr flush dev ceph-brx + sudo ip link set ceph-brx up + sudo ip addr add {ip}/{mask} brd {brd} dev ceph-brx + """, timeout=(5*60), omit_sudo=False, cwd='/') + + args = "echo 1 | sudo tee /proc/sys/net/ipv4/ip_forward" + self.client_remote.run(args=args, timeout=(5*60), omit_sudo=False) + + # Setup the NAT + p = self.client_remote.run(args=['route'], stderr=StringIO(), + stdout=StringIO(), timeout=(5*60)) + p = re.findall(r'default .*', p.stdout.getvalue()) + if p == False: + raise RuntimeError("No default gw found") + gw = p[0].split()[7] + + self.run_shell_payload(f""" + set -e + sudo iptables -A FORWARD -o {gw} -i ceph-brx -j ACCEPT + sudo iptables -A FORWARD -i {gw} -o ceph-brx -j ACCEPT + sudo iptables -t nat -A POSTROUTING -s {ip}/{mask} -o {gw} -j MASQUERADE + """, timeout=(5*60), omit_sudo=False, cwd='/') + + def _setup_netns(self): + p = self.client_remote.run(args=['ip', 'netns', 'list'], + stderr=StringIO(), stdout=StringIO(), + timeout=(5*60)).stdout.getvalue().strip() + + # Get the netns name list + netns_list = re.findall(r'[^()\s][-.\w]+[^():\s]', p) + + out = re.search(r"{0}".format(self.netns_name), p) + if out is None: + # Get an uniq nsid for the new netns + nsid = 0 + p = self.client_remote.run(args=['ip', 'netns', 'list-id'], + stderr=StringIO(), stdout=StringIO(), + timeout=(5*60)).stdout.getvalue() + while True: + out = re.search(r"nsid {} ".format(nsid), p) + if out is None: + break + + nsid += 1 + + # Add one new netns and set it id + self.run_shell_payload(f""" + set -e + sudo ip netns add {self.netns_name} + sudo ip netns set {self.netns_name} {nsid} + """, timeout=(5*60), omit_sudo=False, cwd='/') + self.nsid = nsid; + else: + # The netns already exists and maybe suspended by self.kill() + self.resume_netns(); + + nsid = int(re.search(r"{0} \(id: (\d+)\)".format(self.netns_name), p).group(1)) + self.nsid = nsid; + return + + # Get one ip address for netns + ips = IP(self.ceph_brx_net) + for ip in ips: + found = False + if ip == ips[0]: + continue + if ip == ips[-2]: + raise RuntimeError("we have ran out of the ip addresses") + + for ns in netns_list: + ns_name = ns.split()[0] + args = ['sudo', 'ip', 'netns', 'exec', '{0}'.format(ns_name), 'ip', 'addr'] + try: + p = self.client_remote.run(args=args, stderr=StringIO(), + stdout=StringIO(), timeout=(5*60), + omit_sudo=False) + q = re.search("{0}".format(ip), p.stdout.getvalue()) + if q is not None: + found = True + break + except CommandFailedError: + if "No such file or directory" in p.stderr.getvalue(): + pass + if "Invalid argument" in p.stderr.getvalue(): + pass + + if found == False: + break + + mask = self.ceph_brx_net.split('/')[1] + brd = IP(self.ceph_brx_net).broadcast() + + log.info("Setuping the netns '{0}' with {1}/{2}".format(self.netns_name, ip, mask)) + + # Setup the veth interfaces + brxip = IP(self.ceph_brx_net)[-2] + self.run_shell_payload(f""" + set -e + sudo ip link add veth0 netns {self.netns_name} type veth peer name brx.{nsid} + sudo ip netns exec {self.netns_name} ip addr add {ip}/{mask} brd {brd} dev veth0 + sudo ip netns exec {self.netns_name} ip link set veth0 up + sudo ip netns exec {self.netns_name} ip link set lo up + sudo ip netns exec {self.netns_name} ip route add default via {brxip} + """, timeout=(5*60), omit_sudo=False, cwd='/') + + # Bring up the brx interface and join it to 'ceph-brx' + self.run_shell_payload(f""" + set -e + sudo ip link set brx.{nsid} up + sudo ip link set dev brx.{nsid} master ceph-brx + """, timeout=(5*60), omit_sudo=False, cwd='/') + + def _cleanup_netns(self): + if self.nsid == -1: + return + log.info("Removing the netns '{0}'".format(self.netns_name)) + + # Delete the netns and the peer veth interface + self.run_shell_payload(f""" + set -e + sudo ip link set brx.{self.nsid} down + sudo ip link delete dev brx.{self.nsid} + sudo ip netns delete {self.netns_name} + """, timeout=(5*60), omit_sudo=False, cwd='/') + + self.nsid = -1 + + def _cleanup_brx_and_nat(self): + brx = self.client_remote.run(args=['ip', 'addr'], stderr=StringIO(), + stdout=StringIO(), timeout=(5*60)) + brx = re.findall(r'inet .* ceph-brx', brx.stdout.getvalue()) + if not brx: + return + + # If we are the last netns, will delete the ceph-brx + args = ['sudo', 'ip', 'link', 'show'] + p = self.client_remote.run(args=args, stdout=StringIO(), + timeout=(5*60), omit_sudo=False) + _list = re.findall(r'brx\.', p.stdout.getvalue().strip()) + if len(_list) != 0: + return + + log.info("Removing the 'ceph-brx'") + + self.run_shell_payload(""" + set -e + sudo ip link set ceph-brx down + sudo ip link delete ceph-brx + """, timeout=(5*60), omit_sudo=False, cwd='/') + + # Drop the iptables NAT rules + ip = IP(self.ceph_brx_net)[-2] + mask = self.ceph_brx_net.split('/')[1] + + p = self.client_remote.run(args=['route'], stderr=StringIO(), + stdout=StringIO(), timeout=(5*60)) + p = re.findall(r'default .*', p.stdout.getvalue()) + if p == False: + raise RuntimeError("No default gw found") + gw = p[0].split()[7] + self.run_shell_payload(f""" + set -e + sudo iptables -D FORWARD -o {gw} -i ceph-brx -j ACCEPT + sudo iptables -D FORWARD -i {gw} -o ceph-brx -j ACCEPT + sudo iptables -t nat -D POSTROUTING -s {ip}/{mask} -o {gw} -j MASQUERADE + """, timeout=(5*60), omit_sudo=False, cwd='/') + + def setup_netns(self): + """ + Setup the netns for the mountpoint. + """ + log.info("Setting the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint)) + self._setup_brx_and_nat() + self._setup_netns() + + def cleanup_netns(self): + """ + Cleanup the netns for the mountpoint. + """ + # We will defer cleaning the netnses and bridge until the last + # mountpoint is unmounted, this will be a temporary work around + # for issue#46282. + + # log.info("Cleaning the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint)) + # self._cleanup_netns() + # self._cleanup_brx_and_nat() + + def suspend_netns(self): + """ + Suspend the netns veth interface. + """ + if self.nsid == -1: + return + + log.info("Suspending the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint)) + + args = ['sudo', 'ip', 'link', 'set', 'brx.{0}'.format(self.nsid), 'down'] + self.client_remote.run(args=args, timeout=(5*60), omit_sudo=False) + + def resume_netns(self): + """ + Resume the netns veth interface. + """ + if self.nsid == -1: + return + + log.info("Resuming the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint)) + + args = ['sudo', 'ip', 'link', 'set', 'brx.{0}'.format(self.nsid), 'up'] + self.client_remote.run(args=args, timeout=(5*60), omit_sudo=False) + + def mount(self, mntopts=[], createfs=True, check_status=True, **kwargs): + """ + kwargs expects its members to be same as the arguments accepted by + self.update_attrs(). + """ + raise NotImplementedError() + + def mount_wait(self, **kwargs): + """ + Accepts arguments same as self.mount(). + """ + self.mount(**kwargs) + self.wait_until_mounted() + + def umount(self): + raise NotImplementedError() + + def umount_wait(self, force=False, require_clean=False, timeout=None): + """ + + :param force: Expect that the mount will not shutdown cleanly: kill + it hard. + :param require_clean: Wait for the Ceph client associated with the + mount (e.g. ceph-fuse) to terminate, and + raise if it doesn't do so cleanly. + :param timeout: amount of time to be waited for umount command to finish + :return: + """ + raise NotImplementedError() + + def _verify_attrs(self, **kwargs): + """ + Verify that client_id, client_keyring_path, client_remote, hostfs_mntpt, + cephfs_name, cephfs_mntpt are either type str or None. + """ + for k, v in kwargs.items(): + if v is not None and not isinstance(v, str): + raise RuntimeError('value of attributes should be either str ' + f'or None. {k} - {v}') + + def update_attrs(self, client_id=None, client_keyring_path=None, + client_remote=None, hostfs_mntpt=None, cephfs_name=None, + cephfs_mntpt=None): + if not (client_id or client_keyring_path or client_remote or + cephfs_name or cephfs_mntpt or hostfs_mntpt): + return + + self._verify_attrs(client_id=client_id, + client_keyring_path=client_keyring_path, + hostfs_mntpt=hostfs_mntpt, cephfs_name=cephfs_name, + cephfs_mntpt=cephfs_mntpt) + + if client_id: + self.client_id = client_id + if client_keyring_path: + self.client_keyring_path = client_keyring_path + if client_remote: + self.client_remote = client_remote + if hostfs_mntpt: + self.hostfs_mntpt = hostfs_mntpt + if cephfs_name: + self.cephfs_name = cephfs_name + if cephfs_mntpt: + self.cephfs_mntpt = cephfs_mntpt + + def remount(self, **kwargs): + """ + Update mount object's attributes and attempt remount with these + new values for these attrbiutes. + + 1. Run umount_wait(). + 2. Run update_attrs(). + 3. Run mount(). + + Accepts arguments of self.mount() and self.update_attrs() with 2 exceptions - + 1. Accepts wait too which can be True or False. + 2. The default value of createfs is False. + """ + self.umount_wait() + assert not self.mounted + + mntopts = kwargs.pop('mntopts', []) + createfs = kwargs.pop('createfs', False) + check_status = kwargs.pop('check_status', True) + wait = kwargs.pop('wait', True) + + self.update_attrs(**kwargs) + + retval = self.mount(mntopts=mntopts, createfs=createfs, + check_status=check_status) + # avoid this scenario (again): mount command might've failed and + # check_status might have silenced the exception, yet we attempt to + # wait which might lead to an error. + if retval is None and wait: + self.wait_until_mounted() + + return retval + + def kill(self): + """ + Suspend the netns veth interface to make the client disconnected + from the ceph cluster + """ + log.info('Killing connection on {0}...'.format(self.client_remote.name)) + self.suspend_netns() + + def kill_cleanup(self): + """ + Follow up ``kill`` to get to a clean unmounted state. + """ + log.info('Cleaning up killed connection on {0}'.format(self.client_remote.name)) + self.umount_wait(force=True) + + def cleanup(self): + """ + Remove the mount point. + + Prerequisite: the client is not mounted. + """ + log.info('Cleaning up mount {0}'.format(self.client_remote.name)) + stderr = StringIO() + try: + self.client_remote.run(args=['rmdir', '--', self.mountpoint], + cwd=self.test_dir, stderr=stderr, + timeout=(60*5), check_status=False) + except CommandFailedError: + if "no such file or directory" not in stderr.getvalue().lower(): + raise + + self.cleanup_netns() + + def wait_until_mounted(self): + raise NotImplementedError() + + def get_keyring_path(self): + # N.B.: default keyring is /etc/ceph/ceph.keyring; see ceph.py and generate_caps + return '/etc/ceph/ceph.client.{id}.keyring'.format(id=self.client_id) + + def get_key_from_keyfile(self): + # XXX: don't call run_shell(), since CephFS might be unmounted. + keyring = self.client_remote.run( + args=['sudo', 'cat', self.client_keyring_path], stdout=StringIO(), + omit_sudo=False).stdout.getvalue() + for line in keyring.split('\n'): + if line.find('key') != -1: + return line[line.find('=') + 1 : ].strip() + + @property + def config_path(self): + """ + Path to ceph.conf: override this if you're not a normal systemwide ceph install + :return: stringv + """ + return "/etc/ceph/ceph.conf" + + @contextmanager + def mounted_wait(self): + """ + A context manager, from an initially unmounted state, to mount + this, yield, and then unmount and clean up. + """ + self.mount() + self.wait_until_mounted() + try: + yield + finally: + self.umount_wait() + + def create_file(self, filename='testfile', dirname=None, user=None, + check_status=True): + assert(self.is_mounted()) + + if not os.path.isabs(filename): + if dirname: + if os.path.isabs(dirname): + path = os.path.join(dirname, filename) + else: + path = os.path.join(self.hostfs_mntpt, dirname, filename) + else: + path = os.path.join(self.hostfs_mntpt, filename) + else: + path = filename + + if user: + args = ['sudo', '-u', user, '-s', '/bin/bash', '-c', 'touch ' + path] + else: + args = 'touch ' + path + + return self.client_remote.run(args=args, check_status=check_status) + + def create_files(self): + assert(self.is_mounted()) + + for suffix in self.test_files: + log.info("Creating file {0}".format(suffix)) + self.client_remote.run(args=[ + 'touch', os.path.join(self.hostfs_mntpt, suffix) + ]) + + def test_create_file(self, filename='testfile', dirname=None, user=None, + check_status=True): + return self.create_file(filename=filename, dirname=dirname, user=user, + check_status=False) + + def check_files(self): + assert(self.is_mounted()) + + for suffix in self.test_files: + log.info("Checking file {0}".format(suffix)) + r = self.client_remote.run(args=[ + 'ls', os.path.join(self.hostfs_mntpt, suffix) + ], check_status=False) + if r.exitstatus != 0: + raise RuntimeError("Expected file {0} not found".format(suffix)) + + def write_file(self, path, data, perms=None): + """ + Write the given data at the given path and set the given perms to the + file on the path. + """ + if path.find(self.hostfs_mntpt) == -1: + path = os.path.join(self.hostfs_mntpt, path) + + write_file(self.client_remote, path, data) + + if perms: + self.run_shell(args=f'chmod {perms} {path}') + + def read_file(self, path): + """ + Return the data from the file on given path. + """ + if path.find(self.hostfs_mntpt) == -1: + path = os.path.join(self.hostfs_mntpt, path) + + return self.run_shell(args=['cat', path]).\ + stdout.getvalue().strip() + + def create_destroy(self): + assert(self.is_mounted()) + + filename = "{0} {1}".format(datetime.datetime.now(), self.client_id) + log.debug("Creating test file {0}".format(filename)) + self.client_remote.run(args=[ + 'touch', os.path.join(self.hostfs_mntpt, filename) + ]) + log.debug("Deleting test file {0}".format(filename)) + self.client_remote.run(args=[ + 'rm', '-f', os.path.join(self.hostfs_mntpt, filename) + ]) + + def _run_python(self, pyscript, py_version='python3', sudo=False): + args = [] + if sudo: + args.append('sudo') + args += ['adjust-ulimits', 'daemon-helper', 'kill', py_version, '-c', pyscript] + return self.client_remote.run(args=args, wait=False, stdin=run.PIPE, stdout=StringIO()) + + def run_python(self, pyscript, py_version='python3', sudo=False): + p = self._run_python(pyscript, py_version, sudo=sudo) + p.wait() + return p.stdout.getvalue().strip() + + def run_shell(self, args, timeout=900, **kwargs): + args = args.split() if isinstance(args, str) else args + omit_sudo = kwargs.pop('omit_sudo', False) + sudo = kwargs.pop('sudo', False) + cwd = kwargs.pop('cwd', self.mountpoint) + stdout = kwargs.pop('stdout', StringIO()) + stderr = kwargs.pop('stderr', StringIO()) + + if sudo: + args.insert(0, 'sudo') + + return self.client_remote.run(args=args, cwd=cwd, timeout=timeout, + stdout=stdout, stderr=stderr, + omit_sudo=omit_sudo, **kwargs) + + def run_shell_payload(self, payload, **kwargs): + return self.run_shell(["bash", "-c", Raw(f"'{payload}'")], **kwargs) + + def run_as_user(self, **kwargs): + """ + Besides the arguments defined for run_shell() this method also + accepts argument 'user'. + """ + args = kwargs.pop('args') + user = kwargs.pop('user') + if isinstance(args, str): + args = ['sudo', '-u', user, '-s', '/bin/bash', '-c', args] + elif isinstance(args, list): + cmdlist = args + cmd = '' + for i in cmdlist: + cmd = cmd + i + ' ' + # get rid of extra space at the end. + cmd = cmd[:-1] + + args = ['sudo', '-u', user, '-s', '/bin/bash', '-c', cmd] + + kwargs['args'] = args + return self.run_shell(**kwargs) + + def run_as_root(self, **kwargs): + """ + Accepts same arguments as run_shell(). + """ + kwargs['user'] = 'root' + return self.run_as_user(**kwargs) + + def _verify(self, proc, retval=None, errmsg=None): + if retval: + msg = ('expected return value: {}\nreceived return value: ' + '{}\n'.format(retval, proc.returncode)) + assert proc.returncode == retval, msg + + if errmsg: + stderr = proc.stderr.getvalue().lower() + msg = ('didn\'t find given string in stderr -\nexpected string: ' + '{}\nreceived error message: {}\nnote: received error ' + 'message is converted to lowercase'.format(errmsg, stderr)) + assert errmsg in stderr, msg + + def negtestcmd(self, args, retval=None, errmsg=None, stdin=None, + cwd=None, wait=True): + """ + Conduct a negative test for the given command. + + retval and errmsg are parameters to confirm the cause of command + failure. + """ + proc = self.run_shell(args=args, wait=wait, stdin=stdin, cwd=cwd, + check_status=False) + self._verify(proc, retval, errmsg) + return proc + + def negtestcmd_as_user(self, args, user, retval=None, errmsg=None, + stdin=None, cwd=None, wait=True): + proc = self.run_as_user(args=args, user=user, wait=wait, stdin=stdin, + cwd=cwd, check_status=False) + self._verify(proc, retval, errmsg) + return proc + + def negtestcmd_as_root(self, args, retval=None, errmsg=None, stdin=None, + cwd=None, wait=True): + proc = self.run_as_root(args=args, wait=wait, stdin=stdin, cwd=cwd, + check_status=False) + self._verify(proc, retval, errmsg) + return proc + + def open_no_data(self, basename): + """ + A pure metadata operation + """ + assert(self.is_mounted()) + + path = os.path.join(self.hostfs_mntpt, basename) + + p = self._run_python(dedent( + """ + f = open("{path}", 'w') + """.format(path=path) + )) + p.wait() + + def open_background(self, basename="background_file", write=True): + """ + Open a file for writing, then block such that the client + will hold a capability. + + Don't return until the remote process has got as far as opening + the file, then return the RemoteProcess instance. + """ + assert(self.is_mounted()) + + path = os.path.join(self.hostfs_mntpt, basename) + + if write: + pyscript = dedent(""" + import time + + with open("{path}", 'w') as f: + f.write('content') + f.flush() + f.write('content2') + while True: + time.sleep(1) + """).format(path=path) + else: + pyscript = dedent(""" + import time + + with open("{path}", 'r') as f: + while True: + time.sleep(1) + """).format(path=path) + + rproc = self._run_python(pyscript) + self.background_procs.append(rproc) + + # This wait would not be sufficient if the file had already + # existed, but it's simple and in practice users of open_background + # are not using it on existing files. + self.wait_for_visible(basename) + + return rproc + + def open_dir_background(self, basename): + """ + Create and hold a capability to a directory. + """ + assert(self.is_mounted()) + + path = os.path.join(self.hostfs_mntpt, basename) + + pyscript = dedent(""" + import time + import os + + os.mkdir("{path}") + fd = os.open("{path}", os.O_RDONLY) + while True: + time.sleep(1) + """).format(path=path) + + rproc = self._run_python(pyscript) + self.background_procs.append(rproc) + + self.wait_for_visible(basename) + + return rproc + + def wait_for_dir_empty(self, dirname, timeout=30): + dirpath = os.path.join(self.hostfs_mntpt, dirname) + with safe_while(sleep=5, tries=(timeout//5)) as proceed: + while proceed(): + p = self.run_shell_payload(f"stat -c %h {dirpath}") + nr_links = int(p.stdout.getvalue().strip()) + if nr_links == 2: + return + + def wait_for_visible(self, basename="background_file", timeout=30): + i = 0 + while i < timeout: + r = self.client_remote.run(args=[ + 'stat', os.path.join(self.hostfs_mntpt, basename) + ], check_status=False) + if r.exitstatus == 0: + log.debug("File {0} became visible from {1} after {2}s".format( + basename, self.client_id, i)) + return + else: + time.sleep(1) + i += 1 + + raise RuntimeError("Timed out after {0}s waiting for {1} to become visible from {2}".format( + i, basename, self.client_id)) + + def lock_background(self, basename="background_file", do_flock=True): + """ + Open and lock a files for writing, hold the lock in a background process + """ + assert(self.is_mounted()) + + path = os.path.join(self.hostfs_mntpt, basename) + + script_builder = """ + import time + import fcntl + import struct""" + if do_flock: + script_builder += """ + f1 = open("{path}-1", 'w') + fcntl.flock(f1, fcntl.LOCK_EX | fcntl.LOCK_NB)""" + script_builder += """ + f2 = open("{path}-2", 'w') + lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0) + fcntl.fcntl(f2, fcntl.F_SETLK, lockdata) + while True: + time.sleep(1) + """ + + pyscript = dedent(script_builder).format(path=path) + + log.info("lock_background file {0}".format(basename)) + rproc = self._run_python(pyscript) + self.background_procs.append(rproc) + return rproc + + def lock_and_release(self, basename="background_file"): + assert(self.is_mounted()) + + path = os.path.join(self.hostfs_mntpt, basename) + + script = """ + import time + import fcntl + import struct + f1 = open("{path}-1", 'w') + fcntl.flock(f1, fcntl.LOCK_EX) + f2 = open("{path}-2", 'w') + lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0) + fcntl.fcntl(f2, fcntl.F_SETLK, lockdata) + """ + pyscript = dedent(script).format(path=path) + + log.info("lock_and_release file {0}".format(basename)) + return self._run_python(pyscript) + + def check_filelock(self, basename="background_file", do_flock=True): + assert(self.is_mounted()) + + path = os.path.join(self.hostfs_mntpt, basename) + + script_builder = """ + import fcntl + import errno + import struct""" + if do_flock: + script_builder += """ + f1 = open("{path}-1", 'r') + try: + fcntl.flock(f1, fcntl.LOCK_EX | fcntl.LOCK_NB) + except IOError as e: + if e.errno == errno.EAGAIN: + pass + else: + raise RuntimeError("flock on file {path}-1 not found")""" + script_builder += """ + f2 = open("{path}-2", 'r') + try: + lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0) + fcntl.fcntl(f2, fcntl.F_SETLK, lockdata) + except IOError as e: + if e.errno == errno.EAGAIN: + pass + else: + raise RuntimeError("posix lock on file {path}-2 not found") + """ + pyscript = dedent(script_builder).format(path=path) + + log.info("check lock on file {0}".format(basename)) + self.client_remote.run(args=[ + 'python3', '-c', pyscript + ]) + + def write_background(self, basename="background_file", loop=False): + """ + Open a file for writing, complete as soon as you can + :param basename: + :return: + """ + assert(self.is_mounted()) + + path = os.path.join(self.hostfs_mntpt, basename) + + pyscript = dedent(""" + import os + import time + + fd = os.open("{path}", os.O_RDWR | os.O_CREAT, 0o644) + try: + while True: + os.write(fd, b'content') + time.sleep(1) + if not {loop}: + break + except IOError as e: + pass + os.close(fd) + """).format(path=path, loop=str(loop)) + + rproc = self._run_python(pyscript) + self.background_procs.append(rproc) + return rproc + + def write_n_mb(self, filename, n_mb, seek=0, wait=True): + """ + Write the requested number of megabytes to a file + """ + assert(self.is_mounted()) + + return self.run_shell(["dd", "if=/dev/urandom", "of={0}".format(filename), + "bs=1M", "conv=fdatasync", + "count={0}".format(int(n_mb)), + "seek={0}".format(int(seek)) + ], wait=wait) + + def write_test_pattern(self, filename, size): + log.info("Writing {0} bytes to {1}".format(size, filename)) + return self.run_python(dedent(""" + import zlib + path = "{path}" + with open(path, 'w') as f: + for i in range(0, {size}): + val = zlib.crc32(str(i).encode('utf-8')) & 7 + f.write(chr(val)) + """.format( + path=os.path.join(self.hostfs_mntpt, filename), + size=size + ))) + + def validate_test_pattern(self, filename, size): + log.info("Validating {0} bytes from {1}".format(size, filename)) + # Use sudo because cephfs-data-scan may recreate the file with owner==root + return self.run_python(dedent(""" + import zlib + path = "{path}" + with open(path, 'r') as f: + bytes = f.read() + if len(bytes) != {size}: + raise RuntimeError("Bad length {{0}} vs. expected {{1}}".format( + len(bytes), {size} + )) + for i, b in enumerate(bytes): + val = zlib.crc32(str(i).encode('utf-8')) & 7 + if b != chr(val): + raise RuntimeError("Bad data at offset {{0}}".format(i)) + """.format( + path=os.path.join(self.hostfs_mntpt, filename), + size=size + )), sudo=True) + + def open_n_background(self, fs_path, count): + """ + Open N files for writing, hold them open in a background process + + :param fs_path: Path relative to CephFS root, e.g. "foo/bar" + :return: a RemoteProcess + """ + assert(self.is_mounted()) + + abs_path = os.path.join(self.hostfs_mntpt, fs_path) + + pyscript = dedent(""" + import sys + import time + import os + + n = {count} + abs_path = "{abs_path}" + + if not os.path.exists(abs_path): + os.makedirs(abs_path) + + handles = [] + for i in range(0, n): + fname = "file_"+str(i) + path = os.path.join(abs_path, fname) + handles.append(open(path, 'w')) + + while True: + time.sleep(1) + """).format(abs_path=abs_path, count=count) + + rproc = self._run_python(pyscript) + self.background_procs.append(rproc) + return rproc + + def create_n_files(self, fs_path, count, sync=False, dirsync=False, unlink=False, finaldirsync=False): + """ + Create n files. + + :param sync: sync the file after writing + :param dirsync: sync the containing directory after closing the file + :param unlink: unlink the file after closing + :param finaldirsync: sync the containing directory after closing the last file + """ + + assert(self.is_mounted()) + + abs_path = os.path.join(self.hostfs_mntpt, fs_path) + + pyscript = dedent(f""" + import os + + n = {count} + path = "{abs_path}" + + dpath = os.path.dirname(path) + fnameprefix = os.path.basename(path) + os.makedirs(dpath, exist_ok=True) + + try: + dirfd = os.open(dpath, os.O_DIRECTORY) + + for i in range(n): + fpath = os.path.join(dpath, f"{{fnameprefix}}_{{i}}") + with open(fpath, 'w') as f: + f.write(f"{{i}}") + if {sync}: + f.flush() + os.fsync(f.fileno()) + if {unlink}: + os.unlink(fpath) + if {dirsync}: + os.fsync(dirfd) + if {finaldirsync}: + os.fsync(dirfd) + finally: + os.close(dirfd) + """) + + self.run_python(pyscript) + + def teardown(self): + for p in self.background_procs: + log.info("Terminating background process") + self._kill_background(p) + + self.background_procs = [] + + def _kill_background(self, p): + if p.stdin: + p.stdin.close() + try: + p.wait() + except (CommandFailedError, ConnectionLostError): + pass + + def kill_background(self, p): + """ + For a process that was returned by one of the _background member functions, + kill it hard. + """ + self._kill_background(p) + self.background_procs.remove(p) + + def send_signal(self, signal): + signal = signal.lower() + if signal.lower() not in ['sigstop', 'sigcont', 'sigterm', 'sigkill']: + raise NotImplementedError + + self.client_remote.run(args=['sudo', 'kill', '-{0}'.format(signal), + self.client_pid], omit_sudo=False) + + def get_global_id(self): + raise NotImplementedError() + + def get_global_inst(self): + raise NotImplementedError() + + def get_global_addr(self): + raise NotImplementedError() + + def get_osd_epoch(self): + raise NotImplementedError() + + def get_op_read_count(self): + raise NotImplementedError() + + def lstat(self, fs_path, follow_symlinks=False, wait=True): + return self.stat(fs_path, follow_symlinks=False, wait=True) + + def stat(self, fs_path, follow_symlinks=True, wait=True, **kwargs): + """ + stat a file, and return the result as a dictionary like this: + { + "st_ctime": 1414161137.0, + "st_mtime": 1414161137.0, + "st_nlink": 33, + "st_gid": 0, + "st_dev": 16777218, + "st_size": 1190, + "st_ino": 2, + "st_uid": 0, + "st_mode": 16877, + "st_atime": 1431520593.0 + } + + Raises exception on absent file. + """ + abs_path = os.path.join(self.hostfs_mntpt, fs_path) + if follow_symlinks: + stat_call = "os.stat('" + abs_path + "')" + else: + stat_call = "os.lstat('" + abs_path + "')" + + pyscript = dedent(""" + import os + import stat + import json + import sys + + try: + s = {stat_call} + except OSError as e: + sys.exit(e.errno) + + attrs = ["st_mode", "st_ino", "st_dev", "st_nlink", "st_uid", "st_gid", "st_size", "st_atime", "st_mtime", "st_ctime"] + print(json.dumps( + dict([(a, getattr(s, a)) for a in attrs]), + indent=2)) + """).format(stat_call=stat_call) + proc = self._run_python(pyscript, **kwargs) + if wait: + proc.wait() + return json.loads(proc.stdout.getvalue().strip()) + else: + return proc + + def touch(self, fs_path): + """ + Create a dentry if it doesn't already exist. This python + implementation exists because the usual command line tool doesn't + pass through error codes like EIO. + + :param fs_path: + :return: + """ + abs_path = os.path.join(self.hostfs_mntpt, fs_path) + pyscript = dedent(""" + import sys + import errno + + try: + f = open("{path}", "w") + f.close() + except IOError as e: + sys.exit(errno.EIO) + """).format(path=abs_path) + proc = self._run_python(pyscript) + proc.wait() + + def path_to_ino(self, fs_path, follow_symlinks=True): + abs_path = os.path.join(self.hostfs_mntpt, fs_path) + + if follow_symlinks: + pyscript = dedent(""" + import os + import stat + + print(os.stat("{path}").st_ino) + """).format(path=abs_path) + else: + pyscript = dedent(""" + import os + import stat + + print(os.lstat("{path}").st_ino) + """).format(path=abs_path) + + proc = self._run_python(pyscript) + proc.wait() + return int(proc.stdout.getvalue().strip()) + + def path_to_nlink(self, fs_path): + abs_path = os.path.join(self.hostfs_mntpt, fs_path) + + pyscript = dedent(""" + import os + import stat + + print(os.stat("{path}").st_nlink) + """).format(path=abs_path) + + proc = self._run_python(pyscript) + proc.wait() + return int(proc.stdout.getvalue().strip()) + + def ls(self, path=None, **kwargs): + """ + Wrap ls: return a list of strings + """ + cmd = ["ls"] + if path: + cmd.append(path) + + ls_text = self.run_shell(cmd, **kwargs).stdout.getvalue().strip() + + if ls_text: + return ls_text.split("\n") + else: + # Special case because otherwise split on empty string + # gives you [''] instead of [] + return [] + + def setfattr(self, path, key, val, **kwargs): + """ + Wrap setfattr. + + :param path: relative to mount point + :param key: xattr name + :param val: xattr value + :return: None + """ + self.run_shell(["setfattr", "-n", key, "-v", val, path], **kwargs) + + def getfattr(self, path, attr, **kwargs): + """ + Wrap getfattr: return the values of a named xattr on one file, or + None if the attribute is not found. + + :return: a string + """ + p = self.run_shell(["getfattr", "--only-values", "-n", attr, path], wait=False, **kwargs) + try: + p.wait() + except CommandFailedError as e: + if e.exitstatus == 1 and "No such attribute" in p.stderr.getvalue(): + return None + else: + raise + + return str(p.stdout.getvalue()) + + def df(self): + """ + Wrap df: return a dict of usage fields in bytes + """ + + p = self.run_shell(["df", "-B1", "."]) + lines = p.stdout.getvalue().strip().split("\n") + fs, total, used, avail = lines[1].split()[:4] + log.warning(lines) + + return { + "total": int(total), + "used": int(used), + "available": int(avail) + } + + def dir_checksum(self, path=None, follow_symlinks=False): + cmd = ["find"] + if follow_symlinks: + cmd.append("-L") + if path: + cmd.append(path) + cmd.extend(["-type", "f", "-exec", "md5sum", "{}", "+"]) + checksum_text = self.run_shell(cmd).stdout.getvalue().strip() + checksum_sorted = sorted(checksum_text.split('\n'), key=lambda v: v.split()[1]) + return hashlib.md5(('\n'.join(checksum_sorted)).encode('utf-8')).hexdigest() diff --git a/qa/tasks/cephfs/test_acls.py b/qa/tasks/cephfs/test_acls.py new file mode 100644 index 000000000..4f704c076 --- /dev/null +++ b/qa/tasks/cephfs/test_acls.py @@ -0,0 +1,27 @@ +import logging + +from io import BytesIO +from tasks.cephfs.xfstests_dev import XFSTestsDev + +log = logging.getLogger(__name__) + +class TestACLs(XFSTestsDev): + + def test_acls(self): + from tasks.cephfs.fuse_mount import FuseMount + from tasks.cephfs.kernel_mount import KernelMount + + # TODO: make xfstests-dev compatible with ceph-fuse. xfstests-dev + # remounts CephFS before running tests using kernel, so ceph-fuse + # mounts are never actually testsed. + if isinstance(self.mount_a, FuseMount): + log.info('client is fuse mounted') + self.skipTest('Requires kernel client; xfstests-dev not '\ + 'compatible with ceph-fuse ATM.') + elif isinstance(self.mount_a, KernelMount): + log.info('client is kernel mounted') + + self.mount_a.client_remote.run(args=['sudo', './check', + 'generic/099'], cwd=self.repo_path, stdout=BytesIO(), + stderr=BytesIO(), timeout=30, check_status=True, + label='running tests for ACLs from xfstests-dev') diff --git a/qa/tasks/cephfs/test_admin.py b/qa/tasks/cephfs/test_admin.py new file mode 100644 index 000000000..bbf069066 --- /dev/null +++ b/qa/tasks/cephfs/test_admin.py @@ -0,0 +1,912 @@ +import errno +import json +import logging +import time +import uuid +from io import StringIO +from os.path import join as os_path_join + +from teuthology.orchestra.run import CommandFailedError, Raw + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from tasks.cephfs.filesystem import FileLayout, FSMissing +from tasks.cephfs.fuse_mount import FuseMount +from tasks.cephfs.caps_helper import CapsHelper + +log = logging.getLogger(__name__) + +class TestAdminCommands(CephFSTestCase): + """ + Tests for administration command. + """ + + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 3 + + def test_fsnames_can_only_by_goodchars(self): + n = 'test_fsnames_can_only_by_goodchars' + metapoolname, datapoolname = n+'-testmetapool', n+'-testdatapool' + badname = n+'badname@#' + + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', + n+metapoolname) + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', + n+datapoolname) + + # test that fsname not with "goodchars" fails + args = ['fs', 'new', badname, metapoolname, datapoolname] + proc = self.fs.mon_manager.run_cluster_cmd(args=args,stderr=StringIO(), + check_status=False) + self.assertIn('invalid chars', proc.stderr.getvalue().lower()) + + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'rm', metapoolname, + metapoolname, + '--yes-i-really-really-mean-it-not-faking') + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'rm', datapoolname, + datapoolname, + '--yes-i-really-really-mean-it-not-faking') + + def test_fs_status(self): + """ + That `ceph fs status` command functions. + """ + + s = self.fs.mon_manager.raw_cluster_cmd("fs", "status") + self.assertTrue("active" in s) + + mdsmap = json.loads(self.fs.mon_manager.raw_cluster_cmd("fs", "status", "--format=json-pretty"))["mdsmap"] + self.assertEqual(mdsmap[0]["state"], "active") + + mdsmap = json.loads(self.fs.mon_manager.raw_cluster_cmd("fs", "status", "--format=json"))["mdsmap"] + self.assertEqual(mdsmap[0]["state"], "active") + + def _setup_ec_pools(self, n, metadata=True, overwrites=True): + if metadata: + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', n+"-meta", "8") + cmd = ['osd', 'erasure-code-profile', 'set', n+"-profile", "m=2", "k=2", "crush-failure-domain=osd"] + self.fs.mon_manager.raw_cluster_cmd(*cmd) + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', n+"-data", "8", "erasure", n+"-profile") + if overwrites: + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'set', n+"-data", 'allow_ec_overwrites', 'true') + + def _check_pool_application_metadata_key_value(self, pool, app, key, value): + output = self.fs.mon_manager.raw_cluster_cmd( + 'osd', 'pool', 'application', 'get', pool, app, key) + self.assertEqual(str(output.strip()), value) + + def test_add_data_pool_root(self): + """ + That a new data pool can be added and used for the root directory. + """ + + p = self.fs.add_data_pool("foo") + self.fs.set_dir_layout(self.mount_a, ".", FileLayout(pool=p)) + + def test_add_data_pool_application_metadata(self): + """ + That the application metadata set on a newly added data pool is as expected. + """ + pool_name = "foo" + mon_cmd = self.fs.mon_manager.raw_cluster_cmd + mon_cmd('osd', 'pool', 'create', pool_name, '--pg_num_min', + str(self.fs.pg_num_min)) + # Check whether https://tracker.ceph.com/issues/43061 is fixed + mon_cmd('osd', 'pool', 'application', 'enable', pool_name, 'cephfs') + self.fs.add_data_pool(pool_name, create=False) + self._check_pool_application_metadata_key_value( + pool_name, 'cephfs', 'data', self.fs.name) + + def test_add_data_pool_subdir(self): + """ + That a new data pool can be added and used for a sub-directory. + """ + + p = self.fs.add_data_pool("foo") + self.mount_a.run_shell("mkdir subdir") + self.fs.set_dir_layout(self.mount_a, "subdir", FileLayout(pool=p)) + + def test_add_data_pool_non_alphamueric_name_as_subdir(self): + """ + That a new data pool with non-alphanumeric name can be added and used for a sub-directory. + """ + p = self.fs.add_data_pool("I-am-data_pool00.") + self.mount_a.run_shell("mkdir subdir") + self.fs.set_dir_layout(self.mount_a, "subdir", FileLayout(pool=p)) + + def test_add_data_pool_ec(self): + """ + That a new EC data pool can be added. + """ + + n = "test_add_data_pool_ec" + self._setup_ec_pools(n, metadata=False) + self.fs.add_data_pool(n+"-data", create=False) + + def test_new_default_ec(self): + """ + That a new file system warns/fails with an EC default data pool. + """ + + self.mount_a.umount_wait(require_clean=True) + self.mds_cluster.delete_all_filesystems() + n = "test_new_default_ec" + self._setup_ec_pools(n) + try: + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data") + except CommandFailedError as e: + if e.exitstatus == 22: + pass + else: + raise + else: + raise RuntimeError("expected failure") + + def test_new_default_ec_force(self): + """ + That a new file system succeeds with an EC default data pool with --force. + """ + + self.mount_a.umount_wait(require_clean=True) + self.mds_cluster.delete_all_filesystems() + n = "test_new_default_ec_force" + self._setup_ec_pools(n) + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data", "--force") + + def test_new_default_ec_no_overwrite(self): + """ + That a new file system fails with an EC default data pool without overwrite. + """ + + self.mount_a.umount_wait(require_clean=True) + self.mds_cluster.delete_all_filesystems() + n = "test_new_default_ec_no_overwrite" + self._setup_ec_pools(n, overwrites=False) + try: + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data") + except CommandFailedError as e: + if e.exitstatus == 22: + pass + else: + raise + else: + raise RuntimeError("expected failure") + # and even with --force ! + try: + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data", "--force") + except CommandFailedError as e: + if e.exitstatus == 22: + pass + else: + raise + else: + raise RuntimeError("expected failure") + + def test_fs_new_pool_application_metadata(self): + """ + That the application metadata set on the pools of a newly created filesystem are as expected. + """ + self.mount_a.umount_wait(require_clean=True) + self.mds_cluster.delete_all_filesystems() + fs_name = "test_fs_new_pool_application" + keys = ['metadata', 'data'] + pool_names = [fs_name+'-'+key for key in keys] + mon_cmd = self.fs.mon_manager.raw_cluster_cmd + for p in pool_names: + mon_cmd('osd', 'pool', 'create', p, '--pg_num_min', str(self.fs.pg_num_min)) + mon_cmd('osd', 'pool', 'application', 'enable', p, 'cephfs') + mon_cmd('fs', 'new', fs_name, pool_names[0], pool_names[1]) + for i in range(2): + self._check_pool_application_metadata_key_value( + pool_names[i], 'cephfs', keys[i], fs_name) + + def test_fs_new_with_specific_id(self): + """ + That a file system can be created with a specific ID. + """ + fs_name = "test_fs_specific_id" + fscid = 100 + keys = ['metadata', 'data'] + pool_names = [fs_name+'-'+key for key in keys] + for p in pool_names: + self.run_cluster_cmd(f'osd pool create {p}') + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + self.fs.status().get_fsmap(fscid) + for i in range(2): + self._check_pool_application_metadata_key_value(pool_names[i], 'cephfs', keys[i], fs_name) + + def test_fs_new_with_specific_id_idempotency(self): + """ + That command to create file system with specific ID is idempotent. + """ + fs_name = "test_fs_specific_id" + fscid = 100 + keys = ['metadata', 'data'] + pool_names = [fs_name+'-'+key for key in keys] + for p in pool_names: + self.run_cluster_cmd(f'osd pool create {p}') + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + self.fs.status().get_fsmap(fscid) + + def test_fs_new_with_specific_id_fails_without_force_flag(self): + """ + That command to create file system with specific ID fails without '--force' flag. + """ + fs_name = "test_fs_specific_id" + fscid = 100 + keys = ['metadata', 'data'] + pool_names = [fs_name+'-'+key for key in keys] + for p in pool_names: + self.run_cluster_cmd(f'osd pool create {p}') + try: + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid}') + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, + "invalid error code on creating a file system with specifc ID without --force flag") + else: + self.fail("expected creating file system with specific ID without '--force' flag to fail") + + def test_fs_new_with_specific_id_fails_already_in_use(self): + """ + That creating file system with ID already in use fails. + """ + fs_name = "test_fs_specific_id" + # file system ID already in use + fscid = self.fs.status().map['filesystems'][0]['id'] + keys = ['metadata', 'data'] + pool_names = [fs_name+'-'+key for key in keys] + for p in pool_names: + self.run_cluster_cmd(f'osd pool create {p}') + try: + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, + "invalid error code on creating a file system with specifc ID that is already in use") + else: + self.fail("expected creating file system with ID already in use to fail") + + +class TestDump(CephFSTestCase): + CLIENTS_REQUIRED = 0 + MDSS_REQUIRED = 1 + + def test_fs_dump_epoch(self): + """ + That dumping a specific epoch works. + """ + + status1 = self.fs.status() + status2 = self.fs.status(epoch=status1["epoch"]-1) + self.assertEqual(status1["epoch"], status2["epoch"]+1) + + def test_fsmap_trim(self): + """ + That the fsmap is trimmed normally. + """ + + paxos_service_trim_min = 25 + self.config_set('mon', 'paxos_service_trim_min', paxos_service_trim_min) + mon_max_mdsmap_epochs = 20 + self.config_set('mon', 'mon_max_mdsmap_epochs', mon_max_mdsmap_epochs) + + status = self.fs.status() + epoch = status["epoch"] + + # for N mutations + mutations = paxos_service_trim_min + mon_max_mdsmap_epochs + b = False + for i in range(mutations): + self.fs.set_joinable(b) + b = not b + + time.sleep(10) # for tick/compaction + + try: + self.fs.status(epoch=epoch) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT, "invalid error code when trying to fetch FSMap that was trimmed") + else: + self.fail("trimming did not occur as expected") + + def test_fsmap_force_trim(self): + """ + That the fsmap is trimmed forcefully. + """ + + status = self.fs.status() + epoch = status["epoch"] + + paxos_service_trim_min = 1 + self.config_set('mon', 'paxos_service_trim_min', paxos_service_trim_min) + mon_mds_force_trim_to = epoch+1 + self.config_set('mon', 'mon_mds_force_trim_to', mon_mds_force_trim_to) + + # force a new fsmap + self.fs.set_joinable(False) + time.sleep(10) # for tick/compaction + + status = self.fs.status() + log.debug(f"new epoch is {status['epoch']}") + self.fs.status(epoch=epoch+1) # epoch+1 is not trimmed, may not == status["epoch"] + + try: + self.fs.status(epoch=epoch) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT, "invalid error code when trying to fetch FSMap that was trimmed") + else: + self.fail("trimming did not occur as expected") + +class TestRequiredClientFeatures(CephFSTestCase): + CLIENTS_REQUIRED = 0 + MDSS_REQUIRED = 1 + + def test_required_client_features(self): + """ + That `ceph fs required_client_features` command functions. + """ + + def is_required(index): + out = self.fs.mon_manager.raw_cluster_cmd('fs', 'get', self.fs.name, '--format=json-pretty') + features = json.loads(out)['mdsmap']['required_client_features'] + if "feature_{0}".format(index) in features: + return True; + return False; + + features = json.loads(self.fs.mon_manager.raw_cluster_cmd('fs', 'feature', 'ls', '--format=json-pretty')) + self.assertGreater(len(features), 0); + + for f in features: + self.fs.required_client_features('rm', str(f['index'])) + + for f in features: + index = f['index'] + feature = f['name'] + if feature == 'reserved': + feature = str(index) + + if index % 3 == 0: + continue; + self.fs.required_client_features('add', feature) + self.assertTrue(is_required(index)) + + if index % 2 == 0: + continue; + self.fs.required_client_features('rm', feature) + self.assertFalse(is_required(index)) + + def test_required_client_feature_add_reserved(self): + """ + That `ceph fs required_client_features X add reserved` fails. + """ + + p = self.fs.required_client_features('add', 'reserved', check_status=False, stderr=StringIO()) + self.assertIn('Invalid feature name', p.stderr.getvalue()) + + def test_required_client_feature_rm_reserved(self): + """ + That `ceph fs required_client_features X rm reserved` fails. + """ + + p = self.fs.required_client_features('rm', 'reserved', check_status=False, stderr=StringIO()) + self.assertIn('Invalid feature name', p.stderr.getvalue()) + + def test_required_client_feature_add_reserved_bit(self): + """ + That `ceph fs required_client_features X add <reserved_bit>` passes. + """ + + p = self.fs.required_client_features('add', '1', stderr=StringIO()) + self.assertIn("added feature 'reserved' to required_client_features", p.stderr.getvalue()) + + def test_required_client_feature_rm_reserved_bit(self): + """ + That `ceph fs required_client_features X rm <reserved_bit>` passes. + """ + + self.fs.required_client_features('add', '1') + p = self.fs.required_client_features('rm', '1', stderr=StringIO()) + self.assertIn("removed feature 'reserved' from required_client_features", p.stderr.getvalue()) + +class TestCompatCommands(CephFSTestCase): + """ + """ + + CLIENTS_REQUIRED = 0 + MDSS_REQUIRED = 3 + + def test_add_compat(self): + """ + Test adding a compat. + """ + + self.fs.fail() + self.fs.add_compat(63, 'placeholder') + mdsmap = self.fs.get_mds_map() + self.assertIn("feature_63", mdsmap['compat']['compat']) + + def test_add_incompat(self): + """ + Test adding an incompat. + """ + + self.fs.fail() + self.fs.add_incompat(63, 'placeholder') + mdsmap = self.fs.get_mds_map() + log.info(f"{mdsmap}") + self.assertIn("feature_63", mdsmap['compat']['incompat']) + + def test_rm_compat(self): + """ + Test removing a compat. + """ + + self.fs.fail() + self.fs.add_compat(63, 'placeholder') + self.fs.rm_compat(63) + mdsmap = self.fs.get_mds_map() + self.assertNotIn("feature_63", mdsmap['compat']['compat']) + + def test_rm_incompat(self): + """ + Test removing an incompat. + """ + + self.fs.fail() + self.fs.add_incompat(63, 'placeholder') + self.fs.rm_incompat(63) + mdsmap = self.fs.get_mds_map() + self.assertNotIn("feature_63", mdsmap['compat']['incompat']) + + def test_standby_compat(self): + """ + That adding a compat does not prevent standbys from joining. + """ + + self.fs.fail() + self.fs.add_compat(63, "placeholder") + self.fs.set_joinable() + self.fs.wait_for_daemons() + mdsmap = self.fs.get_mds_map() + self.assertIn("feature_63", mdsmap['compat']['compat']) + + def test_standby_incompat_reject(self): + """ + That adding an incompat feature prevents incompatible daemons from joining. + """ + + self.fs.fail() + self.fs.add_incompat(63, "placeholder") + self.fs.set_joinable() + try: + self.fs.wait_for_daemons(timeout=60) + except RuntimeError as e: + if "Timed out waiting for MDS daemons to become healthy" in str(e): + pass + else: + raise + else: + self.fail() + + def test_standby_incompat_upgrade(self): + """ + That an MDS can upgrade the compat of a fs. + """ + + self.fs.fail() + self.fs.rm_incompat(1) + self.fs.set_joinable() + self.fs.wait_for_daemons() + mdsmap = self.fs.get_mds_map() + self.assertIn("feature_1", mdsmap['compat']['incompat']) + + def test_standby_replay_not_upgradeable(self): + """ + That the mons will not upgrade the MDSMap compat if standby-replay is + enabled. + """ + + self.fs.fail() + self.fs.rm_incompat(1) + self.fs.set_allow_standby_replay(True) + self.fs.set_joinable() + try: + self.fs.wait_for_daemons(timeout=60) + except RuntimeError as e: + if "Timed out waiting for MDS daemons to become healthy" in str(e): + pass + else: + raise + else: + self.fail() + + def test_standby_incompat_reject_multifs(self): + """ + Like test_standby_incompat_reject but with a second fs. + """ + + fs2 = self.mds_cluster.newfs(name="cephfs2", create=True) + fs2.fail() + fs2.add_incompat(63, 'placeholder') + fs2.set_joinable() + try: + fs2.wait_for_daemons(timeout=60) + except RuntimeError as e: + if "Timed out waiting for MDS daemons to become healthy" in str(e): + pass + else: + raise + else: + self.fail() + # did self.fs lose MDS or standbys suicide? + self.fs.wait_for_daemons() + mdsmap = fs2.get_mds_map() + self.assertIn("feature_63", mdsmap['compat']['incompat']) + +class TestConfigCommands(CephFSTestCase): + """ + Test that daemons and clients respond to the otherwise rarely-used + runtime config modification operations. + """ + + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 1 + + def test_ceph_config_show(self): + """ + That I can successfully show MDS configuration. + """ + + names = self.fs.get_rank_names() + for n in names: + s = self.fs.mon_manager.raw_cluster_cmd("config", "show", "mds."+n) + self.assertTrue("NAME" in s) + self.assertTrue("mon_host" in s) + + + def test_client_config(self): + """ + That I can successfully issue asok "config set" commands + + :return: + """ + + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Test only applies to FUSE clients") + + test_key = "client_cache_size" + test_val = "123" + self.mount_a.admin_socket(['config', 'set', test_key, test_val]) + out = self.mount_a.admin_socket(['config', 'get', test_key]) + self.assertEqual(out[test_key], test_val) + + + def test_mds_config_asok(self): + test_key = "mds_max_purge_ops" + test_val = "123" + self.fs.mds_asok(['config', 'set', test_key, test_val]) + out = self.fs.mds_asok(['config', 'get', test_key]) + self.assertEqual(out[test_key], test_val) + + def test_mds_config_tell(self): + test_key = "mds_max_purge_ops" + test_val = "123" + + self.fs.rank_tell(['injectargs', "--{0}={1}".format(test_key, test_val)]) + + # Read it back with asok because there is no `tell` equivalent + out = self.fs.rank_tell(['config', 'get', test_key]) + self.assertEqual(out[test_key], test_val) + + +class TestMirroringCommands(CephFSTestCase): + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 1 + + def _enable_mirroring(self, fs_name): + self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", fs_name) + + def _disable_mirroring(self, fs_name): + self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "disable", fs_name) + + def _add_peer(self, fs_name, peer_spec, remote_fs_name): + peer_uuid = str(uuid.uuid4()) + self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "peer_add", fs_name, peer_uuid, peer_spec, remote_fs_name) + + def _remove_peer(self, fs_name, peer_uuid): + self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "peer_remove", fs_name, peer_uuid) + + def _verify_mirroring(self, fs_name, flag_str): + status = self.fs.status() + fs_map = status.get_fsmap_byname(fs_name) + if flag_str == 'enabled': + self.assertTrue('mirror_info' in fs_map) + elif flag_str == 'disabled': + self.assertTrue('mirror_info' not in fs_map) + else: + raise RuntimeError(f'invalid flag_str {flag_str}') + + def _get_peer_uuid(self, fs_name, peer_spec): + status = self.fs.status() + fs_map = status.get_fsmap_byname(fs_name) + mirror_info = fs_map.get('mirror_info', None) + self.assertTrue(mirror_info is not None) + for peer_uuid, remote in mirror_info['peers'].items(): + client_name = remote['remote']['client_name'] + cluster_name = remote['remote']['cluster_name'] + spec = f'{client_name}@{cluster_name}' + if spec == peer_spec: + return peer_uuid + return None + + def test_mirroring_command(self): + """basic mirroring command test -- enable, disable mirroring on a + filesystem""" + self._enable_mirroring(self.fs.name) + self._verify_mirroring(self.fs.name, "enabled") + self._disable_mirroring(self.fs.name) + self._verify_mirroring(self.fs.name, "disabled") + + def test_mirroring_peer_commands(self): + """test adding and removing peers to a mirror enabled filesystem""" + self._enable_mirroring(self.fs.name) + self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b") + self._add_peer(self.fs.name, "client.site-c@site-c", "fs_c") + self._verify_mirroring(self.fs.name, "enabled") + uuid_peer_b = self._get_peer_uuid(self.fs.name, "client.site-b@site-b") + uuid_peer_c = self._get_peer_uuid(self.fs.name, "client.site-c@site-c") + self.assertTrue(uuid_peer_b is not None) + self.assertTrue(uuid_peer_c is not None) + self._remove_peer(self.fs.name, uuid_peer_b) + self._remove_peer(self.fs.name, uuid_peer_c) + self._disable_mirroring(self.fs.name) + self._verify_mirroring(self.fs.name, "disabled") + + def test_mirroring_command_idempotency(self): + """test to check idempotency of mirroring family of commands """ + self._enable_mirroring(self.fs.name) + self._verify_mirroring(self.fs.name, "enabled") + self._enable_mirroring(self.fs.name) + # add peer + self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b") + uuid_peer_b1 = self._get_peer_uuid(self.fs.name, "client.site-b@site-b") + self.assertTrue(uuid_peer_b1 is not None) + # adding the peer again should be idempotent + self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b") + uuid_peer_b2 = self._get_peer_uuid(self.fs.name, "client.site-b@site-b") + self.assertTrue(uuid_peer_b2 is not None) + self.assertTrue(uuid_peer_b1 == uuid_peer_b2) + # remove peer + self._remove_peer(self.fs.name, uuid_peer_b1) + uuid_peer_b3 = self._get_peer_uuid(self.fs.name, "client.site-b@site-b") + self.assertTrue(uuid_peer_b3 is None) + # removing the peer again should be idempotent + self._remove_peer(self.fs.name, uuid_peer_b1) + self._disable_mirroring(self.fs.name) + self._verify_mirroring(self.fs.name, "disabled") + self._disable_mirroring(self.fs.name) + + def test_mirroring_disable_with_peers(self): + """test disabling mirroring for a filesystem with active peers""" + self._enable_mirroring(self.fs.name) + self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b") + self._verify_mirroring(self.fs.name, "enabled") + uuid_peer_b = self._get_peer_uuid(self.fs.name, "client.site-b@site-b") + self.assertTrue(uuid_peer_b is not None) + self._disable_mirroring(self.fs.name) + self._verify_mirroring(self.fs.name, "disabled") + # enable mirroring to check old peers + self._enable_mirroring(self.fs.name) + self._verify_mirroring(self.fs.name, "enabled") + # peer should be gone + uuid_peer_b = self._get_peer_uuid(self.fs.name, "client.site-b@site-b") + self.assertTrue(uuid_peer_b is None) + self._disable_mirroring(self.fs.name) + self._verify_mirroring(self.fs.name, "disabled") + + def test_mirroring_with_filesystem_reset(self): + """test to verify mirroring state post filesystem reset""" + self._enable_mirroring(self.fs.name) + self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b") + self._verify_mirroring(self.fs.name, "enabled") + uuid_peer_b = self._get_peer_uuid(self.fs.name, "client.site-b@site-b") + self.assertTrue(uuid_peer_b is not None) + # reset filesystem + self.fs.fail() + self.fs.reset() + self.fs.wait_for_daemons() + self._verify_mirroring(self.fs.name, "disabled") + + +class TestSubCmdFsAuthorize(CapsHelper): + client_id = 'testuser' + client_name = 'client.' + client_id + + def test_single_path_r(self): + perm = 'r' + filepaths, filedata, mounts, keyring = self.setup_test_env(perm) + moncap = self.get_mon_cap_from_keyring(self.client_name) + + self.run_mon_cap_tests(moncap, keyring) + self.run_mds_cap_tests(filepaths, filedata, mounts, perm) + + def test_single_path_rw(self): + perm = 'rw' + filepaths, filedata, mounts, keyring = self.setup_test_env(perm) + moncap = self.get_mon_cap_from_keyring(self.client_name) + + self.run_mon_cap_tests(moncap, keyring) + self.run_mds_cap_tests(filepaths, filedata, mounts, perm) + + def test_single_path_rootsquash(self): + filedata, filename = 'some data on fs 1', 'file_on_fs1' + filepath = os_path_join(self.mount_a.hostfs_mntpt, filename) + self.mount_a.write_file(filepath, filedata) + + keyring = self.fs.authorize(self.client_id, ('/', 'rw', 'root_squash')) + keyring_path = self.create_keyring_file(self.mount_a.client_remote, + keyring) + self.mount_a.remount(client_id=self.client_id, + client_keyring_path=keyring_path, + cephfs_mntpt='/') + + if filepath.find(self.mount_a.hostfs_mntpt) != -1: + # can read, but not write as root + contents = self.mount_a.read_file(filepath) + self.assertEqual(filedata, contents) + cmdargs = ['echo', 'some random data', Raw('|'), 'sudo', 'tee', filepath] + self.mount_a.negtestcmd(args=cmdargs, retval=1, errmsg='permission denied') + + def test_single_path_authorize_on_nonalphanumeric_fsname(self): + """ + That fs authorize command works on filesystems with names having [_.-] characters + """ + self.mount_a.umount_wait(require_clean=True) + self.mds_cluster.delete_all_filesystems() + fs_name = "cephfs-_." + self.fs = self.mds_cluster.newfs(name=fs_name) + self.fs.wait_for_daemons() + self.run_cluster_cmd(f'auth caps client.{self.mount_a.client_id} ' + f'mon "allow r" ' + f'osd "allow rw pool={self.fs.get_data_pool_name()}" ' + f'mds allow') + self.mount_a.remount(cephfs_name=self.fs.name) + perm = 'rw' + filepaths, filedata, mounts, keyring = self.setup_test_env(perm) + self.run_mds_cap_tests(filepaths, filedata, mounts, perm) + + def test_multiple_path_r(self): + perm, paths = 'r', ('/dir1', '/dir2/dir22') + filepaths, filedata, mounts, keyring = self.setup_test_env(perm, paths) + moncap = self.get_mon_cap_from_keyring(self.client_name) + + keyring_path = self.create_keyring_file(self.mount_a.client_remote, + keyring) + for path in paths: + self.mount_a.remount(client_id=self.client_id, + client_keyring_path=keyring_path, + cephfs_mntpt=path) + + + # actual tests... + self.run_mon_cap_tests(moncap, keyring) + self.run_mds_cap_tests(filepaths, filedata, mounts, perm) + + def test_multiple_path_rw(self): + perm, paths = 'rw', ('/dir1', '/dir2/dir22') + filepaths, filedata, mounts, keyring = self.setup_test_env(perm, paths) + moncap = self.get_mon_cap_from_keyring(self.client_name) + + keyring_path = self.create_keyring_file(self.mount_a.client_remote, + keyring) + for path in paths: + self.mount_a.remount(client_id=self.client_id, + client_keyring_path=keyring_path, + cephfs_mntpt=path) + + + # actual tests... + self.run_mon_cap_tests(moncap, keyring) + self.run_mds_cap_tests(filepaths, filedata, mounts, perm) + + def tearDown(self): + self.mount_a.umount_wait() + self.run_cluster_cmd(f'auth rm {self.client_name}') + + super(type(self), self).tearDown() + + def setup_for_single_path(self, perm): + filedata, filename = 'some data on fs 1', 'file_on_fs1' + + filepath = os_path_join(self.mount_a.hostfs_mntpt, filename) + self.mount_a.write_file(filepath, filedata) + + keyring = self.fs.authorize(self.client_id, ('/', perm)) + keyring_path = self.create_keyring_file(self.mount_a.client_remote, + keyring) + + self.mount_a.remount(client_id=self.client_id, + client_keyring_path=keyring_path, + cephfs_mntpt='/') + + return filepath, filedata, keyring + + def setup_for_multiple_paths(self, perm, paths): + filedata, filename = 'some data on fs 1', 'file_on_fs1' + + self.mount_a.run_shell('mkdir -p dir1/dir12/dir13 dir2/dir22/dir23') + + filepaths = [] + for path in paths: + filepath = os_path_join(self.mount_a.hostfs_mntpt, path[1:], filename) + self.mount_a.write_file(filepath, filedata) + filepaths.append(filepath.replace(path, '')) + filepaths = tuple(filepaths) + + keyring = self.fs.authorize(self.client_id, (paths[0], perm, paths[1], + perm)) + + return filepaths, filedata, keyring + + def setup_test_env(self, perm, paths=()): + filepaths, filedata, keyring = self.setup_for_multiple_paths(perm, paths) if paths \ + else self.setup_for_single_path(perm) + + if not isinstance(filepaths, tuple): + filepaths = (filepaths, ) + if not isinstance(filedata, tuple): + filedata = (filedata, ) + mounts = (self.mount_a, ) + + return filepaths, filedata, mounts, keyring + +class TestAdminCommandIdempotency(CephFSTestCase): + """ + Tests for administration command idempotency. + """ + + CLIENTS_REQUIRED = 0 + MDSS_REQUIRED = 1 + + def test_rm_idempotency(self): + """ + That a removing a fs twice is idempotent. + """ + + data_pools = self.fs.get_data_pool_names(refresh=True) + self.fs.fail() + self.fs.rm() + try: + self.fs.get_mds_map() + except FSMissing: + pass + else: + self.fail("get_mds_map should raise") + p = self.fs.rm() + self.assertIn("does not exist", p.stderr.getvalue()) + self.fs.remove_pools(data_pools) + + +class TestAdminCommandDumpTree(CephFSTestCase): + """ + Tests for administration command subtrees. + """ + + CLIENTS_REQUIRED = 0 + MDSS_REQUIRED = 1 + + def test_dump_subtrees(self): + """ + Dump all the subtrees to make sure the MDS daemon won't crash. + """ + + subtrees = self.fs.mds_asok(['get', 'subtrees']) + log.info(f"dumping {len(subtrees)} subtrees:") + for subtree in subtrees: + log.info(f" subtree: '{subtree['dir']['path']}'") + self.fs.mds_asok(['dump', 'tree', subtree['dir']['path']]) + + log.info("dumping 2 special subtrees:") + log.info(" subtree: '/'") + self.fs.mds_asok(['dump', 'tree', '/']) + log.info(" subtree: '~mdsdir'") + self.fs.mds_asok(['dump', 'tree', '~mdsdir']) diff --git a/qa/tasks/cephfs/test_auto_repair.py b/qa/tasks/cephfs/test_auto_repair.py new file mode 100644 index 000000000..00c86b68b --- /dev/null +++ b/qa/tasks/cephfs/test_auto_repair.py @@ -0,0 +1,88 @@ + +""" +Exercise the MDS's auto repair functions +""" + +import logging +import time + +from teuthology.orchestra.run import CommandFailedError +from tasks.cephfs.cephfs_test_case import CephFSTestCase + + +log = logging.getLogger(__name__) + + +# Arbitrary timeouts for operations involving restarting +# an MDS or waiting for it to come up +MDS_RESTART_GRACE = 60 + + +class TestMDSAutoRepair(CephFSTestCase): + def test_backtrace_repair(self): + """ + MDS should verify/fix backtrace on fetch dirfrag + """ + + self.mount_a.run_shell(["mkdir", "testdir1"]) + self.mount_a.run_shell(["touch", "testdir1/testfile"]) + dir_objname = "{:x}.00000000".format(self.mount_a.path_to_ino("testdir1")) + + # drop inodes caps + self.mount_a.umount_wait() + + # flush journal entries to dirfrag objects, and expire journal + self.fs.mds_asok(['flush', 'journal']) + + # Restart the MDS to drop the metadata cache (because we expired the journal, + # nothing gets replayed into cache on restart) + self.fs.rank_fail() + self.fs.wait_for_daemons() + + # remove testdir1's backtrace + self.fs.radosm(["rmxattr", dir_objname, "parent"]) + + # readdir (fetch dirfrag) should fix testdir1's backtrace + self.mount_a.mount_wait() + self.mount_a.run_shell(["ls", "testdir1"]) + + # flush journal entries to dirfrag objects + self.fs.mds_asok(['flush', 'journal']) + + # check if backtrace exists + self.fs.radosm(["getxattr", dir_objname, "parent"]) + + def test_mds_readonly(self): + """ + test if MDS behave correct when it's readonly + """ + # operation should successd when MDS is not readonly + self.mount_a.run_shell(["touch", "test_file1"]) + writer = self.mount_a.write_background(loop=True) + + time.sleep(10) + self.assertFalse(writer.finished) + + # force MDS to read-only mode + self.fs.mds_asok(['force_readonly']) + time.sleep(10) + + # touching test file should fail + try: + self.mount_a.run_shell(["touch", "test_file1"]) + except CommandFailedError: + pass + else: + self.assertTrue(False) + + # background writer also should fail + self.assertTrue(writer.finished) + + # The MDS should report its readonly health state to the mon + self.wait_for_health("MDS_READ_ONLY", timeout=30) + + # restart mds to make it writable + self.fs.mds_fail_restart() + self.fs.wait_for_daemons() + + self.wait_for_health_clear(timeout=30) diff --git a/qa/tasks/cephfs/test_backtrace.py b/qa/tasks/cephfs/test_backtrace.py new file mode 100644 index 000000000..af246a1e3 --- /dev/null +++ b/qa/tasks/cephfs/test_backtrace.py @@ -0,0 +1,78 @@ + +from tasks.cephfs.cephfs_test_case import CephFSTestCase + + +class TestBacktrace(CephFSTestCase): + def test_backtrace(self): + """ + That the 'parent' and 'layout' xattrs on the head objects of files + are updated correctly. + """ + + old_data_pool_name = self.fs.get_data_pool_name() + old_pool_id = self.fs.get_data_pool_id() + + # Create a file for subsequent checks + self.mount_a.run_shell(["mkdir", "parent_a"]) + self.mount_a.run_shell(["touch", "parent_a/alpha"]) + file_ino = self.mount_a.path_to_ino("parent_a/alpha") + + # That backtrace and layout are written after initial flush + self.fs.mds_asok(["flush", "journal"]) + backtrace = self.fs.read_backtrace(file_ino) + self.assertEqual(['alpha', 'parent_a'], [a['dname'] for a in backtrace['ancestors']]) + layout = self.fs.read_layout(file_ino) + self.assertDictEqual(layout, { + "stripe_unit": 4194304, + "stripe_count": 1, + "object_size": 4194304, + "pool_id": old_pool_id, + "pool_ns": "", + }) + self.assertEqual(backtrace['pool'], old_pool_id) + + # That backtrace is written after parentage changes + self.mount_a.run_shell(["mkdir", "parent_b"]) + self.mount_a.run_shell(["mv", "parent_a/alpha", "parent_b/alpha"]) + + self.fs.mds_asok(["flush", "journal"]) + backtrace = self.fs.read_backtrace(file_ino) + self.assertEqual(['alpha', 'parent_b'], [a['dname'] for a in backtrace['ancestors']]) + + # Create a new data pool + new_pool_name = "data_new" + new_pool_id = self.fs.add_data_pool(new_pool_name) + + # That an object which has switched pools gets its backtrace updated + self.mount_a.setfattr("./parent_b/alpha", + "ceph.file.layout.pool", new_pool_name) + self.fs.mds_asok(["flush", "journal"]) + backtrace_old_pool = self.fs.read_backtrace(file_ino, pool=old_data_pool_name) + self.assertEqual(backtrace_old_pool['pool'], new_pool_id) + backtrace_new_pool = self.fs.read_backtrace(file_ino, pool=new_pool_name) + self.assertEqual(backtrace_new_pool['pool'], new_pool_id) + new_pool_layout = self.fs.read_layout(file_ino, pool=new_pool_name) + self.assertEqual(new_pool_layout['pool_id'], new_pool_id) + self.assertEqual(new_pool_layout['pool_ns'], '') + + # That subsequent linkage changes are only written to new pool backtrace + self.mount_a.run_shell(["mkdir", "parent_c"]) + self.mount_a.run_shell(["mv", "parent_b/alpha", "parent_c/alpha"]) + self.fs.mds_asok(["flush", "journal"]) + backtrace_old_pool = self.fs.read_backtrace(file_ino, pool=old_data_pool_name) + self.assertEqual(['alpha', 'parent_b'], [a['dname'] for a in backtrace_old_pool['ancestors']]) + backtrace_new_pool = self.fs.read_backtrace(file_ino, pool=new_pool_name) + self.assertEqual(['alpha', 'parent_c'], [a['dname'] for a in backtrace_new_pool['ancestors']]) + + # That layout is written to new pool after change to other field in layout + self.mount_a.setfattr("./parent_c/alpha", + "ceph.file.layout.object_size", "8388608") + + self.fs.mds_asok(["flush", "journal"]) + new_pool_layout = self.fs.read_layout(file_ino, pool=new_pool_name) + self.assertEqual(new_pool_layout['object_size'], 8388608) + + # ...but not to the old pool: the old pool's backtrace points to the new pool, and that's enough, + # we don't update the layout in all the old pools whenever it changes + old_pool_layout = self.fs.read_layout(file_ino, pool=old_data_pool_name) + self.assertEqual(old_pool_layout['object_size'], 4194304) diff --git a/qa/tasks/cephfs/test_cap_flush.py b/qa/tasks/cephfs/test_cap_flush.py new file mode 100644 index 000000000..c472e85bd --- /dev/null +++ b/qa/tasks/cephfs/test_cap_flush.py @@ -0,0 +1,58 @@ + +import os +import time +from textwrap import dedent +from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology + +class TestCapFlush(CephFSTestCase): + @for_teuthology + def test_replay_create(self): + """ + MDS starts to handle client caps when it enters clientreplay stage. + When handling a client cap in clientreplay stage, it's possible that + corresponding inode does not exist because the client request which + creates inode hasn't been replayed. + """ + + dir_path = os.path.join(self.mount_a.mountpoint, "testdir") + py_script = dedent(""" + import os + os.mkdir("{0}") + fd = os.open("{0}", os.O_RDONLY) + os.fchmod(fd, 0o777) + os.fsync(fd) + """).format(dir_path) + self.mount_a.run_python(py_script) + + self.fs.mds_asok(["flush", "journal"]) + + # client will only get unsafe replay + self.fs.mds_asok(["config", "set", "mds_log_pause", "1"]) + + file_name = "testfile" + file_path = dir_path + "/" + file_name + + # Create a file and modify its mode. ceph-fuse will mark Ax cap dirty + py_script = dedent(""" + import os + os.chdir("{0}") + os.setgid(65534) + os.setuid(65534) + fd = os.open("{1}", os.O_CREAT | os.O_RDWR, 0o644) + os.fchmod(fd, 0o640) + """).format(dir_path, file_name) + self.mount_a.run_python(py_script, sudo=True) + + # Modify file mode by different user. ceph-fuse will send a setattr request + self.mount_a.run_shell(["chmod", "600", file_path], wait=False, sudo=True) + + time.sleep(10) + + # Restart mds. Client will re-send the unsafe request and cap flush + self.fs.rank_fail() + self.fs.wait_for_daemons() + + mode = self.mount_a.run_shell(['stat', '-c' '%a', file_path]).stdout.getvalue().strip() + # If the cap flush get dropped, mode should be 0644. + # (Ax cap stays in dirty state, which prevents setattr reply from updating file mode) + self.assertEqual(mode, "600") diff --git a/qa/tasks/cephfs/test_cephfs_shell.py b/qa/tasks/cephfs/test_cephfs_shell.py new file mode 100644 index 000000000..8995d260f --- /dev/null +++ b/qa/tasks/cephfs/test_cephfs_shell.py @@ -0,0 +1,1028 @@ +""" +Before running this testsuite, add path to cephfs-shell module to $PATH and +export $PATH. +""" +from io import StringIO +from os import path +import crypt +import logging +from tempfile import mkstemp as tempfile_mkstemp +import math +from time import sleep +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.orchestra.run import CommandFailedError + +log = logging.getLogger(__name__) + +def humansize(nbytes): + suffixes = ['B', 'K', 'M', 'G', 'T', 'P'] + i = 0 + while nbytes >= 1024 and i < len(suffixes)-1: + nbytes /= 1024. + i += 1 + nbytes = math.ceil(nbytes) + f = ('%d' % nbytes).rstrip('.') + return '%s%s' % (f, suffixes[i]) + +def ensure_str(s): + if isinstance(s, str): + return s + if isinstance(s, bytes): + return s.decode() + raise TypeError("not expecting type '%s'" % type(s)) + +class TestCephFSShell(CephFSTestCase): + CLIENTS_REQUIRED = 1 + + def setUp(self): + super(TestCephFSShell, self).setUp() + + conf_contents = "[cephfs-shell]\ncolors = False\ndebug = True\n" + confpath = self.mount_a.client_remote.sh('mktemp').strip() + self.mount_a.client_remote.write_file(confpath, conf_contents) + self.default_shell_conf_path = confpath + + def run_cephfs_shell_cmd(self, cmd, mount_x=None, shell_conf_path=None, + opts=None, stdout=None, stderr=None, stdin=None, + check_status=True): + stdout = stdout or StringIO() + stderr = stderr or StringIO() + if mount_x is None: + mount_x = self.mount_a + if isinstance(cmd, list): + cmd = " ".join(cmd) + if not shell_conf_path: + shell_conf_path = self.default_shell_conf_path + + args = ["cephfs-shell", "-c", shell_conf_path] + if opts: + args += opts + args.extend(("--", cmd)) + + log.info("Running command: {}".format(" ".join(args))) + return mount_x.client_remote.run(args=args, stdout=stdout, + stderr=stderr, stdin=stdin, + check_status=check_status) + + def negtest_cephfs_shell_cmd(self, **kwargs): + """ + This method verifies that cephfs shell command fails with expected + return value and/or error message. + + kwargs is expected to hold the arguments same as + run_cephfs_shell_cmd() with the following exceptions - + * It should not contain check_status (since commands are expected + to fail, check_status is hardcoded to False). + * It is optional to set expected error message and return value to + dict members 'errmsg' and 'retval' respectively. + + This method servers as shorthand for codeblocks like - + + try: + proc = self.run_cephfs_shell_cmd(args=['some', 'cmd'], + check_status=False, + stdout=stdout) + except CommandFailedError as e: + self.assertNotIn('some error message', + proc.stderr.getvalue.lower()) + + + try: + proc = self.run_cephfs_shell_cmd(args=['some', 'cmd'], + check_status=False, + stdout=stdout) + except CommandFailedError as e: + self.assertNotEqual(1, proc.returncode) + """ + retval = kwargs.pop('retval', None) + errmsg = kwargs.pop('errmsg', None) + kwargs['check_status'] = False + + proc = self.run_cephfs_shell_cmd(**kwargs) + if retval: + self.assertEqual(proc.returncode, retval) + else: + self.assertNotEqual(proc.returncode, 0) + if errmsg: + self.assertIn(errmsg, proc.stderr.getvalue().lower()) + + return proc + + def get_cephfs_shell_cmd_output(self, cmd, mount_x=None, + shell_conf_path=None, opts=None, + stdout=None, stdin=None,check_status=True): + return ensure_str(self.run_cephfs_shell_cmd( + cmd=cmd, mount_x=mount_x, shell_conf_path=shell_conf_path, + opts=opts, stdout=stdout, stdin=stdin, + check_status=check_status).stdout.getvalue().strip()) + + def get_cephfs_shell_cmd_error(self, cmd, mount_x=None, + shell_conf_path=None, opts=None, + stderr=None, stdin=None, check_status=True): + return ensure_str(self.run_cephfs_shell_cmd( + cmd=cmd, mount_x=mount_x, shell_conf_path=shell_conf_path, + opts=opts, stderr=stderr, stdin=stdin, + check_status=check_status).stderr.getvalue().strip()) + + def run_cephfs_shell_script(self, script, mount_x=None, + shell_conf_path=None, opts=None, stdout=None, + stderr=None, stdin=None, check_status=True): + stdout = stdout or StringIO() + stderr = stderr or StringIO() + if mount_x is None: + mount_x = self.mount_a + + scriptpath = tempfile_mkstemp(prefix='test-cephfs', text=True)[1] + with open(scriptpath, 'w') as scriptfile: + scriptfile.write(script) + # copy script to the machine running cephfs-shell. + mount_x.client_remote.put_file(scriptpath, scriptpath) + mount_x.run_shell_payload(f"chmod 755 {scriptpath}") + + args = ["cephfs-shell", '-b', scriptpath] + if shell_conf_path: + args[1:1] = ["-c", shell_conf_path] + log.info('Running script \"' + scriptpath + '\"') + return mount_x.client_remote.run(args=args, stdout=stdout, + stderr=stderr, stdin=stdin, + check_status=True) + + def get_cephfs_shell_script_output(self, script, mount_x=None, + shell_conf_path=None, opts=None, + stdout=None, stdin=None, + check_status=True): + return ensure_str(self.run_cephfs_shell_script( + script=script, mount_x=mount_x, shell_conf_path=shell_conf_path, + opts=opts, stdout=stdout, stdin=stdin, + check_status=check_status).stdout.getvalue().strip()) + + +class TestMkdir(TestCephFSShell): + def test_mkdir(self): + """ + Test that mkdir creates directory + """ + o = self.get_cephfs_shell_cmd_output("mkdir d1") + log.info("cephfs-shell output:\n{}".format(o)) + + o = self.mount_a.stat('d1') + log.info("mount_a output:\n{}".format(o)) + + def test_mkdir_with_07000_octal_mode(self): + """ + Test that mkdir fails with octal mode greater than 0777 + """ + self.negtest_cephfs_shell_cmd(cmd="mkdir -m 07000 d2") + try: + self.mount_a.stat('d2') + except CommandFailedError: + pass + + def test_mkdir_with_negative_octal_mode(self): + """ + Test that mkdir fails with negative octal mode + """ + self.negtest_cephfs_shell_cmd(cmd="mkdir -m -0755 d3") + try: + self.mount_a.stat('d3') + except CommandFailedError: + pass + + def test_mkdir_with_non_octal_mode(self): + """ + Test that mkdir passes with non-octal mode + """ + o = self.get_cephfs_shell_cmd_output("mkdir -m u=rwx d4") + log.info("cephfs-shell output:\n{}".format(o)) + + # mkdir d4 should pass + o = self.mount_a.stat('d4') + assert((o['st_mode'] & 0o700) == 0o700) + + def test_mkdir_with_bad_non_octal_mode(self): + """ + Test that mkdir failes with bad non-octal mode + """ + self.negtest_cephfs_shell_cmd(cmd="mkdir -m ugx=0755 d5") + try: + self.mount_a.stat('d5') + except CommandFailedError: + pass + + def test_mkdir_path_without_path_option(self): + """ + Test that mkdir fails without path option for creating path + """ + self.negtest_cephfs_shell_cmd(cmd="mkdir d5/d6/d7") + try: + self.mount_a.stat('d5/d6/d7') + except CommandFailedError: + pass + + def test_mkdir_path_with_path_option(self): + """ + Test that mkdir passes with path option for creating path + """ + o = self.get_cephfs_shell_cmd_output("mkdir -p d5/d6/d7") + log.info("cephfs-shell output:\n{}".format(o)) + + # mkdir d5/d6/d7 should pass + o = self.mount_a.stat('d5/d6/d7') + log.info("mount_a output:\n{}".format(o)) + +class TestRmdir(TestCephFSShell): + dir_name = "test_dir" + + def dir_does_not_exists(self): + """ + Tests that directory does not exists + """ + try: + self.mount_a.stat(self.dir_name) + except CommandFailedError as e: + if e.exitstatus == 2: + return 0 + raise + + def test_rmdir(self): + """ + Test that rmdir deletes directory + """ + self.run_cephfs_shell_cmd("mkdir " + self.dir_name) + self.run_cephfs_shell_cmd("rmdir "+ self.dir_name) + self.dir_does_not_exists() + + def test_rmdir_non_existing_dir(self): + """ + Test that rmdir does not delete a non existing directory + """ + self.negtest_cephfs_shell_cmd(cmd="rmdir test_dir") + self.dir_does_not_exists() + + def test_rmdir_dir_with_file(self): + """ + Test that rmdir does not delete directory containing file + """ + self.run_cephfs_shell_cmd("mkdir " + self.dir_name) + self.run_cephfs_shell_cmd("put - test_dir/dumpfile", stdin="Valid File") + self.run_cephfs_shell_cmd("rmdir" + self.dir_name) + self.mount_a.stat(self.dir_name) + + def test_rmdir_existing_file(self): + """ + Test that rmdir does not delete a file + """ + self.run_cephfs_shell_cmd("put - dumpfile", stdin="Valid File") + self.negtest_cephfs_shell_cmd(cmd="rmdir dumpfile") + self.mount_a.stat("dumpfile") + + def test_rmdir_p(self): + """ + Test that rmdir -p deletes all empty directories in the root directory passed + """ + self.run_cephfs_shell_cmd("mkdir -p test_dir/t1/t2/t3") + self.run_cephfs_shell_cmd("rmdir -p "+ self.dir_name) + self.dir_does_not_exists() + + def test_rmdir_p_valid_path(self): + """ + Test that rmdir -p deletes all empty directories in the path passed + """ + self.run_cephfs_shell_cmd("mkdir -p test_dir/t1/t2/t3") + self.run_cephfs_shell_cmd("rmdir -p test_dir/t1/t2/t3") + self.dir_does_not_exists() + + def test_rmdir_p_non_existing_dir(self): + """ + Test that rmdir -p does not delete an invalid directory + """ + self.negtest_cephfs_shell_cmd(cmd="rmdir -p test_dir") + self.dir_does_not_exists() + + def test_rmdir_p_dir_with_file(self): + """ + Test that rmdir -p does not delete the directory containing a file + """ + self.run_cephfs_shell_cmd("mkdir " + self.dir_name) + self.run_cephfs_shell_cmd("put - test_dir/dumpfile", stdin="Valid File") + self.run_cephfs_shell_cmd("rmdir -p " + self.dir_name) + self.mount_a.stat(self.dir_name) + +class TestGetAndPut(TestCephFSShell): + def test_get_with_target_name(self): + """ + Test that get passes with target name + """ + s = 'C' * 1024 + s_hash = crypt.crypt(s, '.A') + o = self.get_cephfs_shell_cmd_output("put - dump4", stdin=s) + log.info("cephfs-shell output:\n{}".format(o)) + + # put - dump4 should pass + o = self.mount_a.stat('dump4') + log.info("mount_a output:\n{}".format(o)) + + o = self.get_cephfs_shell_cmd_output("get dump4 ./dump4") + log.info("cephfs-shell output:\n{}".format(o)) + + o = self.get_cephfs_shell_cmd_output("!cat dump4") + o_hash = crypt.crypt(o, '.A') + + # s_hash must be equal to o_hash + log.info("s_hash:{}".format(s_hash)) + log.info("o_hash:{}".format(o_hash)) + assert(s_hash == o_hash) + + def test_get_without_target_name(self): + """ + Test that get should fail when there is no target name + """ + s = 'Somedata' + # put - dump5 should pass + self.get_cephfs_shell_cmd_output("put - dump5", stdin=s) + + self.mount_a.stat('dump5') + + # get dump5 should fail as there is no local_path mentioned + with self.assertRaises(CommandFailedError): + self.get_cephfs_shell_cmd_output("get dump5") + + # stat dump would return non-zero exit code as get dump failed + # cwd=None because we want to run it at CWD, not at cephfs mntpt. + r = self.mount_a.run_shell('stat dump5', cwd=None, + check_status=False).returncode + self.assertEqual(r, 1) + + def test_get_doesnt_create_dir(self): + # if get cmd is creating subdirs on its own then dump7 will be + # stored as ./dump7/tmp/dump7 and not ./dump7, therefore + # if doing `cat ./dump7` returns non-zero exit code(i.e. 1) then + # it implies that no such file exists at that location + dir_abspath = path.join(self.mount_a.mountpoint, 'tmp') + self.mount_a.run_shell_payload(f"mkdir {dir_abspath}") + self.mount_a.client_remote.write_file(path.join(dir_abspath, 'dump7'), + 'somedata') + self.get_cephfs_shell_cmd_output("get /tmp/dump7 ./dump7") + # test that dump7 exists + self.mount_a.run_shell("cat ./dump7", cwd=None) + + def test_get_to_console(self): + """ + Test that get passes with target name + """ + s = 'E' * 1024 + s_hash = crypt.crypt(s, '.A') + o = self.get_cephfs_shell_cmd_output("put - dump6", stdin=s) + log.info("cephfs-shell output:\n{}".format(o)) + + # put - dump6 should pass + o = self.mount_a.stat('dump6') + log.info("mount_a output:\n{}".format(o)) + + # get dump6 - should pass + o = self.get_cephfs_shell_cmd_output("get dump6 -") + o_hash = crypt.crypt(o, '.A') + log.info("cephfs-shell output:\n{}".format(o)) + + # s_hash must be equal to o_hash + log.info("s_hash:{}".format(s_hash)) + log.info("o_hash:{}".format(o_hash)) + assert(s_hash == o_hash) + + def test_put_without_target_name(self): + """ + put - should fail as the cmd expects both arguments are mandatory. + """ + with self.assertRaises(CommandFailedError): + self.get_cephfs_shell_cmd_output("put -") + + def test_put_validate_local_path(self): + """ + This test is intended to make sure local_path is validated before + trying to put the file from local fs to cephfs and the command + put ./dumpXYZ dump8 would fail as dumpXYX doesn't exist. + """ + with self.assertRaises(CommandFailedError): + o = self.get_cephfs_shell_cmd_output("put ./dumpXYZ dump8") + log.info("cephfs-shell output:\n{}".format(o)) + +class TestSnapshots(TestCephFSShell): + def test_snap(self): + """ + Test that snapshot creation and deletion work + """ + sd = self.fs.get_config('client_snapdir') + sdn = "data_dir/{}/snap1".format(sd) + + # create a data dir and dump some files into it + self.get_cephfs_shell_cmd_output("mkdir data_dir") + s = 'A' * 10240 + o = self.get_cephfs_shell_cmd_output("put - data_dir/data_a", stdin=s) + s = 'B' * 10240 + o = self.get_cephfs_shell_cmd_output("put - data_dir/data_b", stdin=s) + s = 'C' * 10240 + o = self.get_cephfs_shell_cmd_output("put - data_dir/data_c", stdin=s) + s = 'D' * 10240 + o = self.get_cephfs_shell_cmd_output("put - data_dir/data_d", stdin=s) + s = 'E' * 10240 + o = self.get_cephfs_shell_cmd_output("put - data_dir/data_e", stdin=s) + + o = self.get_cephfs_shell_cmd_output("ls -l /data_dir") + log.info("cephfs-shell output:\n{}".format(o)) + + # create the snapshot - must pass + o = self.get_cephfs_shell_cmd_output("snap create snap1 /data_dir") + log.info("cephfs-shell output:\n{}".format(o)) + self.assertEqual("", o) + o = self.mount_a.stat(sdn) + log.info("mount_a output:\n{}".format(o)) + self.assertIn('st_mode', o) + + # create the same snapshot again - must fail with an error message + self.negtest_cephfs_shell_cmd(cmd="snap create snap1 /data_dir", + errmsg="snapshot 'snap1' already exists") + o = self.mount_a.stat(sdn) + log.info("mount_a output:\n{}".format(o)) + self.assertIn('st_mode', o) + + # delete the snapshot - must pass + o = self.get_cephfs_shell_cmd_output("snap delete snap1 /data_dir") + log.info("cephfs-shell output:\n{}".format(o)) + self.assertEqual("", o) + try: + o = self.mount_a.stat(sdn) + except CommandFailedError: + # snap dir should not exist anymore + pass + log.info("mount_a output:\n{}".format(o)) + self.assertNotIn('st_mode', o) + + # delete the same snapshot again - must fail with an error message + self.negtest_cephfs_shell_cmd(cmd="snap delete snap1 /data_dir", + errmsg="'snap1': no such snapshot") + try: + o = self.mount_a.stat(sdn) + except CommandFailedError: + pass + log.info("mount_a output:\n{}".format(o)) + self.assertNotIn('st_mode', o) + +class TestCD(TestCephFSShell): + CLIENTS_REQUIRED = 1 + + def test_cd_with_no_args(self): + """ + Test that when cd is issued without any arguments, CWD is changed + to root directory. + """ + path = 'dir1/dir2/dir3' + self.mount_a.run_shell_payload(f"mkdir -p {path}") + expected_cwd = '/' + + script = 'cd {}\ncd\ncwd\n'.format(path) + output = self.get_cephfs_shell_script_output(script) + self.assertEqual(output, expected_cwd) + + def test_cd_with_args(self): + """ + Test that when cd is issued with an argument, CWD is changed + to the path passed in the argument. + """ + path = 'dir1/dir2/dir3' + self.mount_a.run_shell_payload(f"mkdir -p {path}") + expected_cwd = '/dir1/dir2/dir3' + + script = 'cd {}\ncwd\n'.format(path) + output = self.get_cephfs_shell_script_output(script) + self.assertEqual(output, expected_cwd) + +class TestDU(TestCephFSShell): + CLIENTS_REQUIRED = 1 + + def test_du_works_for_regfiles(self): + regfilename = 'some_regfile' + regfile_abspath = path.join(self.mount_a.mountpoint, regfilename) + self.mount_a.client_remote.write_file(regfile_abspath, 'somedata') + + size = humansize(self.mount_a.stat(regfile_abspath)['st_size']) + expected_output = r'{}{}{}'.format(size, " +", regfilename) + + du_output = self.get_cephfs_shell_cmd_output('du ' + regfilename) + self.assertRegex(du_output, expected_output) + + def test_du_works_for_non_empty_dirs(self): + dirname = 'some_directory' + dir_abspath = path.join(self.mount_a.mountpoint, dirname) + regfilename = 'some_regfile' + regfile_abspath = path.join(dir_abspath, regfilename) + self.mount_a.run_shell_payload(f"mkdir {dir_abspath}") + self.mount_a.client_remote.write_file(regfile_abspath, 'somedata') + + # XXX: we stat `regfile_abspath` here because ceph du reports a non-empty + # directory's size as sum of sizes of all files under it. + size = humansize(self.mount_a.stat(regfile_abspath)['st_size']) + expected_output = r'{}{}{}'.format(size, " +", dirname) + + sleep(10) + du_output = self.get_cephfs_shell_cmd_output('du ' + dirname) + self.assertRegex(du_output, expected_output) + + def test_du_works_for_empty_dirs(self): + dirname = 'some_directory' + dir_abspath = path.join(self.mount_a.mountpoint, dirname) + self.mount_a.run_shell_payload(f"mkdir {dir_abspath}") + + size = humansize(self.mount_a.stat(dir_abspath)['st_size']) + expected_output = r'{}{}{}'.format(size, " +", dirname) + + du_output = self.get_cephfs_shell_cmd_output('du ' + dirname) + self.assertRegex(du_output, expected_output) + + def test_du_works_for_hardlinks(self): + regfilename = 'some_regfile' + regfile_abspath = path.join(self.mount_a.mountpoint, regfilename) + self.mount_a.client_remote.write_file(regfile_abspath, 'somedata') + hlinkname = 'some_hardlink' + hlink_abspath = path.join(self.mount_a.mountpoint, hlinkname) + self.mount_a.run_shell_payload(f"ln {regfile_abspath} {hlink_abspath}") + + size = humansize(self.mount_a.stat(hlink_abspath)['st_size']) + expected_output = r'{}{}{}'.format(size, " +", hlinkname) + + du_output = self.get_cephfs_shell_cmd_output('du ' + hlinkname) + self.assertRegex(du_output, expected_output) + + def test_du_works_for_softlinks_to_files(self): + regfilename = 'some_regfile' + regfile_abspath = path.join(self.mount_a.mountpoint, regfilename) + self.mount_a.client_remote.write_file(regfile_abspath, 'somedata') + slinkname = 'some_softlink' + slink_abspath = path.join(self.mount_a.mountpoint, slinkname) + self.mount_a.run_shell_payload(f"ln -s {regfile_abspath} {slink_abspath}") + + size = humansize(self.mount_a.lstat(slink_abspath)['st_size']) + expected_output = r'{}{}{}'.format((size), " +", slinkname) + + du_output = self.get_cephfs_shell_cmd_output('du ' + slinkname) + self.assertRegex(du_output, expected_output) + + def test_du_works_for_softlinks_to_dirs(self): + dirname = 'some_directory' + dir_abspath = path.join(self.mount_a.mountpoint, dirname) + self.mount_a.run_shell_payload(f"mkdir {dir_abspath}") + slinkname = 'some_softlink' + slink_abspath = path.join(self.mount_a.mountpoint, slinkname) + self.mount_a.run_shell_payload(f"ln -s {dir_abspath} {slink_abspath}") + + size = humansize(self.mount_a.lstat(slink_abspath)['st_size']) + expected_output = r'{}{}{}'.format(size, " +", slinkname) + + du_output = self.get_cephfs_shell_cmd_output('du ' + slinkname) + self.assertRegex(du_output, expected_output) + + # NOTE: tests using these are pretty slow since to this methods sleeps for + # 15 seconds + def _setup_files(self, return_path_to_files=False, path_prefix='./'): + dirname = 'dir1' + regfilename = 'regfile' + hlinkname = 'hlink' + slinkname = 'slink1' + slink2name = 'slink2' + + dir_abspath = path.join(self.mount_a.mountpoint, dirname) + regfile_abspath = path.join(self.mount_a.mountpoint, regfilename) + hlink_abspath = path.join(self.mount_a.mountpoint, hlinkname) + slink_abspath = path.join(self.mount_a.mountpoint, slinkname) + slink2_abspath = path.join(self.mount_a.mountpoint, slink2name) + + self.mount_a.run_shell_payload(f"mkdir {dir_abspath}") + self.mount_a.run_shell_payload(f"touch {regfile_abspath}") + self.mount_a.run_shell_payload(f"ln {regfile_abspath} {hlink_abspath}") + self.mount_a.run_shell_payload(f"ln -s {regfile_abspath} {slink_abspath}") + self.mount_a.run_shell_payload(f"ln -s {dir_abspath} {slink2_abspath}") + + dir2_name = 'dir2' + dir21_name = 'dir21' + regfile121_name = 'regfile121' + dir2_abspath = path.join(self.mount_a.mountpoint, dir2_name) + dir21_abspath = path.join(dir2_abspath, dir21_name) + regfile121_abspath = path.join(dir21_abspath, regfile121_name) + self.mount_a.run_shell_payload(f"mkdir -p {dir21_abspath}") + self.mount_a.run_shell_payload(f"touch {regfile121_abspath}") + + self.mount_a.client_remote.write_file(regfile_abspath, 'somedata') + self.mount_a.client_remote.write_file(regfile121_abspath, 'somemoredata') + + # TODO: is there a way to trigger/force update ceph.dir.rbytes? + # wait so that attr ceph.dir.rbytes gets a chance to be updated. + sleep(20) + + expected_patterns = [] + path_to_files = [] + + def append_expected_output_pattern(f): + if f == '/': + expected_patterns.append(r'{}{}{}'.format(size, " +", '.' + f)) + else: + expected_patterns.append(r'{}{}{}'.format(size, " +", + path_prefix + path.relpath(f, self.mount_a.mountpoint))) + + for f in [dir_abspath, regfile_abspath, regfile121_abspath, + hlink_abspath, slink_abspath, slink2_abspath]: + size = humansize(self.mount_a.stat(f, follow_symlinks= + False)['st_size']) + append_expected_output_pattern(f) + + # get size for directories containig regfiles within + for f in [dir2_abspath, dir21_abspath]: + size = humansize(self.mount_a.stat(regfile121_abspath, + follow_symlinks=False)['st_size']) + append_expected_output_pattern(f) + + # get size for CephFS root + size = 0 + for f in [regfile_abspath, regfile121_abspath, slink_abspath, + slink2_abspath]: + size += self.mount_a.stat(f, follow_symlinks=False)['st_size'] + size = humansize(size) + append_expected_output_pattern('/') + + if return_path_to_files: + for p in [dir_abspath, regfile_abspath, dir2_abspath, + dir21_abspath, regfile121_abspath, hlink_abspath, + slink_abspath, slink2_abspath]: + path_to_files.append(path.relpath(p, self.mount_a.mountpoint)) + + return (expected_patterns, path_to_files) + else: + return expected_patterns + + def test_du_works_recursively_with_no_path_in_args(self): + expected_patterns_in_output = self._setup_files() + du_output = self.get_cephfs_shell_cmd_output('du -r') + + for expected_output in expected_patterns_in_output: + self.assertRegex(du_output, expected_output) + + def test_du_with_path_in_args(self): + expected_patterns_in_output, path_to_files = self._setup_files(True, + path_prefix='') + + args = ['du', '/'] + for p in path_to_files: + args.append(p) + du_output = self.get_cephfs_shell_cmd_output(args) + + for expected_output in expected_patterns_in_output: + self.assertRegex(du_output, expected_output) + + def test_du_with_no_args(self): + expected_patterns_in_output = self._setup_files() + + du_output = self.get_cephfs_shell_cmd_output('du') + + for expected_output in expected_patterns_in_output: + # Since CWD is CephFS root and being non-recursive expect only + # CWD in DU report. + if expected_output.find('/') == len(expected_output) - 1: + self.assertRegex(du_output, expected_output) + + +class TestDF(TestCephFSShell): + def validate_df(self, filename): + df_output = self.get_cephfs_shell_cmd_output('df '+filename) + log.info("cephfs-shell df output:\n{}".format(df_output)) + + shell_df = df_output.splitlines()[1].split() + + block_size = int(self.mount_a.df()["total"]) // 1024 + log.info("cephfs df block size output:{}\n".format(block_size)) + + st_size = int(self.mount_a.stat(filename)["st_size"]) + log.info("cephfs stat used output:{}".format(st_size)) + log.info("cephfs available:{}\n".format(block_size - st_size)) + + self.assertTupleEqual((block_size, st_size, block_size - st_size), + (int(shell_df[0]), int(shell_df[1]) , int(shell_df[2]))) + + def test_df_with_no_args(self): + expected_output = '' + df_output = self.get_cephfs_shell_cmd_output('df') + assert df_output == expected_output + + def test_df_for_valid_directory(self): + dir_name = 'dir1' + mount_output = self.mount_a.run_shell_payload(f"mkdir {dir_name}") + log.info("cephfs-shell mount output:\n{}".format(mount_output)) + self.validate_df(dir_name) + + def test_df_for_invalid_directory(self): + dir_abspath = path.join(self.mount_a.mountpoint, 'non-existent-dir') + self.negtest_cephfs_shell_cmd(cmd='df ' + dir_abspath, + errmsg='error in stat') + + def test_df_for_valid_file(self): + s = 'df test' * 14145016 + o = self.get_cephfs_shell_cmd_output("put - dumpfile", stdin=s) + log.info("cephfs-shell output:\n{}".format(o)) + self.validate_df("dumpfile") + + +class TestQuota(TestCephFSShell): + dir_name = 'testdir' + + def create_dir(self): + mount_output = self.get_cephfs_shell_cmd_output('mkdir ' + self.dir_name) + log.info("cephfs-shell mount output:\n{}".format(mount_output)) + + def set_and_get_quota_vals(self, input_val, check_status=True): + self.run_cephfs_shell_cmd(['quota', 'set', '--max_bytes', + input_val[0], '--max_files', input_val[1], + self.dir_name], check_status=check_status) + + quota_output = self.get_cephfs_shell_cmd_output(['quota', 'get', self.dir_name], + check_status=check_status) + + quota_output = quota_output.split() + return quota_output[1], quota_output[3] + + def test_set(self): + self.create_dir() + set_values = ('6', '2') + self.assertTupleEqual(self.set_and_get_quota_vals(set_values), set_values) + + def test_replace_values(self): + self.test_set() + set_values = ('20', '4') + self.assertTupleEqual(self.set_and_get_quota_vals(set_values), set_values) + + def test_set_invalid_dir(self): + set_values = ('5', '5') + try: + self.assertTupleEqual(self.set_and_get_quota_vals( + set_values, False), set_values) + raise Exception("Something went wrong!! Values set for non existing directory") + except IndexError: + # Test should pass as values cannot be set for non existing directory + pass + + def test_set_invalid_values(self): + self.create_dir() + set_values = ('-6', '-5') + try: + self.assertTupleEqual(self.set_and_get_quota_vals(set_values, + False), set_values) + raise Exception("Something went wrong!! Invalid values set") + except IndexError: + # Test should pass as invalid values cannot be set + pass + + def test_exceed_file_limit(self): + self.test_set() + dir_abspath = path.join(self.mount_a.mountpoint, self.dir_name) + self.mount_a.run_shell_payload(f"touch {dir_abspath}/file1") + file2 = path.join(dir_abspath, "file2") + try: + self.mount_a.run_shell_payload(f"touch {file2}") + raise Exception("Something went wrong!! File creation should have failed") + except CommandFailedError: + # Test should pass as file quota set to 2 + # Additional condition to confirm file creation failure + if not path.exists(file2): + return 0 + raise + + def test_exceed_write_limit(self): + self.test_set() + dir_abspath = path.join(self.mount_a.mountpoint, self.dir_name) + filename = 'test_file' + file_abspath = path.join(dir_abspath, filename) + try: + # Write should fail as bytes quota is set to 6 + self.mount_a.client_remote.write_file(file_abspath, 'Disk raise Exception') + raise Exception("Write should have failed") + except CommandFailedError: + # Test should pass only when write command fails + path_exists = path.exists(file_abspath) + if not path_exists: + # Testing with teuthology: No file is created. + return 0 + elif path_exists and not path.getsize(file_abspath): + # Testing on Fedora 30: When write fails, empty file gets created. + return 0 + else: + raise + + +class TestXattr(TestCephFSShell): + dir_name = 'testdir' + + def create_dir(self): + self.run_cephfs_shell_cmd('mkdir ' + self.dir_name) + + def set_get_list_xattr_vals(self, input_val, negtest=False): + setxattr_output = self.get_cephfs_shell_cmd_output( + ['setxattr', self.dir_name, input_val[0], input_val[1]]) + log.info("cephfs-shell setxattr output:\n{}".format(setxattr_output)) + + getxattr_output = self.get_cephfs_shell_cmd_output( + ['getxattr', self.dir_name, input_val[0]]) + log.info("cephfs-shell getxattr output:\n{}".format(getxattr_output)) + + listxattr_output = self.get_cephfs_shell_cmd_output( + ['listxattr', self.dir_name]) + log.info("cephfs-shell listxattr output:\n{}".format(listxattr_output)) + + return listxattr_output, getxattr_output + + def test_set(self): + self.create_dir() + set_values = ('user.key', '2') + self.assertTupleEqual(self.set_get_list_xattr_vals(set_values), set_values) + + def test_reset(self): + self.test_set() + set_values = ('user.key', '4') + self.assertTupleEqual(self.set_get_list_xattr_vals(set_values), set_values) + + def test_non_existing_dir(self): + input_val = ('user.key', '9') + self.negtest_cephfs_shell_cmd(cmd=['setxattr', self.dir_name, input_val[0], + input_val[1]]) + self.negtest_cephfs_shell_cmd(cmd=['getxattr', self.dir_name, input_val[0]]) + self.negtest_cephfs_shell_cmd(cmd=['listxattr', self.dir_name]) + +class TestLS(TestCephFSShell): + dir_name = ('test_dir') + hidden_dir_name = ('.test_hidden_dir') + + def test_ls(self): + """ Test that ls prints files in CWD. """ + self.run_cephfs_shell_cmd(f'mkdir {self.dir_name}') + + ls_output = self.get_cephfs_shell_cmd_output("ls") + log.info(f"output of ls command:\n{ls_output}") + + self.assertIn(self.dir_name, ls_output) + + def test_ls_a(self): + """ Test ls -a prints hidden files in CWD.""" + + self.run_cephfs_shell_cmd(f'mkdir {self.hidden_dir_name}') + + ls_a_output = self.get_cephfs_shell_cmd_output(['ls', '-a']) + log.info(f"output of ls -a command:\n{ls_a_output}") + + self.assertIn(self.hidden_dir_name, ls_a_output) + + def test_ls_does_not_print_hidden_dir(self): + """ Test ls command does not print hidden directory """ + + self.run_cephfs_shell_cmd(f'mkdir {self.hidden_dir_name}') + + ls_output = self.get_cephfs_shell_cmd_output("ls") + log.info(f"output of ls command:\n{ls_output}") + + self.assertNotIn(self.hidden_dir_name, ls_output) + + def test_ls_a_prints_non_hidden_dir(self): + """ Test ls -a command prints non hidden directory """ + + self.run_cephfs_shell_cmd(f'mkdir {self.hidden_dir_name} {self.dir_name}') + + ls_a_output = self.get_cephfs_shell_cmd_output(['ls', '-a']) + log.info(f"output of ls -a command:\n{ls_a_output}") + + self.assertIn(self.dir_name, ls_a_output) + + def test_ls_H_prints_human_readable_file_size(self): + """ Test "ls -lH" prints human readable file size.""" + + file_sizes = ['1','1K', '1M', '1G'] + file_names = ['dump1', 'dump2', 'dump3', 'dump4'] + + + for (file_size, file_name) in zip(file_sizes, file_names): + temp_file = self.mount_a.client_remote.mktemp(file_name) + self.mount_a.run_shell_payload(f"fallocate -l {file_size} {temp_file}") + self.mount_a.run_shell_payload(f'mv {temp_file} ./') + + ls_H_output = self.get_cephfs_shell_cmd_output(['ls', '-lH']) + + ls_H_file_size = set() + for line in ls_H_output.split('\n'): + ls_H_file_size.add(line.split()[1]) + + # test that file sizes are in human readable format + self.assertEqual({'1B','1K', '1M', '1G'}, ls_H_file_size) + + def test_ls_s_sort_by_size(self): + """ Test "ls -S" sorts file listing by file_size """ + test_file1 = "test_file1.txt" + test_file2 = "test_file2.txt" + file1_content = 'A' * 102 + file2_content = 'B' * 10 + + self.run_cephfs_shell_cmd(f"write {test_file1}", stdin=file1_content) + self.run_cephfs_shell_cmd(f"write {test_file2}", stdin=file2_content) + + ls_s_output = self.get_cephfs_shell_cmd_output(['ls', '-lS']) + + file_sizes = [] + for line in ls_s_output.split('\n'): + file_sizes.append(line.split()[1]) + + #test that file size are in ascending order + self.assertEqual(file_sizes, sorted(file_sizes)) + + +class TestMisc(TestCephFSShell): + def test_issue_cephfs_shell_cmd_at_invocation(self): + """ + Test that `cephfs-shell -c conf cmd` works. + """ + # choosing a long name since short ones have a higher probability + # of getting matched by coincidence. + dirname = 'somedirectory' + self.run_cephfs_shell_cmd(['mkdir', dirname]) + + output = self.mount_a.client_remote.sh(['cephfs-shell', 'ls']).\ + strip() + + self.assertRegex(output, dirname) + + def test_help(self): + """ + Test that help outputs commands. + """ + o = self.get_cephfs_shell_cmd_output("help all") + log.info("output:\n{}".format(o)) + +class TestShellOpts(TestCephFSShell): + """ + Contains tests for shell options from conf file and shell prompt. + """ + + def setUp(self): + super(type(self), self).setUp() + + # output of following command - + # editor - was: 'vim' + # now: '?' + # editor: '?' + self.editor_val = self.get_cephfs_shell_cmd_output( + 'set editor ?, set editor').split('\n')[2] + self.editor_val = self.editor_val.split(':')[1].\ + replace("'", "", 2).strip() + + def write_tempconf(self, confcontents): + self.tempconfpath = self.mount_a.client_remote.mktemp( + suffix='cephfs-shell.conf') + self.mount_a.client_remote.write_file(self.tempconfpath, + confcontents) + + def test_reading_conf(self): + self.write_tempconf("[cephfs-shell]\neditor = ???") + + # output of following command - + # CephFS:~/>>> set editor + # editor: 'vim' + final_editor_val = self.get_cephfs_shell_cmd_output( + cmd='set editor', shell_conf_path=self.tempconfpath) + final_editor_val = final_editor_val.split(': ')[1] + final_editor_val = final_editor_val.replace("'", "", 2) + + self.assertNotEqual(self.editor_val, final_editor_val) + + def test_reading_conf_with_dup_opt(self): + """ + Read conf without duplicate sections/options. + """ + self.write_tempconf("[cephfs-shell]\neditor = ???\neditor = " + + self.editor_val) + + # output of following command - + # CephFS:~/>>> set editor + # editor: 'vim' + final_editor_val = self.get_cephfs_shell_cmd_output( + cmd='set editor', shell_conf_path=self.tempconfpath) + final_editor_val = final_editor_val.split(': ')[1] + final_editor_val = final_editor_val.replace("'", "", 2) + + self.assertEqual(self.editor_val, final_editor_val) + + def test_setting_opt_after_reading_conf(self): + self.write_tempconf("[cephfs-shell]\neditor = ???") + + # output of following command - + # editor - was: vim + # now: vim + # editor: vim + final_editor_val = self.get_cephfs_shell_cmd_output( + cmd='set editor %s, set editor' % (self.editor_val), + shell_conf_path=self.tempconfpath) + final_editor_val = final_editor_val.split('\n')[2] + final_editor_val = final_editor_val.split(': ')[1] + final_editor_val = final_editor_val.replace("'", "", 2) + + self.assertEqual(self.editor_val, final_editor_val) diff --git a/qa/tasks/cephfs/test_client_limits.py b/qa/tasks/cephfs/test_client_limits.py new file mode 100644 index 000000000..4ea6576af --- /dev/null +++ b/qa/tasks/cephfs/test_client_limits.py @@ -0,0 +1,327 @@ + +""" +Exercise the MDS's behaviour when clients and the MDCache reach or +exceed the limits of how many caps/inodes they should hold. +""" + +import logging +from textwrap import dedent +from tasks.ceph_test_case import TestTimeoutError +from tasks.cephfs.cephfs_test_case import CephFSTestCase, needs_trimming +from tasks.cephfs.fuse_mount import FuseMount +import os + + +log = logging.getLogger(__name__) + + +# Arbitrary timeouts for operations involving restarting +# an MDS or waiting for it to come up +MDS_RESTART_GRACE = 60 + +# Hardcoded values from Server::recall_client_state +CAP_RECALL_RATIO = 0.8 +CAP_RECALL_MIN = 100 + + +class TestClientLimits(CephFSTestCase): + REQUIRE_KCLIENT_REMOTE = True + CLIENTS_REQUIRED = 2 + + def _test_client_pin(self, use_subdir, open_files): + """ + When a client pins an inode in its cache, for example because the file is held open, + it should reject requests from the MDS to trim these caps. The MDS should complain + to the user that it is unable to enforce its cache size limits because of this + objectionable client. + + :param use_subdir: whether to put test files in a subdir or use root + """ + + # Set MDS cache memory limit to a low value that will make the MDS to + # ask the client to trim the caps. + cache_memory_limit = "1K" + + self.config_set('mds', 'mds_cache_memory_limit', cache_memory_limit) + self.config_set('mds', 'mds_recall_max_caps', int(open_files/2)) + self.config_set('mds', 'mds_recall_warning_threshold', open_files) + + mds_min_caps_per_client = int(self.config_get('mds', "mds_min_caps_per_client")) + self.config_set('mds', 'mds_min_caps_working_set', mds_min_caps_per_client) + mds_max_caps_per_client = int(self.config_get('mds', "mds_max_caps_per_client")) + mds_recall_warning_decay_rate = float(self.config_get('mds', "mds_recall_warning_decay_rate")) + self.assertGreaterEqual(open_files, mds_min_caps_per_client) + + mount_a_client_id = self.mount_a.get_global_id() + path = "subdir" if use_subdir else "." + open_proc = self.mount_a.open_n_background(path, open_files) + + # Client should now hold: + # `open_files` caps for the open files + # 1 cap for root + # 1 cap for subdir + self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'], + open_files + (2 if use_subdir else 1), + timeout=600, + reject_fn=lambda x: x > open_files + 2) + + # MDS should not be happy about that, as the client is failing to comply + # with the SESSION_RECALL messages it is being sent + self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_warning_decay_rate*2) + + # We can also test that the MDS health warning for oversized + # cache is functioning as intended. + self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_warning_decay_rate*2) + + # When the client closes the files, it should retain only as many caps as allowed + # under the SESSION_RECALL policy + log.info("Terminating process holding files open") + self.mount_a._kill_background(open_proc) + + # The remaining caps should comply with the numbers sent from MDS in SESSION_RECALL message, + # which depend on the caps outstanding, cache size and overall ratio + def expected_caps(): + num_caps = self.get_session(mount_a_client_id)['num_caps'] + if num_caps <= mds_min_caps_per_client: + return True + elif num_caps <= mds_max_caps_per_client: + return True + else: + return False + + self.wait_until_true(expected_caps, timeout=60) + + @needs_trimming + def test_client_pin_root(self): + self._test_client_pin(False, 400) + + @needs_trimming + def test_client_pin(self): + self._test_client_pin(True, 800) + + @needs_trimming + def test_client_pin_mincaps(self): + self._test_client_pin(True, 200) + + def test_client_min_caps_working_set(self): + """ + When a client has inodes pinned in its cache (open files), that the MDS + will not warn about the client not responding to cache pressure when + the number of caps is below mds_min_caps_working_set. + """ + + # Set MDS cache memory limit to a low value that will make the MDS to + # ask the client to trim the caps. + cache_memory_limit = "1K" + open_files = 400 + + self.config_set('mds', 'mds_cache_memory_limit', cache_memory_limit) + self.config_set('mds', 'mds_recall_max_caps', int(open_files/2)) + self.config_set('mds', 'mds_recall_warning_threshold', open_files) + self.config_set('mds', 'mds_min_caps_working_set', open_files*2) + + mds_min_caps_per_client = int(self.config_get('mds', "mds_min_caps_per_client")) + mds_recall_warning_decay_rate = float(self.config_get('mds', "mds_recall_warning_decay_rate")) + self.assertGreaterEqual(open_files, mds_min_caps_per_client) + + mount_a_client_id = self.mount_a.get_global_id() + self.mount_a.open_n_background("subdir", open_files) + + # Client should now hold: + # `open_files` caps for the open files + # 1 cap for root + # 1 cap for subdir + self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'], + open_files + 2, + timeout=600, + reject_fn=lambda x: x > open_files + 2) + + # We can also test that the MDS health warning for oversized + # cache is functioning as intended. + self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_warning_decay_rate*2) + + try: + # MDS should not be happy about that but it's not sending + # MDS_CLIENT_RECALL warnings because the client's caps are below + # mds_min_caps_working_set. + self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_warning_decay_rate*2) + except TestTimeoutError: + pass + else: + raise RuntimeError("expected no client recall warning") + + def test_cap_acquisition_throttle_readdir(self): + """ + Mostly readdir acquires caps faster than the mds recalls, so the cap + acquisition via readdir is throttled by retrying the readdir after + a fraction of second (0.5) by default when throttling condition is met. + """ + + max_caps_per_client = 500 + cap_acquisition_throttle = 250 + + self.config_set('mds', 'mds_max_caps_per_client', max_caps_per_client) + self.config_set('mds', 'mds_session_cap_acquisition_throttle', cap_acquisition_throttle) + + # Create 1500 files split across 6 directories, 250 each. + for i in range(1, 7): + self.mount_a.create_n_files("dir{0}/file".format(i), cap_acquisition_throttle, sync=True) + + mount_a_client_id = self.mount_a.get_global_id() + + # recursive readdir + self.mount_a.run_shell_payload("find | wc") + + # validate cap_acquisition decay counter after readdir to exceed throttle count i.e 250 + cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value'] + self.assertGreaterEqual(cap_acquisition_value, cap_acquisition_throttle) + + # validate the throttle condition to be hit atleast once + cap_acquisition_throttle_hit_count = self.perf_dump()['mds_server']['cap_acquisition_throttle'] + self.assertGreaterEqual(cap_acquisition_throttle_hit_count, 1) + + def test_client_release_bug(self): + """ + When a client has a bug (which we will simulate) preventing it from releasing caps, + the MDS should notice that releases are not being sent promptly, and generate a health + metric to that effect. + """ + + # The debug hook to inject the failure only exists in the fuse client + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Require FUSE client to inject client release failure") + + self.set_conf('client.{0}'.format(self.mount_a.client_id), 'client inject release failure', 'true') + self.mount_a.teardown() + self.mount_a.mount_wait() + mount_a_client_id = self.mount_a.get_global_id() + + # Client A creates a file. He will hold the write caps on the file, and later (simulated bug) fail + # to comply with the MDSs request to release that cap + self.mount_a.run_shell(["touch", "file1"]) + + # Client B tries to stat the file that client A created + rproc = self.mount_b.write_background("file1") + + # After session_timeout, we should see a health warning (extra lag from + # MDS beacon period) + session_timeout = self.fs.get_var("session_timeout") + self.wait_for_health("MDS_CLIENT_LATE_RELEASE", session_timeout + 10) + + # Client B should still be stuck + self.assertFalse(rproc.finished) + + # Kill client A + self.mount_a.kill() + self.mount_a.kill_cleanup() + + # Client B should complete + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) + rproc.wait() + + def test_client_oldest_tid(self): + """ + When a client does not advance its oldest tid, the MDS should notice that + and generate health warnings. + """ + + # num of requests client issues + max_requests = 1000 + + # The debug hook to inject the failure only exists in the fuse client + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Require FUSE client to inject client release failure") + + self.set_conf('client', 'client inject fixed oldest tid', 'true') + self.mount_a.teardown() + self.mount_a.mount_wait() + + self.fs.mds_asok(['config', 'set', 'mds_max_completed_requests', '{0}'.format(max_requests)]) + + # Create lots of files + self.mount_a.create_n_files("testdir/file1", max_requests + 100) + + # Create a few files synchronously. This makes sure previous requests are completed + self.mount_a.create_n_files("testdir/file2", 5, True) + + # Wait for the health warnings. Assume mds can handle 10 request per second at least + self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests // 10) + + def _test_client_cache_size(self, mount_subdir): + """ + check if client invalidate kernel dcache according to its cache size config + """ + + # The debug hook to inject the failure only exists in the fuse client + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Require FUSE client to inject client release failure") + + if mount_subdir: + # fuse assigns a fix inode number (1) to root inode. But in mounting into + # subdir case, the actual inode number of root is not 1. This mismatch + # confuses fuse_lowlevel_notify_inval_entry() when invalidating dentries + # in root directory. + self.mount_a.run_shell(["mkdir", "subdir"]) + self.mount_a.umount_wait() + self.set_conf('client', 'client mountpoint', '/subdir') + self.mount_a.mount_wait() + root_ino = self.mount_a.path_to_ino(".") + self.assertEqual(root_ino, 1); + + dir_path = os.path.join(self.mount_a.mountpoint, "testdir") + + mkdir_script = dedent(""" + import os + os.mkdir("{path}") + for n in range(0, {num_dirs}): + os.mkdir("{path}/dir{{0}}".format(n)) + """) + + num_dirs = 1000 + self.mount_a.run_python(mkdir_script.format(path=dir_path, num_dirs=num_dirs)) + self.mount_a.run_shell(["sync"]) + + dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count() + self.assertGreaterEqual(dentry_count, num_dirs) + self.assertGreaterEqual(dentry_pinned_count, num_dirs) + + cache_size = num_dirs // 10 + self.mount_a.set_cache_size(cache_size) + + def trimmed(): + dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count() + log.info("waiting, dentry_count, dentry_pinned_count: {0}, {1}".format( + dentry_count, dentry_pinned_count + )) + if dentry_count > cache_size or dentry_pinned_count > cache_size: + return False + + return True + + self.wait_until_true(trimmed, 30) + + @needs_trimming + def test_client_cache_size(self): + self._test_client_cache_size(False) + self._test_client_cache_size(True) + + def test_client_max_caps(self): + """ + That the MDS will not let a client sit above mds_max_caps_per_client caps. + """ + + mds_min_caps_per_client = int(self.config_get('mds', "mds_min_caps_per_client")) + mds_max_caps_per_client = 2*mds_min_caps_per_client + self.config_set('mds', 'mds_max_caps_per_client', mds_max_caps_per_client) + + self.mount_a.create_n_files("foo/", 3*mds_max_caps_per_client, sync=True) + + mount_a_client_id = self.mount_a.get_global_id() + def expected_caps(): + num_caps = self.get_session(mount_a_client_id)['num_caps'] + if num_caps <= mds_max_caps_per_client: + return True + else: + return False + + self.wait_until_true(expected_caps, timeout=60) diff --git a/qa/tasks/cephfs/test_client_recovery.py b/qa/tasks/cephfs/test_client_recovery.py new file mode 100644 index 000000000..24726b369 --- /dev/null +++ b/qa/tasks/cephfs/test_client_recovery.py @@ -0,0 +1,698 @@ + +""" +Teuthology task for exercising CephFS client recovery +""" + +import logging +from textwrap import dedent +import time +import distutils.version as version +import re +import os + +from teuthology.orchestra import run +from teuthology.orchestra.run import CommandFailedError +from tasks.cephfs.fuse_mount import FuseMount +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.packaging import get_package_version + +log = logging.getLogger(__name__) + + +# Arbitrary timeouts for operations involving restarting +# an MDS or waiting for it to come up +MDS_RESTART_GRACE = 60 + + +class TestClientNetworkRecovery(CephFSTestCase): + REQUIRE_KCLIENT_REMOTE = True + REQUIRE_ONE_CLIENT_REMOTE = True + CLIENTS_REQUIRED = 2 + + LOAD_SETTINGS = ["mds_reconnect_timeout", "ms_max_backoff"] + + # Environment references + mds_reconnect_timeout = None + ms_max_backoff = None + + def test_network_death(self): + """ + Simulate software freeze or temporary network failure. + + Check that the client blocks I/O during failure, and completes + I/O after failure. + """ + + session_timeout = self.fs.get_var("session_timeout") + self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false']) + + # We only need one client + self.mount_b.umount_wait() + + # Initially our one client session should be visible + client_id = self.mount_a.get_global_id() + ls_data = self._session_list() + self.assert_session_count(1, ls_data) + self.assertEqual(ls_data[0]['id'], client_id) + self.assert_session_state(client_id, "open") + + # ...and capable of doing I/O without blocking + self.mount_a.create_files() + + # ...but if we turn off the network + self.fs.set_clients_block(True) + + # ...and try and start an I/O + write_blocked = self.mount_a.write_background() + + # ...then it should block + self.assertFalse(write_blocked.finished) + self.assert_session_state(client_id, "open") + time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale + self.assertFalse(write_blocked.finished) + self.assert_session_state(client_id, "stale") + + # ...until we re-enable I/O + self.fs.set_clients_block(False) + + # ...when it should complete promptly + a = time.time() + self.wait_until_true(lambda: write_blocked.finished, self.ms_max_backoff * 2) + write_blocked.wait() # Already know we're finished, wait() to raise exception on errors + recovery_time = time.time() - a + log.info("recovery time: {0}".format(recovery_time)) + self.assert_session_state(client_id, "open") + + +class TestClientRecovery(CephFSTestCase): + REQUIRE_KCLIENT_REMOTE = True + CLIENTS_REQUIRED = 2 + + LOAD_SETTINGS = ["mds_reconnect_timeout", "ms_max_backoff"] + + # Environment references + mds_reconnect_timeout = None + ms_max_backoff = None + + def test_basic(self): + # Check that two clients come up healthy and see each others' files + # ===================================================== + self.mount_a.create_files() + self.mount_a.check_files() + self.mount_a.umount_wait() + + self.mount_b.check_files() + + self.mount_a.mount_wait() + + # Check that the admin socket interface is correctly reporting + # two sessions + # ===================================================== + ls_data = self._session_list() + self.assert_session_count(2, ls_data) + + self.assertSetEqual( + set([l['id'] for l in ls_data]), + {self.mount_a.get_global_id(), self.mount_b.get_global_id()} + ) + + def test_restart(self): + # Check that after an MDS restart both clients reconnect and continue + # to handle I/O + # ===================================================== + self.fs.mds_fail_restart() + self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) + + self.mount_a.create_destroy() + self.mount_b.create_destroy() + + def _session_num_caps(self, client_id): + ls_data = self.fs.mds_asok(['session', 'ls']) + return int(self._session_by_id(ls_data).get(client_id, {'num_caps': None})['num_caps']) + + def test_reconnect_timeout(self): + # Reconnect timeout + # ================= + # Check that if I stop an MDS and a client goes away, the MDS waits + # for the reconnect period + + mount_a_client_id = self.mount_a.get_global_id() + + self.fs.fail() + + self.mount_a.umount_wait(force=True) + + self.fs.set_joinable() + + self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE) + # Check that the MDS locally reports its state correctly + status = self.fs.mds_asok(['status']) + self.assertIn("reconnect_status", status) + + ls_data = self._session_list() + self.assert_session_count(2, ls_data) + + # The session for the dead client should have the 'reconnect' flag set + self.assertTrue(self.get_session(mount_a_client_id)['reconnecting']) + + # Wait for the reconnect state to clear, this should take the + # reconnect timeout period. + in_reconnect_for = self.fs.wait_for_state('up:active', timeout=self.mds_reconnect_timeout * 2) + # Check that the period we waited to enter active is within a factor + # of two of the reconnect timeout. + self.assertGreater(in_reconnect_for, self.mds_reconnect_timeout // 2, + "Should have been in reconnect phase for {0} but only took {1}".format( + self.mds_reconnect_timeout, in_reconnect_for + )) + + self.assert_session_count(1) + + # Check that the client that timed out during reconnect can + # mount again and do I/O + self.mount_a.mount_wait() + self.mount_a.create_destroy() + + self.assert_session_count(2) + + def test_reconnect_eviction(self): + # Eviction during reconnect + # ========================= + mount_a_client_id = self.mount_a.get_global_id() + + self.fs.fail() + + # The mount goes away while the MDS is offline + self.mount_a.kill() + + # wait for it to die + time.sleep(5) + + self.fs.set_joinable() + + # Enter reconnect phase + self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE) + self.assert_session_count(2) + + # Evict the stuck client + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) + self.assert_session_count(1) + + # Observe that we proceed to active phase without waiting full reconnect timeout + evict_til_active = self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) + # Once we evict the troublemaker, the reconnect phase should complete + # in well under the reconnect timeout. + self.assertLess(evict_til_active, self.mds_reconnect_timeout * 0.5, + "reconnect did not complete soon enough after eviction, took {0}".format( + evict_til_active + )) + + # We killed earlier so must clean up before trying to use again + self.mount_a.kill_cleanup() + + # Bring the client back + self.mount_a.mount_wait() + self.mount_a.create_destroy() + + def _test_stale_caps(self, write): + session_timeout = self.fs.get_var("session_timeout") + + # Capability release from stale session + # ===================================== + if write: + cap_holder = self.mount_a.open_background() + else: + self.mount_a.run_shell(["touch", "background_file"]) + self.mount_a.umount_wait() + self.mount_a.mount_wait() + cap_holder = self.mount_a.open_background(write=False) + + self.assert_session_count(2) + mount_a_gid = self.mount_a.get_global_id() + + # Wait for the file to be visible from another client, indicating + # that mount_a has completed its network ops + self.mount_b.wait_for_visible() + + # Simulate client death + self.mount_a.suspend_netns() + + # wait for it to die so it doesn't voluntarily release buffer cap + time.sleep(5) + + try: + # Now, after session_timeout seconds, the waiter should + # complete their operation when the MDS marks the holder's + # session stale. + cap_waiter = self.mount_b.write_background() + a = time.time() + cap_waiter.wait() + b = time.time() + + # Should have succeeded + self.assertEqual(cap_waiter.exitstatus, 0) + + if write: + self.assert_session_count(1) + else: + self.assert_session_state(mount_a_gid, "stale") + + cap_waited = b - a + log.info("cap_waiter waited {0}s".format(cap_waited)) + self.assertTrue(session_timeout / 2.0 <= cap_waited <= session_timeout * 2.0, + "Capability handover took {0}, expected approx {1}".format( + cap_waited, session_timeout + )) + + self.mount_a._kill_background(cap_holder) + finally: + # teardown() doesn't quite handle this case cleanly, so help it out + self.mount_a.resume_netns() + + def test_stale_read_caps(self): + self._test_stale_caps(False) + + def test_stale_write_caps(self): + self._test_stale_caps(True) + + def test_evicted_caps(self): + # Eviction while holding a capability + # =================================== + + session_timeout = self.fs.get_var("session_timeout") + + # Take out a write capability on a file on client A, + # and then immediately kill it. + cap_holder = self.mount_a.open_background() + mount_a_client_id = self.mount_a.get_global_id() + + # Wait for the file to be visible from another client, indicating + # that mount_a has completed its network ops + self.mount_b.wait_for_visible() + + # Simulate client death + self.mount_a.suspend_netns() + + # wait for it to die so it doesn't voluntarily release buffer cap + time.sleep(5) + + try: + # The waiter should get stuck waiting for the capability + # held on the MDS by the now-dead client A + cap_waiter = self.mount_b.write_background() + time.sleep(5) + self.assertFalse(cap_waiter.finished) + + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) + # Now, because I evicted the old holder of the capability, it should + # immediately get handed over to the waiter + a = time.time() + cap_waiter.wait() + b = time.time() + cap_waited = b - a + log.info("cap_waiter waited {0}s".format(cap_waited)) + # This is the check that it happened 'now' rather than waiting + # for the session timeout + self.assertLess(cap_waited, session_timeout / 2.0, + "Capability handover took {0}, expected less than {1}".format( + cap_waited, session_timeout / 2.0 + )) + + self.mount_a._kill_background(cap_holder) + finally: + self.mount_a.resume_netns() + + def test_trim_caps(self): + # Trim capability when reconnecting MDS + # =================================== + + count = 500 + # Create lots of files + for i in range(count): + self.mount_a.run_shell(["touch", "f{0}".format(i)]) + + # Populate mount_b's cache + self.mount_b.run_shell(["ls", "-l"]) + + client_id = self.mount_b.get_global_id() + num_caps = self._session_num_caps(client_id) + self.assertGreaterEqual(num_caps, count) + + # Restart MDS. client should trim its cache when reconnecting to the MDS + self.fs.mds_fail_restart() + self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) + + num_caps = self._session_num_caps(client_id) + self.assertLess(num_caps, count, + "should have less than {0} capabilities, have {1}".format( + count, num_caps + )) + + def _is_flockable(self): + a_version_str = get_package_version(self.mount_a.client_remote, "fuse") + b_version_str = get_package_version(self.mount_b.client_remote, "fuse") + flock_version_str = "2.9" + + version_regex = re.compile(r"[0-9\.]+") + a_result = version_regex.match(a_version_str) + self.assertTrue(a_result) + b_result = version_regex.match(b_version_str) + self.assertTrue(b_result) + a_version = version.StrictVersion(a_result.group()) + b_version = version.StrictVersion(b_result.group()) + flock_version=version.StrictVersion(flock_version_str) + + if (a_version >= flock_version and b_version >= flock_version): + log.info("flock locks are available") + return True + else: + log.info("not testing flock locks, machines have versions {av} and {bv}".format( + av=a_version_str,bv=b_version_str)) + return False + + def test_filelock(self): + """ + Check that file lock doesn't get lost after an MDS restart + """ + + flockable = self._is_flockable() + lock_holder = self.mount_a.lock_background(do_flock=flockable) + + self.mount_b.wait_for_visible("background_file-2") + self.mount_b.check_filelock(do_flock=flockable) + + self.fs.mds_fail_restart() + self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) + + self.mount_b.check_filelock(do_flock=flockable) + + # Tear down the background process + self.mount_a._kill_background(lock_holder) + + def test_filelock_eviction(self): + """ + Check that file lock held by evicted client is given to + waiting client. + """ + if not self._is_flockable(): + self.skipTest("flock is not available") + + lock_holder = self.mount_a.lock_background() + self.mount_b.wait_for_visible("background_file-2") + self.mount_b.check_filelock() + + lock_taker = self.mount_b.lock_and_release() + # Check the taker is waiting (doesn't get it immediately) + time.sleep(2) + self.assertFalse(lock_holder.finished) + self.assertFalse(lock_taker.finished) + + try: + mount_a_client_id = self.mount_a.get_global_id() + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) + + # Evicting mount_a should let mount_b's attempt to take the lock + # succeed + self.wait_until_true(lambda: lock_taker.finished, timeout=10) + finally: + # Tear down the background process + self.mount_a._kill_background(lock_holder) + + # teardown() doesn't quite handle this case cleanly, so help it out + self.mount_a.kill() + self.mount_a.kill_cleanup() + + # Bring the client back + self.mount_a.mount_wait() + + def test_dir_fsync(self): + self._test_fsync(True); + + def test_create_fsync(self): + self._test_fsync(False); + + def _test_fsync(self, dirfsync): + """ + That calls to fsync guarantee visibility of metadata to another + client immediately after the fsyncing client dies. + """ + + # Leave this guy out until he's needed + self.mount_b.umount_wait() + + # Create dir + child dentry on client A, and fsync the dir + path = os.path.join(self.mount_a.mountpoint, "subdir") + self.mount_a.run_python( + dedent(""" + import os + import time + + path = "{path}" + + print("Starting creation...") + start = time.time() + + os.mkdir(path) + dfd = os.open(path, os.O_DIRECTORY) + + fd = open(os.path.join(path, "childfile"), "w") + print("Finished creation in {{0}}s".format(time.time() - start)) + + print("Starting fsync...") + start = time.time() + if {dirfsync}: + os.fsync(dfd) + else: + os.fsync(fd) + print("Finished fsync in {{0}}s".format(time.time() - start)) + """.format(path=path,dirfsync=str(dirfsync))) + ) + + # Immediately kill the MDS and then client A + self.fs.fail() + self.mount_a.kill() + self.mount_a.kill_cleanup() + + # Restart the MDS. Wait for it to come up, it'll have to time out in clientreplay + self.fs.set_joinable() + log.info("Waiting for reconnect...") + self.fs.wait_for_state("up:reconnect") + log.info("Waiting for active...") + self.fs.wait_for_state("up:active", timeout=MDS_RESTART_GRACE + self.mds_reconnect_timeout) + log.info("Reached active...") + + # Is the child dentry visible from mount B? + self.mount_b.mount_wait() + self.mount_b.run_shell(["ls", "subdir/childfile"]) + + def test_unmount_for_evicted_client(self): + """Test if client hangs on unmount after evicting the client.""" + mount_a_client_id = self.mount_a.get_global_id() + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) + + self.mount_a.umount_wait(require_clean=True, timeout=30) + + def test_stale_renew(self): + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Require FUSE client to handle signal STOP/CONT") + + session_timeout = self.fs.get_var("session_timeout") + + self.mount_a.run_shell(["mkdir", "testdir"]) + self.mount_a.run_shell(["touch", "testdir/file1"]) + # populate readdir cache + self.mount_a.run_shell(["ls", "testdir"]) + self.mount_b.run_shell(["ls", "testdir"]) + + # check if readdir cache is effective + initial_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency']) + self.mount_b.run_shell(["ls", "testdir"]) + current_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency']) + self.assertEqual(current_readdirs, initial_readdirs); + + mount_b_gid = self.mount_b.get_global_id() + # stop ceph-fuse process of mount_b + self.mount_b.suspend_netns() + + self.assert_session_state(mount_b_gid, "open") + time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale + + self.mount_a.run_shell(["touch", "testdir/file2"]) + self.assert_session_state(mount_b_gid, "stale") + + # resume ceph-fuse process of mount_b + self.mount_b.resume_netns() + # Is the new file visible from mount_b? (caps become invalid after session stale) + self.mount_b.run_shell(["ls", "testdir/file2"]) + + def test_abort_conn(self): + """ + Check that abort_conn() skips closing mds sessions. + """ + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Testing libcephfs function") + + self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false']) + session_timeout = self.fs.get_var("session_timeout") + + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + gid_str = self.mount_a.run_python(dedent(""" + import cephfs as libcephfs + cephfs = libcephfs.LibCephFS(conffile='') + cephfs.mount() + client_id = cephfs.get_instance_id() + cephfs.abort_conn() + print(client_id) + """) + ) + gid = int(gid_str); + + self.assert_session_state(gid, "open") + time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale + self.assert_session_state(gid, "stale") + + def test_dont_mark_unresponsive_client_stale(self): + """ + Test that an unresponsive client holding caps is not marked stale or + evicted unless another clients wants its caps. + """ + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Require FUSE client to handle signal STOP/CONT") + + # XXX: To conduct this test we need at least two clients since a + # single client is never evcited by MDS. + SESSION_TIMEOUT = 30 + SESSION_AUTOCLOSE = 50 + time_at_beg = time.time() + mount_a_gid = self.mount_a.get_global_id() + _ = self.mount_a.client_pid + self.fs.set_var('session_timeout', SESSION_TIMEOUT) + self.fs.set_var('session_autoclose', SESSION_AUTOCLOSE) + self.assert_session_count(2, self.fs.mds_asok(['session', 'ls'])) + + # test that client holding cap not required by any other client is not + # marked stale when it becomes unresponsive. + self.mount_a.run_shell(['mkdir', 'dir']) + self.mount_a.send_signal('sigstop') + time.sleep(SESSION_TIMEOUT + 2) + self.assert_session_state(mount_a_gid, "open") + + # test that other clients have to wait to get the caps from + # unresponsive client until session_autoclose. + self.mount_b.run_shell(['stat', 'dir']) + self.assert_session_count(1, self.fs.mds_asok(['session', 'ls'])) + self.assertLess(time.time(), time_at_beg + SESSION_AUTOCLOSE) + + self.mount_a.send_signal('sigcont') + + def test_config_session_timeout(self): + self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false']) + session_timeout = self.fs.get_var("session_timeout") + mount_a_gid = self.mount_a.get_global_id() + + self.fs.mds_asok(['session', 'config', '%s' % mount_a_gid, 'timeout', '%s' % (session_timeout * 2)]) + + self.mount_a.kill(); + + self.assert_session_count(2) + + time.sleep(session_timeout * 1.5) + self.assert_session_state(mount_a_gid, "open") + + time.sleep(session_timeout) + self.assert_session_count(1) + + self.mount_a.kill_cleanup() + + def test_reconnect_after_blocklisted(self): + """ + Test reconnect after blocklisted. + - writing to a fd that was opened before blocklist should return -EBADF + - reading/writing to a file with lost file locks should return -EIO + - readonly fd should continue to work + """ + + self.mount_a.umount_wait() + + if isinstance(self.mount_a, FuseMount): + self.mount_a.mount_wait(mntopts=['--client_reconnect_stale=1', '--fuse_disable_pagecache=1']) + else: + try: + self.mount_a.mount_wait(mntopts=['recover_session=clean']) + except CommandFailedError: + self.mount_a.kill_cleanup() + self.skipTest("Not implemented in current kernel") + + self.mount_a.wait_until_mounted() + + path = os.path.join(self.mount_a.mountpoint, 'testfile_reconnect_after_blocklisted') + pyscript = dedent(""" + import os + import sys + import fcntl + import errno + import time + + fd1 = os.open("{path}.1", os.O_RDWR | os.O_CREAT, 0O666) + fd2 = os.open("{path}.1", os.O_RDONLY) + fd3 = os.open("{path}.2", os.O_RDWR | os.O_CREAT, 0O666) + fd4 = os.open("{path}.2", os.O_RDONLY) + + os.write(fd1, b'content') + os.read(fd2, 1); + + os.write(fd3, b'content') + os.read(fd4, 1); + fcntl.flock(fd4, fcntl.LOCK_SH | fcntl.LOCK_NB) + + print("blocklist") + sys.stdout.flush() + + sys.stdin.readline() + + # wait for mds to close session + time.sleep(10); + + # trigger 'open session' message. kclient relies on 'session reject' message + # to detect if itself is blocklisted + try: + os.stat("{path}.1") + except: + pass + + # wait for auto reconnect + time.sleep(10); + + try: + os.write(fd1, b'content') + except OSError as e: + if e.errno != errno.EBADF: + raise + else: + raise RuntimeError("write() failed to raise error") + + os.read(fd2, 1); + + try: + os.read(fd4, 1) + except OSError as e: + if e.errno != errno.EIO: + raise + else: + raise RuntimeError("read() failed to raise error") + """).format(path=path) + rproc = self.mount_a.client_remote.run( + args=['python3', '-c', pyscript], + wait=False, stdin=run.PIPE, stdout=run.PIPE) + + rproc.stdout.readline() + + mount_a_client_id = self.mount_a.get_global_id() + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) + + rproc.stdin.writelines(['done\n']) + rproc.stdin.flush() + + rproc.wait() + self.assertEqual(rproc.exitstatus, 0) diff --git a/qa/tasks/cephfs/test_damage.py b/qa/tasks/cephfs/test_damage.py new file mode 100644 index 000000000..4d0194429 --- /dev/null +++ b/qa/tasks/cephfs/test_damage.py @@ -0,0 +1,564 @@ +from io import BytesIO, StringIO +import json +import logging +import errno +import re +from teuthology.contextutil import MaxWhileTries +from teuthology.exceptions import CommandFailedError +from teuthology.orchestra.run import wait +from tasks.cephfs.fuse_mount import FuseMount +from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology + +DAMAGED_ON_START = "damaged_on_start" +DAMAGED_ON_LS = "damaged_on_ls" +CRASHED = "server crashed" +NO_DAMAGE = "no damage" +READONLY = "readonly" +FAILED_CLIENT = "client failed" +FAILED_SERVER = "server failed" + +# An EIO in response to a stat from the client +EIO_ON_LS = "eio" + +# An EIO, but nothing in damage table (not ever what we expect) +EIO_NO_DAMAGE = "eio without damage entry" + + +log = logging.getLogger(__name__) + + +class TestDamage(CephFSTestCase): + def _simple_workload_write(self): + self.mount_a.run_shell(["mkdir", "subdir"]) + self.mount_a.write_n_mb("subdir/sixmegs", 6) + return self.mount_a.stat("subdir/sixmegs") + + def is_marked_damaged(self, rank): + mds_map = self.fs.get_mds_map() + return rank in mds_map['damaged'] + + @for_teuthology #459s + def test_object_deletion(self): + """ + That the MDS has a clean 'damaged' response to loss of any single metadata object + """ + + self._simple_workload_write() + + # Hmm, actually it would be nice to permute whether the metadata pool + # state contains sessions or not, but for the moment close this session + # to avoid waiting through reconnect on every MDS start. + self.mount_a.umount_wait() + for mds_name in self.fs.get_active_names(): + self.fs.mds_asok(["flush", "journal"], mds_name) + + self.fs.fail() + + serialized = self.fs.radosmo(['export', '-']) + + def is_ignored(obj_id, dentry=None): + """ + A filter to avoid redundantly mutating many similar objects (e.g. + stray dirfrags) or similar dentries (e.g. stray dir dentries) + """ + if re.match("60.\.00000000", obj_id) and obj_id != "600.00000000": + return True + + if dentry and obj_id == "100.00000000": + if re.match("stray.+_head", dentry) and dentry != "stray0_head": + return True + + return False + + def get_path(obj_id, dentry=None): + """ + What filesystem path does this object or dentry correspond to? i.e. + what should I poke to see EIO after damaging it? + """ + + if obj_id == "1.00000000" and dentry == "subdir_head": + return "./subdir" + elif obj_id == "10000000000.00000000" and dentry == "sixmegs_head": + return "./subdir/sixmegs" + + # None means ls will do an "ls -R" in hope of seeing some errors + return None + + objects = self.fs.radosmo(["ls"], stdout=StringIO()).strip().split("\n") + objects = [o for o in objects if not is_ignored(o)] + + # Find all objects with an OMAP header + omap_header_objs = [] + for o in objects: + header = self.fs.radosmo(["getomapheader", o], stdout=StringIO()) + # The rados CLI wraps the header output in a hex-printed style + header_bytes = int(re.match("header \((.+) bytes\)", header).group(1)) + if header_bytes > 0: + omap_header_objs.append(o) + + # Find all OMAP key/vals + omap_keys = [] + for o in objects: + keys_str = self.fs.radosmo(["listomapkeys", o], stdout=StringIO()) + if keys_str: + for key in keys_str.strip().split("\n"): + if not is_ignored(o, key): + omap_keys.append((o, key)) + + # Find objects that have data in their bodies + data_objects = [] + for obj_id in objects: + stat_out = self.fs.radosmo(["stat", obj_id], stdout=StringIO()) + size = int(re.match(".+, size (.+)$", stat_out).group(1)) + if size > 0: + data_objects.append(obj_id) + + # Define the various forms of damage we will inflict + class MetadataMutation(object): + def __init__(self, obj_id_, desc_, mutate_fn_, expectation_, ls_path=None): + self.obj_id = obj_id_ + self.desc = desc_ + self.mutate_fn = mutate_fn_ + self.expectation = expectation_ + if ls_path is None: + self.ls_path = "." + else: + self.ls_path = ls_path + + def __eq__(self, other): + return self.desc == other.desc + + def __hash__(self): + return hash(self.desc) + + junk = "deadbeef" * 10 + mutations = [] + + # Removals + for o in objects: + if o in [ + # JournalPointers are auto-replaced if missing (same path as upgrade) + "400.00000000", + # Missing dirfrags for non-system dirs result in empty directory + "10000000000.00000000", + # PurgeQueue is auto-created if not found on startup + "500.00000000", + # open file table is auto-created if not found on startup + "mds0_openfiles.0" + ]: + expectation = NO_DAMAGE + else: + expectation = DAMAGED_ON_START + + log.info("Expectation on rm '{0}' will be '{1}'".format( + o, expectation + )) + + mutations.append(MetadataMutation( + o, + "Delete {0}".format(o), + lambda o=o: self.fs.radosm(["rm", o]), + expectation + )) + + # Blatant corruptions + for obj_id in data_objects: + if obj_id == "500.00000000": + # purge queue corruption results in read-only FS + mutations.append(MetadataMutation( + obj_id, + "Corrupt {0}".format(obj_id), + lambda o=obj_id: self.fs.radosm(["put", o, "-"], stdin=StringIO(junk)), + READONLY + )) + else: + mutations.append(MetadataMutation( + obj_id, + "Corrupt {0}".format(obj_id), + lambda o=obj_id: self.fs.radosm(["put", o, "-"], stdin=StringIO(junk)), + DAMAGED_ON_START + )) + + # Truncations + for o in data_objects: + if o == "500.00000000": + # The PurgeQueue is allowed to be empty: Journaler interprets + # an empty header object as an empty journal. + expectation = NO_DAMAGE + else: + expectation = DAMAGED_ON_START + + mutations.append( + MetadataMutation( + o, + "Truncate {0}".format(o), + lambda o=o: self.fs.radosm(["truncate", o, "0"]), + expectation + )) + + # OMAP value corruptions + for o, k in omap_keys: + if o.startswith("100."): + # Anything in rank 0's 'mydir' + expectation = DAMAGED_ON_START + else: + expectation = EIO_ON_LS + + mutations.append( + MetadataMutation( + o, + "Corrupt omap key {0}:{1}".format(o, k), + lambda o=o,k=k: self.fs.radosm(["setomapval", o, k, junk]), + expectation, + get_path(o, k) + ) + ) + + # OMAP header corruptions + for o in omap_header_objs: + if re.match("60.\.00000000", o) \ + or o in ["1.00000000", "100.00000000", "mds0_sessionmap"]: + expectation = DAMAGED_ON_START + else: + expectation = NO_DAMAGE + + log.info("Expectation on corrupt header '{0}' will be '{1}'".format( + o, expectation + )) + + mutations.append( + MetadataMutation( + o, + "Corrupt omap header on {0}".format(o), + lambda o=o: self.fs.radosm(["setomapheader", o, junk]), + expectation + ) + ) + + results = {} + + for mutation in mutations: + log.info("Applying mutation '{0}'".format(mutation.desc)) + + # Reset MDS state + self.mount_a.umount_wait(force=True) + self.fs.fail() + self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0') + + # Reset RADOS pool state + self.fs.radosm(['import', '-'], stdin=BytesIO(serialized)) + + # Inject the mutation + mutation.mutate_fn() + + # Try starting the MDS + self.fs.set_joinable() + + # How long we'll wait between starting a daemon and expecting + # it to make it through startup, and potentially declare itself + # damaged to the mon cluster. + startup_timeout = 60 + + if mutation.expectation not in (EIO_ON_LS, DAMAGED_ON_LS, NO_DAMAGE): + if mutation.expectation == DAMAGED_ON_START: + # The MDS may pass through active before making it to damaged + try: + self.wait_until_true(lambda: self.is_marked_damaged(0), startup_timeout) + except RuntimeError: + pass + + # Wait for MDS to either come up or go into damaged state + try: + self.wait_until_true(lambda: self.is_marked_damaged(0) or self.fs.are_daemons_healthy(), startup_timeout) + except RuntimeError: + crashed = False + # Didn't make it to healthy or damaged, did it crash? + for daemon_id, daemon in self.fs.mds_daemons.items(): + if daemon.proc and daemon.proc.finished: + crashed = True + log.error("Daemon {0} crashed!".format(daemon_id)) + daemon.proc = None # So that subsequent stop() doesn't raise error + if not crashed: + # Didn't go health, didn't go damaged, didn't crash, so what? + raise + else: + log.info("Result: Mutation '{0}' led to crash".format(mutation.desc)) + results[mutation] = CRASHED + continue + if self.is_marked_damaged(0): + log.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation.desc)) + results[mutation] = DAMAGED_ON_START + continue + else: + log.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation.desc)) + else: + try: + self.wait_until_true(self.fs.are_daemons_healthy, 60) + except RuntimeError: + log.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation.desc)) + if self.is_marked_damaged(0): + results[mutation] = DAMAGED_ON_START + else: + results[mutation] = FAILED_SERVER + continue + log.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation.desc)) + + # MDS is up, should go damaged on ls or client mount + self.mount_a.mount_wait() + if mutation.ls_path == ".": + proc = self.mount_a.run_shell(["ls", "-R", mutation.ls_path], wait=False) + else: + proc = self.mount_a.stat(mutation.ls_path, wait=False) + + if mutation.expectation == DAMAGED_ON_LS: + try: + self.wait_until_true(lambda: self.is_marked_damaged(0), 60) + log.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation.desc)) + results[mutation] = DAMAGED_ON_LS + except RuntimeError: + if self.fs.are_daemons_healthy(): + log.error("Result: Failed to go damaged on mutation '{0}', actually went active".format( + mutation.desc)) + results[mutation] = NO_DAMAGE + else: + log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc)) + results[mutation] = FAILED_SERVER + elif mutation.expectation == READONLY: + proc = self.mount_a.run_shell(["mkdir", "foo"], wait=False) + try: + proc.wait() + except CommandFailedError: + stderr = proc.stderr.getvalue() + log.info(stderr) + if "Read-only file system".lower() in stderr.lower(): + pass + else: + raise + else: + try: + wait([proc], 20) + log.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation.desc)) + results[mutation] = NO_DAMAGE + except MaxWhileTries: + log.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation.desc)) + results[mutation] = FAILED_CLIENT + except CommandFailedError as e: + if e.exitstatus == errno.EIO: + log.info("Result: EIO on client") + results[mutation] = EIO_ON_LS + else: + log.info("Result: unexpected error {0} on client".format(e)) + results[mutation] = FAILED_CLIENT + + if mutation.expectation == EIO_ON_LS: + # EIOs mean something handled by DamageTable: assert that it has + # been populated + damage = json.loads( + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty')) + if len(damage) == 0: + results[mutation] = EIO_NO_DAMAGE + + failures = [(mutation, result) for (mutation, result) in results.items() if mutation.expectation != result] + if failures: + log.error("{0} mutations had unexpected outcomes:".format(len(failures))) + for mutation, result in failures: + log.error(" Expected '{0}' actually '{1}' from '{2}'".format( + mutation.expectation, result, mutation.desc + )) + raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures))) + else: + log.info("All {0} mutations had expected outcomes".format(len(mutations))) + + def test_damaged_dentry(self): + # Damage to dentrys is interesting because it leaves the + # directory's `complete` flag in a subtle state where + # we have marked the dir complete in order that folks + # can access it, but in actual fact there is a dentry + # missing + self.mount_a.run_shell(["mkdir", "subdir/"]) + + self.mount_a.run_shell(["touch", "subdir/file_undamaged"]) + self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"]) + + subdir_ino = self.mount_a.path_to_ino("subdir") + + self.mount_a.umount_wait() + for mds_name in self.fs.get_active_names(): + self.fs.mds_asok(["flush", "journal"], mds_name) + + self.fs.fail() + + # Corrupt a dentry + junk = "deadbeef" * 10 + dirfrag_obj = "{0:x}.00000000".format(subdir_ino) + self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk]) + + # Start up and try to list it + self.fs.set_joinable() + self.fs.wait_for_daemons() + + self.mount_a.mount_wait() + dentries = self.mount_a.ls("subdir/") + + # The damaged guy should have disappeared + self.assertEqual(dentries, ["file_undamaged"]) + + # I should get ENOENT if I try and read it normally, because + # the dir is considered complete + try: + self.mount_a.stat("subdir/file_to_be_damaged", wait=True) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + raise AssertionError("Expected ENOENT") + + # The fact that there is damaged should have bee recorded + damage = json.loads( + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "ls", '--format=json-pretty')) + self.assertEqual(len(damage), 1) + damage_id = damage[0]['id'] + + # If I try to create a dentry with the same name as the damaged guy + # then that should be forbidden + try: + self.mount_a.touch("subdir/file_to_be_damaged") + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.EIO) + else: + raise AssertionError("Expected EIO") + + # Attempting that touch will clear the client's complete flag, now + # when I stat it I'll get EIO instead of ENOENT + try: + self.mount_a.stat("subdir/file_to_be_damaged", wait=True) + except CommandFailedError as e: + if isinstance(self.mount_a, FuseMount): + self.assertEqual(e.exitstatus, errno.EIO) + else: + # Old kernel client handles this case differently + self.assertIn(e.exitstatus, [errno.ENOENT, errno.EIO]) + else: + raise AssertionError("Expected EIO") + + nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files") + self.assertEqual(nfiles, "2") + + self.mount_a.umount_wait() + + # Now repair the stats + scrub_json = self.fs.run_scrub(["start", "/subdir", "repair"]) + log.info(json.dumps(scrub_json, indent=2)) + + self.assertNotEqual(scrub_json, None) + self.assertEqual(scrub_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=scrub_json["scrub_tag"]), True) + + # Check that the file count is now correct + self.mount_a.mount_wait() + nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files") + self.assertEqual(nfiles, "1") + + # Clean up the omap object + self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk]) + + # Clean up the damagetable entry + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "rm", "{did}".format(did=damage_id)) + + # Now I should be able to create a file with the same name as the + # damaged guy if I want. + self.mount_a.touch("subdir/file_to_be_damaged") + + def test_open_ino_errors(self): + """ + That errors encountered during opening inos are properly propagated + """ + + self.mount_a.run_shell(["mkdir", "dir1"]) + self.mount_a.run_shell(["touch", "dir1/file1"]) + self.mount_a.run_shell(["mkdir", "dir2"]) + self.mount_a.run_shell(["touch", "dir2/file2"]) + self.mount_a.run_shell(["mkdir", "testdir"]) + self.mount_a.run_shell(["ln", "dir1/file1", "testdir/hardlink1"]) + self.mount_a.run_shell(["ln", "dir2/file2", "testdir/hardlink2"]) + + file1_ino = self.mount_a.path_to_ino("dir1/file1") + file2_ino = self.mount_a.path_to_ino("dir2/file2") + dir2_ino = self.mount_a.path_to_ino("dir2") + + # Ensure everything is written to backing store + self.mount_a.umount_wait() + self.fs.mds_asok(["flush", "journal"]) + + # Drop everything from the MDS cache + self.fs.fail() + self.fs.journal_tool(['journal', 'reset'], 0) + self.fs.set_joinable() + self.fs.wait_for_daemons() + + self.mount_a.mount_wait() + + # Case 1: un-decodeable backtrace + + # Validate that the backtrace is present and decodable + self.fs.read_backtrace(file1_ino) + # Go corrupt the backtrace of alpha/target (used for resolving + # bravo/hardlink). + self.fs._write_data_xattr(file1_ino, "parent", "rhubarb") + + # Check that touching the hardlink gives EIO + ran = self.mount_a.run_shell(["stat", "testdir/hardlink1"], wait=False) + try: + ran.wait() + except CommandFailedError: + self.assertTrue("Input/output error" in ran.stderr.getvalue()) + + # Check that an entry is created in the damage table + damage = json.loads( + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "ls", '--format=json-pretty')) + self.assertEqual(len(damage), 1) + self.assertEqual(damage[0]['damage_type'], "backtrace") + self.assertEqual(damage[0]['ino'], file1_ino) + + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "rm", str(damage[0]['id'])) + + + # Case 2: missing dirfrag for the target inode + + self.fs.radosm(["rm", "{0:x}.00000000".format(dir2_ino)]) + + # Check that touching the hardlink gives EIO + ran = self.mount_a.run_shell(["stat", "testdir/hardlink2"], wait=False) + try: + ran.wait() + except CommandFailedError: + self.assertTrue("Input/output error" in ran.stderr.getvalue()) + + # Check that an entry is created in the damage table + damage = json.loads( + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "ls", '--format=json-pretty')) + self.assertEqual(len(damage), 2) + if damage[0]['damage_type'] == "backtrace" : + self.assertEqual(damage[0]['ino'], file2_ino) + self.assertEqual(damage[1]['damage_type'], "dir_frag") + self.assertEqual(damage[1]['ino'], dir2_ino) + else: + self.assertEqual(damage[0]['damage_type'], "dir_frag") + self.assertEqual(damage[0]['ino'], dir2_ino) + self.assertEqual(damage[1]['damage_type'], "backtrace") + self.assertEqual(damage[1]['ino'], file2_ino) + + for entry in damage: + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "rm", str(entry['id'])) diff --git a/qa/tasks/cephfs/test_data_scan.py b/qa/tasks/cephfs/test_data_scan.py new file mode 100644 index 000000000..dcb7eda40 --- /dev/null +++ b/qa/tasks/cephfs/test_data_scan.py @@ -0,0 +1,688 @@ + +""" +Test our tools for recovering metadata from the data pool +""" +import json + +import logging +import os +import time +import traceback + +from io import BytesIO, StringIO +from collections import namedtuple, defaultdict +from textwrap import dedent + +from teuthology.orchestra.run import CommandFailedError +from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology + +log = logging.getLogger(__name__) + + +ValidationError = namedtuple("ValidationError", ["exception", "backtrace"]) + + +class Workload(object): + def __init__(self, filesystem, mount): + self._mount = mount + self._filesystem = filesystem + self._initial_state = None + + # Accumulate backtraces for every failed validation, and return them. Backtraces + # are rather verbose, but we only see them when something breaks, and they + # let us see which check failed without having to decorate each check with + # a string + self._errors = [] + + def assert_equal(self, a, b): + try: + if a != b: + raise AssertionError("{0} != {1}".format(a, b)) + except AssertionError as e: + self._errors.append( + ValidationError(e, traceback.format_exc(3)) + ) + + def write(self): + """ + Write the workload files to the mount + """ + raise NotImplementedError() + + def validate(self): + """ + Read from the mount and validate that the workload files are present (i.e. have + survived or been reconstructed from the test scenario) + """ + raise NotImplementedError() + + def damage(self): + """ + Damage the filesystem pools in ways that will be interesting to recover from. By + default just wipe everything in the metadata pool + """ + # Delete every object in the metadata pool + pool = self._filesystem.get_metadata_pool_name() + self._filesystem.rados(["purge", pool, '--yes-i-really-really-mean-it']) + + def flush(self): + """ + Called after client unmount, after write: flush whatever you want + """ + self._filesystem.mds_asok(["flush", "journal"]) + + +class SimpleWorkload(Workload): + """ + Single file, single directory, check that it gets recovered and so does its size + """ + def write(self): + self._mount.run_shell(["mkdir", "subdir"]) + self._mount.write_n_mb("subdir/sixmegs", 6) + self._initial_state = self._mount.stat("subdir/sixmegs") + + def validate(self): + self._mount.run_shell(["ls", "subdir"], sudo=True) + st = self._mount.stat("subdir/sixmegs", sudo=True) + self.assert_equal(st['st_size'], self._initial_state['st_size']) + return self._errors + + +class MovedFile(Workload): + def write(self): + # Create a file whose backtrace disagrees with his eventual position + # in the metadata. We will see that he gets reconstructed in his + # original position according to his backtrace. + self._mount.run_shell(["mkdir", "subdir_alpha"]) + self._mount.run_shell(["mkdir", "subdir_bravo"]) + self._mount.write_n_mb("subdir_alpha/sixmegs", 6) + self._filesystem.mds_asok(["flush", "journal"]) + self._mount.run_shell(["mv", "subdir_alpha/sixmegs", "subdir_bravo/sixmegs"]) + self._initial_state = self._mount.stat("subdir_bravo/sixmegs") + + def flush(self): + pass + + def validate(self): + self.assert_equal(self._mount.ls(sudo=True), ["subdir_alpha"]) + st = self._mount.stat("subdir_alpha/sixmegs", sudo=True) + self.assert_equal(st['st_size'], self._initial_state['st_size']) + return self._errors + + +class BacktracelessFile(Workload): + def write(self): + self._mount.run_shell(["mkdir", "subdir"]) + self._mount.write_n_mb("subdir/sixmegs", 6) + self._initial_state = self._mount.stat("subdir/sixmegs") + + def flush(self): + # Never flush metadata, so backtrace won't be written + pass + + def validate(self): + ino_name = "%x" % self._initial_state["st_ino"] + + # The inode should be linked into lost+found because we had no path for it + self.assert_equal(self._mount.ls(sudo=True), ["lost+found"]) + self.assert_equal(self._mount.ls("lost+found", sudo=True), [ino_name]) + st = self._mount.stat(f"lost+found/{ino_name}", sudo=True) + + # We might not have got the name or path, but we should still get the size + self.assert_equal(st['st_size'], self._initial_state['st_size']) + + return self._errors + + +class StripedStashedLayout(Workload): + def __init__(self, fs, m): + super(StripedStashedLayout, self).__init__(fs, m) + + # Nice small stripes so we can quickly do our writes+validates + self.sc = 4 + self.ss = 65536 + self.os = 262144 + + self.interesting_sizes = [ + # Exactly stripe_count objects will exist + self.os * self.sc, + # Fewer than stripe_count objects will exist + self.os * self.sc // 2, + self.os * (self.sc - 1) + self.os // 2, + self.os * (self.sc - 1) + self.os // 2 - 1, + self.os * (self.sc + 1) + self.os // 2, + self.os * (self.sc + 1) + self.os // 2 + 1, + # More than stripe_count objects will exist + self.os * self.sc + self.os * self.sc // 2 + ] + + def write(self): + # Create a dir with a striped layout set on it + self._mount.run_shell(["mkdir", "stripey"]) + + self._mount.setfattr("./stripey", "ceph.dir.layout", + "stripe_unit={ss} stripe_count={sc} object_size={os} pool={pool}".format( + ss=self.ss, os=self.os, sc=self.sc, + pool=self._filesystem.get_data_pool_name() + )) + + # Write files, then flush metadata so that its layout gets written into an xattr + for i, n_bytes in enumerate(self.interesting_sizes): + self._mount.write_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes) + # This is really just validating the validator + self._mount.validate_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes) + self._filesystem.mds_asok(["flush", "journal"]) + + # Write another file in the same way, but this time don't flush the metadata, + # so that it won't have the layout xattr + self._mount.write_test_pattern("stripey/unflushed_file", 1024 * 512) + self._mount.validate_test_pattern("stripey/unflushed_file", 1024 * 512) + + self._initial_state = { + "unflushed_ino": self._mount.path_to_ino("stripey/unflushed_file") + } + + def flush(self): + # Pass because we already selectively flushed during write + pass + + def validate(self): + # The first files should have been recovered into its original location + # with the correct layout: read back correct data + for i, n_bytes in enumerate(self.interesting_sizes): + try: + self._mount.validate_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes) + except CommandFailedError as e: + self._errors.append( + ValidationError("File {0} (size {1}): {2}".format(i, n_bytes, e), traceback.format_exc(3)) + ) + + # The unflushed file should have been recovered into lost+found without + # the correct layout: read back junk + ino_name = "%x" % self._initial_state["unflushed_ino"] + self.assert_equal(self._mount.ls("lost+found", sudo=True), [ino_name]) + try: + self._mount.validate_test_pattern(os.path.join("lost+found", ino_name), 1024 * 512) + except CommandFailedError: + pass + else: + self._errors.append( + ValidationError("Unexpectedly valid data in unflushed striped file", "") + ) + + return self._errors + + +class ManyFilesWorkload(Workload): + def __init__(self, filesystem, mount, file_count): + super(ManyFilesWorkload, self).__init__(filesystem, mount) + self.file_count = file_count + + def write(self): + self._mount.run_shell(["mkdir", "subdir"]) + for n in range(0, self.file_count): + self._mount.write_test_pattern("subdir/{0}".format(n), 6 * 1024 * 1024) + + def validate(self): + for n in range(0, self.file_count): + try: + self._mount.validate_test_pattern("subdir/{0}".format(n), 6 * 1024 * 1024) + except CommandFailedError as e: + self._errors.append( + ValidationError("File {0}: {1}".format(n, e), traceback.format_exc(3)) + ) + + return self._errors + + +class MovedDir(Workload): + def write(self): + # Create a nested dir that we will then move. Two files with two different + # backtraces referring to the moved dir, claiming two different locations for + # it. We will see that only one backtrace wins and the dir ends up with + # single linkage. + self._mount.run_shell(["mkdir", "-p", "grandmother/parent"]) + self._mount.write_n_mb("grandmother/parent/orig_pos_file", 1) + self._filesystem.mds_asok(["flush", "journal"]) + self._mount.run_shell(["mkdir", "grandfather"]) + self._mount.run_shell(["mv", "grandmother/parent", "grandfather"]) + self._mount.write_n_mb("grandfather/parent/new_pos_file", 2) + self._filesystem.mds_asok(["flush", "journal"]) + + self._initial_state = ( + self._mount.stat("grandfather/parent/orig_pos_file"), + self._mount.stat("grandfather/parent/new_pos_file") + ) + + def validate(self): + root_files = self._mount.ls() + self.assert_equal(len(root_files), 1) + self.assert_equal(root_files[0] in ["grandfather", "grandmother"], True) + winner = root_files[0] + st_opf = self._mount.stat(f"{winner}/parent/orig_pos_file", sudo=True) + st_npf = self._mount.stat(f"{winner}/parent/new_pos_file", sudo=True) + + self.assert_equal(st_opf['st_size'], self._initial_state[0]['st_size']) + self.assert_equal(st_npf['st_size'], self._initial_state[1]['st_size']) + + +class MissingZerothObject(Workload): + def write(self): + self._mount.run_shell(["mkdir", "subdir"]) + self._mount.write_n_mb("subdir/sixmegs", 6) + self._initial_state = self._mount.stat("subdir/sixmegs") + + def damage(self): + super(MissingZerothObject, self).damage() + zeroth_id = "{0:x}.00000000".format(self._initial_state['st_ino']) + self._filesystem.rados(["rm", zeroth_id], pool=self._filesystem.get_data_pool_name()) + + def validate(self): + ino = self._initial_state['st_ino'] + st = self._mount.stat(f"lost+found/{ino:x}", sudo=True) + self.assert_equal(st['st_size'], self._initial_state['st_size']) + + +class NonDefaultLayout(Workload): + """ + Check that the reconstruction copes with files that have a different + object size in their layout + """ + def write(self): + self._mount.run_shell(["touch", "datafile"]) + self._mount.setfattr("./datafile", "ceph.file.layout.object_size", "8388608") + self._mount.run_shell(["dd", "if=/dev/urandom", "of=./datafile", "bs=1M", "count=32"]) + self._initial_state = self._mount.stat("datafile") + + def validate(self): + # Check we got the layout reconstructed properly + object_size = int(self._mount.getfattr("./datafile", "ceph.file.layout.object_size", sudo=True)) + self.assert_equal(object_size, 8388608) + + # Check we got the file size reconstructed properly + st = self._mount.stat("datafile", sudo=True) + self.assert_equal(st['st_size'], self._initial_state['st_size']) + + +class TestDataScan(CephFSTestCase): + MDSS_REQUIRED = 2 + + def is_marked_damaged(self, rank): + mds_map = self.fs.get_mds_map() + return rank in mds_map['damaged'] + + def _rebuild_metadata(self, workload, workers=1): + """ + That when all objects in metadata pool are removed, we can rebuild a metadata pool + based on the contents of a data pool, and a client can see and read our files. + """ + + # First, inject some files + + workload.write() + + # Unmount the client and flush the journal: the tool should also cope with + # situations where there is dirty metadata, but we'll test that separately + self.mount_a.umount_wait() + workload.flush() + + # Stop the MDS + self.fs.fail() + + # After recovery, we need the MDS to not be strict about stats (in production these options + # are off by default, but in QA we need to explicitly disable them) + self.fs.set_ceph_conf('mds', 'mds verify scatter', False) + self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False) + + # Apply any data damage the workload wants + workload.damage() + + # Reset the MDS map in case multiple ranks were in play: recovery procedure + # only understands how to rebuild metadata under rank 0 + self.fs.reset() + + self.fs.set_joinable() # redundant with reset + + def get_state(mds_id): + info = self.mds_cluster.get_mds_info(mds_id) + return info['state'] if info is not None else None + + self.wait_until_true(lambda: self.is_marked_damaged(0), 60) + for mds_id in self.fs.mds_ids: + self.wait_until_equal( + lambda: get_state(mds_id), + "up:standby", + timeout=60) + + self.fs.table_tool([self.fs.name + ":0", "reset", "session"]) + self.fs.table_tool([self.fs.name + ":0", "reset", "snap"]) + self.fs.table_tool([self.fs.name + ":0", "reset", "inode"]) + + # Run the recovery procedure + if False: + with self.assertRaises(CommandFailedError): + # Normal reset should fail when no objects are present, we'll use --force instead + self.fs.journal_tool(["journal", "reset"], 0) + + self.fs.journal_tool(["journal", "reset", "--force"], 0) + self.fs.data_scan(["init"]) + self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()], worker_count=workers) + self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()], worker_count=workers) + + # Mark the MDS repaired + self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0') + + # Start the MDS + self.fs.mds_restart() + self.fs.wait_for_daemons() + log.info(str(self.mds_cluster.status())) + + # Mount a client + self.mount_a.mount_wait() + + # See that the files are present and correct + errors = workload.validate() + if errors: + log.error("Validation errors found: {0}".format(len(errors))) + for e in errors: + log.error(e.exception) + log.error(e.backtrace) + raise AssertionError("Validation failed, first error: {0}\n{1}".format( + errors[0].exception, errors[0].backtrace + )) + + def test_rebuild_simple(self): + self._rebuild_metadata(SimpleWorkload(self.fs, self.mount_a)) + + def test_rebuild_moved_file(self): + self._rebuild_metadata(MovedFile(self.fs, self.mount_a)) + + def test_rebuild_backtraceless(self): + self._rebuild_metadata(BacktracelessFile(self.fs, self.mount_a)) + + def test_rebuild_moved_dir(self): + self._rebuild_metadata(MovedDir(self.fs, self.mount_a)) + + def test_rebuild_missing_zeroth(self): + self._rebuild_metadata(MissingZerothObject(self.fs, self.mount_a)) + + def test_rebuild_nondefault_layout(self): + self._rebuild_metadata(NonDefaultLayout(self.fs, self.mount_a)) + + def test_stashed_layout(self): + self._rebuild_metadata(StripedStashedLayout(self.fs, self.mount_a)) + + def _dirfrag_keys(self, object_id): + keys_str = self.fs.radosmo(["listomapkeys", object_id], stdout=StringIO()) + if keys_str: + return keys_str.strip().split("\n") + else: + return [] + + def test_fragmented_injection(self): + """ + That when injecting a dentry into a fragmented directory, we put it in the right fragment. + """ + + file_count = 100 + file_names = ["%s" % n for n in range(0, file_count)] + + # Make sure and disable dirfrag auto merging and splitting + self.fs.set_ceph_conf('mds', 'mds bal merge size', 0) + self.fs.set_ceph_conf('mds', 'mds bal split size', 100 * file_count) + + # Create a directory of `file_count` files, each named after its + # decimal number and containing the string of its decimal number + self.mount_a.run_python(dedent(""" + import os + path = os.path.join("{path}", "subdir") + os.mkdir(path) + for n in range(0, {file_count}): + open(os.path.join(path, "%s" % n), 'w').write("%s" % n) + """.format( + path=self.mount_a.mountpoint, + file_count=file_count + ))) + + dir_ino = self.mount_a.path_to_ino("subdir") + + # Only one MDS should be active! + self.assertEqual(len(self.fs.get_active_names()), 1) + + # Ensure that one directory is fragmented + mds_id = self.fs.get_active_names()[0] + self.fs.mds_asok(["dirfrag", "split", "/subdir", "0/0", "1"], mds_id) + + # Flush journal and stop MDS + self.mount_a.umount_wait() + self.fs.mds_asok(["flush", "journal"], mds_id) + self.fs.fail() + + # Pick a dentry and wipe out its key + # Because I did a 1 bit split, I know one frag will be named <inode>.01000000 + frag_obj_id = "{0:x}.01000000".format(dir_ino) + keys = self._dirfrag_keys(frag_obj_id) + victim_key = keys[7] # arbitrary choice + log.info("victim_key={0}".format(victim_key)) + victim_dentry = victim_key.split("_head")[0] + self.fs.radosm(["rmomapkey", frag_obj_id, victim_key]) + + # Start filesystem back up, observe that the file appears to be gone in an `ls` + self.fs.set_joinable() + self.fs.wait_for_daemons() + self.mount_a.mount_wait() + files = self.mount_a.run_shell(["ls", "subdir/"]).stdout.getvalue().strip().split("\n") + self.assertListEqual(sorted(files), sorted(list(set(file_names) - set([victim_dentry])))) + + # Stop the filesystem + self.mount_a.umount_wait() + self.fs.fail() + + # Run data-scan, observe that it inserts our dentry back into the correct fragment + # by checking the omap now has the dentry's key again + self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()]) + self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()]) + self.fs.data_scan(["scan_links"]) + self.assertIn(victim_key, self._dirfrag_keys(frag_obj_id)) + + # Start the filesystem and check that the dentry we deleted is now once again visible + # and points to the correct file data. + self.fs.set_joinable() + self.fs.wait_for_daemons() + self.mount_a.mount_wait() + self.mount_a.run_shell(["ls", "-l", "subdir/"]) # debugging + # Use sudo because cephfs-data-scan will reinsert the dentry with root ownership, it can't know the real owner. + out = self.mount_a.run_shell_payload(f"cat subdir/{victim_dentry}", sudo=True).stdout.getvalue().strip() + self.assertEqual(out, victim_dentry) + + # Finally, close the loop by checking our injected dentry survives a merge + mds_id = self.fs.get_active_names()[0] + self.mount_a.ls("subdir") # Do an ls to ensure both frags are in cache so the merge will work + self.fs.mds_asok(["dirfrag", "merge", "/subdir", "0/0"], mds_id) + self.fs.mds_asok(["flush", "journal"], mds_id) + frag_obj_id = "{0:x}.00000000".format(dir_ino) + keys = self._dirfrag_keys(frag_obj_id) + self.assertListEqual(sorted(keys), sorted(["%s_head" % f for f in file_names])) + + # run scrub to update and make sure rstat.rbytes info in subdir inode and dirfrag + # are matched + out_json = self.fs.run_scrub(["start", "/subdir", "repair,recursive"]) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + # Remove the whole 'sudbdir' directory + self.mount_a.run_shell(["rm", "-rf", "subdir/"]) + + @for_teuthology + def test_parallel_execution(self): + self._rebuild_metadata(ManyFilesWorkload(self.fs, self.mount_a, 25), workers=7) + + def test_pg_files(self): + """ + That the pg files command tells us which files are associated with + a particular PG + """ + file_count = 20 + self.mount_a.run_shell(["mkdir", "mydir"]) + self.mount_a.create_n_files("mydir/myfile", file_count) + + # Some files elsewhere in the system that we will ignore + # to check that the tool is filtering properly + self.mount_a.run_shell(["mkdir", "otherdir"]) + self.mount_a.create_n_files("otherdir/otherfile", file_count) + + pgs_to_files = defaultdict(list) + # Rough (slow) reimplementation of the logic + for i in range(0, file_count): + file_path = "mydir/myfile_{0}".format(i) + ino = self.mount_a.path_to_ino(file_path) + obj = "{0:x}.{1:08x}".format(ino, 0) + pgid = json.loads(self.fs.mon_manager.raw_cluster_cmd( + "osd", "map", self.fs.get_data_pool_name(), obj, + "--format=json-pretty" + ))['pgid'] + pgs_to_files[pgid].append(file_path) + log.info("{0}: {1}".format(file_path, pgid)) + + pg_count = self.fs.get_pool_pg_num(self.fs.get_data_pool_name()) + for pg_n in range(0, pg_count): + pg_str = "{0}.{1:x}".format(self.fs.get_data_pool_id(), pg_n) + out = self.fs.data_scan(["pg_files", "mydir", pg_str]) + lines = [l for l in out.split("\n") if l] + log.info("{0}: {1}".format(pg_str, lines)) + self.assertSetEqual(set(lines), set(pgs_to_files[pg_str])) + + def test_rebuild_linkage(self): + """ + The scan_links command fixes linkage errors + """ + self.mount_a.run_shell(["mkdir", "testdir1"]) + self.mount_a.run_shell(["mkdir", "testdir2"]) + dir1_ino = self.mount_a.path_to_ino("testdir1") + dir2_ino = self.mount_a.path_to_ino("testdir2") + dirfrag1_oid = "{0:x}.00000000".format(dir1_ino) + dirfrag2_oid = "{0:x}.00000000".format(dir2_ino) + + self.mount_a.run_shell(["touch", "testdir1/file1"]) + self.mount_a.run_shell(["ln", "testdir1/file1", "testdir1/link1"]) + self.mount_a.run_shell(["ln", "testdir1/file1", "testdir2/link2"]) + + mds_id = self.fs.get_active_names()[0] + self.fs.mds_asok(["flush", "journal"], mds_id) + + dirfrag1_keys = self._dirfrag_keys(dirfrag1_oid) + + # introduce duplicated primary link + file1_key = "file1_head" + self.assertIn(file1_key, dirfrag1_keys) + file1_omap_data = self.fs.radosmo(["getomapval", dirfrag1_oid, file1_key, '-']) + self.fs.radosm(["setomapval", dirfrag2_oid, file1_key], stdin=BytesIO(file1_omap_data)) + self.assertIn(file1_key, self._dirfrag_keys(dirfrag2_oid)) + + # remove a remote link, make inode link count incorrect + link1_key = 'link1_head' + self.assertIn(link1_key, dirfrag1_keys) + self.fs.radosm(["rmomapkey", dirfrag1_oid, link1_key]) + + # increase good primary link's version + self.mount_a.run_shell(["touch", "testdir1/file1"]) + self.mount_a.umount_wait() + + self.fs.mds_asok(["flush", "journal"], mds_id) + self.fs.fail() + + # repair linkage errors + self.fs.data_scan(["scan_links"]) + + # primary link in testdir2 was deleted? + self.assertNotIn(file1_key, self._dirfrag_keys(dirfrag2_oid)) + + self.fs.set_joinable() + self.fs.wait_for_daemons() + + self.mount_a.mount_wait() + + # link count was adjusted? + file1_nlink = self.mount_a.path_to_nlink("testdir1/file1") + self.assertEqual(file1_nlink, 2) + + def test_rebuild_inotable(self): + """ + The scan_links command repair inotables + """ + self.fs.set_max_mds(2) + self.fs.wait_for_daemons() + + active_mds_names = self.fs.get_active_names() + mds0_id = active_mds_names[0] + mds1_id = active_mds_names[1] + + self.mount_a.run_shell(["mkdir", "dir1"]) + dir_ino = self.mount_a.path_to_ino("dir1") + self.mount_a.setfattr("dir1", "ceph.dir.pin", "1") + # wait for subtree migration + + file_ino = 0; + while True: + time.sleep(1) + # allocate an inode from mds.1 + self.mount_a.run_shell(["touch", "dir1/file1"]) + file_ino = self.mount_a.path_to_ino("dir1/file1") + if file_ino >= (2 << 40): + break + self.mount_a.run_shell(["rm", "-f", "dir1/file1"]) + + self.mount_a.umount_wait() + + self.fs.mds_asok(["flush", "journal"], mds0_id) + self.fs.mds_asok(["flush", "journal"], mds1_id) + self.fs.fail() + + self.fs.radosm(["rm", "mds0_inotable"]) + self.fs.radosm(["rm", "mds1_inotable"]) + + self.fs.data_scan(["scan_links", "--filesystem", self.fs.name]) + + mds0_inotable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "inode"])) + self.assertGreaterEqual( + mds0_inotable['0']['data']['inotable']['free'][0]['start'], dir_ino) + + mds1_inotable = json.loads(self.fs.table_tool([self.fs.name + ":1", "show", "inode"])) + self.assertGreaterEqual( + mds1_inotable['1']['data']['inotable']['free'][0]['start'], file_ino) + + def test_rebuild_snaptable(self): + """ + The scan_links command repair snaptable + """ + self.fs.set_allow_new_snaps(True) + + self.mount_a.run_shell(["mkdir", "dir1"]) + self.mount_a.run_shell(["mkdir", "dir1/.snap/s1"]) + self.mount_a.run_shell(["mkdir", "dir1/.snap/s2"]) + self.mount_a.run_shell(["rmdir", "dir1/.snap/s2"]) + + self.mount_a.umount_wait() + + mds0_id = self.fs.get_active_names()[0] + self.fs.mds_asok(["flush", "journal"], mds0_id) + + # wait for mds to update removed snaps + time.sleep(10) + + old_snaptable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "snap"])) + # stamps may have minor difference + for item in old_snaptable['snapserver']['snaps']: + del item['stamp'] + + self.fs.radosm(["rm", "mds_snaptable"]) + self.fs.data_scan(["scan_links", "--filesystem", self.fs.name]) + + new_snaptable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "snap"])) + for item in new_snaptable['snapserver']['snaps']: + del item['stamp'] + self.assertGreaterEqual( + new_snaptable['snapserver']['last_snap'], old_snaptable['snapserver']['last_snap']) + self.assertEqual( + new_snaptable['snapserver']['snaps'], old_snaptable['snapserver']['snaps']) diff --git a/qa/tasks/cephfs/test_dump_tree.py b/qa/tasks/cephfs/test_dump_tree.py new file mode 100644 index 000000000..48a2c6f00 --- /dev/null +++ b/qa/tasks/cephfs/test_dump_tree.py @@ -0,0 +1,66 @@ +from tasks.cephfs.cephfs_test_case import CephFSTestCase +import random +import os + +class TestDumpTree(CephFSTestCase): + def get_paths_to_ino(self): + inos = {} + p = self.mount_a.run_shell(["find", "./"]) + paths = p.stdout.getvalue().strip().split() + for path in paths: + inos[path] = self.mount_a.path_to_ino(path, False) + + return inos + + def populate(self): + self.mount_a.run_shell(["git", "clone", + "https://github.com/ceph/ceph-qa-suite"]) + + def test_basic(self): + self.mount_a.run_shell(["mkdir", "parent"]) + self.mount_a.run_shell(["mkdir", "parent/child"]) + self.mount_a.run_shell(["touch", "parent/child/file"]) + self.mount_a.run_shell(["mkdir", "parent/child/grandchild"]) + self.mount_a.run_shell(["touch", "parent/child/grandchild/file"]) + + inos = self.get_paths_to_ino() + tree = self.fs.mds_asok(["dump", "tree", "/parent/child", "1"]) + + target_inos = [inos["./parent/child"], inos["./parent/child/file"], + inos["./parent/child/grandchild"]] + + for ino in tree: + del target_inos[target_inos.index(ino['ino'])] # don't catch! + + assert(len(target_inos) == 0) + + def test_random(self): + random.seed(0) + + self.populate() + inos = self.get_paths_to_ino() + target = random.sample(inos.keys(), 1)[0] + + if target != "./": + target = os.path.dirname(target) + + subtree = [path for path in inos.keys() if path.startswith(target)] + target_inos = [inos[path] for path in subtree] + tree = self.fs.mds_asok(["dump", "tree", target[1:]]) + + for ino in tree: + del target_inos[target_inos.index(ino['ino'])] # don't catch! + + assert(len(target_inos) == 0) + + target_depth = target.count('/') + maxdepth = max([path.count('/') for path in subtree]) - target_depth + depth = random.randint(0, maxdepth) + target_inos = [inos[path] for path in subtree \ + if path.count('/') <= depth + target_depth] + tree = self.fs.mds_asok(["dump", "tree", target[1:], str(depth)]) + + for ino in tree: + del target_inos[target_inos.index(ino['ino'])] # don't catch! + + assert(len(target_inos) == 0) diff --git a/qa/tasks/cephfs/test_exports.py b/qa/tasks/cephfs/test_exports.py new file mode 100644 index 000000000..d2421bedc --- /dev/null +++ b/qa/tasks/cephfs/test_exports.py @@ -0,0 +1,519 @@ +import logging +import random +import time +from tasks.cephfs.fuse_mount import FuseMount +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.orchestra.run import CommandFailedError + +log = logging.getLogger(__name__) + +class TestExports(CephFSTestCase): + MDSS_REQUIRED = 2 + CLIENTS_REQUIRED = 2 + + def test_session_race(self): + """ + Test session creation race. + + See: https://tracker.ceph.com/issues/24072#change-113056 + """ + + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + rank1 = self.fs.get_rank(rank=1, status=status) + + # Create a directory that is pre-exported to rank 1 + self.mount_a.run_shell(["mkdir", "-p", "a/aa"]) + self.mount_a.setfattr("a", "ceph.dir.pin", "1") + self._wait_subtrees([('/a', 1)], status=status, rank=1) + + # Now set the mds config to allow the race + self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "true"], rank=1) + + # Now create another directory and try to export it + self.mount_b.run_shell(["mkdir", "-p", "b/bb"]) + self.mount_b.setfattr("b", "ceph.dir.pin", "1") + + time.sleep(5) + + # Now turn off the race so that it doesn't wait again + self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "false"], rank=1) + + # Now try to create a session with rank 1 by accessing a dir known to + # be there, if buggy, this should cause the rank 1 to crash: + self.mount_b.run_shell(["ls", "a"]) + + # Check if rank1 changed (standby tookover?) + new_rank1 = self.fs.get_rank(rank=1) + self.assertEqual(rank1['gid'], new_rank1['gid']) + +class TestExportPin(CephFSTestCase): + MDSS_REQUIRED = 3 + CLIENTS_REQUIRED = 1 + + def setUp(self): + CephFSTestCase.setUp(self) + + self.fs.set_max_mds(3) + self.status = self.fs.wait_for_daemons() + + self.mount_a.run_shell_payload("mkdir -p 1/2/3/4") + + def test_noop(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "-1") + time.sleep(30) # for something to not happen + self._wait_subtrees([], status=self.status) + + def test_negative(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "-2341") + time.sleep(30) # for something to not happen + self._wait_subtrees([], status=self.status) + + def test_empty_pin(self): + self.mount_a.setfattr("1/2/3/4", "ceph.dir.pin", "1") + time.sleep(30) # for something to not happen + self._wait_subtrees([], status=self.status) + + def test_trivial(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self._wait_subtrees([('/1', 1)], status=self.status, rank=1) + + def test_export_targets(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self._wait_subtrees([('/1', 1)], status=self.status, rank=1) + self.status = self.fs.status() + r0 = self.status.get_rank(self.fs.id, 0) + self.assertTrue(sorted(r0['export_targets']) == [1]) + + def test_redundant(self): + # redundant pin /1/2 to rank 1 + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self._wait_subtrees([('/1', 1)], status=self.status, rank=1) + self.mount_a.setfattr("1/2", "ceph.dir.pin", "1") + self._wait_subtrees([('/1', 1), ('/1/2', 1)], status=self.status, rank=1) + + def test_reassignment(self): + self.mount_a.setfattr("1/2", "ceph.dir.pin", "1") + self._wait_subtrees([('/1/2', 1)], status=self.status, rank=1) + self.mount_a.setfattr("1/2", "ceph.dir.pin", "0") + self._wait_subtrees([('/1/2', 0)], status=self.status, rank=0) + + def test_phantom_rank(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "0") + self.mount_a.setfattr("1/2", "ceph.dir.pin", "10") + time.sleep(30) # wait for nothing weird to happen + self._wait_subtrees([('/1', 0)], status=self.status) + + def test_nested(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self.mount_a.setfattr("1/2", "ceph.dir.pin", "0") + self.mount_a.setfattr("1/2/3", "ceph.dir.pin", "2") + self._wait_subtrees([('/1', 1), ('/1/2', 0), ('/1/2/3', 2)], status=self.status, rank=2) + + def test_nested_unset(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self.mount_a.setfattr("1/2", "ceph.dir.pin", "2") + self._wait_subtrees([('/1', 1), ('/1/2', 2)], status=self.status, rank=1) + self.mount_a.setfattr("1/2", "ceph.dir.pin", "-1") + self._wait_subtrees([('/1', 1)], status=self.status, rank=1) + + def test_rename(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self.mount_a.run_shell_payload("mkdir -p 9/8/7") + self.mount_a.setfattr("9/8", "ceph.dir.pin", "0") + self._wait_subtrees([('/1', 1), ("/9/8", 0)], status=self.status, rank=0) + self.mount_a.run_shell_payload("mv 9/8 1/2") + self._wait_subtrees([('/1', 1), ("/1/2/8", 0)], status=self.status, rank=0) + + def test_getfattr(self): + # pin /1 to rank 0 + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self.mount_a.setfattr("1/2", "ceph.dir.pin", "0") + self._wait_subtrees([('/1', 1), ('/1/2', 0)], status=self.status, rank=1) + + if not isinstance(self.mount_a, FuseMount): + p = self.mount_a.client_remote.sh('uname -r', wait=True) + dir_pin = self.mount_a.getfattr("1", "ceph.dir.pin") + log.debug("mount.getfattr('1','ceph.dir.pin'): %s " % dir_pin) + if str(p) < "5" and not(dir_pin): + self.skipTest("Kernel does not support getting the extended attribute ceph.dir.pin") + self.assertEqual(self.mount_a.getfattr("1", "ceph.dir.pin"), '1') + self.assertEqual(self.mount_a.getfattr("1/2", "ceph.dir.pin"), '0') + + def test_export_pin_cache_drop(self): + """ + That the export pin does not prevent empty (nothing in cache) subtree merging. + """ + + self.mount_a.setfattr("1", "ceph.dir.pin", "0") + self.mount_a.setfattr("1/2", "ceph.dir.pin", "1") + self._wait_subtrees([('/1', 0), ('/1/2', 1)], status=self.status) + self.mount_a.umount_wait() # release all caps + def _drop(): + self.fs.ranks_tell(["cache", "drop"], status=self.status) + # drop cache multiple times to clear replica pins + self._wait_subtrees([], status=self.status, action=_drop) + +class TestEphemeralPins(CephFSTestCase): + MDSS_REQUIRED = 3 + CLIENTS_REQUIRED = 1 + + def setUp(self): + CephFSTestCase.setUp(self) + + self.config_set('mds', 'mds_export_ephemeral_random', True) + self.config_set('mds', 'mds_export_ephemeral_distributed', True) + self.config_set('mds', 'mds_export_ephemeral_random_max', 1.0) + + self.mount_a.run_shell_payload(""" +set -e + +# Use up a random number of inode numbers so the ephemeral pinning is not the same every test. +mkdir .inode_number_thrash +count=$((RANDOM % 1024)) +for ((i = 0; i < count; i++)); do touch .inode_number_thrash/$i; done +rm -rf .inode_number_thrash +""") + + self.fs.set_max_mds(3) + self.status = self.fs.wait_for_daemons() + + def _setup_tree(self, path="tree", export=-1, distributed=False, random=0.0, count=100, wait=True): + return self.mount_a.run_shell_payload(f""" +set -ex +mkdir -p {path} +{f"setfattr -n ceph.dir.pin -v {export} {path}" if export >= 0 else ""} +{f"setfattr -n ceph.dir.pin.distributed -v 1 {path}" if distributed else ""} +{f"setfattr -n ceph.dir.pin.random -v {random} {path}" if random > 0.0 else ""} +for ((i = 0; i < {count}; i++)); do + mkdir -p "{path}/$i" + echo file > "{path}/$i/file" +done +""", wait=wait) + + def test_ephemeral_pin_dist_override(self): + """ + That an ephemeral distributed pin overrides a normal export pin. + """ + + self._setup_tree(distributed=True) + subtrees = self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") + for s in subtrees: + path = s['dir']['path'] + if path == '/tree': + self.assertTrue(s['distributed_ephemeral_pin']) + + def test_ephemeral_pin_dist_override_pin(self): + """ + That an export pin overrides an ephemerally pinned directory. + """ + + self._setup_tree(distributed=True) + subtrees = self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") + self.mount_a.setfattr("tree", "ceph.dir.pin", "0") + time.sleep(15) + subtrees = self._get_subtrees(status=self.status, rank=0) + for s in subtrees: + path = s['dir']['path'] + if path == '/tree': + self.assertEqual(s['auth_first'], 0) + self.assertFalse(s['distributed_ephemeral_pin']) + # it has been merged into /tree + + def test_ephemeral_pin_dist_off(self): + """ + That turning off ephemeral distributed pin merges subtrees. + """ + + self._setup_tree(distributed=True) + self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") + self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "0") + time.sleep(15) + subtrees = self._get_subtrees(status=self.status, rank=0) + for s in subtrees: + path = s['dir']['path'] + if path == '/tree': + self.assertFalse(s['distributed_ephemeral_pin']) + + + def test_ephemeral_pin_dist_conf_off(self): + """ + That turning off ephemeral distributed pin config prevents distribution. + """ + + self._setup_tree() + self.config_set('mds', 'mds_export_ephemeral_distributed', False) + self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "1") + time.sleep(15) + subtrees = self._get_subtrees(status=self.status, rank=0) + for s in subtrees: + path = s['dir']['path'] + if path == '/tree': + self.assertFalse(s['distributed_ephemeral_pin']) + + def _test_ephemeral_pin_dist_conf_off_merge(self): + """ + That turning off ephemeral distributed pin config merges subtrees. + FIXME: who triggers the merge? + """ + + self._setup_tree(distributed=True) + self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") + self.config_set('mds', 'mds_export_ephemeral_distributed', False) + self._wait_subtrees([('/tree', 0)], timeout=60, status=self.status) + + def test_ephemeral_pin_dist_override_before(self): + """ + That a conventional export pin overrides the distributed policy _before_ distributed policy is set. + """ + + count = 10 + self._setup_tree(count=count) + test = [] + for i in range(count): + path = f"tree/{i}" + self.mount_a.setfattr(path, "ceph.dir.pin", "1") + test.append(("/"+path, 1)) + self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "1") + time.sleep(15) # for something to not happen... + self._wait_subtrees(test, timeout=60, status=self.status, rank="all", path="/tree/") + + def test_ephemeral_pin_dist_override_after(self): + """ + That a conventional export pin overrides the distributed policy _after_ distributed policy is set. + """ + + self._setup_tree(distributed=True) + self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") + test = [] + for i in range(10): + path = f"tree/{i}" + self.mount_a.setfattr(path, "ceph.dir.pin", "1") + test.append(("/"+path, 1)) + self._wait_subtrees(test, timeout=60, status=self.status, rank="all", path="/tree/") + + def test_ephemeral_pin_dist_failover(self): + """ + That MDS failover does not cause unnecessary migrations. + """ + + # pin /tree so it does not export during failover + self._setup_tree(distributed=True) + self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") + #test = [(s['dir']['path'], s['auth_first']) for s in subtrees] + before = self.fs.ranks_perf(lambda p: p['mds']['exported']) + log.info(f"export stats: {before}") + self.fs.rank_fail(rank=1) + self.status = self.fs.wait_for_daemons() + time.sleep(10) # waiting for something to not happen + after = self.fs.ranks_perf(lambda p: p['mds']['exported']) + log.info(f"export stats: {after}") + self.assertEqual(before, after) + + def test_ephemeral_pin_distribution(self): + """ + That ephemerally pinned subtrees are somewhat evenly distributed. + """ + + max_mds = 3 + frags = 128 + + self.fs.set_max_mds(max_mds) + self.status = self.fs.wait_for_daemons() + + self.config_set('mds', 'mds_export_ephemeral_distributed_factor', (frags-1) / max_mds) + self._setup_tree(count=1000, distributed=True) + + subtrees = self._wait_distributed_subtrees(frags, status=self.status, rank="all") + nsubtrees = len(subtrees) + + # Check if distribution is uniform + rank0 = list(filter(lambda x: x['auth_first'] == 0, subtrees)) + rank1 = list(filter(lambda x: x['auth_first'] == 1, subtrees)) + rank2 = list(filter(lambda x: x['auth_first'] == 2, subtrees)) + self.assertGreaterEqual(len(rank0)/nsubtrees, 0.15) + self.assertGreaterEqual(len(rank1)/nsubtrees, 0.15) + self.assertGreaterEqual(len(rank2)/nsubtrees, 0.15) + + + def test_ephemeral_random(self): + """ + That 100% randomness causes all children to be pinned. + """ + self._setup_tree(random=1.0) + self._wait_random_subtrees(100, status=self.status, rank="all") + + def test_ephemeral_random_max(self): + """ + That the config mds_export_ephemeral_random_max is not exceeded. + """ + + r = 0.5 + count = 1000 + self._setup_tree(count=count, random=r) + subtrees = self._wait_random_subtrees(int(r*count*.75), status=self.status, rank="all") + self.config_set('mds', 'mds_export_ephemeral_random_max', 0.01) + self._setup_tree(path="tree/new", count=count) + time.sleep(30) # for something not to happen... + subtrees = self._get_subtrees(status=self.status, rank="all", path="tree/new/") + self.assertLessEqual(len(subtrees), int(.01*count*1.25)) + + def test_ephemeral_random_max_config(self): + """ + That the config mds_export_ephemeral_random_max config rejects new OOB policies. + """ + + self.config_set('mds', 'mds_export_ephemeral_random_max', 0.01) + try: + p = self._setup_tree(count=1, random=0.02, wait=False) + p.wait() + except CommandFailedError as e: + log.info(f"{e}") + self.assertIn("Invalid", p.stderr.getvalue()) + else: + raise RuntimeError("mds_export_ephemeral_random_max ignored!") + + def test_ephemeral_random_dist(self): + """ + That ephemeral distributed pin overrides ephemeral random pin + """ + + self._setup_tree(random=1.0, distributed=True) + self._wait_distributed_subtrees(3 * 2, status=self.status) + + time.sleep(15) + subtrees = self._get_subtrees(status=self.status, rank=0) + for s in subtrees: + path = s['dir']['path'] + if path.startswith('/tree'): + self.assertFalse(s['random_ephemeral_pin']) + + def test_ephemeral_random_pin_override_before(self): + """ + That a conventional export pin overrides the random policy before creating new directories. + """ + + self._setup_tree(count=0, random=1.0) + self._setup_tree(path="tree/pin", count=10, export=1) + self._wait_subtrees([("/tree/pin", 1)], status=self.status, rank=1, path="/tree/pin") + + def test_ephemeral_random_pin_override_after(self): + """ + That a conventional export pin overrides the random policy after creating new directories. + """ + + count = 10 + self._setup_tree(count=0, random=1.0) + self._setup_tree(path="tree/pin", count=count) + self._wait_random_subtrees(count+1, status=self.status, rank="all") + self.mount_a.setfattr("tree/pin", "ceph.dir.pin", "1") + self._wait_subtrees([("/tree/pin", 1)], status=self.status, rank=1, path="/tree/pin") + + def test_ephemeral_randomness(self): + """ + That the randomness is reasonable. + """ + + r = random.uniform(0.25, 0.75) # ratios don't work for small r! + count = 1000 + self._setup_tree(count=count, random=r) + subtrees = self._wait_random_subtrees(int(r*count*.50), status=self.status, rank="all") + time.sleep(30) # for max to not be exceeded + subtrees = self._wait_random_subtrees(int(r*count*.50), status=self.status, rank="all") + self.assertLessEqual(len(subtrees), int(r*count*1.50)) + + def test_ephemeral_random_cache_drop(self): + """ + That the random ephemeral pin does not prevent empty (nothing in cache) subtree merging. + """ + + count = 100 + self._setup_tree(count=count, random=1.0) + self._wait_random_subtrees(count, status=self.status, rank="all") + self.mount_a.umount_wait() # release all caps + def _drop(): + self.fs.ranks_tell(["cache", "drop"], status=self.status) + self._wait_subtrees([], status=self.status, action=_drop) + + def test_ephemeral_random_failover(self): + """ + That the random ephemeral pins stay pinned across MDS failover. + """ + + count = 100 + r = 0.5 + self._setup_tree(count=count, random=r) + # wait for all random subtrees to be created, not a specific count + time.sleep(30) + subtrees = self._wait_random_subtrees(1, status=self.status, rank=1) + before = [(s['dir']['path'], s['auth_first']) for s in subtrees] + before.sort(); + + self.fs.rank_fail(rank=1) + self.status = self.fs.wait_for_daemons() + + time.sleep(30) # waiting for something to not happen + subtrees = self._wait_random_subtrees(1, status=self.status, rank=1) + after = [(s['dir']['path'], s['auth_first']) for s in subtrees] + after.sort(); + log.info(f"subtrees before: {before}") + log.info(f"subtrees after: {after}") + + self.assertEqual(before, after) + + def test_ephemeral_pin_grow_mds(self): + """ + That consistent hashing works to reduce the number of migrations. + """ + + self.fs.set_max_mds(2) + self.status = self.fs.wait_for_daemons() + + self._setup_tree(random=1.0) + subtrees_old = self._wait_random_subtrees(100, status=self.status, rank="all") + + self.fs.set_max_mds(3) + self.status = self.fs.wait_for_daemons() + + # Sleeping for a while to allow the ephemeral pin migrations to complete + time.sleep(30) + + subtrees_new = self._wait_random_subtrees(100, status=self.status, rank="all") + count = 0 + for old_subtree in subtrees_old: + for new_subtree in subtrees_new: + if (old_subtree['dir']['path'] == new_subtree['dir']['path']) and (old_subtree['auth_first'] != new_subtree['auth_first']): + count = count + 1 + break + + log.info("{0} migrations have occured due to the cluster resizing".format(count)) + # ~50% of subtrees from the two rank will migrate to another rank + self.assertLessEqual((count/len(subtrees_old)), (0.5)*1.25) # with 25% overbudget + + def test_ephemeral_pin_shrink_mds(self): + """ + That consistent hashing works to reduce the number of migrations. + """ + + self.fs.set_max_mds(3) + self.status = self.fs.wait_for_daemons() + + self._setup_tree(random=1.0) + subtrees_old = self._wait_random_subtrees(100, status=self.status, rank="all") + + self.fs.set_max_mds(2) + self.status = self.fs.wait_for_daemons() + time.sleep(30) + + subtrees_new = self._wait_random_subtrees(100, status=self.status, rank="all") + count = 0 + for old_subtree in subtrees_old: + for new_subtree in subtrees_new: + if (old_subtree['dir']['path'] == new_subtree['dir']['path']) and (old_subtree['auth_first'] != new_subtree['auth_first']): + count = count + 1 + break + + log.info("{0} migrations have occured due to the cluster resizing".format(count)) + # rebalancing from 3 -> 2 may cause half of rank 0/1 to move and all of rank 2 + self.assertLessEqual((count/len(subtrees_old)), (1.0/3.0/2.0 + 1.0/3.0/2.0 + 1.0/3.0)*1.25) # aka .66 with 25% overbudget diff --git a/qa/tasks/cephfs/test_failover.py b/qa/tasks/cephfs/test_failover.py new file mode 100644 index 000000000..745489309 --- /dev/null +++ b/qa/tasks/cephfs/test_failover.py @@ -0,0 +1,800 @@ +import time +import signal +import logging +import operator +from random import randint + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.exceptions import CommandFailedError +from tasks.cephfs.fuse_mount import FuseMount + +log = logging.getLogger(__name__) + +class TestClusterAffinity(CephFSTestCase): + CLIENTS_REQUIRED = 0 + MDSS_REQUIRED = 4 + + def _verify_join_fs(self, target, status=None): + if status is None: + status = self.fs.wait_for_daemons(timeout=30) + log.debug("%s", status) + target = sorted(target, key=operator.itemgetter('name')) + log.info("target = %s", target) + current = list(status.get_all()) + current = sorted(current, key=operator.itemgetter('name')) + log.info("current = %s", current) + self.assertEqual(len(current), len(target)) + for i in range(len(current)): + for attr in target[i]: + self.assertIn(attr, current[i]) + self.assertEqual(target[i][attr], current[i][attr]) + + def _change_target_state(self, state, name, changes): + for entity in state: + if entity['name'] == name: + for k, v in changes.items(): + entity[k] = v + return + self.fail("no entity") + + def _verify_init(self): + status = self.fs.status() + log.info("status = {0}".format(status)) + target = [{'join_fscid': -1, 'name': info['name']} for info in status.get_all()] + self._verify_join_fs(target, status=status) + return (status, target) + + def _reach_target(self, target): + def takeover(): + try: + self._verify_join_fs(target) + return True + except AssertionError as e: + log.debug("%s", e) + return False + self.wait_until_true(takeover, 30) + + def test_join_fs_runtime(self): + """ + That setting mds_join_fs at runtime affects the cluster layout. + """ + status, target = self._verify_init() + standbys = list(status.get_standbys()) + self.config_set('mds.'+standbys[0]['name'], 'mds_join_fs', 'cephfs') + self._change_target_state(target, standbys[0]['name'], {'join_fscid': self.fs.id, 'state': 'up:active'}) + self._reach_target(target) + + def test_join_fs_unset(self): + """ + That unsetting mds_join_fs will cause failover if another high-affinity standby exists. + """ + status, target = self._verify_init() + standbys = list(status.get_standbys()) + names = (standbys[0]['name'], standbys[1]['name']) + self.config_set('mds.'+names[0], 'mds_join_fs', 'cephfs') + self.config_set('mds.'+names[1], 'mds_join_fs', 'cephfs') + self._change_target_state(target, names[0], {'join_fscid': self.fs.id}) + self._change_target_state(target, names[1], {'join_fscid': self.fs.id}) + self._reach_target(target) + status = self.fs.status() + active = self.fs.get_active_names(status=status)[0] + self.assertIn(active, names) + self.config_rm('mds.'+active, 'mds_join_fs') + self._change_target_state(target, active, {'join_fscid': -1}) + new_active = (set(names) - set((active,))).pop() + self._change_target_state(target, new_active, {'state': 'up:active'}) + self._reach_target(target) + + def test_join_fs_drop(self): + """ + That unsetting mds_join_fs will not cause failover if no high-affinity standby exists. + """ + status, target = self._verify_init() + standbys = list(status.get_standbys()) + active = standbys[0]['name'] + self.config_set('mds.'+active, 'mds_join_fs', 'cephfs') + self._change_target_state(target, active, {'join_fscid': self.fs.id, 'state': 'up:active'}) + self._reach_target(target) + self.config_rm('mds.'+active, 'mds_join_fs') + self._change_target_state(target, active, {'join_fscid': -1}) + self._reach_target(target) + + def test_join_fs_vanilla(self): + """ + That a vanilla standby is preferred over others with mds_join_fs set to another fs. + """ + # After Octopus is EOL, we can remove this setting: + self.fs.set_allow_multifs() + fs2 = self.mds_cluster.newfs(name="cephfs2") + status, target = self._verify_init() + active = self.fs.get_active_names(status=status)[0] + standbys = [info['name'] for info in status.get_standbys()] + victim = standbys.pop() + # Set a bogus fs on the others + for mds in standbys: + self.config_set('mds.'+mds, 'mds_join_fs', 'cephfs2') + self._change_target_state(target, mds, {'join_fscid': fs2.id}) + self.fs.rank_fail() + self._change_target_state(target, victim, {'state': 'up:active'}) + self._reach_target(target) + status = self.fs.status() + active = self.fs.get_active_names(status=status)[0] + self.assertEqual(active, victim) + + def test_join_fs_last_resort(self): + """ + That a standby with mds_join_fs set to another fs is still used if necessary. + """ + status, target = self._verify_init() + standbys = [info['name'] for info in status.get_standbys()] + for mds in standbys: + self.config_set('mds.'+mds, 'mds_join_fs', 'cephfs2') + # After Octopus is EOL, we can remove this setting: + self.fs.set_allow_multifs() + fs2 = self.mds_cluster.newfs(name="cephfs2") + for mds in standbys: + self._change_target_state(target, mds, {'join_fscid': fs2.id}) + self.fs.rank_fail() + status = self.fs.status() + ranks = list(self.fs.get_ranks(status=status)) + self.assertEqual(len(ranks), 1) + self.assertIn(ranks[0]['name'], standbys) + # Note that we would expect the former active to reclaim its spot, but + # we're not testing that here. + + def test_join_fs_steady(self): + """ + That a sole MDS with mds_join_fs set will come back as active eventually even after failover. + """ + status, target = self._verify_init() + active = self.fs.get_active_names(status=status)[0] + self.config_set('mds.'+active, 'mds_join_fs', 'cephfs') + self._change_target_state(target, active, {'join_fscid': self.fs.id}) + self._reach_target(target) + self.fs.rank_fail() + self._reach_target(target) + + def test_join_fs_standby_replay(self): + """ + That a standby-replay daemon with weak affinity is replaced by a stronger one. + """ + status, target = self._verify_init() + standbys = [info['name'] for info in status.get_standbys()] + self.config_set('mds.'+standbys[0], 'mds_join_fs', 'cephfs') + self._change_target_state(target, standbys[0], {'join_fscid': self.fs.id, 'state': 'up:active'}) + self._reach_target(target) + self.fs.set_allow_standby_replay(True) + status = self.fs.status() + standbys = [info['name'] for info in status.get_standbys()] + self.config_set('mds.'+standbys[0], 'mds_join_fs', 'cephfs') + self._change_target_state(target, standbys[0], {'join_fscid': self.fs.id, 'state': 'up:standby-replay'}) + self._reach_target(target) + +class TestClusterResize(CephFSTestCase): + CLIENTS_REQUIRED = 0 + MDSS_REQUIRED = 3 + + def test_grow(self): + """ + That the MDS cluster grows after increasing max_mds. + """ + + # Need all my standbys up as well as the active daemons + # self.wait_for_daemon_start() necessary? + + self.fs.grow(2) + self.fs.grow(3) + + + def test_shrink(self): + """ + That the MDS cluster shrinks automatically after decreasing max_mds. + """ + + self.fs.grow(3) + self.fs.shrink(1) + + def test_up_less_than_max(self): + """ + That a health warning is generated when max_mds is greater than active count. + """ + + status = self.fs.status() + mdss = [info['gid'] for info in status.get_all()] + self.fs.set_max_mds(len(mdss)+1) + self.wait_for_health("MDS_UP_LESS_THAN_MAX", 30) + self.fs.shrink(2) + self.wait_for_health_clear(30) + + def test_down_health(self): + """ + That marking a FS down does not generate a health warning + """ + + self.fs.set_down() + try: + self.wait_for_health("", 30) + raise RuntimeError("got health warning?") + except RuntimeError as e: + if "Timed out after" in str(e): + pass + else: + raise + + def test_down_twice(self): + """ + That marking a FS down twice does not wipe old_max_mds. + """ + + self.fs.grow(2) + self.fs.set_down() + self.fs.wait_for_daemons() + self.fs.set_down(False) + self.assertEqual(self.fs.get_var("max_mds"), 2) + self.fs.wait_for_daemons(timeout=60) + + def test_down_grow(self): + """ + That setting max_mds undoes down. + """ + + self.fs.set_down() + self.fs.wait_for_daemons() + self.fs.grow(2) + self.fs.wait_for_daemons() + + def test_down(self): + """ + That down setting toggles and sets max_mds appropriately. + """ + + self.fs.set_down() + self.fs.wait_for_daemons() + self.assertEqual(self.fs.get_var("max_mds"), 0) + self.fs.set_down(False) + self.assertEqual(self.fs.get_var("max_mds"), 1) + self.fs.wait_for_daemons() + self.assertEqual(self.fs.get_var("max_mds"), 1) + + def test_hole(self): + """ + Test that a hole cannot be created in the FS ranks. + """ + + fscid = self.fs.id + + self.fs.grow(2) + + # Now add a delay which should slow down how quickly rank 1 stops + self.config_set('mds', 'ms_inject_delay_max', '5.0') + self.config_set('mds', 'ms_inject_delay_probability', '1.0') + self.fs.set_max_mds(1) + log.info("status = {0}".format(self.fs.status())) + + # Don't wait for rank 1 to stop + self.fs.set_max_mds(3) + log.info("status = {0}".format(self.fs.status())) + + # Now check that the mons didn't try to promote a standby to rank 2 + self.fs.set_max_mds(2) + status = self.fs.status() + try: + status = self.fs.wait_for_daemons(timeout=90) + ranks = set([info['rank'] for info in status.get_ranks(fscid)]) + self.assertEqual(ranks, set([0, 1])) + finally: + log.info("status = {0}".format(status)) + + def test_thrash(self): + """ + Test that thrashing max_mds does not fail. + """ + + max_mds = 2 + for i in range(0, 100): + self.fs.set_max_mds(max_mds) + max_mds = (max_mds+1)%3+1 + + self.fs.wait_for_daemons(timeout=90) + +class TestFailover(CephFSTestCase): + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 2 + + def test_simple(self): + """ + That when the active MDS is killed, a standby MDS is promoted into + its rank after the grace period. + + This is just a simple unit test, the harder cases are covered + in thrashing tests. + """ + + # Need all my standbys up as well as the active daemons + self.wait_for_daemon_start() + + (original_active, ) = self.fs.get_active_names() + original_standbys = self.mds_cluster.get_standby_daemons() + + # Kill the rank 0 daemon's physical process + self.fs.mds_stop(original_active) + + grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon")) + + # Wait until the monitor promotes his replacement + def promoted(): + active = self.fs.get_active_names() + return active and active[0] in original_standbys + + log.info("Waiting for promotion of one of the original standbys {0}".format( + original_standbys)) + self.wait_until_true( + promoted, + timeout=grace*2) + + # Start the original rank 0 daemon up again, see that he becomes a standby + self.fs.mds_restart(original_active) + self.wait_until_true( + lambda: original_active in self.mds_cluster.get_standby_daemons(), + timeout=60 # Approximately long enough for MDS to start and mon to notice + ) + + def test_client_abort(self): + """ + That a client will respect fuse_require_active_mds and error out + when the cluster appears to be unavailable. + """ + + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Requires FUSE client to inject client metadata") + + require_active = self.fs.get_config("fuse_require_active_mds", service_type="mon").lower() == "true" + if not require_active: + self.skipTest("fuse_require_active_mds is not set") + + grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon")) + + # Check it's not laggy to begin with + (original_active, ) = self.fs.get_active_names() + self.assertNotIn("laggy_since", self.fs.status().get_mds(original_active)) + + self.mounts[0].umount_wait() + + # Control: that we can mount and unmount usually, while the cluster is healthy + self.mounts[0].mount_wait() + self.mounts[0].umount_wait() + + # Stop the daemon processes + self.fs.mds_stop() + + # Wait for everyone to go laggy + def laggy(): + mdsmap = self.fs.get_mds_map() + for info in mdsmap['info'].values(): + if "laggy_since" not in info: + return False + + return True + + self.wait_until_true(laggy, grace * 2) + with self.assertRaises(CommandFailedError): + self.mounts[0].mount_wait() + + def test_standby_count_wanted(self): + """ + That cluster health warnings are generated by insufficient standbys available. + """ + + # Need all my standbys up as well as the active daemons + self.wait_for_daemon_start() + + grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon")) + + standbys = self.mds_cluster.get_standby_daemons() + self.assertGreaterEqual(len(standbys), 1) + self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys))) + + # Kill a standby and check for warning + victim = standbys.pop() + self.fs.mds_stop(victim) + log.info("waiting for insufficient standby daemon warning") + self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2) + + # restart the standby, see that he becomes a standby, check health clears + self.fs.mds_restart(victim) + self.wait_until_true( + lambda: victim in self.mds_cluster.get_standby_daemons(), + timeout=60 # Approximately long enough for MDS to start and mon to notice + ) + self.wait_for_health_clear(timeout=30) + + # Set it one greater than standbys ever seen + standbys = self.mds_cluster.get_standby_daemons() + self.assertGreaterEqual(len(standbys), 1) + self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)+1)) + log.info("waiting for insufficient standby daemon warning") + self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2) + + # Set it to 0 + self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', '0') + self.wait_for_health_clear(timeout=30) + + def test_discontinuous_mdsmap(self): + """ + That discontinuous mdsmap does not affect failover. + See http://tracker.ceph.com/issues/24856. + """ + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + self.mount_a.umount_wait() + + grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon")) + monc_timeout = float(self.fs.get_config("mon_client_ping_timeout", service_type="mds")) + + mds_0 = self.fs.get_rank(rank=0, status=status) + self.fs.rank_freeze(True, rank=0) # prevent failover + self.fs.rank_signal(signal.SIGSTOP, rank=0, status=status) + self.wait_until_true( + lambda: "laggy_since" in self.fs.get_rank(), + timeout=grace * 2 + ) + + self.fs.rank_fail(rank=1) + self.fs.wait_for_state('up:resolve', rank=1, timeout=30) + + # Make sure of mds_0's monitor connection gets reset + time.sleep(monc_timeout * 2) + + # Continue rank 0, it will get discontinuous mdsmap + self.fs.rank_signal(signal.SIGCONT, rank=0) + self.wait_until_true( + lambda: "laggy_since" not in self.fs.get_rank(rank=0), + timeout=grace * 2 + ) + + # mds.b will be stuck at 'reconnect' state if snapserver gets confused + # by discontinuous mdsmap + self.fs.wait_for_state('up:active', rank=1, timeout=30) + self.assertEqual(mds_0['gid'], self.fs.get_rank(rank=0)['gid']) + self.fs.rank_freeze(False, rank=0) + + def test_connect_bootstrapping(self): + self.config_set("mds", "mds_sleep_rank_change", 10000000.0) + self.config_set("mds", "mds_connect_bootstrapping", True) + self.fs.set_max_mds(2) + self.fs.wait_for_daemons() + self.fs.rank_fail(rank=0) + # rank 0 will get stuck in up:resolve, see https://tracker.ceph.com/issues/53194 + self.fs.wait_for_daemons() + + +class TestStandbyReplay(CephFSTestCase): + CLIENTS_REQUIRED = 0 + MDSS_REQUIRED = 4 + + def _confirm_no_replay(self): + status = self.fs.status() + _ = len(list(status.get_standbys())) + self.assertEqual(0, len(list(self.fs.get_replays(status=status)))) + return status + + def _confirm_single_replay(self, full=True, status=None, retries=3): + status = self.fs.wait_for_daemons(status=status) + ranks = sorted(self.fs.get_mds_map(status=status)['in']) + replays = list(self.fs.get_replays(status=status)) + checked_replays = set() + for rank in ranks: + has_replay = False + for replay in replays: + if replay['rank'] == rank: + self.assertFalse(has_replay) + has_replay = True + checked_replays.add(replay['gid']) + if full and not has_replay: + if retries <= 0: + raise RuntimeError("rank "+str(rank)+" has no standby-replay follower") + else: + retries = retries-1 + time.sleep(2) + self.assertEqual(checked_replays, set(info['gid'] for info in replays)) + return status + + def _check_replay_takeover(self, status, rank=0): + replay = self.fs.get_replay(rank=rank, status=status) + new_status = self.fs.wait_for_daemons() + new_active = self.fs.get_rank(rank=rank, status=new_status) + if replay: + self.assertEqual(replay['gid'], new_active['gid']) + else: + # double check takeover came from a standby (or some new daemon via restart) + found = False + for info in status.get_standbys(): + if info['gid'] == new_active['gid']: + found = True + break + if not found: + for info in status.get_all(): + self.assertNotEqual(info['gid'], new_active['gid']) + return new_status + + def test_standby_replay_singleton(self): + """ + That only one MDS becomes standby-replay. + """ + + self._confirm_no_replay() + self.fs.set_allow_standby_replay(True) + time.sleep(30) + self._confirm_single_replay() + + def test_standby_replay_damaged(self): + """ + That a standby-replay daemon can cause the rank to go damaged correctly. + """ + + self._confirm_no_replay() + self.config_set("mds", "mds_standby_replay_damaged", True) + self.fs.set_allow_standby_replay(True) + self.wait_until_true( + lambda: len(self.fs.get_damaged()) > 0, + timeout=30 + ) + status = self.fs.status() + self.assertListEqual([], list(self.fs.get_ranks(status=status))) + self.assertListEqual([0], self.fs.get_damaged(status=status)) + + def test_standby_replay_disable(self): + """ + That turning off allow_standby_replay fails all standby-replay daemons. + """ + + self._confirm_no_replay() + self.fs.set_allow_standby_replay(True) + time.sleep(30) + self._confirm_single_replay() + self.fs.set_allow_standby_replay(False) + self._confirm_no_replay() + + def test_standby_replay_singleton_fail(self): + """ + That failures don't violate singleton constraint. + """ + + self._confirm_no_replay() + self.fs.set_allow_standby_replay(True) + status = self._confirm_single_replay() + + for i in range(10): + time.sleep(randint(1, 5)) + self.fs.rank_restart(status=status) + status = self._check_replay_takeover(status) + status = self._confirm_single_replay(status=status) + + for i in range(10): + time.sleep(randint(1, 5)) + self.fs.rank_fail() + status = self._check_replay_takeover(status) + status = self._confirm_single_replay(status=status) + + def test_standby_replay_singleton_fail_multimds(self): + """ + That failures don't violate singleton constraint with multiple actives. + """ + + status = self._confirm_no_replay() + new_max_mds = randint(2, len(list(status.get_standbys()))) + self.fs.set_max_mds(new_max_mds) + self.fs.wait_for_daemons() # wait for actives to come online! + self.fs.set_allow_standby_replay(True) + status = self._confirm_single_replay(full=False) + + for i in range(10): + time.sleep(randint(1, 5)) + victim = randint(0, new_max_mds-1) + self.fs.rank_restart(rank=victim, status=status) + status = self._check_replay_takeover(status, rank=victim) + status = self._confirm_single_replay(status=status, full=False) + + for i in range(10): + time.sleep(randint(1, 5)) + victim = randint(0, new_max_mds-1) + self.fs.rank_fail(rank=victim) + status = self._check_replay_takeover(status, rank=victim) + status = self._confirm_single_replay(status=status, full=False) + + def test_standby_replay_failure(self): + """ + That the failure of a standby-replay daemon happens cleanly + and doesn't interrupt anything else. + """ + + status = self._confirm_no_replay() + self.fs.set_max_mds(1) + self.fs.set_allow_standby_replay(True) + status = self._confirm_single_replay() + + for i in range(10): + time.sleep(randint(1, 5)) + victim = self.fs.get_replay(status=status) + self.fs.mds_restart(mds_id=victim['name']) + status = self._confirm_single_replay(status=status) + + def test_standby_replay_prepare_beacon(self): + """ + That a MDSMonitor::prepare_beacon handles standby-replay daemons + correctly without removing the standby. (Note, usually a standby-replay + beacon will just be replied to by MDSMonitor::preprocess_beacon.) + """ + + status = self._confirm_no_replay() + self.fs.set_max_mds(1) + self.fs.set_allow_standby_replay(True) + status = self._confirm_single_replay() + replays = list(status.get_replays(self.fs.id)) + self.assertEqual(len(replays), 1) + self.config_set('mds.'+replays[0]['name'], 'mds_inject_health_dummy', True) + time.sleep(10) # for something not to happen... + status = self._confirm_single_replay() + replays2 = list(status.get_replays(self.fs.id)) + self.assertEqual(replays[0]['gid'], replays2[0]['gid']) + + def test_rank_stopped(self): + """ + That when a rank is STOPPED, standby replays for + that rank get torn down + """ + + status = self._confirm_no_replay() + standby_count = len(list(status.get_standbys())) + self.fs.set_max_mds(2) + self.fs.set_allow_standby_replay(True) + status = self._confirm_single_replay() + + self.fs.set_max_mds(1) # stop rank 1 + + status = self._confirm_single_replay() + self.assertTrue(standby_count, len(list(status.get_standbys()))) + + +class TestMultiFilesystems(CephFSTestCase): + CLIENTS_REQUIRED = 2 + MDSS_REQUIRED = 4 + + # We'll create our own filesystems and start our own daemons + REQUIRE_FILESYSTEM = False + + def setUp(self): + super(TestMultiFilesystems, self).setUp() + self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set", + "enable_multiple", "true", + "--yes-i-really-mean-it") + + def _setup_two(self): + fs_a = self.mds_cluster.newfs(name="alpha") + fs_b = self.mds_cluster.newfs(name="bravo") + + self.mds_cluster.mds_restart() + + # Wait for both filesystems to go healthy + fs_a.wait_for_daemons() + fs_b.wait_for_daemons() + + # Reconfigure client auth caps + for mount in self.mounts: + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(mount.client_id), + 'mds', 'allow', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + fs_a.get_data_pool_name(), fs_b.get_data_pool_name())) + + return fs_a, fs_b + + def test_clients(self): + fs_a, fs_b = self._setup_two() + + # Mount a client on fs_a + self.mount_a.mount_wait(cephfs_name=fs_a.name) + self.mount_a.write_n_mb("pad.bin", 1) + self.mount_a.write_n_mb("test.bin", 2) + a_created_ino = self.mount_a.path_to_ino("test.bin") + self.mount_a.create_files() + + # Mount a client on fs_b + self.mount_b.mount_wait(cephfs_name=fs_b.name) + self.mount_b.write_n_mb("test.bin", 1) + b_created_ino = self.mount_b.path_to_ino("test.bin") + self.mount_b.create_files() + + # Check that a non-default filesystem mount survives an MDS + # failover (i.e. that map subscription is continuous, not + # just the first time), reproduces #16022 + old_fs_b_mds = fs_b.get_active_names()[0] + self.mds_cluster.mds_stop(old_fs_b_mds) + self.mds_cluster.mds_fail(old_fs_b_mds) + fs_b.wait_for_daemons() + background = self.mount_b.write_background() + # Raise exception if the write doesn't finish (i.e. if client + # has not kept up with MDS failure) + try: + self.wait_until_true(lambda: background.finished, timeout=30) + except RuntimeError: + # The mount is stuck, we'll have to force it to fail cleanly + background.stdin.close() + self.mount_b.umount_wait(force=True) + raise + + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + # See that the client's files went into the correct pool + self.assertTrue(fs_a.data_objects_present(a_created_ino, 1024 * 1024)) + self.assertTrue(fs_b.data_objects_present(b_created_ino, 1024 * 1024)) + + def test_standby(self): + fs_a, fs_b = self._setup_two() + + # Assert that the remaining two MDS daemons are now standbys + a_daemons = fs_a.get_active_names() + b_daemons = fs_b.get_active_names() + self.assertEqual(len(a_daemons), 1) + self.assertEqual(len(b_daemons), 1) + original_a = a_daemons[0] + original_b = b_daemons[0] + expect_standby_daemons = set(self.mds_cluster.mds_ids) - (set(a_daemons) | set(b_daemons)) + + # Need all my standbys up as well as the active daemons + self.wait_for_daemon_start() + self.assertEqual(expect_standby_daemons, self.mds_cluster.get_standby_daemons()) + + # Kill fs_a's active MDS, see a standby take over + self.mds_cluster.mds_stop(original_a) + self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_a) + self.wait_until_equal(lambda: len(fs_a.get_active_names()), 1, 30, + reject_fn=lambda v: v > 1) + # Assert that it's a *different* daemon that has now appeared in the map for fs_a + self.assertNotEqual(fs_a.get_active_names()[0], original_a) + + # Kill fs_b's active MDS, see a standby take over + self.mds_cluster.mds_stop(original_b) + self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_b) + self.wait_until_equal(lambda: len(fs_b.get_active_names()), 1, 30, + reject_fn=lambda v: v > 1) + # Assert that it's a *different* daemon that has now appeared in the map for fs_a + self.assertNotEqual(fs_b.get_active_names()[0], original_b) + + # Both of the original active daemons should be gone, and all standbys used up + self.assertEqual(self.mds_cluster.get_standby_daemons(), set()) + + # Restart the ones I killed, see them reappear as standbys + self.mds_cluster.mds_restart(original_a) + self.mds_cluster.mds_restart(original_b) + self.wait_until_true( + lambda: {original_a, original_b} == self.mds_cluster.get_standby_daemons(), + timeout=30 + ) + + def test_grow_shrink(self): + # Usual setup... + fs_a, fs_b = self._setup_two() + + # Increase max_mds on fs_b, see a standby take up the role + fs_b.set_max_mds(2) + self.wait_until_equal(lambda: len(fs_b.get_active_names()), 2, 30, + reject_fn=lambda v: v > 2 or v < 1) + + # Increase max_mds on fs_a, see a standby take up the role + fs_a.set_max_mds(2) + self.wait_until_equal(lambda: len(fs_a.get_active_names()), 2, 30, + reject_fn=lambda v: v > 2 or v < 1) + + # Shrink fs_b back to 1, see a daemon go back to standby + fs_b.set_max_mds(1) + self.wait_until_equal(lambda: len(fs_b.get_active_names()), 1, 30, + reject_fn=lambda v: v > 2 or v < 1) + + # Grow fs_a up to 3, see the former fs_b daemon join it. + fs_a.set_max_mds(3) + self.wait_until_equal(lambda: len(fs_a.get_active_names()), 3, 60, + reject_fn=lambda v: v > 3 or v < 2) diff --git a/qa/tasks/cephfs/test_flush.py b/qa/tasks/cephfs/test_flush.py new file mode 100644 index 000000000..17cb84970 --- /dev/null +++ b/qa/tasks/cephfs/test_flush.py @@ -0,0 +1,112 @@ + +from textwrap import dedent +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from tasks.cephfs.filesystem import ObjectNotFound, ROOT_INO + + +class TestFlush(CephFSTestCase): + def test_flush(self): + self.mount_a.run_shell(["mkdir", "mydir"]) + self.mount_a.run_shell(["touch", "mydir/alpha"]) + dir_ino = self.mount_a.path_to_ino("mydir") + file_ino = self.mount_a.path_to_ino("mydir/alpha") + + # Unmount the client so that it isn't still holding caps + self.mount_a.umount_wait() + + # Before flush, the dirfrag object does not exist + with self.assertRaises(ObjectNotFound): + self.fs.list_dirfrag(dir_ino) + + # Before flush, the file's backtrace has not been written + with self.assertRaises(ObjectNotFound): + self.fs.read_backtrace(file_ino) + + # Before flush, there are no dentries in the root + self.assertEqual(self.fs.list_dirfrag(ROOT_INO), []) + + # Execute flush + flush_data = self.fs.mds_asok(["flush", "journal"]) + self.assertEqual(flush_data['return_code'], 0) + + # After flush, the dirfrag object has been created + dir_list = self.fs.list_dirfrag(dir_ino) + self.assertEqual(dir_list, ["alpha_head"]) + + # And the 'mydir' dentry is in the root + self.assertEqual(self.fs.list_dirfrag(ROOT_INO), ['mydir_head']) + + # ...and the data object has its backtrace + backtrace = self.fs.read_backtrace(file_ino) + self.assertEqual(['alpha', 'mydir'], [a['dname'] for a in backtrace['ancestors']]) + self.assertEqual([dir_ino, 1], [a['dirino'] for a in backtrace['ancestors']]) + self.assertEqual(file_ino, backtrace['ino']) + + # ...and the journal is truncated to just a single subtreemap from the + # newly created segment + summary_output = self.fs.journal_tool(["event", "get", "summary"], 0) + try: + self.assertEqual(summary_output, + dedent( + """ + Events by type: + SUBTREEMAP: 1 + Errors: 0 + """ + ).strip()) + except AssertionError: + # In some states, flushing the journal will leave you + # an extra event from locks a client held. This is + # correct behaviour: the MDS is flushing the journal, + # it's just that new events are getting added too. + # In this case, we should nevertheless see a fully + # empty journal after a second flush. + self.assertEqual(summary_output, + dedent( + """ + Events by type: + SUBTREEMAP: 1 + UPDATE: 1 + Errors: 0 + """ + ).strip()) + flush_data = self.fs.mds_asok(["flush", "journal"]) + self.assertEqual(flush_data['return_code'], 0) + self.assertEqual(self.fs.journal_tool(["event", "get", "summary"], 0), + dedent( + """ + Events by type: + SUBTREEMAP: 1 + Errors: 0 + """ + ).strip()) + + # Now for deletion! + # We will count the RADOS deletions and MDS file purges, to verify that + # the expected behaviour is happening as a result of the purge + initial_dels = self.fs.mds_asok(['perf', 'dump', 'objecter'])['objecter']['osdop_delete'] + initial_purges = self.fs.mds_asok(['perf', 'dump', 'mds_cache'])['mds_cache']['strays_enqueued'] + + # Use a client to delete a file + self.mount_a.mount_wait() + self.mount_a.run_shell(["rm", "-rf", "mydir"]) + + # Flush the journal so that the directory inode can be purged + flush_data = self.fs.mds_asok(["flush", "journal"]) + self.assertEqual(flush_data['return_code'], 0) + + # We expect to see a single file purge + self.wait_until_true( + lambda: self.fs.mds_asok(['perf', 'dump', 'mds_cache'])['mds_cache']['strays_enqueued'] - initial_purges >= 2, + 60) + + # We expect two deletions, one of the dirfrag and one of the backtrace + self.wait_until_true( + lambda: self.fs.mds_asok(['perf', 'dump', 'objecter'])['objecter']['osdop_delete'] - initial_dels >= 2, + 60) # timeout is fairly long to allow for tick+rados latencies + + with self.assertRaises(ObjectNotFound): + self.fs.list_dirfrag(dir_ino) + with self.assertRaises(ObjectNotFound): + self.fs.read_backtrace(file_ino) + self.assertEqual(self.fs.list_dirfrag(ROOT_INO), []) diff --git a/qa/tasks/cephfs/test_forward_scrub.py b/qa/tasks/cephfs/test_forward_scrub.py new file mode 100644 index 000000000..82630e069 --- /dev/null +++ b/qa/tasks/cephfs/test_forward_scrub.py @@ -0,0 +1,300 @@ + +""" +Test that the forward scrub functionality can traverse metadata and apply +requested tags, on well formed metadata. + +This is *not* the real testing for forward scrub, which will need to test +how the functionality responds to damaged metadata. + +""" +import logging +import json + +from collections import namedtuple +from io import BytesIO +from textwrap import dedent + +from teuthology.orchestra.run import CommandFailedError +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +import struct + +log = logging.getLogger(__name__) + + +ValidationError = namedtuple("ValidationError", ["exception", "backtrace"]) + + +class TestForwardScrub(CephFSTestCase): + MDSS_REQUIRED = 1 + + def _read_str_xattr(self, pool, obj, attr): + """ + Read a ceph-encoded string from a rados xattr + """ + output = self.fs.mon_manager.do_rados(["getxattr", obj, attr], pool=pool, + stdout=BytesIO()).stdout.getvalue() + strlen = struct.unpack('i', output[0:4])[0] + return output[4:(4 + strlen)].decode(encoding='ascii') + + def _get_paths_to_ino(self): + inos = {} + p = self.mount_a.run_shell(["find", "./"]) + paths = p.stdout.getvalue().strip().split() + for path in paths: + inos[path] = self.mount_a.path_to_ino(path) + + return inos + + def test_apply_tag(self): + self.mount_a.run_shell(["mkdir", "parentdir"]) + self.mount_a.run_shell(["mkdir", "parentdir/childdir"]) + self.mount_a.run_shell(["touch", "rfile"]) + self.mount_a.run_shell(["touch", "parentdir/pfile"]) + self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"]) + + # Build a structure mapping path to inode, as we will later want + # to check object by object and objects are named after ino number + inos = self._get_paths_to_ino() + + # Flush metadata: this is a friendly test of forward scrub so we're skipping + # the part where it's meant to cope with dirty metadata + self.mount_a.umount_wait() + self.fs.mds_asok(["flush", "journal"]) + + tag = "mytag" + + # Execute tagging forward scrub + self.fs.mds_asok(["tag", "path", "/parentdir", tag]) + # Wait for completion + import time + time.sleep(10) + # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll + # watch that instead + + # Check that dirs were tagged + for dirpath in ["./parentdir", "./parentdir/childdir"]: + self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name()) + + # Check that files were tagged + for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]: + self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name()) + + # This guy wasn't in the tag path, shouldn't have been tagged + self.assertUntagged(inos["./rfile"]) + + def assertUntagged(self, ino): + file_obj_name = "{0:x}.00000000".format(ino) + with self.assertRaises(CommandFailedError): + self._read_str_xattr( + self.fs.get_data_pool_name(), + file_obj_name, + "scrub_tag" + ) + + def assertTagged(self, ino, tag, pool): + file_obj_name = "{0:x}.00000000".format(ino) + wrote = self._read_str_xattr( + pool, + file_obj_name, + "scrub_tag" + ) + self.assertEqual(wrote, tag) + + def _validate_linkage(self, expected): + inos = self._get_paths_to_ino() + try: + self.assertDictEqual(inos, expected) + except AssertionError: + log.error("Expected: {0}".format(json.dumps(expected, indent=2))) + log.error("Actual: {0}".format(json.dumps(inos, indent=2))) + raise + + def test_orphan_scan(self): + # Create some files whose metadata we will flush + self.mount_a.run_python(dedent(""" + import os + mount_point = "{mount_point}" + parent = os.path.join(mount_point, "parent") + os.mkdir(parent) + flushed = os.path.join(parent, "flushed") + os.mkdir(flushed) + for f in ["alpha", "bravo", "charlie"]: + open(os.path.join(flushed, f), 'w').write(f) + """.format(mount_point=self.mount_a.mountpoint))) + + inos = self._get_paths_to_ino() + + # Flush journal + # Umount before flush to avoid cap releases putting + # things we don't want in the journal later. + self.mount_a.umount_wait() + self.fs.mds_asok(["flush", "journal"]) + + # Create a new inode that's just in the log, i.e. would + # look orphaned to backward scan if backward scan wisnae + # respectin' tha scrub_tag xattr. + self.mount_a.mount_wait() + self.mount_a.run_shell(["mkdir", "parent/unflushed"]) + self.mount_a.run_shell(["dd", "if=/dev/urandom", + "of=./parent/unflushed/jfile", + "bs=1M", "count=8"]) + inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed") + inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile") + self.mount_a.umount_wait() + + # Orphan an inode by deleting its dentry + # Our victim will be.... bravo. + self.mount_a.umount_wait() + self.fs.fail() + self.fs.set_ceph_conf('mds', 'mds verify scatter', False) + self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False) + frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"]) + self.fs.radosm(["rmomapkey", frag_obj_id, "bravo_head"]) + + self.fs.set_joinable() + self.fs.wait_for_daemons() + + # See that the orphaned file is indeed missing from a client's POV + self.mount_a.mount_wait() + damaged_state = self._get_paths_to_ino() + self.assertNotIn("./parent/flushed/bravo", damaged_state) + self.mount_a.umount_wait() + + # Run a tagging forward scrub + tag = "mytag123" + self.fs.mds_asok(["tag", "path", "/parent", tag]) + + # See that the orphan wisnae tagged + self.assertUntagged(inos['./parent/flushed/bravo']) + + # See that the flushed-metadata-and-still-present files are tagged + self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name()) + self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name()) + + # See that journalled-but-not-flushed file *was* tagged + self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name()) + + # Run cephfs-data-scan targeting only orphans + self.fs.fail() + self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()]) + self.fs.data_scan([ + "scan_inodes", + "--filter-tag", tag, + self.fs.get_data_pool_name() + ]) + + # After in-place injection stats should be kosher again + self.fs.set_ceph_conf('mds', 'mds verify scatter', True) + self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True) + + # And we should have all the same linkage we started with, + # and no lost+found, and no extra inodes! + self.fs.set_joinable() + self.fs.wait_for_daemons() + self.mount_a.mount_wait() + self._validate_linkage(inos) + + def _stash_inotable(self): + # Get all active ranks + ranks = self.fs.get_all_mds_rank() + + inotable_dict = {} + for rank in ranks: + inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable" + print("Trying to fetch inotable object: " + inotable_oid) + + #self.fs.get_metadata_object("InoTable", "mds0_inotable") + inotable_raw = self.fs.radosmo(['get', inotable_oid, '-']) + inotable_dict[inotable_oid] = inotable_raw + return inotable_dict + + def test_inotable_sync(self): + self.mount_a.write_n_mb("file1_sixmegs", 6) + + # Flush journal + self.mount_a.umount_wait() + self.fs.mds_asok(["flush", "journal"]) + + inotable_copy = self._stash_inotable() + + self.mount_a.mount_wait() + + self.mount_a.write_n_mb("file2_sixmegs", 6) + self.mount_a.write_n_mb("file3_sixmegs", 6) + + inos = self._get_paths_to_ino() + + # Flush journal + self.mount_a.umount_wait() + self.fs.mds_asok(["flush", "journal"]) + + self.mount_a.umount_wait() + + with self.assert_cluster_log("inode table repaired", invert_match=True): + out_json = self.fs.run_scrub(["start", "/", "repair,recursive"]) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + self.fs.fail() + + # Truncate the journal (to ensure the inotable on disk + # is all that will be in the InoTable in memory) + + self.fs.journal_tool(["event", "splice", + "--inode={0}".format(inos["./file2_sixmegs"]), "summary"], 0) + + self.fs.journal_tool(["event", "splice", + "--inode={0}".format(inos["./file3_sixmegs"]), "summary"], 0) + + # Revert to old inotable. + for key, value in inotable_copy.items(): + self.fs.radosm(["put", key, "-"], stdin=BytesIO(value)) + + self.fs.set_joinable() + self.fs.wait_for_daemons() + + with self.assert_cluster_log("inode table repaired"): + out_json = self.fs.run_scrub(["start", "/", "repair,recursive"]) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + self.fs.fail() + table_text = self.fs.table_tool(["0", "show", "inode"]) + table = json.loads(table_text) + self.assertGreater( + table['0']['data']['inotable']['free'][0]['start'], + inos['./file3_sixmegs']) + + def test_backtrace_repair(self): + """ + That the MDS can repair an inodes backtrace in the data pool + if it is found to be damaged. + """ + # Create a file for subsequent checks + self.mount_a.run_shell(["mkdir", "parent_a"]) + self.mount_a.run_shell(["touch", "parent_a/alpha"]) + file_ino = self.mount_a.path_to_ino("parent_a/alpha") + + # That backtrace and layout are written after initial flush + self.fs.mds_asok(["flush", "journal"]) + backtrace = self.fs.read_backtrace(file_ino) + self.assertEqual(['alpha', 'parent_a'], + [a['dname'] for a in backtrace['ancestors']]) + + # Go corrupt the backtrace + self.fs._write_data_xattr(file_ino, "parent", + "oh i'm sorry did i overwrite your xattr?") + + with self.assert_cluster_log("bad backtrace on inode"): + out_json = self.fs.run_scrub(["start", "/", "repair,recursive"]) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + self.fs.mds_asok(["flush", "journal"]) + backtrace = self.fs.read_backtrace(file_ino) + self.assertEqual(['alpha', 'parent_a'], + [a['dname'] for a in backtrace['ancestors']]) diff --git a/qa/tasks/cephfs/test_fragment.py b/qa/tasks/cephfs/test_fragment.py new file mode 100644 index 000000000..1102f887b --- /dev/null +++ b/qa/tasks/cephfs/test_fragment.py @@ -0,0 +1,319 @@ +from io import StringIO + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.orchestra import run + +import os +import time +import logging +log = logging.getLogger(__name__) + + +class TestFragmentation(CephFSTestCase): + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 1 + + def get_splits(self): + return self.fs.mds_asok(['perf', 'dump', 'mds'])['mds']['dir_split'] + + def get_merges(self): + return self.fs.mds_asok(['perf', 'dump', 'mds'])['mds']['dir_merge'] + + def get_dir_ino(self, path): + dir_cache = self.fs.read_cache(path, 0) + dir_ino = None + dir_inono = self.mount_a.path_to_ino(path.strip("/")) + for ino in dir_cache: + if ino['ino'] == dir_inono: + dir_ino = ino + break + self.assertIsNotNone(dir_ino) + return dir_ino + + def _configure(self, **kwargs): + """ + Apply kwargs as MDS configuration settings, enable dirfrags + and restart the MDSs. + """ + + for k, v in kwargs.items(): + self.ceph_cluster.set_ceph_conf("mds", k, v.__str__()) + + self.mds_cluster.mds_fail_restart() + self.fs.wait_for_daemons() + + def test_oversize(self): + """ + That a directory is split when it becomes too large. + """ + + split_size = 20 + merge_size = 5 + + self._configure( + mds_bal_split_size=split_size, + mds_bal_merge_size=merge_size, + mds_bal_split_bits=1 + ) + + self.assertEqual(self.get_splits(), 0) + + self.mount_a.create_n_files("splitdir/file", split_size + 1) + + self.wait_until_true( + lambda: self.get_splits() == 1, + timeout=30 + ) + + frags = self.get_dir_ino("/splitdir")['dirfrags'] + self.assertEqual(len(frags), 2) + self.assertEqual(frags[0]['dirfrag'], "0x10000000000.0*") + self.assertEqual(frags[1]['dirfrag'], "0x10000000000.1*") + self.assertEqual( + sum([len(f['dentries']) for f in frags]), + split_size + 1 + ) + + self.assertEqual(self.get_merges(), 0) + + self.mount_a.run_shell(["rm", "-f", run.Raw("splitdir/file*")]) + + self.wait_until_true( + lambda: self.get_merges() == 1, + timeout=30 + ) + + self.assertEqual(len(self.get_dir_ino("/splitdir")["dirfrags"]), 1) + + def test_rapid_creation(self): + """ + That the fast-splitting limit of 1.5x normal limit is + applied when creating dentries quickly. + """ + + split_size = 100 + merge_size = 1 + + self._configure( + mds_bal_split_size=split_size, + mds_bal_merge_size=merge_size, + mds_bal_split_bits=3, + mds_bal_fragment_size_max=int(split_size * 1.5 + 2) + ) + + # We test this only at a single split level. If a client was sending + # IO so fast that it hit a second split before the first split + # was complete, it could violate mds_bal_fragment_size_max -- there + # is a window where the child dirfrags of a split are unfrozen + # (so they can grow), but still have STATE_FRAGMENTING (so they + # can't be split). + + # By writing 4x the split size when the split bits are set + # to 3 (i.e. 4-ways), I am reasonably sure to see precisely + # one split. The test is to check whether that split + # happens soon enough that the client doesn't exceed + # 2x the split_size (the "immediate" split mode should + # kick in at 1.5x the split size). + + self.assertEqual(self.get_splits(), 0) + self.mount_a.create_n_files("splitdir/file", split_size * 4) + self.wait_until_equal( + self.get_splits, + 1, + reject_fn=lambda s: s > 1, + timeout=30 + ) + + def test_deep_split(self): + """ + That when the directory grows many times larger than split size, + the fragments get split again. + """ + + split_size = 100 + merge_size = 1 # i.e. don't merge frag unless its empty + split_bits = 1 + + branch_factor = 2**split_bits + + # Arbitrary: how many levels shall we try fragmenting before + # ending the test? + max_depth = 5 + + self._configure( + mds_bal_split_size=split_size, + mds_bal_merge_size=merge_size, + mds_bal_split_bits=split_bits + ) + + # Each iteration we will create another level of fragments. The + # placement of dentries into fragments is by hashes (i.e. pseudo + # random), so we rely on statistics to get the behaviour that + # by writing about 1.5x as many dentries as the split_size times + # the number of frags, we will get them all to exceed their + # split size and trigger a split. + depth = 0 + files_written = 0 + splits_expected = 0 + while depth < max_depth: + log.info("Writing files for depth {0}".format(depth)) + target_files = branch_factor**depth * int(split_size * 1.5) + create_files = target_files - files_written + + self.ceph_cluster.mon_manager.raw_cluster_cmd("log", + "{0} Writing {1} files (depth={2})".format( + self.__class__.__name__, create_files, depth + )) + self.mount_a.create_n_files("splitdir/file_{0}".format(depth), + create_files) + self.ceph_cluster.mon_manager.raw_cluster_cmd("log", + "{0} Done".format(self.__class__.__name__)) + + files_written += create_files + log.info("Now have {0} files".format(files_written)) + + splits_expected += branch_factor**depth + log.info("Waiting to see {0} splits".format(splits_expected)) + try: + self.wait_until_equal( + self.get_splits, + splits_expected, + timeout=30, + reject_fn=lambda x: x > splits_expected + ) + + frags = self.get_dir_ino("/splitdir")['dirfrags'] + self.assertEqual(len(frags), branch_factor**(depth+1)) + self.assertEqual( + sum([len(f['dentries']) for f in frags]), + target_files + ) + except: + # On failures, log what fragmentation we actually ended + # up with. This block is just for logging, at the end + # we raise the exception again. + frags = self.get_dir_ino("/splitdir")['dirfrags'] + log.info("depth={0} splits_expected={1} files_written={2}".format( + depth, splits_expected, files_written + )) + log.info("Dirfrags:") + for f in frags: + log.info("{0}: {1}".format( + f['dirfrag'], len(f['dentries']) + )) + raise + + depth += 1 + + # Remember the inode number because we will be checking for + # objects later. + dir_inode_no = self.mount_a.path_to_ino("splitdir") + + self.mount_a.run_shell(["rm", "-rf", "splitdir/"]) + self.mount_a.umount_wait() + + self.fs.mds_asok(['flush', 'journal']) + + def _check_pq_finished(): + num_strays = self.fs.mds_asok(['perf', 'dump', 'mds_cache'])['mds_cache']['num_strays'] + pq_ops = self.fs.mds_asok(['perf', 'dump', 'purge_queue'])['purge_queue']['pq_executing'] + return num_strays == 0 and pq_ops == 0 + + # Wait for all strays to purge + self.wait_until_true( + lambda: _check_pq_finished(), + timeout=1200 + ) + # Check that the metadata pool objects for all the myriad + # child fragments are gone + metadata_objs = self.fs.radosmo(["ls"], stdout=StringIO()).strip() + frag_objs = [] + for o in metadata_objs.split("\n"): + if o.startswith("{0:x}.".format(dir_inode_no)): + frag_objs.append(o) + self.assertListEqual(frag_objs, []) + + def test_split_straydir(self): + """ + That stray dir is split when it becomes too large. + """ + def _count_fragmented(): + mdsdir_cache = self.fs.read_cache("~mdsdir", 1) + num = 0 + for ino in mdsdir_cache: + if ino["ino"] == 0x100: + continue + if len(ino["dirfrags"]) > 1: + log.info("straydir 0x{:X} is fragmented".format(ino["ino"])) + num += 1; + return num + + split_size = 50 + merge_size = 5 + split_bits = 1 + + self._configure( + mds_bal_split_size=split_size, + mds_bal_merge_size=merge_size, + mds_bal_split_bits=split_bits, + mds_bal_fragment_size_max=(split_size * 100) + ) + + # manually split/merge + self.assertEqual(_count_fragmented(), 0) + self.fs.mds_asok(["dirfrag", "split", "~mdsdir/stray8", "0/0", "1"]) + self.fs.mds_asok(["dirfrag", "split", "~mdsdir/stray9", "0/0", "1"]) + self.wait_until_true( + lambda: _count_fragmented() == 2, + timeout=30 + ) + + time.sleep(30) + + self.fs.mds_asok(["dirfrag", "merge", "~mdsdir/stray8", "0/0"]) + self.wait_until_true( + lambda: _count_fragmented() == 1, + timeout=30 + ) + + time.sleep(30) + + # auto merge + + # merging stray dirs is driven by MDCache::advance_stray() + # advance stray dir 10 times + for _ in range(10): + self.fs.mds_asok(['flush', 'journal']) + + self.wait_until_true( + lambda: _count_fragmented() == 0, + timeout=30 + ) + + # auto split + + # there are 10 stray dirs. advance stray dir 20 times + self.mount_a.create_n_files("testdir1/file", split_size * 20) + self.mount_a.run_shell(["mkdir", "testdir2"]) + testdir1_path = os.path.join(self.mount_a.mountpoint, "testdir1") + for i in self.mount_a.ls(testdir1_path): + self.mount_a.run_shell(["ln", "testdir1/{0}".format(i), "testdir2/"]) + + self.mount_a.umount_wait() + self.mount_a.mount_wait() + self.mount_a.wait_until_mounted() + + # flush journal and restart mds. after restart, testdir2 is not in mds' cache + self.fs.mds_asok(['flush', 'journal']) + self.mds_cluster.mds_fail_restart() + self.fs.wait_for_daemons() + # splitting stray dirs is driven by MDCache::advance_stray() + # advance stray dir after unlink 'split_size' files. + self.fs.mds_asok(['config', 'set', 'mds_log_events_per_segment', str(split_size)]) + + self.assertEqual(_count_fragmented(), 0) + self.mount_a.run_shell(["rm", "-rf", "testdir1"]) + self.wait_until_true( + lambda: _count_fragmented() > 0, + timeout=30 + ) diff --git a/qa/tasks/cephfs/test_fstop.py b/qa/tasks/cephfs/test_fstop.py new file mode 100644 index 000000000..08617807e --- /dev/null +++ b/qa/tasks/cephfs/test_fstop.py @@ -0,0 +1,27 @@ +import logging + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.exceptions import CommandFailedError + +log = logging.getLogger(__name__) + +class TestFSTop(CephFSTestCase): + def test_fstop_non_existent_cluster(self): + self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "stats") + try: + self.mount_a.run_shell(['cephfs-top', + '--cluster=hpec', + '--id=admin', + '--selftest']) + except CommandFailedError: + pass + else: + raise RuntimeError('expected cephfs-top command to fail.') + self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "stats") + + def test_fstop(self): + self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "stats") + self.mount_a.run_shell(['cephfs-top', + '--id=admin', + '--selftest']) + self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "stats") diff --git a/qa/tasks/cephfs/test_full.py b/qa/tasks/cephfs/test_full.py new file mode 100644 index 000000000..8282b4e0c --- /dev/null +++ b/qa/tasks/cephfs/test_full.py @@ -0,0 +1,398 @@ +import json +import logging +import os +from textwrap import dedent +from typing import Optional +from teuthology.orchestra.run import CommandFailedError +from tasks.cephfs.fuse_mount import FuseMount +from tasks.cephfs.cephfs_test_case import CephFSTestCase + + +log = logging.getLogger(__name__) + + +class FullnessTestCase(CephFSTestCase): + CLIENTS_REQUIRED = 2 + + # Subclasses define whether they're filling whole cluster or just data pool + data_only = False + + # Subclasses define how many bytes should be written to achieve fullness + pool_capacity: Optional[int] = None + fill_mb = None + + def is_full(self): + return self.fs.is_full() + + def setUp(self): + CephFSTestCase.setUp(self) + + mds_status = self.fs.rank_asok(["status"]) + + # Capture the initial OSD map epoch for later use + self.initial_osd_epoch = mds_status['osdmap_epoch_barrier'] + + def test_barrier(self): + """ + That when an OSD epoch barrier is set on an MDS, subsequently + issued capabilities cause clients to update their OSD map to that + epoch. + """ + + # script that sync up client with MDS OSD map barrier. The barrier should + # be updated by cap flush ack message. + pyscript = dedent(""" + import os + fd = os.open("{path}", os.O_CREAT | os.O_RDWR, 0O600) + os.fchmod(fd, 0O666) + os.fsync(fd) + os.close(fd) + """) + + # Sync up client with initial MDS OSD map barrier. + path = os.path.join(self.mount_a.mountpoint, "foo") + self.mount_a.run_python(pyscript.format(path=path)) + + # Grab mounts' initial OSD epochs: later we will check that + # it hasn't advanced beyond this point. + mount_a_initial_epoch, mount_a_initial_barrier = self.mount_a.get_osd_epoch() + + # Freshly mounted at start of test, should be up to date with OSD map + self.assertGreaterEqual(mount_a_initial_epoch, self.initial_osd_epoch) + + # Set and unset a flag to cause OSD epoch to increment + self.fs.mon_manager.raw_cluster_cmd("osd", "set", "pause") + self.fs.mon_manager.raw_cluster_cmd("osd", "unset", "pause") + + out = self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip() + new_epoch = json.loads(out)['epoch'] + self.assertNotEqual(self.initial_osd_epoch, new_epoch) + + # Do a metadata operation on clients, witness that they end up with + # the old OSD map from startup time (nothing has prompted client + # to update its map) + path = os.path.join(self.mount_a.mountpoint, "foo") + self.mount_a.run_python(pyscript.format(path=path)) + mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch() + self.assertEqual(mount_a_epoch, mount_a_initial_epoch) + self.assertEqual(mount_a_barrier, mount_a_initial_barrier) + + # Set a barrier on the MDS + self.fs.rank_asok(["osdmap", "barrier", new_epoch.__str__()]) + + # Sync up client with new MDS OSD map barrier + path = os.path.join(self.mount_a.mountpoint, "baz") + self.mount_a.run_python(pyscript.format(path=path)) + mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch() + self.assertEqual(mount_a_barrier, new_epoch) + + # Some time passes here because the metadata part of the operation + # completes immediately, while the resulting OSD map update happens + # asynchronously (it's an Objecter::_maybe_request_map) as a result + # of seeing the new epoch barrier. + self.wait_until_true( + lambda: self.mount_a.get_osd_epoch()[0] >= new_epoch, + timeout=30) + + def _data_pool_name(self): + data_pool_names = self.fs.get_data_pool_names() + if len(data_pool_names) > 1: + raise RuntimeError("This test can't handle multiple data pools") + else: + return data_pool_names[0] + + def _test_full(self, easy_case): + """ + - That a client trying to write data to a file is prevented + from doing so with an -EFULL result + - That they are also prevented from creating new files by the MDS. + - That they may delete another file to get the system healthy again + + :param easy_case: if true, delete a successfully written file to + free up space. else, delete the file that experienced + the failed write. + """ + + osd_mon_report_interval = int(self.fs.get_config("osd_mon_report_interval", service_type='osd')) + + log.info("Writing {0}MB should fill this cluster".format(self.fill_mb)) + + # Fill up the cluster. This dd may or may not fail, as it depends on + # how soon the cluster recognises its own fullness + self.mount_a.write_n_mb("large_file_a", self.fill_mb // 2) + try: + self.mount_a.write_n_mb("large_file_b", (self.fill_mb * 1.1) // 2) + except CommandFailedError: + log.info("Writing file B failed (full status happened already)") + assert self.is_full() + else: + log.info("Writing file B succeeded (full status will happen soon)") + self.wait_until_true(lambda: self.is_full(), + timeout=osd_mon_report_interval * 120) + + # Attempting to write more data should give me ENOSPC + with self.assertRaises(CommandFailedError) as ar: + self.mount_a.write_n_mb("large_file_b", 50, seek=self.fill_mb // 2) + self.assertEqual(ar.exception.exitstatus, 1) # dd returns 1 on "No space" + + # Wait for the MDS to see the latest OSD map so that it will reliably + # be applying the policy of rejecting non-deletion metadata operations + # while in the full state. + osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch'] + self.wait_until_true( + lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch, + timeout=10) + + if not self.data_only: + with self.assertRaises(CommandFailedError): + self.mount_a.write_n_mb("small_file_1", 0) + + # Clear out some space + if easy_case: + self.mount_a.run_shell(['rm', '-f', 'large_file_a']) + self.mount_a.run_shell(['rm', '-f', 'large_file_b']) + else: + # In the hard case it is the file that filled the system. + # Before the new #7317 (ENOSPC, epoch barrier) changes, this + # would fail because the last objects written would be + # stuck in the client cache as objecter operations. + self.mount_a.run_shell(['rm', '-f', 'large_file_b']) + self.mount_a.run_shell(['rm', '-f', 'large_file_a']) + + # Here we are waiting for two things to happen: + # * The MDS to purge the stray folder and execute object deletions + # * The OSDs to inform the mon that they are no longer full + self.wait_until_true(lambda: not self.is_full(), + timeout=osd_mon_report_interval * 120) + + # Wait for the MDS to see the latest OSD map so that it will reliably + # be applying the free space policy + osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch'] + self.wait_until_true( + lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch, + timeout=10) + + # Now I should be able to write again + self.mount_a.write_n_mb("large_file", 50, seek=0) + + # Ensure that the MDS keeps its OSD epoch barrier across a restart + + def test_full_different_file(self): + self._test_full(True) + + def test_full_same_file(self): + self._test_full(False) + + def _remote_write_test(self, template): + """ + Run some remote python in a way that's useful for + testing free space behaviour (see test_* methods using this) + """ + file_path = os.path.join(self.mount_a.mountpoint, "full_test_file") + + # Enough to trip the full flag + osd_mon_report_interval = int(self.fs.get_config("osd_mon_report_interval", service_type='osd')) + mon_tick_interval = int(self.fs.get_config("mon_tick_interval", service_type="mon")) + + # Sufficient data to cause RADOS cluster to go 'full' + log.info("pool capacity {0}, {1}MB should be enough to fill it".format(self.pool_capacity, self.fill_mb)) + + # Long enough for RADOS cluster to notice it is full and set flag on mons + # (report_interval for mon to learn PG stats, tick interval for it to update OSD map, + # factor of 1.5 for I/O + network latency in committing OSD map and distributing it + # to the OSDs) + full_wait = (osd_mon_report_interval + mon_tick_interval) * 1.5 + + # Configs for this test should bring this setting down in order to + # run reasonably quickly + if osd_mon_report_interval > 10: + log.warning("This test may run rather slowly unless you decrease" + "osd_mon_report_interval (5 is a good setting)!") + + # set the object_size to 1MB to make the objects destributed more evenly + # among the OSDs to fix Tracker#45434 + file_layout = "stripe_unit=1048576 stripe_count=1 object_size=1048576" + self.mount_a.run_python(template.format( + fill_mb=self.fill_mb, + file_path=file_path, + file_layout=file_layout, + full_wait=full_wait, + is_fuse=isinstance(self.mount_a, FuseMount) + )) + + def test_full_fclose(self): + # A remote script which opens a file handle, fills up the filesystem, and then + # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync + remote_script = dedent(""" + import time + import datetime + import subprocess + import os + + # Write some buffered data through before going full, all should be well + print("writing some data through which we expect to succeed") + bytes = 0 + f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT) + os.setxattr("{file_path}", 'ceph.file.layout', b'{file_layout}') + bytes += os.write(f, b'a' * 512 * 1024) + os.fsync(f) + print("fsync'ed data successfully, will now attempt to fill fs") + + # Okay, now we're going to fill up the filesystem, and then keep + # writing until we see an error from fsync. As long as we're doing + # buffered IO, the error should always only appear from fsync and not + # from write + full = False + + for n in range(0, int({fill_mb} * 0.9)): + bytes += os.write(f, b'x' * 1024 * 1024) + print("wrote {{0}} bytes via buffered write, may repeat".format(bytes)) + print("done writing {{0}} bytes".format(bytes)) + + # OK, now we should sneak in under the full condition + # due to the time it takes the OSDs to report to the + # mons, and get a successful fsync on our full-making data + os.fsync(f) + print("successfully fsync'ed prior to getting full state reported") + + # buffered write, add more dirty data to the buffer + print("starting buffered write") + try: + for n in range(0, int({fill_mb} * 0.2)): + bytes += os.write(f, b'x' * 1024 * 1024) + print("sleeping a bit as we've exceeded 90% of our expected full ratio") + time.sleep({full_wait}) + except OSError: + pass; + + print("wrote, now waiting 30s and then doing a close we expect to fail") + + # Wait long enough for a background flush that should fail + time.sleep(30) + + if {is_fuse}: + # ...and check that the failed background flush is reflected in fclose + try: + os.close(f) + except OSError: + print("close() returned an error as expected") + else: + raise RuntimeError("close() failed to raise error") + else: + # The kernel cephfs client does not raise errors on fclose + os.close(f) + + os.unlink("{file_path}") + """) + self._remote_write_test(remote_script) + + def test_full_fsync(self): + """ + That when the full flag is encountered during asynchronous + flushes, such that an fwrite() succeeds but an fsync/fclose() + should return the ENOSPC error. + """ + + # A remote script which opens a file handle, fills up the filesystem, and then + # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync + remote_script = dedent(""" + import time + import datetime + import subprocess + import os + + # Write some buffered data through before going full, all should be well + print("writing some data through which we expect to succeed") + bytes = 0 + f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT) + os.setxattr("{file_path}", 'ceph.file.layout', b'{file_layout}') + bytes += os.write(f, b'a' * 4096) + os.fsync(f) + print("fsync'ed data successfully, will now attempt to fill fs") + + # Okay, now we're going to fill up the filesystem, and then keep + # writing until we see an error from fsync. As long as we're doing + # buffered IO, the error should always only appear from fsync and not + # from write + full = False + + for n in range(0, int({fill_mb} * 1.1)): + try: + bytes += os.write(f, b'x' * 1024 * 1024) + print("wrote bytes via buffered write, moving on to fsync") + except OSError as e: + if {is_fuse}: + print("Unexpected error %s from write() instead of fsync()" % e) + raise + else: + print("Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0))) + full = True + break + + try: + os.fsync(f) + print("fsync'ed successfully") + except OSError as e: + print("Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0))) + full = True + break + else: + print("Not full yet after %.2f MB" % (bytes / (1024.0 * 1024.0))) + + if n > {fill_mb} * 0.9: + # Be cautious in the last region where we expect to hit + # the full condition, so that we don't overshoot too dramatically + print("sleeping a bit as we've exceeded 90% of our expected full ratio") + time.sleep({full_wait}) + + if not full: + raise RuntimeError("Failed to reach fullness after writing %d bytes" % bytes) + + # close() should not raise an error because we already caught it in + # fsync. There shouldn't have been any more writeback errors + # since then because all IOs got cancelled on the full flag. + print("calling close") + os.close(f) + print("close() did not raise error") + + os.unlink("{file_path}") + """) + + self._remote_write_test(remote_script) + + +class TestQuotaFull(FullnessTestCase): + """ + Test per-pool fullness, which indicates quota limits exceeded + """ + pool_capacity = 1024 * 1024 * 32 # arbitrary low-ish limit + fill_mb = pool_capacity // (1024 * 1024) # type: ignore + + # We are only testing quota handling on the data pool, not the metadata + # pool. + data_only = True + + def setUp(self): + super(TestQuotaFull, self).setUp() + + pool_name = self.fs.get_data_pool_name() + self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", pool_name, + "max_bytes", "{0}".format(self.pool_capacity)) + + +class TestClusterFull(FullnessTestCase): + """ + Test data pool fullness, which indicates that an OSD has become too full + """ + pool_capacity = None + REQUIRE_MEMSTORE = True + + def setUp(self): + super(TestClusterFull, self).setUp() + + if self.pool_capacity is None: + TestClusterFull.pool_capacity = self.fs.get_pool_df(self._data_pool_name())['max_avail'] + TestClusterFull.fill_mb = (self.pool_capacity // (1024 * 1024)) + +# Hide the parent class so that unittest.loader doesn't try to run it. +del globals()['FullnessTestCase'] diff --git a/qa/tasks/cephfs/test_journal_migration.py b/qa/tasks/cephfs/test_journal_migration.py new file mode 100644 index 000000000..67b514c22 --- /dev/null +++ b/qa/tasks/cephfs/test_journal_migration.py @@ -0,0 +1,100 @@ + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from tasks.workunit import task as workunit + +JOURNAL_FORMAT_LEGACY = 0 +JOURNAL_FORMAT_RESILIENT = 1 + + +class TestJournalMigration(CephFSTestCase): + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 2 + + def test_journal_migration(self): + old_journal_version = JOURNAL_FORMAT_LEGACY + new_journal_version = JOURNAL_FORMAT_RESILIENT + + self.mount_a.umount_wait() + self.fs.mds_stop() + + # Create a filesystem using the older journal format. + self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version) + self.fs.mds_restart() + self.fs.recreate() + + # Enable standby replay, to cover the bug case #8811 where + # a standby replay might mistakenly end up trying to rewrite + # the journal at the same time as an active daemon. + self.fs.set_allow_standby_replay(True) + + status = self.fs.wait_for_daemons() + + self.assertTrue(self.fs.get_replay(status=status) is not None) + + # Do some client work so that the log is populated with something. + with self.mount_a.mounted_wait(): + self.mount_a.create_files() + self.mount_a.check_files() # sanity, this should always pass + + # Run a more substantial workunit so that the length of the log to be + # coverted is going span at least a few segments + workunit(self.ctx, { + 'clients': { + "client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"], + }, + "timeout": "3h" + }) + + # Modify the ceph.conf to ask the MDS to use the new journal format. + self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version) + + # Restart the MDS. + self.fs.mds_fail_restart() + + # This ensures that all daemons come up into a valid state + status = self.fs.wait_for_daemons() + + # Check that files created in the initial client workload are still visible + # in a client mount. + with self.mount_a.mounted_wait(): + self.mount_a.check_files() + + # Verify that the journal really has been rewritten. + journal_version = self.fs.get_journal_version() + if journal_version != new_journal_version: + raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format( + new_journal_version, journal_version() + )) + + # Verify that cephfs-journal-tool can now read the rewritten journal + inspect_out = self.fs.journal_tool(["journal", "inspect"], 0) + if not inspect_out.endswith(": OK"): + raise RuntimeError("Unexpected journal-tool result: '{0}'".format( + inspect_out + )) + + self.fs.journal_tool(["event", "get", "json", + "--path", "/tmp/journal.json"], 0) + p = self.fs.tool_remote.sh([ + "python3", + "-c", + "import json; print(len(json.load(open('/tmp/journal.json'))))" + ]) + event_count = int(p.strip()) + if event_count < 1000: + # Approximate value of "lots", expected from having run fsstress + raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count)) + + # Do some client work to check that writing the log is still working + with self.mount_a.mounted_wait(): + workunit(self.ctx, { + 'clients': { + "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"], + }, + "timeout": "3h" + }) + + # Check that both an active and a standby replay are still up + status = self.fs.status() + self.assertEqual(len(list(self.fs.get_replays(status=status))), 1) + self.assertEqual(len(list(self.fs.get_ranks(status=status))), 1) diff --git a/qa/tasks/cephfs/test_journal_repair.py b/qa/tasks/cephfs/test_journal_repair.py new file mode 100644 index 000000000..b810e1a28 --- /dev/null +++ b/qa/tasks/cephfs/test_journal_repair.py @@ -0,0 +1,428 @@ + +""" +Test our tools for recovering the content of damaged journals +""" + +import json +import logging +from textwrap import dedent +import time + +from teuthology.exceptions import CommandFailedError, ConnectionLostError +from tasks.cephfs.filesystem import ObjectNotFound, ROOT_INO +from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology +from tasks.workunit import task as workunit + +log = logging.getLogger(__name__) + + +class TestJournalRepair(CephFSTestCase): + MDSS_REQUIRED = 2 + + def test_inject_to_empty(self): + """ + That when some dentries in the journal but nothing is in + the backing store, we correctly populate the backing store + from the journalled dentries. + """ + + # Inject metadata operations + self.mount_a.run_shell(["touch", "rootfile"]) + self.mount_a.run_shell(["mkdir", "subdir"]) + self.mount_a.run_shell(["touch", "subdir/subdirfile"]) + # There are several different paths for handling hardlinks, depending + # on whether an existing dentry (being overwritten) is also a hardlink + self.mount_a.run_shell(["mkdir", "linkdir"]) + + # Test inode -> remote transition for a dentry + self.mount_a.run_shell(["touch", "linkdir/link0"]) + self.mount_a.run_shell(["rm", "-f", "linkdir/link0"]) + self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link0"]) + + # Test nothing -> remote transition + self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link1"]) + + # Test remote -> inode transition + self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link2"]) + self.mount_a.run_shell(["rm", "-f", "linkdir/link2"]) + self.mount_a.run_shell(["touch", "linkdir/link2"]) + + # Test remote -> diff remote transition + self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link3"]) + self.mount_a.run_shell(["rm", "-f", "linkdir/link3"]) + self.mount_a.run_shell(["ln", "rootfile", "linkdir/link3"]) + + # Test an empty directory + self.mount_a.run_shell(["mkdir", "subdir/subsubdir"]) + self.mount_a.run_shell(["sync"]) + + # Before we unmount, make a note of the inode numbers, later we will + # check that they match what we recover from the journal + rootfile_ino = self.mount_a.path_to_ino("rootfile") + subdir_ino = self.mount_a.path_to_ino("subdir") + linkdir_ino = self.mount_a.path_to_ino("linkdir") + subdirfile_ino = self.mount_a.path_to_ino("subdir/subdirfile") + subsubdir_ino = self.mount_a.path_to_ino("subdir/subsubdir") + + self.mount_a.umount_wait() + + # Stop the MDS + self.fs.mds_stop() + self.fs.mds_fail() + + # Now, the journal should contain the operations, but the backing + # store shouldn't + with self.assertRaises(ObjectNotFound): + self.fs.list_dirfrag(subdir_ino) + self.assertEqual(self.fs.list_dirfrag(ROOT_INO), []) + + # Execute the dentry recovery, this should populate the backing store + self.fs.journal_tool(['event', 'recover_dentries', 'list'], 0) + + # Dentries in ROOT_INO are present + self.assertEqual(sorted(self.fs.list_dirfrag(ROOT_INO)), sorted(['rootfile_head', 'subdir_head', 'linkdir_head'])) + self.assertEqual(self.fs.list_dirfrag(subdir_ino), ['subdirfile_head', 'subsubdir_head']) + self.assertEqual(sorted(self.fs.list_dirfrag(linkdir_ino)), + sorted(['link0_head', 'link1_head', 'link2_head', 'link3_head'])) + + # Now check the MDS can read what we wrote: truncate the journal + # and start the mds. + self.fs.journal_tool(['journal', 'reset'], 0) + self.fs.mds_fail_restart() + self.fs.wait_for_daemons() + + # List files + self.mount_a.mount_wait() + + # First ls -R to populate MDCache, such that hardlinks will + # resolve properly (recover_dentries does not create backtraces, + # so ordinarily hardlinks to inodes that happen not to have backtraces + # will be invisible in readdir). + # FIXME: hook in forward scrub here to regenerate backtraces + proc = self.mount_a.run_shell(['ls', '-R']) + self.mount_a.umount_wait() # remount to clear client cache before our second ls + self.mount_a.mount_wait() + + proc = self.mount_a.run_shell(['ls', '-R']) + self.assertEqual(proc.stdout.getvalue().strip(), + dedent(""" + .: + linkdir + rootfile + subdir + + ./linkdir: + link0 + link1 + link2 + link3 + + ./subdir: + subdirfile + subsubdir + + ./subdir/subsubdir: + """).strip()) + + # Check the correct inos were preserved by path + self.assertEqual(rootfile_ino, self.mount_a.path_to_ino("rootfile")) + self.assertEqual(subdir_ino, self.mount_a.path_to_ino("subdir")) + self.assertEqual(subdirfile_ino, self.mount_a.path_to_ino("subdir/subdirfile")) + self.assertEqual(subsubdir_ino, self.mount_a.path_to_ino("subdir/subsubdir")) + + # Check that the hard link handling came out correctly + self.assertEqual(self.mount_a.path_to_ino("linkdir/link0"), subdirfile_ino) + self.assertEqual(self.mount_a.path_to_ino("linkdir/link1"), subdirfile_ino) + self.assertNotEqual(self.mount_a.path_to_ino("linkdir/link2"), subdirfile_ino) + self.assertEqual(self.mount_a.path_to_ino("linkdir/link3"), rootfile_ino) + + # Create a new file, ensure it is not issued the same ino as one of the + # recovered ones + self.mount_a.run_shell(["touch", "afterwards"]) + new_ino = self.mount_a.path_to_ino("afterwards") + self.assertNotIn(new_ino, [rootfile_ino, subdir_ino, subdirfile_ino]) + + # Check that we can do metadata ops in the recovered directory + self.mount_a.run_shell(["touch", "subdir/subsubdir/subsubdirfile"]) + + @for_teuthology # 308s + def test_reset(self): + """ + That after forcibly modifying the backing store, we can get back into + a good state by resetting the MDSMap. + + The scenario is that we have two active MDSs, and we lose the journals. Once + we have completely lost confidence in the integrity of the metadata, we want to + return the system to a single-MDS state to go into a scrub to recover what we + can. + """ + + # Set max_mds to 2 + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + active_mds_names = self.fs.get_active_names(status=status) + + # Switch off any unneeded MDS daemons + for unneeded_mds in set(self.mds_cluster.mds_ids) - set(active_mds_names): + self.mds_cluster.mds_stop(unneeded_mds) + self.mds_cluster.mds_fail(unneeded_mds) + + # Create a dir on each rank + self.mount_a.run_shell_payload("mkdir {alpha,bravo} && touch {alpha,bravo}/file") + self.mount_a.setfattr("alpha/", "ceph.dir.pin", "0") + self.mount_a.setfattr("bravo/", "ceph.dir.pin", "1") + + # Ensure the pinning has taken effect and the /bravo dir is now + # migrated to rank 1. + self._wait_subtrees([('/bravo', 1), ('/alpha', 0)], rank=0, status=status) + + # Do some IO (this should be split across ranks according to + # the rank-pinned dirs) + self.mount_a.create_n_files("alpha/file", 1000) + self.mount_a.create_n_files("bravo/file", 1000) + + # Flush the journals so that we have some backing store data + # belonging to one MDS, and some to the other MDS. + for mds_name in active_mds_names: + self.fs.mds_asok(["flush", "journal"], mds_name) + + # Stop (hard) the second MDS daemon + self.fs.mds_stop(active_mds_names[1]) + + # Wipe out the tables for MDS rank 1 so that it is broken and can't start + # (this is the simulated failure that we will demonstrate that the disaster + # recovery tools can get us back from) + self.fs.erase_metadata_objects(prefix="mds1_") + + # Try to access files from the client + blocked_ls = self.mount_a.run_shell(["ls", "-R"], wait=False) + + # Check that this "ls -R" blocked rather than completing: indicates + # it got stuck trying to access subtrees which were on the now-dead MDS. + log.info("Sleeping to check ls is blocked...") + time.sleep(60) + self.assertFalse(blocked_ls.finished) + + # This mount is now useless because it will depend on MDS rank 1, and MDS rank 1 + # is not coming back. Kill it. + log.info("Killing mount, it's blocked on the MDS we killed") + self.mount_a.kill() + self.mount_a.kill_cleanup() + try: + # Now that the mount is dead, the ls -R should error out. + blocked_ls.wait() + except (CommandFailedError, ConnectionLostError): + # The ConnectionLostError case is for kernel client, where + # killing the mount also means killing the node. + pass + + # See that the second MDS will crash when it starts and tries to + # acquire rank 1 + damaged_id = active_mds_names[1] + self.fs.mds_restart(damaged_id) + + # The daemon taking the damaged rank should start starting, then + # restart back into standby after asking the mon to mark the rank + # damaged. + def is_marked_damaged(): + mds_map = self.fs.get_mds_map() + return 1 in mds_map['damaged'] + + self.wait_until_true(is_marked_damaged, 60) + + def get_state(): + info = self.mds_cluster.get_mds_info(damaged_id) + return info['state'] if info is not None else None + + self.wait_until_equal( + get_state, + "up:standby", + timeout=60) + + self.fs.mds_stop(damaged_id) + self.fs.mds_fail(damaged_id) + + # Now give up and go through a disaster recovery procedure + self.fs.mds_stop(active_mds_names[0]) + self.fs.mds_fail(active_mds_names[0]) + # Invoke recover_dentries quietly, because otherwise log spews millions of lines + self.fs.journal_tool(["event", "recover_dentries", "summary"], 0, quiet=True) + self.fs.journal_tool(["event", "recover_dentries", "summary"], 1, quiet=True) + self.fs.table_tool(["0", "reset", "session"]) + self.fs.journal_tool(["journal", "reset"], 0) + self.fs.erase_mds_objects(1) + self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name, + '--yes-i-really-mean-it') + + # Bring an MDS back online, mount a client, and see that we can walk the full + # filesystem tree again + self.fs.mds_fail_restart(active_mds_names[0]) + self.wait_until_equal(lambda: self.fs.get_active_names(), [active_mds_names[0]], 30, + reject_fn=lambda v: len(v) > 1) + self.mount_a.mount_wait() + self.mount_a.run_shell(["ls", "-R"], wait=True) + + def test_table_tool(self): + active_mdss = self.fs.get_active_names() + self.assertEqual(len(active_mdss), 1) + mds_name = active_mdss[0] + + self.mount_a.run_shell(["touch", "foo"]) + self.fs.mds_asok(["flush", "journal"], mds_name) + + log.info(self.fs.table_tool(["all", "show", "inode"])) + log.info(self.fs.table_tool(["all", "show", "snap"])) + log.info(self.fs.table_tool(["all", "show", "session"])) + + # Inode table should always be the same because initial state + # and choice of inode are deterministic. + # Should see one inode consumed + self.assertEqual( + json.loads(self.fs.table_tool(["all", "show", "inode"])), + {"0": { + "data": { + "version": 2, + "inotable": { + "projected_free": [ + {"start": 1099511628777, + "len": 1099511626775}], + "free": [ + {"start": 1099511628777, + "len": 1099511626775}]}}, + "result": 0}} + + ) + + # Should see one session + session_data = json.loads(self.fs.table_tool( + ["all", "show", "session"])) + self.assertEqual(len(session_data["0"]["data"]["sessions"]), 1) + self.assertEqual(session_data["0"]["result"], 0) + + # Should see no snaps + self.assertEqual( + json.loads(self.fs.table_tool(["all", "show", "snap"])), + {"version": 1, + "snapserver": {"last_snap": 1, + "last_created": 1, + "last_destroyed": 1, + "pending_noop": [], + "snaps": [], + "need_to_purge": {}, + "pending_update": [], + "pending_destroy": []}, + "result": 0} + ) + + # Reset everything + for table in ["session", "inode", "snap"]: + self.fs.table_tool(["all", "reset", table]) + + log.info(self.fs.table_tool(["all", "show", "inode"])) + log.info(self.fs.table_tool(["all", "show", "snap"])) + log.info(self.fs.table_tool(["all", "show", "session"])) + + # Should see 0 sessions + session_data = json.loads(self.fs.table_tool( + ["all", "show", "session"])) + self.assertEqual(len(session_data["0"]["data"]["sessions"]), 0) + self.assertEqual(session_data["0"]["result"], 0) + + # Should see entire inode range now marked free + self.assertEqual( + json.loads(self.fs.table_tool(["all", "show", "inode"])), + {"0": {"data": {"version": 1, + "inotable": {"projected_free": [ + {"start": 1099511627776, + "len": 1099511627776}], + "free": [ + {"start": 1099511627776, + "len": 1099511627776}]}}, + "result": 0}} + ) + + # Should see no snaps + self.assertEqual( + json.loads(self.fs.table_tool(["all", "show", "snap"])), + {"version": 1, + "snapserver": {"last_snap": 1, + "last_created": 1, + "last_destroyed": 1, + "pending_noop": [], + "snaps": [], + "need_to_purge": {}, + "pending_update": [], + "pending_destroy": []}, + "result": 0} + ) + + def test_table_tool_take_inos(self): + initial_range_start = 1099511627776 + initial_range_len = 1099511627776 + # Initially a completely clear range + self.assertEqual( + json.loads(self.fs.table_tool(["all", "show", "inode"])), + {"0": {"data": {"version": 0, + "inotable": {"projected_free": [ + {"start": initial_range_start, + "len": initial_range_len}], + "free": [ + {"start": initial_range_start, + "len": initial_range_len}]}}, + "result": 0}} + ) + + # Remove some + self.assertEqual( + json.loads(self.fs.table_tool(["all", "take_inos", "{0}".format(initial_range_start + 100)])), + {"0": {"data": {"version": 1, + "inotable": {"projected_free": [ + {"start": initial_range_start + 101, + "len": initial_range_len - 101}], + "free": [ + {"start": initial_range_start + 101, + "len": initial_range_len - 101}]}}, + "result": 0}} + ) + + @for_teuthology # Hack: "for_teuthology" because .sh doesn't work outside teuth + def test_journal_smoke(self): + workunit(self.ctx, { + 'clients': { + "client.{0}".format(self.mount_a.client_id): [ + "fs/misc/trivial_sync.sh"], + }, + "timeout": "1h" + }) + + for mount in self.mounts: + mount.umount_wait() + + self.fs.mds_stop() + self.fs.mds_fail() + + # journal tool smoke + workunit(self.ctx, { + 'clients': { + "client.{0}".format(self.mount_a.client_id): [ + "suites/cephfs_journal_tool_smoke.sh"], + }, + "timeout": "1h" + }) + + + + self.fs.mds_restart() + self.fs.wait_for_daemons() + + self.mount_a.mount_wait() + + # trivial sync moutn a + workunit(self.ctx, { + 'clients': { + "client.{0}".format(self.mount_a.client_id): [ + "fs/misc/trivial_sync.sh"], + }, + "timeout": "1h" + }) + diff --git a/qa/tasks/cephfs/test_mantle.py b/qa/tasks/cephfs/test_mantle.py new file mode 100644 index 000000000..746c2ffe3 --- /dev/null +++ b/qa/tasks/cephfs/test_mantle.py @@ -0,0 +1,111 @@ +from io import StringIO + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +import json +import logging + +log = logging.getLogger(__name__) +failure = "using old balancer; mantle failed for balancer=" +success = "mantle balancer version changed: " + +class TestMantle(CephFSTestCase): + def start_mantle(self): + self.wait_for_health_clear(timeout=30) + self.fs.set_max_mds(2) + self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30, + reject_fn=lambda v: v > 2 or v < 1) + + for m in self.fs.get_active_names(): + self.fs.mds_asok(['config', 'set', 'debug_objecter', '20'], mds_id=m) + self.fs.mds_asok(['config', 'set', 'debug_ms', '0'], mds_id=m) + self.fs.mds_asok(['config', 'set', 'debug_mds', '0'], mds_id=m) + self.fs.mds_asok(['config', 'set', 'debug_mds_balancer', '5'], mds_id=m) + + def push_balancer(self, obj, lua_code, expect): + self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', obj) + self.fs.radosm(["put", obj, "-"], stdin=StringIO(lua_code)) + with self.assert_cluster_log(failure + obj + " " + expect): + log.info("run a " + obj + " balancer that expects=" + expect) + + def test_version_empty(self): + self.start_mantle() + expect = " : (2) No such file or directory" + + ret = self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer') + assert(ret == 22) # EINVAL + + self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', " ") + with self.assert_cluster_log(failure + " " + expect): pass + + def test_version_not_in_rados(self): + self.start_mantle() + expect = failure + "ghost.lua : (2) No such file or directory" + self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "ghost.lua") + with self.assert_cluster_log(expect): pass + + def test_balancer_invalid(self): + self.start_mantle() + expect = ": (22) Invalid argument" + + lua_code = "this is invalid lua code!" + self.push_balancer("invalid.lua", lua_code, expect) + + lua_code = "BAL_LOG()" + self.push_balancer("invalid_log.lua", lua_code, expect) + + lua_code = "BAL_LOG(0)" + self.push_balancer("invalid_log_again.lua", lua_code, expect) + + def test_balancer_valid(self): + self.start_mantle() + lua_code = "BAL_LOG(0, \"test\")\nreturn {3, 4}" + self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua") + self.fs.radosm(["put", "valid.lua", "-"], stdin=StringIO(lua_code)) + with self.assert_cluster_log(success + "valid.lua"): + log.info("run a valid.lua balancer") + + def test_return_invalid(self): + self.start_mantle() + expect = ": (22) Invalid argument" + + lua_code = "return \"hello\"" + self.push_balancer("string.lua", lua_code, expect) + + lua_code = "return 3" + self.push_balancer("number.lua", lua_code, expect) + + lua_code = "return {}" + self.push_balancer("dict_empty.lua", lua_code, expect) + + lua_code = "return {\"this\", \"is\", \"a\", \"test\"}" + self.push_balancer("dict_of_strings.lua", lua_code, expect) + + lua_code = "return {3, \"test\"}" + self.push_balancer("dict_of_mixed.lua", lua_code, expect) + + lua_code = "return {3}" + self.push_balancer("not_enough_numbers.lua", lua_code, expect) + + lua_code = "return {3, 4, 5, 6, 7, 8, 9}" + self.push_balancer("too_many_numbers.lua", lua_code, expect) + + def test_dead_osd(self): + self.start_mantle() + expect = " : (110) Connection timed out" + + # kill the OSDs so that the balancer pull from RADOS times out + osd_map = json.loads(self.fs.mon_manager.raw_cluster_cmd('osd', 'dump', '--format=json-pretty')) + for i in range(0, len(osd_map['osds'])): + self.fs.mon_manager.raw_cluster_cmd_result('osd', 'down', str(i)) + self.fs.mon_manager.raw_cluster_cmd_result('osd', 'out', str(i)) + + # trigger a pull from RADOS + self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua") + + # make the timeout a little longer since dead OSDs spam ceph -w + with self.assert_cluster_log(failure + "valid.lua" + expect, timeout=30): + log.info("run a balancer that should timeout") + + # cleanup + for i in range(0, len(osd_map['osds'])): + self.fs.mon_manager.raw_cluster_cmd_result('osd', 'in', str(i)) diff --git a/qa/tasks/cephfs/test_mds_metrics.py b/qa/tasks/cephfs/test_mds_metrics.py new file mode 100644 index 000000000..0a4b54a5f --- /dev/null +++ b/qa/tasks/cephfs/test_mds_metrics.py @@ -0,0 +1,643 @@ +import os +import json +import time +import random +import logging +import errno + +from teuthology.contextutil import safe_while, MaxWhileTries +from teuthology.exceptions import CommandFailedError +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +log = logging.getLogger(__name__) + +class TestMDSMetrics(CephFSTestCase): + CLIENTS_REQUIRED = 2 + MDSS_REQUIRED = 3 + + TEST_DIR_PERFIX = "test_mds_metrics" + + def setUp(self): + super(TestMDSMetrics, self).setUp() + self._start_with_single_active_mds() + self._enable_mgr_stats_plugin() + + def tearDown(self): + self._disable_mgr_stats_plugin() + super(TestMDSMetrics, self).tearDown() + + def _start_with_single_active_mds(self): + curr_max_mds = self.fs.get_var('max_mds') + if curr_max_mds > 1: + self.fs.shrink(1) + + def verify_mds_metrics(self, active_mds_count=1, client_count=1, ranks=[], mul_fs=[]): + def verify_metrics_cbk(metrics): + mds_metrics = metrics['metrics'] + if not len(mds_metrics) == active_mds_count + 1: # n active mdss + delayed set + return False + fs_status = self.fs.status() + nonlocal ranks, mul_fs + if not ranks: + if not mul_fs: + mul_fs = [self.fs.id] + for filesystem in mul_fs: + ranks = set([info['rank'] for info in fs_status.get_ranks(filesystem)]) + for rank in ranks: + r = mds_metrics.get("mds.{}".format(rank), None) + if not r or not len(mds_metrics['delayed_ranks']) == 0: + return False + for item in mul_fs: + key = fs_status.get_fsmap(item)['mdsmap']['fs_name'] + global_metrics = metrics['global_metrics'].get(key, {}) + client_metadata = metrics['client_metadata'].get(key, {}) + if not len(global_metrics) >= client_count or not len(client_metadata) >= client_count: + return False + return True + return verify_metrics_cbk + + def _fs_perf_stats(self, *args): + return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", *args) + + def _enable_mgr_stats_plugin(self): + return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "stats") + + def _disable_mgr_stats_plugin(self): + return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "stats") + + def _spread_directory_on_all_ranks(self, fscid): + fs_status = self.fs.status() + ranks = set([info['rank'] for info in fs_status.get_ranks(fscid)]) + # create a per-rank pinned directory + for rank in ranks: + dirname = "{0}_{1}".format(TestMDSMetrics.TEST_DIR_PERFIX, rank) + self.mount_a.run_shell(["mkdir", dirname]) + self.mount_a.setfattr(dirname, "ceph.dir.pin", str(rank)) + log.info("pinning directory {0} to rank {1}".format(dirname, rank)) + for i in range(16): + filename = "{0}.{1}".format("test", i) + self.mount_a.write_n_mb(os.path.join(dirname, filename), 1) + + def _do_spread_io(self, fscid): + # spread readdir I/O + self.mount_b.run_shell(["find", "."]) + + def _do_spread_io_all_clients(self, fscid): + # spread readdir I/O + self.mount_a.run_shell(["find", "."]) + self.mount_b.run_shell(["find", "."]) + + def _cleanup_test_dirs(self): + dirnames = self.mount_a.run_shell(["ls"]).stdout.getvalue() + for dirname in dirnames.split("\n"): + if dirname.startswith(TestMDSMetrics.TEST_DIR_PERFIX): + log.info("cleaning directory {}".format(dirname)) + self.mount_a.run_shell(["rm", "-rf", dirname]) + + def _get_metrics(self, verifier_callback, trials, *args): + metrics = None + done = False + with safe_while(sleep=1, tries=trials, action='wait for metrics') as proceed: + while proceed(): + metrics = json.loads(self._fs_perf_stats(*args)) + done = verifier_callback(metrics) + if done: + break + return done, metrics + + def _setup_fs(self, fs_name): + fs_a = self.mds_cluster.newfs(name=fs_name) + + self.mds_cluster.mds_restart() + + # Wait for filesystem to go healthy + fs_a.wait_for_daemons() + + # Reconfigure client auth caps + for mount in self.mounts: + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', f"client.{mount.client_id}", + 'mds', 'allow', + 'mon', 'allow r', + 'osd', f'allow rw pool={fs_a.get_data_pool_name()}') + + return fs_a + + # basic check to verify if we get back metrics from each active mds rank + + def test_metrics_from_rank(self): + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + def test_metrics_post_client_disconnection(self): + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + self.mount_a.umount_wait() + + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED - 1), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + def test_metrics_mds_grow(self): + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # grow the mds cluster + self.fs.grow(2) + + fscid = self.fs.id + # spread directory per rank + self._spread_directory_on_all_ranks(fscid) + + # spread some I/O + self._do_spread_io(fscid) + + # wait a bit for mgr to get updated metrics + time.sleep(5) + + # validate + valid, metrics = self._get_metrics(self.verify_mds_metrics( + active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED) , 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # cleanup test directories + self._cleanup_test_dirs() + + def test_metrics_mds_grow_and_shrink(self): + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # grow the mds cluster + self.fs.grow(2) + + fscid = self.fs.id + # spread directory per rank + self._spread_directory_on_all_ranks(fscid) + + # spread some I/O + self._do_spread_io(fscid) + + # wait a bit for mgr to get updated metrics + time.sleep(5) + + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # shrink mds cluster + self.fs.shrink(1) + + # wait a bit for mgr to get updated metrics + time.sleep(5) + + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # cleanup test directories + self._cleanup_test_dirs() + + def test_delayed_metrics(self): + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # grow the mds cluster + self.fs.grow(2) + + fscid = self.fs.id + # spread directory per rank + self._spread_directory_on_all_ranks(fscid) + + # spread some I/O + self._do_spread_io(fscid) + + # wait a bit for mgr to get updated metrics + time.sleep(5) + + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # do not give this mds any chance + delayed_rank = 1 + mds_id_rank0 = self.fs.get_rank(rank=0)['name'] + mds_id_rank1 = self.fs.get_rank(rank=1)['name'] + + self.fs.set_inter_mds_block(True, mds_id_rank0, mds_id_rank1) + + def verify_delayed_metrics(metrics): + mds_metrics = metrics['metrics'] + r = mds_metrics.get("mds.{}".format(delayed_rank), None) + if not r or not delayed_rank in mds_metrics['delayed_ranks']: + return False + return True + # validate + valid, metrics = self._get_metrics(verify_delayed_metrics, 30) + log.debug("metrics={0}".format(metrics)) + + self.assertTrue(valid) + self.fs.set_inter_mds_block(False, mds_id_rank0, mds_id_rank1) + + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # cleanup test directories + self._cleanup_test_dirs() + + def test_query_mds_filter(self): + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # grow the mds cluster + self.fs.grow(2) + + fscid = self.fs.id + # spread directory per rank + self._spread_directory_on_all_ranks(fscid) + + # spread some I/O + self._do_spread_io(fscid) + + # wait a bit for mgr to get updated metrics + time.sleep(5) + + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + filtered_mds = 1 + def verify_filtered_mds_rank_metrics(metrics): + # checks if the metrics has only client_metadata and + # global_metrics filtered using --mds_rank=1 + global_metrics = metrics['global_metrics'].get(self.fs.name, {}) + client_metadata = metrics['client_metadata'].get(self.fs.name, {}) + mds_metrics = metrics['metrics'] + if len(mds_metrics) != 2 or f"mds.{filtered_mds}" not in mds_metrics: + return False + if len(global_metrics) > TestMDSMetrics.CLIENTS_REQUIRED or\ + len(client_metadata) > TestMDSMetrics.CLIENTS_REQUIRED: + return False + if len(set(global_metrics) - set(mds_metrics[f"mds.{filtered_mds}"])) or\ + len(set(client_metadata) - set(mds_metrics[f"mds.{filtered_mds}"])): + return False + return True + # initiate a new query with `--mds_rank` filter and validate if + # we get metrics *only* from that mds. + valid, metrics = self._get_metrics(verify_filtered_mds_rank_metrics, 30, + f'--mds_rank={filtered_mds}') + log.debug(f"metrics={metrics}") + self.assertTrue(valid, "Incorrect 'ceph fs perf stats' output" + f" with filter '--mds_rank={filtered_mds}'") + + def test_query_client_filter(self): + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + mds_metrics = metrics['metrics'] + # pick an random client + client = random.choice(list(mds_metrics['mds.0'].keys())) + # could have used regex to extract client id + client_id = (client.split(' ')[0]).split('.')[-1] + + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=1), 30, '--client_id={}'.format(client_id)) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + def test_query_client_ip_filter(self): + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + client_matadata = metrics['client_metadata'][self.fs.name] + # pick an random client + client = random.choice(list(client_matadata.keys())) + # get IP of client to use in filter + client_ip = client_matadata[client]['IP'] + + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=1), 30, '--client_ip={}'.format(client_ip)) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # verify IP from output with filter IP + for i in metrics['client_metadata'][self.fs.name]: + self.assertEqual(client_ip, metrics['client_metadata'][self.fs.name][i]['IP']) + + def test_query_mds_and_client_filter(self): + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # grow the mds cluster + self.fs.grow(2) + + fscid = self.fs.id + # spread directory per rank + self._spread_directory_on_all_ranks(fscid) + + # spread some I/O + self._do_spread_io_all_clients(fscid) + + # wait a bit for mgr to get updated metrics + time.sleep(5) + + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + mds_metrics = metrics['metrics'] + + # pick an random client + client = random.choice(list(mds_metrics['mds.1'].keys())) + # could have used regex to extract client id + client_id = (client.split(' ')[0]).split('.')[-1] + filtered_mds = 1 + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=1, ranks=[filtered_mds]), + 30, '--mds_rank={}'.format(filtered_mds), '--client_id={}'.format(client_id)) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + def test_for_invalid_mds_rank(self): + invalid_mds_rank = "1," + # try, 'fs perf stat' command with invalid mds_rank + try: + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", "--mds_rank", invalid_mds_rank) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise + else: + raise RuntimeError("expected the 'fs perf stat' command to fail for invalid mds_rank") + + def test_for_invalid_client_id(self): + invalid_client_id = "abcd" + # try, 'fs perf stat' command with invalid client_id + try: + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", "--client_id", invalid_client_id) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise + else: + raise RuntimeError("expected the 'fs perf stat' command to fail for invalid client_id") + + def test_for_invalid_client_ip(self): + invalid_client_ip = "1.2.3" + # try, 'fs perf stat' command with invalid client_ip + try: + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", "--client_ip", invalid_client_ip) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise + else: + raise RuntimeError("expected the 'fs perf stat' command to fail for invalid client_ip") + + def test_perf_stats_stale_metrics(self): + """ + That `ceph fs perf stats` doesn't output stale metrics after the rank0 MDS failover + """ + # validate + valid, metrics = self._get_metrics(self.verify_mds_metrics( + active_mds_count=1, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug(f'metrics={metrics}') + self.assertTrue(valid) + + # mount_a and mount_b are the clients mounted for TestMDSMetrics. So get their + # entries from the global_metrics. + client_a_name = f'client.{self.mount_a.get_global_id()}' + client_b_name = f'client.{self.mount_b.get_global_id()}' + + global_metrics = metrics['global_metrics'] + client_a_metrics = global_metrics[self.fs.name][client_a_name] + client_b_metrics = global_metrics[self.fs.name][client_b_name] + + # fail rank0 mds + self.fs.rank_fail(rank=0) + + # Wait for rank0 up:active state + self.fs.wait_for_state('up:active', rank=0, timeout=30) + + fscid = self.fs.id + + # spread directory per rank + self._spread_directory_on_all_ranks(fscid) + + # spread some I/O + self._do_spread_io_all_clients(fscid) + + # wait a bit for mgr to get updated metrics + time.sleep(5) + + # validate + try: + valid, metrics_new = self._get_metrics(self.verify_mds_metrics( + active_mds_count=1, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug(f'metrics={metrics_new}') + self.assertTrue(valid) + + client_metadata = metrics_new['client_metadata'] + client_a_metadata = client_metadata.get(self.fs.name, {}).get(client_a_name, {}) + client_b_metadata = client_metadata.get(self.fs.name, {}).get(client_b_name, {}) + + global_metrics = metrics_new['global_metrics'] + client_a_metrics_new = global_metrics.get(self.fs.name, {}).get(client_a_name, {}) + client_b_metrics_new = global_metrics.get(self.fs.name, {}).get(client_b_name, {}) + + # the metrics should be different for the test to succeed. + self.assertTrue(client_a_metadata and client_b_metadata and + client_a_metrics_new and client_b_metrics_new and + (client_a_metrics_new != client_a_metrics) and + (client_b_metrics_new != client_b_metrics), + "Invalid 'ceph fs perf stats' metrics after rank0 mds failover") + except MaxWhileTries: + raise RuntimeError("Failed to fetch 'ceph fs perf stats' metrics") + finally: + # cleanup test directories + self._cleanup_test_dirs() + + def test_client_metrics_and_metadata(self): + self.mount_a.umount_wait() + self.mount_b.umount_wait() + self.fs.delete_all_filesystems() + + self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set", + "enable_multiple", "true", "--yes-i-really-mean-it") + + # creating filesystem + fs_a = self._setup_fs(fs_name="fs1") + + # Mount a client on fs_a + self.mount_a.mount_wait(cephfs_name=fs_a.name) + self.mount_a.write_n_mb("pad.bin", 1) + self.mount_a.write_n_mb("test.bin", 2) + self.mount_a.path_to_ino("test.bin") + self.mount_a.create_files() + + # creating another filesystem + fs_b = self._setup_fs(fs_name="fs2") + + # Mount a client on fs_b + self.mount_b.mount_wait(cephfs_name=fs_b.name) + self.mount_b.write_n_mb("test.bin", 1) + self.mount_b.path_to_ino("test.bin") + self.mount_b.create_files() + + fscid_list = [fs_a.id, fs_b.id] + + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=1, mul_fs=fscid_list), 30) + log.debug(f"metrics={metrics}") + self.assertTrue(valid) + + client_metadata_a = metrics['client_metadata']['fs1'] + client_metadata_b = metrics['client_metadata']['fs2'] + + for i in client_metadata_a: + if not (client_metadata_a[i]['hostname']): + raise RuntimeError("hostname of fs1 not found!") + if not (client_metadata_a[i]['valid_metrics']): + raise RuntimeError("valid_metrics of fs1 not found!") + + for i in client_metadata_b: + if not (client_metadata_b[i]['hostname']): + raise RuntimeError("hostname of fs2 not found!") + if not (client_metadata_b[i]['valid_metrics']): + raise RuntimeError("valid_metrics of fs2 not found!") + + def test_non_existing_mds_rank(self): + def verify_filtered_metrics(metrics): + # checks if the metrics has non empty client_metadata and global_metrics + if metrics['client_metadata'].get(self.fs.name, {})\ + or metrics['global_metrics'].get(self.fs.name, {}): + return True + return False + + try: + # validate + filter_rank = random.randint(1, 10) + valid, metrics = self._get_metrics(verify_filtered_metrics, 30, + '--mds_rank={}'.format(filter_rank)) + log.info(f'metrics={metrics}') + self.assertFalse(valid, "Fetched 'ceph fs perf stats' metrics using nonexistent MDS rank") + except MaxWhileTries: + # success + pass + + def test_perf_stats_stale_metrics_with_multiple_filesystem(self): + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set", + "enable_multiple", "true", "--yes-i-really-mean-it") + + # creating filesystem + fs_b = self._setup_fs(fs_name="fs2") + + # Mount a client on fs_b + self.mount_b.mount_wait(cephfs_name=fs_b.name) + self.mount_b.write_n_mb("test.bin", 1) + self.mount_b.path_to_ino("test.bin") + self.mount_b.create_files() + + # creating another filesystem + fs_a = self._setup_fs(fs_name="fs1") + + # Mount a client on fs_a + self.mount_a.mount_wait(cephfs_name=fs_a.name) + self.mount_a.write_n_mb("pad.bin", 1) + self.mount_a.write_n_mb("test.bin", 2) + self.mount_a.path_to_ino("test.bin") + self.mount_a.create_files() + + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=1, mul_fs=[fs_a.id, fs_b.id]), 30) + log.debug(f"metrics={metrics}") + self.assertTrue(valid) + + # get mounted client's entries from the global_metrics. + client_a_name = f'client.{self.mount_a.get_global_id()}' + + global_metrics = metrics['global_metrics'] + client_a_metrics = global_metrics.get("fs1", {}).get(client_a_name, {}) + + # fail active mds of fs_a + fs_a_mds = fs_a.get_active_names()[0] + self.mds_cluster.mds_fail(fs_a_mds) + fs_a.wait_for_state('up:active', rank=0, timeout=30) + + # spread directory per rank + self._spread_directory_on_all_ranks(fs_a.id) + + # spread some I/O + self._do_spread_io_all_clients(fs_a.id) + + # wait a bit for mgr to get updated metrics + time.sleep(5) + + # validate + try: + valid, metrics_new = self._get_metrics( + self.verify_mds_metrics(client_count=1, mul_fs=[fs_a.id, fs_b.id]), 30) + log.debug(f'metrics={metrics_new}') + self.assertTrue(valid) + + client_metadata = metrics_new['client_metadata'] + client_a_metadata = client_metadata.get("fs1", {}).get(client_a_name, {}) + + global_metrics = metrics_new['global_metrics'] + client_a_metrics_new = global_metrics.get("fs1", {}).get(client_a_name, {}) + + # the metrics should be different for the test to succeed. + self.assertTrue(client_a_metadata and client_a_metrics_new + and (client_a_metrics_new != client_a_metrics), + "Invalid 'ceph fs perf stats' metrics after" + f" rank0 mds of {fs_a.name} failover") + except MaxWhileTries: + raise RuntimeError("Failed to fetch `ceph fs perf stats` metrics") + finally: + # cleanup test directories + self._cleanup_test_dirs() + diff --git a/qa/tasks/cephfs/test_meta_injection.py b/qa/tasks/cephfs/test_meta_injection.py new file mode 100644 index 000000000..916b30a25 --- /dev/null +++ b/qa/tasks/cephfs/test_meta_injection.py @@ -0,0 +1,38 @@ +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +class TestMetaInjection(CephFSTestCase): + def test_meta_injection(self): + conf_ori = self.fs.mds_asok(['config', 'show']) + self.fs.mds_asok(['config', 'set', 'mds_log_max_segments', '1']) + self.mount_a.run_shell(["mkdir", "metadir"]) + self.mount_a.run_shell(["touch", "metadir/metafile1"]) + self.mount_a.run_shell(["touch", "metadir/metafile2"]) + self.fs.mds_asok(['flush', 'journal']) + dirino = self.mount_a.path_to_ino("metadir") + ino = self.mount_a.path_to_ino("metadir/metafile1") + + # export meta of ino + self.fs.meta_tool(['showm', '-i', str(ino), '-o', '/tmp/meta_out'], 0, True) + out = self.mount_a.run_shell(['grep', str(ino),'/tmp/meta_out']).stdout.getvalue().strip() + + # check the metadata of ino + self.assertNotEqual(out.find(u'"ino":'+ str(ino)), -1) + + # amend info of ino + self.fs.get_meta_of_fs_file(dirino, "metafile1", "/tmp/meta_obj") + self.fs.meta_tool(['amend', '-i', str(ino), '--in', '/tmp/meta_out', '--yes-i-really-really-mean-it'], 0, True) + self.fs.get_meta_of_fs_file(dirino, "metafile1", "/tmp/meta_obj_chg") + + # checkout meta_out after import it + ori_mds5 = self.mount_a.run_shell(["md5sum", "/tmp/meta_obj"]).stdout.getvalue().strip().split() + chg_mds5 = self.mount_a.run_shell(["md5sum", "/tmp/meta_obj_chg"]).stdout.getvalue().strip().split() + print(ori_mds5," ==> ", chg_mds5) + self.assertEqual(len(ori_mds5), 2) + self.assertEqual(len(chg_mds5), 2) + self.assertEqual(ori_mds5[0], chg_mds5[0]) + + self.mount_a.run_shell(["rm", "metadir", "-rf"]) + self.mount_a.run_shell(["rm", "/tmp/meta_obj"]) + self.mount_a.run_shell(["rm", "/tmp/meta_obj_chg"]) + # restore config of mds_log_max_segments + self.fs.mds_asok(['config', 'set', 'mds_log_max_segments', conf_ori["mds_log_max_segments"]]) diff --git a/qa/tasks/cephfs/test_mirroring.py b/qa/tasks/cephfs/test_mirroring.py new file mode 100644 index 000000000..a5f8cdac7 --- /dev/null +++ b/qa/tasks/cephfs/test_mirroring.py @@ -0,0 +1,1263 @@ +import os +import json +import errno +import logging +import random +import time + +from io import StringIO +from collections import deque + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.exceptions import CommandFailedError +from teuthology.contextutil import safe_while + +log = logging.getLogger(__name__) + +class TestMirroring(CephFSTestCase): + MDSS_REQUIRED = 5 + CLIENTS_REQUIRED = 2 + REQUIRE_BACKUP_FILESYSTEM = True + + MODULE_NAME = "mirroring" + + def setUp(self): + super(TestMirroring, self).setUp() + self.primary_fs_name = self.fs.name + self.primary_fs_id = self.fs.id + self.secondary_fs_name = self.backup_fs.name + self.secondary_fs_id = self.backup_fs.id + self.enable_mirroring_module() + + def tearDown(self): + self.disable_mirroring_module() + super(TestMirroring, self).tearDown() + + def enable_mirroring_module(self): + self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", TestMirroring.MODULE_NAME) + + def disable_mirroring_module(self): + self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", TestMirroring.MODULE_NAME) + + def enable_mirroring(self, fs_name, fs_id): + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "enable", fs_name) + time.sleep(10) + # verify via asok + res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', + 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') + self.assertTrue(res['peers'] == {}) + self.assertTrue(res['snap_dirs']['dir_count'] == 0) + + def disable_mirroring(self, fs_name, fs_id): + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "disable", fs_name) + time.sleep(10) + # verify via asok + try: + self.mirror_daemon_command(f'mirror status for fs: {fs_name}', + 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') + except CommandFailedError: + pass + else: + raise RuntimeError('expected admin socket to be unavailable') + + def verify_peer_added(self, fs_name, fs_id, peer_spec, remote_fs_name=None): + # verify via asok + res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', + 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') + peer_uuid = self.get_peer_uuid(peer_spec) + self.assertTrue(peer_uuid in res['peers']) + client_name = res['peers'][peer_uuid]['remote']['client_name'] + cluster_name = res['peers'][peer_uuid]['remote']['cluster_name'] + self.assertTrue(peer_spec == f'{client_name}@{cluster_name}') + if remote_fs_name: + self.assertTrue(self.secondary_fs_name == res['peers'][peer_uuid]['remote']['fs_name']) + else: + self.assertTrue(self.fs_name == res['peers'][peer_uuid]['remote']['fs_name']) + + def peer_add(self, fs_name, fs_id, peer_spec, remote_fs_name=None): + if remote_fs_name: + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_add", fs_name, peer_spec, remote_fs_name) + else: + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_add", fs_name, peer_spec) + time.sleep(10) + self.verify_peer_added(fs_name, fs_id, peer_spec, remote_fs_name) + + def peer_remove(self, fs_name, fs_id, peer_spec): + peer_uuid = self.get_peer_uuid(peer_spec) + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_remove", fs_name, peer_uuid) + time.sleep(10) + # verify via asok + res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', + 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') + self.assertTrue(res['peers'] == {} and res['snap_dirs']['dir_count'] == 0) + + def bootstrap_peer(self, fs_name, client_name, site_name): + outj = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd( + "fs", "snapshot", "mirror", "peer_bootstrap", "create", fs_name, client_name, site_name)) + return outj['token'] + + def import_peer(self, fs_name, token): + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_bootstrap", "import", + fs_name, token) + + def add_directory(self, fs_name, fs_id, dir_name): + # get initial dir count + res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', + 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') + dir_count = res['snap_dirs']['dir_count'] + log.debug(f'initial dir_count={dir_count}') + + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "add", fs_name, dir_name) + + time.sleep(10) + # verify via asok + res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', + 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') + new_dir_count = res['snap_dirs']['dir_count'] + log.debug(f'new dir_count={new_dir_count}') + self.assertTrue(new_dir_count > dir_count) + + def remove_directory(self, fs_name, fs_id, dir_name): + # get initial dir count + res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', + 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') + dir_count = res['snap_dirs']['dir_count'] + log.debug(f'initial dir_count={dir_count}') + + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "remove", fs_name, dir_name) + + time.sleep(10) + # verify via asok + res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', + 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') + new_dir_count = res['snap_dirs']['dir_count'] + log.debug(f'new dir_count={new_dir_count}') + self.assertTrue(new_dir_count < dir_count) + + def check_peer_status(self, fs_name, fs_id, peer_spec, dir_name, expected_snap_name, + expected_snap_count): + peer_uuid = self.get_peer_uuid(peer_spec) + res = self.mirror_daemon_command(f'peer status for fs: {fs_name}', + 'fs', 'mirror', 'peer', 'status', + f'{fs_name}@{fs_id}', peer_uuid) + self.assertTrue(dir_name in res) + self.assertTrue(res[dir_name]['last_synced_snap']['name'] == expected_snap_name) + self.assertTrue(res[dir_name]['snaps_synced'] == expected_snap_count) + + def check_peer_status_deleted_snap(self, fs_name, fs_id, peer_spec, dir_name, + expected_delete_count): + peer_uuid = self.get_peer_uuid(peer_spec) + res = self.mirror_daemon_command(f'peer status for fs: {fs_name}', + 'fs', 'mirror', 'peer', 'status', + f'{fs_name}@{fs_id}', peer_uuid) + self.assertTrue(dir_name in res) + self.assertTrue(res[dir_name]['snaps_deleted'] == expected_delete_count) + + def check_peer_status_renamed_snap(self, fs_name, fs_id, peer_spec, dir_name, + expected_rename_count): + peer_uuid = self.get_peer_uuid(peer_spec) + res = self.mirror_daemon_command(f'peer status for fs: {fs_name}', + 'fs', 'mirror', 'peer', 'status', + f'{fs_name}@{fs_id}', peer_uuid) + self.assertTrue(dir_name in res) + self.assertTrue(res[dir_name]['snaps_renamed'] == expected_rename_count) + + def check_peer_snap_in_progress(self, fs_name, fs_id, + peer_spec, dir_name, snap_name): + peer_uuid = self.get_peer_uuid(peer_spec) + res = self.mirror_daemon_command(f'peer status for fs: {fs_name}', + 'fs', 'mirror', 'peer', 'status', + f'{fs_name}@{fs_id}', peer_uuid) + self.assertTrue('syncing' == res[dir_name]['state']) + self.assertTrue(res[dir_name]['current_sycning_snap']['name'] == snap_name) + + def verify_snapshot(self, dir_name, snap_name): + snap_list = self.mount_b.ls(path=f'{dir_name}/.snap') + self.assertTrue(snap_name in snap_list) + + source_res = self.mount_a.dir_checksum(path=f'{dir_name}/.snap/{snap_name}', + follow_symlinks=True) + log.debug(f'source snapshot checksum {snap_name} {source_res}') + + dest_res = self.mount_b.dir_checksum(path=f'{dir_name}/.snap/{snap_name}', + follow_symlinks=True) + log.debug(f'destination snapshot checksum {snap_name} {dest_res}') + self.assertTrue(source_res == dest_res) + + def verify_failed_directory(self, fs_name, fs_id, peer_spec, dir_name): + peer_uuid = self.get_peer_uuid(peer_spec) + res = self.mirror_daemon_command(f'peer status for fs: {fs_name}', + 'fs', 'mirror', 'peer', 'status', + f'{fs_name}@{fs_id}', peer_uuid) + self.assertTrue('failed' == res[dir_name]['state']) + + def get_peer_uuid(self, peer_spec): + status = self.fs.status() + fs_map = status.get_fsmap_byname(self.primary_fs_name) + peers = fs_map['mirror_info']['peers'] + for peer_uuid, mirror_info in peers.items(): + client_name = mirror_info['remote']['client_name'] + cluster_name = mirror_info['remote']['cluster_name'] + remote_peer_spec = f'{client_name}@{cluster_name}' + if peer_spec == remote_peer_spec: + return peer_uuid + return None + + def get_daemon_admin_socket(self): + """overloaded by teuthology override (fs/mirror/clients/mirror.yaml)""" + return "/var/run/ceph/cephfs-mirror.asok" + + def get_mirror_daemon_pid(self): + """pid file overloaded in fs/mirror/clients/mirror.yaml""" + return self.mount_a.run_shell(['cat', '/var/run/ceph/cephfs-mirror.pid']).stdout.getvalue().strip() + + def get_mirror_rados_addr(self, fs_name, fs_id): + """return the rados addr used by cephfs-mirror instance""" + res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', + 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') + return res['rados_inst'] + + def mirror_daemon_command(self, cmd_label, *args): + asok_path = self.get_daemon_admin_socket() + try: + # use mount_a's remote to execute command + p = self.mount_a.client_remote.run(args= + ['ceph', '--admin-daemon', asok_path] + list(args), + stdout=StringIO(), stderr=StringIO(), timeout=30, + check_status=True, label=cmd_label) + p.wait() + except CommandFailedError as ce: + log.warn(f'mirror daemon command with label "{cmd_label}" failed: {ce}') + raise + res = p.stdout.getvalue().strip() + log.debug(f'command returned={res}') + return json.loads(res) + + def get_mirror_daemon_status(self): + daemon_status = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "daemon", "status")) + log.debug(f'daemon_status: {daemon_status}') + # running a single mirror daemon is supported + status = daemon_status[0] + log.debug(f'status: {status}') + return status + + def test_basic_mirror_commands(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_mirror_peer_commands(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + + # add peer + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + # remove peer + self.peer_remove(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph") + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_mirror_disable_with_peer(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + + # add peer + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_matching_peer(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + + try: + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph") + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise RuntimeError('invalid errno when adding a matching remote peer') + else: + raise RuntimeError('adding a peer matching local spec should fail') + + # verify via asok -- nothing should get added + res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}') + self.assertTrue(res['peers'] == {}) + + # and explicitly specifying the spec (via filesystem name) should fail too + try: + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.primary_fs_name) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise RuntimeError('invalid errno when adding a matching remote peer') + else: + raise RuntimeError('adding a peer matching local spec should fail') + + # verify via asok -- nothing should get added + res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}') + self.assertTrue(res['peers'] == {}) + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_mirror_peer_add_existing(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + + # add peer + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # adding the same peer should be idempotent + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # remove peer + self.peer_remove(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph") + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_peer_commands_with_mirroring_disabled(self): + # try adding peer when mirroring is not enabled + try: + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a peer') + else: + raise RuntimeError(-errno.EINVAL, 'expected peer_add to fail') + + # try removing peer + try: + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_remove", self.primary_fs_name, 'dummy-uuid') + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise RuntimeError(-errno.EINVAL, 'incorrect error code when removing a peer') + else: + raise RuntimeError(-errno.EINVAL, 'expected peer_remove to fail') + + def test_add_directory_with_mirroring_disabled(self): + # try adding a directory when mirroring is not enabled + try: + self.add_directory(self.primary_fs_name, self.primary_fs_id, "/d1") + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a directory') + else: + raise RuntimeError(-errno.EINVAL, 'expected directory add to fail') + + def test_directory_commands(self): + self.mount_a.run_shell(["mkdir", "d1"]) + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1') + try: + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1') + except CommandFailedError as ce: + if ce.exitstatus != errno.EEXIST: + raise RuntimeError(-errno.EINVAL, 'incorrect error code when re-adding a directory') + else: + raise RuntimeError(-errno.EINVAL, 'expected directory add to fail') + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d1') + try: + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d1') + except CommandFailedError as ce: + if ce.exitstatus not in (errno.ENOENT, errno.EINVAL): + raise RuntimeError(-errno.EINVAL, 'incorrect error code when re-deleting a directory') + else: + raise RuntimeError(-errno.EINVAL, 'expected directory removal to fail') + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.mount_a.run_shell(["rmdir", "d1"]) + + def test_add_relative_directory_path(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + try: + self.add_directory(self.primary_fs_name, self.primary_fs_id, './d1') + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a relative path dir') + else: + raise RuntimeError(-errno.EINVAL, 'expected directory add to fail') + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_add_directory_path_normalization(self): + self.mount_a.run_shell(["mkdir", "-p", "d1/d2/d3"]) + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1/d2/d3') + def check_add_command_failure(dir_path): + try: + self.add_directory(self.primary_fs_name, self.primary_fs_id, dir_path) + except CommandFailedError as ce: + if ce.exitstatus != errno.EEXIST: + raise RuntimeError(-errno.EINVAL, 'incorrect error code when re-adding a directory') + else: + raise RuntimeError(-errno.EINVAL, 'expected directory add to fail') + + # everything points for /d1/d2/d3 + check_add_command_failure('/d1/d2/././././././d3') + check_add_command_failure('/d1/d2/././././././d3//////') + check_add_command_failure('/d1/d2/../d2/././././d3') + check_add_command_failure('/././././d1/./././d2/./././d3//////') + check_add_command_failure('/./d1/./d2/./d3/../../../d1/d2/d3') + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.mount_a.run_shell(["rm", "-rf", "d1"]) + + def test_add_ancestor_and_child_directory(self): + self.mount_a.run_shell(["mkdir", "-p", "d1/d2/d3"]) + self.mount_a.run_shell(["mkdir", "-p", "d1/d4"]) + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1/d2/') + def check_add_command_failure(dir_path): + try: + self.add_directory(self.primary_fs_name, self.primary_fs_id, dir_path) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a directory') + else: + raise RuntimeError(-errno.EINVAL, 'expected directory add to fail') + + # cannot add ancestors or a subtree for an existing directory + check_add_command_failure('/') + check_add_command_failure('/d1') + check_add_command_failure('/d1/d2/d3') + + # obviously, one can add a non-ancestor or non-subtree + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1/d4/') + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.mount_a.run_shell(["rm", "-rf", "d1"]) + + def test_cephfs_mirror_blocklist(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + + # add peer + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}') + peers_1 = set(res['peers']) + + # fetch rados address for blacklist check + rados_inst = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id) + + # simulate non-responding mirror daemon by sending SIGSTOP + pid = self.get_mirror_daemon_pid() + log.debug(f'SIGSTOP to cephfs-mirror pid {pid}') + self.mount_a.run_shell(['kill', '-SIGSTOP', pid]) + + # wait for blocklist timeout -- the manager module would blocklist + # the mirror daemon + time.sleep(40) + + # wake up the mirror daemon -- at this point, the daemon should know + # that it has been blocklisted + log.debug('SIGCONT to cephfs-mirror') + self.mount_a.run_shell(['kill', '-SIGCONT', pid]) + + # check if the rados addr is blocklisted + self.assertTrue(self.mds_cluster.is_addr_blocklisted(rados_inst)) + + # wait enough so that the mirror daemon restarts blocklisted instances + time.sleep(40) + rados_inst_new = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id) + + # and we should get a new rados instance + self.assertTrue(rados_inst != rados_inst_new) + + # along with peers that were added + res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}') + peers_2 = set(res['peers']) + self.assertTrue(peers_1, peers_2) + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_stats(self): + log.debug('reconfigure client auth caps') + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), + 'mds', 'allow rw', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + + log.debug(f'mounting filesystem {self.secondary_fs_name}') + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) + + # create a bunch of files in a directory to snap + self.mount_a.run_shell(["mkdir", "d0"]) + self.mount_a.create_n_files('d0/file', 50, sync=True) + + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # take a snapshot + self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) + + time.sleep(30) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 'snap0', 1) + self.verify_snapshot('d0', 'snap0') + + # some more IO + self.mount_a.run_shell(["mkdir", "d0/d00"]) + self.mount_a.run_shell(["mkdir", "d0/d01"]) + + self.mount_a.create_n_files('d0/d00/more_file', 20, sync=True) + self.mount_a.create_n_files('d0/d01/some_more_file', 75, sync=True) + + # take another snapshot + self.mount_a.run_shell(["mkdir", "d0/.snap/snap1"]) + + time.sleep(60) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 'snap1', 2) + self.verify_snapshot('d0', 'snap1') + + # delete a snapshot + self.mount_a.run_shell(["rmdir", "d0/.snap/snap0"]) + + time.sleep(10) + snap_list = self.mount_b.ls(path='d0/.snap') + self.assertTrue('snap0' not in snap_list) + self.check_peer_status_deleted_snap(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 1) + + # rename a snapshot + self.mount_a.run_shell(["mv", "d0/.snap/snap1", "d0/.snap/snap2"]) + + time.sleep(10) + snap_list = self.mount_b.ls(path='d0/.snap') + self.assertTrue('snap1' not in snap_list) + self.assertTrue('snap2' in snap_list) + self.check_peer_status_renamed_snap(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 1) + + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_cancel_sync(self): + log.debug('reconfigure client auth caps') + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), + 'mds', 'allow rw', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + + log.debug(f'mounting filesystem {self.secondary_fs_name}') + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) + + # create a bunch of files in a directory to snap + self.mount_a.run_shell(["mkdir", "d0"]) + for i in range(8): + filename = f'file.{i}' + self.mount_a.write_n_mb(os.path.join('d0', filename), 1024) + + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # take a snapshot + self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) + + time.sleep(10) + self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 'snap0') + + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + + snap_list = self.mount_b.ls(path='d0/.snap') + self.assertTrue('snap0' not in snap_list) + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_restart_sync_on_blocklist(self): + log.debug('reconfigure client auth caps') + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), + 'mds', 'allow rw', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + + log.debug(f'mounting filesystem {self.secondary_fs_name}') + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) + + # create a bunch of files in a directory to snap + self.mount_a.run_shell(["mkdir", "d0"]) + for i in range(8): + filename = f'file.{i}' + self.mount_a.write_n_mb(os.path.join('d0', filename), 1024) + + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # fetch rados address for blacklist check + rados_inst = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id) + + # take a snapshot + self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) + + time.sleep(10) + self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 'snap0') + + # simulate non-responding mirror daemon by sending SIGSTOP + pid = self.get_mirror_daemon_pid() + log.debug(f'SIGSTOP to cephfs-mirror pid {pid}') + self.mount_a.run_shell(['kill', '-SIGSTOP', pid]) + + # wait for blocklist timeout -- the manager module would blocklist + # the mirror daemon + time.sleep(40) + + # wake up the mirror daemon -- at this point, the daemon should know + # that it has been blocklisted + log.debug('SIGCONT to cephfs-mirror') + self.mount_a.run_shell(['kill', '-SIGCONT', pid]) + + # check if the rados addr is blocklisted + self.assertTrue(self.mds_cluster.is_addr_blocklisted(rados_inst)) + + time.sleep(500) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 'snap0', expected_snap_count=1) + self.verify_snapshot('d0', 'snap0') + + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_failed_sync_with_correction(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # add a non-existent directory for synchronization + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + + # wait for mirror daemon to mark it the directory as failed + time.sleep(120) + self.verify_failed_directory(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0') + + # create the directory + self.mount_a.run_shell(["mkdir", "d0"]) + self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) + + # wait for correction + time.sleep(120) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 'snap0', 1) + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_service_daemon_status(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + time.sleep(30) + status = self.get_mirror_daemon_status() + + # assumption for this test: mirroring enabled for a single filesystem w/ single + # peer + + # we have not added any directories + peer = status['filesystems'][0]['peers'][0] + self.assertEquals(status['filesystems'][0]['directory_count'], 0) + self.assertEquals(peer['stats']['failure_count'], 0) + self.assertEquals(peer['stats']['recovery_count'], 0) + + # add a non-existent directory for synchronization -- check if its reported + # in daemon stats + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + + time.sleep(120) + status = self.get_mirror_daemon_status() + # we added one + peer = status['filesystems'][0]['peers'][0] + self.assertEquals(status['filesystems'][0]['directory_count'], 1) + # failure count should be reflected + self.assertEquals(peer['stats']['failure_count'], 1) + self.assertEquals(peer['stats']['recovery_count'], 0) + + # create the directory, mirror daemon would recover + self.mount_a.run_shell(["mkdir", "d0"]) + + time.sleep(120) + status = self.get_mirror_daemon_status() + peer = status['filesystems'][0]['peers'][0] + self.assertEquals(status['filesystems'][0]['directory_count'], 1) + # failure and recovery count should be reflected + self.assertEquals(peer['stats']['failure_count'], 1) + self.assertEquals(peer['stats']['recovery_count'], 1) + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_mirroring_init_failure(self): + """Test mirror daemon init failure""" + + # disable mgr mirroring plugin as it would try to load dir map on + # on mirroring enabled for a filesystem (an throw up erorrs in + # the logs) + self.disable_mirroring_module() + + # enable mirroring through mon interface -- this should result in the mirror daemon + # failing to enable mirroring due to absence of `cephfs_mirorr` index object. + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", self.primary_fs_name) + + with safe_while(sleep=5, tries=10, action='wait for failed state') as proceed: + while proceed(): + try: + # verify via asok + res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}') + if not 'state' in res: + return + self.assertTrue(res['state'] == "failed") + return True + except: + pass + + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "disable", self.primary_fs_name) + time.sleep(10) + # verify via asok + try: + self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}') + except CommandFailedError: + pass + else: + raise RuntimeError('expected admin socket to be unavailable') + + def test_mirroring_init_failure_with_recovery(self): + """Test if the mirror daemon can recover from a init failure""" + + # disable mgr mirroring plugin as it would try to load dir map on + # on mirroring enabled for a filesystem (an throw up erorrs in + # the logs) + self.disable_mirroring_module() + + # enable mirroring through mon interface -- this should result in the mirror daemon + # failing to enable mirroring due to absence of `cephfs_mirror` index object. + + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", self.primary_fs_name) + # need safe_while since non-failed status pops up as mirroring is restarted + # internally in mirror daemon. + with safe_while(sleep=5, tries=20, action='wait for failed state') as proceed: + while proceed(): + try: + # verify via asok + res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}') + if not 'state' in res: + return + self.assertTrue(res['state'] == "failed") + return True + except: + pass + + # create the index object and check daemon recovery + try: + p = self.mount_a.client_remote.run(args=['rados', '-p', self.fs.metadata_pool_name, 'create', 'cephfs_mirror'], + stdout=StringIO(), stderr=StringIO(), timeout=30, + check_status=True, label="create index object") + p.wait() + except CommandFailedError as ce: + log.warn(f'mirror daemon command to create mirror index object failed: {ce}') + raise + time.sleep(30) + res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}') + self.assertTrue(res['peers'] == {}) + self.assertTrue(res['snap_dirs']['dir_count'] == 0) + + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "disable", self.primary_fs_name) + time.sleep(10) + # verify via asok + try: + self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}') + except CommandFailedError: + pass + else: + raise RuntimeError('expected admin socket to be unavailable') + + def test_cephfs_mirror_peer_bootstrap(self): + """Test importing peer bootstrap token""" + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + + # create a bootstrap token for the peer + bootstrap_token = self.bootstrap_peer(self.secondary_fs_name, "client.mirror_peer_bootstrap", "site-remote") + + # import the peer via bootstrap token + self.import_peer(self.primary_fs_name, bootstrap_token) + time.sleep(10) + self.verify_peer_added(self.primary_fs_name, self.primary_fs_id, "client.mirror_peer_bootstrap@site-remote", + self.secondary_fs_name) + + # verify via peer_list interface + peer_uuid = self.get_peer_uuid("client.mirror_peer_bootstrap@site-remote") + res = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_list", self.primary_fs_name)) + self.assertTrue(peer_uuid in res) + self.assertTrue('mon_host' in res[peer_uuid] and res[peer_uuid]['mon_host'] != '') + + # remove peer + self.peer_remove(self.primary_fs_name, self.primary_fs_id, "client.mirror_peer_bootstrap@site-remote") + # disable mirroring + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_symlink_sync(self): + log.debug('reconfigure client auth caps') + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), + 'mds', 'allow rw', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + + log.debug(f'mounting filesystem {self.secondary_fs_name}') + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) + + # create a bunch of files w/ symbolic links in a directory to snap + self.mount_a.run_shell(["mkdir", "d0"]) + self.mount_a.create_n_files('d0/file', 10, sync=True) + self.mount_a.run_shell(["ln", "-s", "./file_0", "d0/sym_0"]) + self.mount_a.run_shell(["ln", "-s", "./file_1", "d0/sym_1"]) + self.mount_a.run_shell(["ln", "-s", "./file_2", "d0/sym_2"]) + + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # take a snapshot + self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) + + time.sleep(30) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 'snap0', 1) + self.verify_snapshot('d0', 'snap0') + + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_with_parent_snapshot(self): + """Test snapshot synchronization with parent directory snapshots""" + self.mount_a.run_shell(["mkdir", "-p", "d0/d1/d2/d3"]) + + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0/d1/d2/d3') + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # take a snapshot + self.mount_a.run_shell(["mkdir", "d0/d1/d2/d3/.snap/snap0"]) + + time.sleep(30) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0/d1/d2/d3', 'snap0', 1) + + # create snapshots in parent directories + self.mount_a.run_shell(["mkdir", "d0/.snap/snap_d0"]) + self.mount_a.run_shell(["mkdir", "d0/d1/.snap/snap_d1"]) + self.mount_a.run_shell(["mkdir", "d0/d1/d2/.snap/snap_d2"]) + + # try syncing more snapshots + self.mount_a.run_shell(["mkdir", "d0/d1/d2/d3/.snap/snap1"]) + time.sleep(30) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0/d1/d2/d3', 'snap1', 2) + + self.mount_a.run_shell(["rmdir", "d0/d1/d2/d3/.snap/snap0"]) + self.mount_a.run_shell(["rmdir", "d0/d1/d2/d3/.snap/snap1"]) + time.sleep(15) + self.check_peer_status_deleted_snap(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0/d1/d2/d3', 2) + + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0/d1/d2/d3') + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_remove_on_stall(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + + # fetch rados address for blacklist check + rados_inst = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id) + + # simulate non-responding mirror daemon by sending SIGSTOP + pid = self.get_mirror_daemon_pid() + log.debug(f'SIGSTOP to cephfs-mirror pid {pid}') + self.mount_a.run_shell(['kill', '-SIGSTOP', pid]) + + # wait for blocklist timeout -- the manager module would blocklist + # the mirror daemon + time.sleep(40) + + # make sure the rados addr is blocklisted + self.assertTrue(self.mds_cluster.is_addr_blocklisted(rados_inst)) + + # now we are sure that there are no "active" mirror daemons -- add a directory path. + dir_path_p = "/d0/d1" + dir_path = "/d0/d1/d2" + + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "add", self.primary_fs_name, dir_path) + + time.sleep(10) + # this uses an undocumented interface to get dirpath map state + res_json = self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path) + res = json.loads(res_json) + # there are no mirror daemons + self.assertTrue(res['state'], 'stalled') + + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "remove", self.primary_fs_name, dir_path) + + time.sleep(10) + try: + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise RuntimeError('invalid errno when checking dirmap status for non-existent directory') + else: + raise RuntimeError('incorrect errno when checking dirmap state for non-existent directory') + + # adding a parent directory should be allowed + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "add", self.primary_fs_name, dir_path_p) + + time.sleep(10) + # however, this directory path should get stalled too + res_json = self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path_p) + res = json.loads(res_json) + # there are no mirror daemons + self.assertTrue(res['state'], 'stalled') + + # wake up the mirror daemon -- at this point, the daemon should know + # that it has been blocklisted + log.debug('SIGCONT to cephfs-mirror') + self.mount_a.run_shell(['kill', '-SIGCONT', pid]) + + # wait for restart mirror on blocklist + time.sleep(60) + res_json = self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path_p) + res = json.loads(res_json) + # there are no mirror daemons + self.assertTrue(res['state'], 'mapped') + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_incremental_sync(self): + """ Test incremental snapshot synchronization (based on mtime differences).""" + log.debug('reconfigure client auth caps') + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), + 'mds', 'allow rw', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + log.debug(f'mounting filesystem {self.secondary_fs_name}') + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) + + repo = 'ceph-qa-suite' + repo_dir = 'ceph_repo' + repo_path = f'{repo_dir}/{repo}' + + def clone_repo(): + self.mount_a.run_shell([ + 'git', 'clone', '--branch', 'giant', + f'http://github.com/ceph/{repo}', repo_path]) + + def exec_git_cmd(cmd_list): + self.mount_a.run_shell(['git', '--git-dir', f'{self.mount_a.mountpoint}/{repo_path}/.git', *cmd_list]) + + self.mount_a.run_shell(["mkdir", repo_dir]) + clone_repo() + + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + self.add_directory(self.primary_fs_name, self.primary_fs_id, f'/{repo_path}') + self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_a']) + + # full copy, takes time + time.sleep(500) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", f'/{repo_path}', 'snap_a', 1) + self.verify_snapshot(repo_path, 'snap_a') + + # create some diff + num = random.randint(5, 20) + log.debug(f'resetting to HEAD~{num}') + exec_git_cmd(["reset", "--hard", f'HEAD~{num}']) + + self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_b']) + # incremental copy, should be fast + time.sleep(180) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", f'/{repo_path}', 'snap_b', 2) + self.verify_snapshot(repo_path, 'snap_b') + + # diff again, this time back to HEAD + log.debug('resetting to HEAD') + exec_git_cmd(["pull"]) + + self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_c']) + # incremental copy, should be fast + time.sleep(180) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", f'/{repo_path}', 'snap_c', 3) + self.verify_snapshot(repo_path, 'snap_c') + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_incremental_sync_with_type_mixup(self): + """ Test incremental snapshot synchronization with file type changes. + + The same filename exist as a different type in subsequent snapshot. + This verifies if the mirror daemon can identify file type mismatch and + sync snapshots. + + \ snap_0 snap_1 snap_2 snap_3 + \----------------------------------------------- + file_x | reg sym dir reg + | + file_y | dir reg sym dir + | + file_z | sym dir reg sym + """ + log.debug('reconfigure client auth caps') + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), + 'mds', 'allow rw', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + log.debug(f'mounting filesystem {self.secondary_fs_name}') + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) + + typs = deque(['reg', 'dir', 'sym']) + def cleanup_and_create_with_type(dirname, fnames): + self.mount_a.run_shell_payload(f"rm -rf {dirname}/*") + fidx = 0 + for t in typs: + fname = f'{dirname}/{fnames[fidx]}' + log.debug(f'file: {fname} type: {t}') + if t == 'reg': + self.mount_a.run_shell(["touch", fname]) + self.mount_a.write_file(fname, data=fname) + elif t == 'dir': + self.mount_a.run_shell(["mkdir", fname]) + elif t == 'sym': + # verify ELOOP in mirror daemon + self.mount_a.run_shell(["ln", "-s", "..", fname]) + fidx += 1 + + def verify_types(dirname, fnames, snap_name): + tidx = 0 + for fname in fnames: + t = self.mount_b.run_shell_payload(f"stat -c %F {dirname}/.snap/{snap_name}/{fname}").stdout.getvalue().strip() + if typs[tidx] == 'reg': + self.assertEquals('regular file', t) + elif typs[tidx] == 'dir': + self.assertEquals('directory', t) + elif typs[tidx] == 'sym': + self.assertEquals('symbolic link', t) + tidx += 1 + + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + self.mount_a.run_shell(["mkdir", "d0"]) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + + fnames = ['file_x', 'file_y', 'file_z'] + turns = 0 + while turns != len(typs): + snapname = f'snap_{turns}' + cleanup_and_create_with_type('d0', fnames) + self.mount_a.run_shell(['mkdir', f'd0/.snap/{snapname}']) + time.sleep(30) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', snapname, turns+1) + verify_types('d0', fnames, snapname) + # next type + typs.rotate(1) + turns += 1 + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_sync_with_purged_snapshot(self): + """Test snapshot synchronization in midst of snapshot deletes. + + Deleted the previous snapshot when the mirror daemon is figuring out + incremental differences between current and previous snaphot. The + mirror daemon should identify the purge and switch to using remote + comparison to sync the snapshot (in the next iteration of course). + """ + + log.debug('reconfigure client auth caps') + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), + 'mds', 'allow rw', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + log.debug(f'mounting filesystem {self.secondary_fs_name}') + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) + + repo = 'ceph-qa-suite' + repo_dir = 'ceph_repo' + repo_path = f'{repo_dir}/{repo}' + + def clone_repo(): + self.mount_a.run_shell([ + 'git', 'clone', '--branch', 'giant', + f'http://github.com/ceph/{repo}', repo_path]) + + def exec_git_cmd(cmd_list): + self.mount_a.run_shell(['git', '--git-dir', f'{self.mount_a.mountpoint}/{repo_path}/.git', *cmd_list]) + + self.mount_a.run_shell(["mkdir", repo_dir]) + clone_repo() + + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + self.add_directory(self.primary_fs_name, self.primary_fs_id, f'/{repo_path}') + self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_a']) + + # full copy, takes time + time.sleep(500) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", f'/{repo_path}', 'snap_a', 1) + self.verify_snapshot(repo_path, 'snap_a') + + # create some diff + num = random.randint(60, 100) + log.debug(f'resetting to HEAD~{num}') + exec_git_cmd(["reset", "--hard", f'HEAD~{num}']) + + self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_b']) + + time.sleep(15) + self.mount_a.run_shell(['rmdir', f'{repo_path}/.snap/snap_a']) + + # incremental copy but based on remote dir_root + time.sleep(300) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", f'/{repo_path}', 'snap_b', 2) + self.verify_snapshot(repo_path, 'snap_b') + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_peer_add_primary(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # try adding the primary file system as a peer to secondary file + # system + try: + self.peer_add(self.secondary_fs_name, self.secondary_fs_id, "client.mirror_remote@ceph", self.primary_fs_name) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise RuntimeError('invalid errno when adding a primary file system') + else: + raise RuntimeError('adding peer should fail') + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_cancel_mirroring_and_readd(self): + """ + Test adding a directory path for synchronization post removal of already added directory paths + + ... to ensure that synchronization of the newly added directory path functions + as expected. Note that we schedule three (3) directories for mirroring to ensure + that all replayer threads (3 by default) in the mirror daemon are busy. + """ + log.debug('reconfigure client auth caps') + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), + 'mds', 'allow rw', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + + log.debug(f'mounting filesystem {self.secondary_fs_name}') + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) + + # create a bunch of files in a directory to snap + self.mount_a.run_shell(["mkdir", "d0"]) + self.mount_a.run_shell(["mkdir", "d1"]) + self.mount_a.run_shell(["mkdir", "d2"]) + for i in range(4): + filename = f'file.{i}' + self.mount_a.write_n_mb(os.path.join('d0', filename), 1024) + self.mount_a.write_n_mb(os.path.join('d1', filename), 1024) + self.mount_a.write_n_mb(os.path.join('d2', filename), 1024) + + log.debug('enabling mirroring') + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + log.debug('adding directory paths') + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1') + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d2') + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # take snapshots + log.debug('taking snapshots') + self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) + self.mount_a.run_shell(["mkdir", "d1/.snap/snap0"]) + self.mount_a.run_shell(["mkdir", "d2/.snap/snap0"]) + + time.sleep(10) + log.debug('checking snap in progress') + self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 'snap0') + self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d1', 'snap0') + self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d2', 'snap0') + + log.debug('removing directories 1') + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + log.debug('removing directories 2') + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d1') + log.debug('removing directories 3') + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d2') + + log.debug('removing snapshots') + self.mount_a.run_shell(["rmdir", "d0/.snap/snap0"]) + self.mount_a.run_shell(["rmdir", "d1/.snap/snap0"]) + self.mount_a.run_shell(["rmdir", "d2/.snap/snap0"]) + + for i in range(4): + filename = f'file.{i}' + log.debug(f'deleting {filename}') + self.mount_a.run_shell(["rm", "-f", os.path.join('d0', filename)]) + self.mount_a.run_shell(["rm", "-f", os.path.join('d1', filename)]) + self.mount_a.run_shell(["rm", "-f", os.path.join('d2', filename)]) + + log.debug('creating new files...') + self.mount_a.create_n_files('d0/file', 50, sync=True) + self.mount_a.create_n_files('d1/file', 50, sync=True) + self.mount_a.create_n_files('d2/file', 50, sync=True) + + log.debug('adding directory paths') + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1') + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d2') + + log.debug('creating new snapshots...') + self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) + self.mount_a.run_shell(["mkdir", "d1/.snap/snap0"]) + self.mount_a.run_shell(["mkdir", "d2/.snap/snap0"]) + + time.sleep(60) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 'snap0', 1) + self.verify_snapshot('d0', 'snap0') + + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d1', 'snap0', 1) + self.verify_snapshot('d1', 'snap0') + + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d2', 'snap0', 1) + self.verify_snapshot('d2', 'snap0') + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py new file mode 100644 index 000000000..0bd8ad621 --- /dev/null +++ b/qa/tasks/cephfs/test_misc.py @@ -0,0 +1,416 @@ +from io import StringIO + +from tasks.cephfs.fuse_mount import FuseMount +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.orchestra.run import CommandFailedError +from textwrap import dedent +from threading import Thread +import errno +import time +import json +import logging +import os + +log = logging.getLogger(__name__) + +class TestMisc(CephFSTestCase): + CLIENTS_REQUIRED = 2 + + def test_statfs_on_deleted_fs(self): + """ + That statfs does not cause monitors to SIGSEGV after fs deletion. + """ + + self.mount_b.umount_wait() + self.mount_a.run_shell_payload("stat -f .") + self.fs.delete_all_filesystems() + # This will hang either way, run in background. + p = self.mount_a.run_shell_payload("stat -f .", wait=False, timeout=60, check_status=False) + time.sleep(30) + self.assertFalse(p.finished) + # the process is stuck in uninterruptible sleep, just kill the mount + self.mount_a.umount_wait(force=True) + p.wait() + + def test_getattr_caps(self): + """ + Check if MDS recognizes the 'mask' parameter of open request. + The parameter allows client to request caps when opening file + """ + + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Require FUSE client") + + # Enable debug. Client will requests CEPH_CAP_XATTR_SHARED + # on lookup/open + self.mount_b.umount_wait() + self.set_conf('client', 'client debug getattr caps', 'true') + self.mount_b.mount_wait() + + # create a file and hold it open. MDS will issue CEPH_CAP_EXCL_* + # to mount_a + p = self.mount_a.open_background("testfile") + self.mount_b.wait_for_visible("testfile") + + # this triggers a lookup request and an open request. The debug + # code will check if lookup/open reply contains xattrs + self.mount_b.run_shell(["cat", "testfile"]) + + self.mount_a.kill_background(p) + + def test_root_rctime(self): + """ + Check that the root inode has a non-default rctime on startup. + """ + + t = time.time() + rctime = self.mount_a.getfattr(".", "ceph.dir.rctime") + log.info("rctime = {}".format(rctime)) + self.assertGreaterEqual(float(rctime), t - 10) + + def test_fs_new(self): + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + data_pool_name = self.fs.get_data_pool_name() + + self.fs.fail() + + self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name, + '--yes-i-really-mean-it') + + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete', + self.fs.metadata_pool_name, + self.fs.metadata_pool_name, + '--yes-i-really-really-mean-it') + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', + self.fs.metadata_pool_name, + '--pg_num_min', str(self.fs.pg_num_min)) + + # insert a garbage object + self.fs.radosm(["put", "foo", "-"], stdin=StringIO("bar")) + + def get_pool_df(fs, name): + try: + return fs.get_pool_df(name)['objects'] > 0 + except RuntimeError: + return False + + self.wait_until_true(lambda: get_pool_df(self.fs, self.fs.metadata_pool_name), timeout=30) + + try: + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name, + self.fs.metadata_pool_name, + data_pool_name) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.EINVAL) + else: + raise AssertionError("Expected EINVAL") + + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name, + self.fs.metadata_pool_name, + data_pool_name, "--force") + + self.fs.mon_manager.raw_cluster_cmd('fs', 'fail', self.fs.name) + + self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name, + '--yes-i-really-mean-it') + + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete', + self.fs.metadata_pool_name, + self.fs.metadata_pool_name, + '--yes-i-really-really-mean-it') + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', + self.fs.metadata_pool_name, + '--pg_num_min', str(self.fs.pg_num_min)) + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name, + self.fs.metadata_pool_name, + data_pool_name) + + def test_cap_revoke_nonresponder(self): + """ + Check that a client is evicted if it has not responded to cap revoke + request for configured number of seconds. + """ + session_timeout = self.fs.get_var("session_timeout") + eviction_timeout = session_timeout / 2.0 + + self.fs.mds_asok(['config', 'set', 'mds_cap_revoke_eviction_timeout', + str(eviction_timeout)]) + + cap_holder = self.mount_a.open_background() + + # Wait for the file to be visible from another client, indicating + # that mount_a has completed its network ops + self.mount_b.wait_for_visible() + + # Simulate client death + self.mount_a.suspend_netns() + + try: + # The waiter should get stuck waiting for the capability + # held on the MDS by the now-dead client A + cap_waiter = self.mount_b.write_background() + + a = time.time() + time.sleep(eviction_timeout) + cap_waiter.wait() + b = time.time() + cap_waited = b - a + log.info("cap_waiter waited {0}s".format(cap_waited)) + + # check if the cap is transferred before session timeout kicked in. + # this is a good enough check to ensure that the client got evicted + # by the cap auto evicter rather than transitioning to stale state + # and then getting evicted. + self.assertLess(cap_waited, session_timeout, + "Capability handover took {0}, expected less than {1}".format( + cap_waited, session_timeout + )) + + self.assertTrue(self.mds_cluster.is_addr_blocklisted( + self.mount_a.get_global_addr())) + self.mount_a._kill_background(cap_holder) + finally: + self.mount_a.resume_netns() + + def test_filtered_df(self): + pool_name = self.fs.get_data_pool_name() + raw_df = self.fs.get_pool_df(pool_name) + raw_avail = float(raw_df["max_avail"]) + out = self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'get', + pool_name, 'size', + '-f', 'json-pretty') + _ = json.loads(out) + + proc = self.mount_a.run_shell(['df', '.']) + output = proc.stdout.getvalue() + fs_avail = output.split('\n')[1].split()[3] + fs_avail = float(fs_avail) * 1024 + + ratio = raw_avail / fs_avail + assert 0.9 < ratio < 1.1 + + def test_dump_inode(self): + info = self.fs.mds_asok(['dump', 'inode', '1']) + assert(info['path'] == "/") + + def test_dump_inode_hexademical(self): + self.mount_a.run_shell(["mkdir", "-p", "foo"]) + ino = self.mount_a.path_to_ino("foo") + assert type(ino) is int + info = self.fs.mds_asok(['dump', 'inode', hex(ino)]) + assert info['path'] == "/foo" + + + def _test_sync_stuck_for_around_5s(self, dir_path, file_sync=False): + self.mount_a.run_shell(["mkdir", dir_path]) + + sync_dir_pyscript = dedent(""" + import os + + path = "{path}" + dfd = os.open(path, os.O_DIRECTORY) + os.fsync(dfd) + os.close(dfd) + """.format(path=dir_path)) + + # run create/delete directories and test the sync time duration + for i in range(300): + for j in range(5): + self.mount_a.run_shell(["mkdir", os.path.join(dir_path, f"{i}_{j}")]) + start = time.time() + if file_sync: + self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript]) + else: + self.mount_a.run_shell(["sync"]) + duration = time.time() - start + log.info(f"sync mkdir i = {i}, duration = {duration}") + self.assertLess(duration, 4) + + for j in range(5): + self.mount_a.run_shell(["rm", "-rf", os.path.join(dir_path, f"{i}_{j}")]) + start = time.time() + if file_sync: + self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript]) + else: + self.mount_a.run_shell(["sync"]) + duration = time.time() - start + log.info(f"sync rmdir i = {i}, duration = {duration}") + self.assertLess(duration, 4) + + self.mount_a.run_shell(["rm", "-rf", dir_path]) + + def test_filesystem_sync_stuck_for_around_5s(self): + """ + To check whether the fsync will be stuck to wait for the mdlog to be + flushed for at most 5 seconds. + """ + + dir_path = "filesystem_sync_do_not_wait_mdlog_testdir" + self._test_sync_stuck_for_around_5s(dir_path) + + def test_file_sync_stuck_for_around_5s(self): + """ + To check whether the filesystem sync will be stuck to wait for the + mdlog to be flushed for at most 5 seconds. + """ + + dir_path = "file_sync_do_not_wait_mdlog_testdir" + self._test_sync_stuck_for_around_5s(dir_path, True) + + def test_file_filesystem_sync_crash(self): + """ + To check whether the kernel crashes when doing the file/filesystem sync. + """ + + stop_thread = False + dir_path = "file_filesystem_sync_crash_testdir" + self.mount_a.run_shell(["mkdir", dir_path]) + + def mkdir_rmdir_thread(mount, path): + #global stop_thread + + log.info(" mkdir_rmdir_thread starting...") + num = 0 + while not stop_thread: + n = num + m = num + for __ in range(10): + mount.run_shell(["mkdir", os.path.join(path, f"{n}")]) + n += 1 + for __ in range(10): + mount.run_shell(["rm", "-rf", os.path.join(path, f"{m}")]) + m += 1 + num += 10 + log.info(" mkdir_rmdir_thread stopped") + + def filesystem_sync_thread(mount, path): + #global stop_thread + + log.info(" filesystem_sync_thread starting...") + while not stop_thread: + mount.run_shell(["sync"]) + log.info(" filesystem_sync_thread stopped") + + def file_sync_thread(mount, path): + #global stop_thread + + log.info(" file_sync_thread starting...") + pyscript = dedent(""" + import os + + path = "{path}" + dfd = os.open(path, os.O_DIRECTORY) + os.fsync(dfd) + os.close(dfd) + """.format(path=path)) + + while not stop_thread: + mount.run_shell(['python3', '-c', pyscript]) + log.info(" file_sync_thread stopped") + + td1 = Thread(target=mkdir_rmdir_thread, args=(self.mount_a, dir_path,)) + td2 = Thread(target=filesystem_sync_thread, args=(self.mount_a, dir_path,)) + td3 = Thread(target=file_sync_thread, args=(self.mount_a, dir_path,)) + + td1.start() + td2.start() + td3.start() + time.sleep(1200) # run 20 minutes + stop_thread = True + td1.join() + td2.join() + td3.join() + self.mount_a.run_shell(["rm", "-rf", dir_path]) + + +class TestCacheDrop(CephFSTestCase): + CLIENTS_REQUIRED = 1 + + def _run_drop_cache_cmd(self, timeout=None): + result = None + args = ["cache", "drop"] + if timeout is not None: + args.append(str(timeout)) + result = self.fs.rank_tell(args) + return result + + def _setup(self, max_caps=20, threshold=400): + # create some files + self.mount_a.create_n_files("dc-dir/dc-file", 1000, sync=True) + + # Reduce this so the MDS doesn't rkcall the maximum for simple tests + self.fs.rank_asok(['config', 'set', 'mds_recall_max_caps', str(max_caps)]) + self.fs.rank_asok(['config', 'set', 'mds_recall_max_decay_threshold', str(threshold)]) + + def test_drop_cache_command(self): + """ + Basic test for checking drop cache command. + Confirm it halts without a timeout. + Note that the cache size post trimming is not checked here. + """ + mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client")) + self._setup() + result = self._run_drop_cache_cmd() + self.assertEqual(result['client_recall']['return_code'], 0) + self.assertEqual(result['flush_journal']['return_code'], 0) + # It should take at least 1 second + self.assertGreater(result['duration'], 1) + self.assertGreaterEqual(result['trim_cache']['trimmed'], 1000-2*mds_min_caps_per_client) + + def test_drop_cache_command_timeout(self): + """ + Basic test for checking drop cache command. + Confirm recall halts early via a timeout. + Note that the cache size post trimming is not checked here. + """ + self._setup() + result = self._run_drop_cache_cmd(timeout=10) + self.assertEqual(result['client_recall']['return_code'], -errno.ETIMEDOUT) + self.assertEqual(result['flush_journal']['return_code'], 0) + self.assertGreater(result['duration'], 10) + self.assertGreaterEqual(result['trim_cache']['trimmed'], 100) # we did something, right? + + def test_drop_cache_command_dead_timeout(self): + """ + Check drop cache command with non-responding client using tell + interface. Note that the cache size post trimming is not checked + here. + """ + self._setup() + self.mount_a.suspend_netns() + # Note: recall is subject to the timeout. The journal flush will + # be delayed due to the client being dead. + result = self._run_drop_cache_cmd(timeout=5) + self.assertEqual(result['client_recall']['return_code'], -errno.ETIMEDOUT) + self.assertEqual(result['flush_journal']['return_code'], 0) + self.assertGreater(result['duration'], 5) + self.assertLess(result['duration'], 120) + # Note: result['trim_cache']['trimmed'] may be >0 because dropping the + # cache now causes the Locker to drive eviction of stale clients (a + # stale session will be autoclosed at mdsmap['session_timeout']). The + # particular operation causing this is journal flush which causes the + # MDS to wait wait for cap revoke. + #self.assertEqual(0, result['trim_cache']['trimmed']) + self.mount_a.resume_netns() + + def test_drop_cache_command_dead(self): + """ + Check drop cache command with non-responding client using tell + interface. Note that the cache size post trimming is not checked + here. + """ + self._setup() + self.mount_a.suspend_netns() + result = self._run_drop_cache_cmd() + self.assertEqual(result['client_recall']['return_code'], 0) + self.assertEqual(result['flush_journal']['return_code'], 0) + self.assertGreater(result['duration'], 5) + self.assertLess(result['duration'], 120) + # Note: result['trim_cache']['trimmed'] may be >0 because dropping the + # cache now causes the Locker to drive eviction of stale clients (a + # stale session will be autoclosed at mdsmap['session_timeout']). The + # particular operation causing this is journal flush which causes the + # MDS to wait wait for cap revoke. + self.mount_a.resume_netns() diff --git a/qa/tasks/cephfs/test_multifs_auth.py b/qa/tasks/cephfs/test_multifs_auth.py new file mode 100644 index 000000000..b247dd8f5 --- /dev/null +++ b/qa/tasks/cephfs/test_multifs_auth.py @@ -0,0 +1,313 @@ +""" +Test for Ceph clusters with multiple FSs. +""" +import logging + +from os.path import join as os_path_join + +# CapsHelper is subclassed from CephFSTestCase +from tasks.cephfs.caps_helper import CapsHelper + +from teuthology.orchestra.run import CommandFailedError + + +log = logging.getLogger(__name__) + + +class TestMultiFS(CapsHelper): + client_id = 'testuser' + client_name = 'client.' + client_id + # one dedicated for each FS + MDSS_REQUIRED = 2 + CLIENTS_REQUIRED = 2 + + def setUp(self): + super(TestMultiFS, self).setUp() + + # we might have it - the client - if the same cluster was used for a + # different vstart_runner.py run. + self.run_cluster_cmd(f'auth rm {self.client_name}') + + self.fs1 = self.fs + # After Octopus is EOL, we can remove this setting: + self.fs1.set_allow_multifs() + self.fs2 = self.mds_cluster.newfs(name='cephfs2', create=True) + + # we'll reassign caps to client.1 so that it can operate with cephfs2 + self.run_cluster_cmd(f'auth caps client.{self.mount_b.client_id} mon ' + f'"allow r" osd "allow rw ' + f'pool={self.fs2.get_data_pool_name()}" mds allow') + self.mount_b.remount(cephfs_name=self.fs2.name) + + +class TestMONCaps(TestMultiFS): + + def test_moncap_with_one_fs_names(self): + moncap = f'allow r fsname={self.fs1.name}' + keyring = self.setup_test_env(moncap) + + self.run_mon_cap_tests(moncap, keyring) + + def test_moncap_with_multiple_fs_names(self): + moncap = (f'allow r fsname={self.fs1.name}, ' + f'allow r fsname={self.fs2.name}') + keyring = self.setup_test_env(moncap) + + self.run_mon_cap_tests(moncap, keyring) + + def test_moncap_with_blanket_allow(self): + moncap = 'allow r' + keyring = self.setup_test_env(moncap) + + self.run_mon_cap_tests(moncap, keyring) + + def setup_test_env(self, moncap): + return self.create_client(self.client_id, moncap) + + +#TODO: add tests for capsecs 'p' and 's'. +class TestMDSCaps(TestMultiFS): + """ + 0. Have 2 FSs on Ceph cluster. + 1. Create new files on both FSs. + 2. Create a new client that has authorization for both FSs. + 3. Remount the current mounts with this new client. + 4. Test read and write on both FSs. + """ + def test_rw_with_fsname_and_no_path_in_cap(self): + perm = 'rw' + filepaths, filedata, mounts = self.setup_test_env(perm, True) + + self.run_mds_cap_tests(filepaths, filedata, mounts, perm) + + def test_r_with_fsname_and_no_path_in_cap(self): + perm = 'r' + filepaths, filedata, mounts = self.setup_test_env(perm, True) + + self.run_mds_cap_tests(filepaths, filedata, mounts, perm) + + def test_rw_with_fsname_and_path_in_cap(self): + perm = 'rw' + filepaths, filedata, mounts = self.setup_test_env(perm, True,'dir1') + + self.run_mds_cap_tests(filepaths, filedata, mounts, perm) + + def test_r_with_fsname_and_path_in_cap(self): + perm = 'r' + filepaths, filedata, mounts = self.setup_test_env(perm, True, 'dir1') + + self.run_mds_cap_tests(filepaths, filedata, mounts, perm) + + # XXX: this tests the backward compatibility; "allow rw path=<dir1>" is + # treated as "allow rw fsname=* path=<dir1>" + def test_rw_with_no_fsname_and_path_in_cap(self): + perm = 'rw' + filepaths, filedata, mounts = self.setup_test_env(perm, False, 'dir1') + + self.run_mds_cap_tests(filepaths, filedata, mounts, perm) + + # XXX: this tests the backward compatibility; "allow r path=<dir1>" is + # treated as "allow r fsname=* path=<dir1>" + def test_r_with_no_fsname_and_path_in_cap(self): + perm = 'r' + filepaths, filedata, mounts = self.setup_test_env(perm, False, 'dir1') + + self.run_mds_cap_tests(filepaths, filedata, mounts, perm) + + def test_rw_with_no_fsname_and_no_path(self): + perm = 'rw' + filepaths, filedata, mounts = self.setup_test_env(perm) + + self.run_mds_cap_tests(filepaths, filedata, mounts, perm) + + def test_r_with_no_fsname_and_no_path(self): + perm = 'r' + filepaths, filedata, mounts = self.setup_test_env(perm) + + self.run_mds_cap_tests(filepaths, filedata, mounts, perm) + + def tearDown(self): + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + super(type(self), self).tearDown() + + def setup_test_env(self, perm, fsname=False, cephfs_mntpt='/'): + """ + Creates the cap string, files on both the FSs and then creates the + new client with the cap and remounts both the FSs with newly created + client. + """ + filenames = ('file_on_fs1', 'file_on_fs2') + filedata = ('some data on first fs', 'some data on second fs') + mounts = (self.mount_a, self.mount_b) + self.setup_fs_contents(cephfs_mntpt, filenames, filedata) + + keyring_paths = self.create_client_and_keyring_file(perm, fsname, + cephfs_mntpt) + filepaths = self.remount_with_new_client(cephfs_mntpt, filenames, + keyring_paths) + + return filepaths, filedata, mounts + + def generate_caps(self, perm, fsname, cephfs_mntpt): + moncap = 'allow r' + osdcap = (f'allow {perm} tag cephfs data={self.fs1.name}, ' + f'allow {perm} tag cephfs data={self.fs2.name}') + + if fsname: + if cephfs_mntpt == '/': + mdscap = (f'allow {perm} fsname={self.fs1.name}, ' + f'allow {perm} fsname={self.fs2.name}') + else: + mdscap = (f'allow {perm} fsname={self.fs1.name} ' + f'path=/{cephfs_mntpt}, ' + f'allow {perm} fsname={self.fs2.name} ' + f'path=/{cephfs_mntpt}') + else: + if cephfs_mntpt == '/': + mdscap = f'allow {perm}' + else: + mdscap = f'allow {perm} path=/{cephfs_mntpt}' + + return moncap, osdcap, mdscap + + def create_client_and_keyring_file(self, perm, fsname, cephfs_mntpt): + moncap, osdcap, mdscap = self.generate_caps(perm, fsname, + cephfs_mntpt) + + keyring = self.create_client(self.client_id, moncap, osdcap, mdscap) + keyring_paths = [] + for mount_x in (self.mount_a, self.mount_b): + keyring_paths.append(self.create_keyring_file( + mount_x.client_remote, keyring)) + + return keyring_paths + + def setup_fs_contents(self, cephfs_mntpt, filenames, filedata): + filepaths = [] + iter_on = zip((self.mount_a, self.mount_b), filenames, filedata) + + for mount_x, filename, data in iter_on: + if cephfs_mntpt != '/' : + mount_x.run_shell(args=['mkdir', cephfs_mntpt]) + filepaths.append(os_path_join(mount_x.hostfs_mntpt, + cephfs_mntpt, filename)) + else: + filepaths.append(os_path_join(mount_x.hostfs_mntpt, filename)) + + mount_x.write_file(filepaths[-1], data) + + def remount_with_new_client(self, cephfs_mntpt, filenames, + keyring_paths): + if isinstance(cephfs_mntpt, str) and cephfs_mntpt != '/' : + cephfs_mntpt = '/' + cephfs_mntpt + + self.mount_a.remount(client_id=self.client_id, + client_keyring_path=keyring_paths[0], + client_remote=self.mount_a.client_remote, + cephfs_name=self.fs1.name, + cephfs_mntpt=cephfs_mntpt, + hostfs_mntpt=self.mount_a.hostfs_mntpt, + wait=True) + self.mount_b.remount(client_id=self.client_id, + client_keyring_path=keyring_paths[1], + client_remote=self.mount_b.client_remote, + cephfs_name=self.fs2.name, + cephfs_mntpt=cephfs_mntpt, + hostfs_mntpt=self.mount_b.hostfs_mntpt, + wait=True) + + return (os_path_join(self.mount_a.hostfs_mntpt, filenames[0]), + os_path_join(self.mount_b.hostfs_mntpt, filenames[1])) + + +class TestClientsWithoutAuth(TestMultiFS): + + def setUp(self): + super(TestClientsWithoutAuth, self).setUp() + + # TODO: When MON and OSD caps for a Ceph FS are assigned to a + # client but MDS caps are not, mount.ceph prints "permission + # denied". But when MON caps are not assigned and MDS and OSD + # caps are, mount.ceph prints "no mds server or cluster laggy" + # instead of "permission denied". + # + # Before uncommenting the following line a fix would be required + # for latter case to change "no mds server is up or the cluster is + # laggy" to "permission denied". + self.kernel_errmsgs = ('permission denied', 'no mds server is up or ' + 'the cluster is laggy', 'no such file or ' + 'directory', + 'input/output error') + + # TODO: When MON and OSD caps are assigned for a Ceph FS to a + # client but MDS caps are not, ceph-fuse prints "operation not + # permitted". But when MON caps are not assigned and MDS and OSD + # caps are, ceph-fuse prints "no such file or directory" instead + # of "operation not permitted". + # + # Before uncommenting the following line a fix would be required + # for the latter case to change "no such file or directory" to + # "operation not permitted". + #self.assertIn('operation not permitted', retval[2].lower()) + self.fuse_errmsgs = ('operation not permitted', 'no such file or ' + 'directory') + + if 'kernel' in str(type(self.mount_a)).lower(): + self.errmsgs = self.kernel_errmsgs + elif 'fuse' in str(type(self.mount_a)).lower(): + self.errmsgs = self.fuse_errmsgs + else: + raise RuntimeError('strange, the client was neither based on ' + 'kernel nor FUSE.') + + def check_that_mount_failed_for_right_reason(self, stderr): + stderr = stderr.lower() + for errmsg in self.errmsgs: + if errmsg in stderr: + break + else: + raise AssertionError('can\'t find expected set of words in the ' + f'stderr\nself.errmsgs - {self.errmsgs}\n' + f'stderr - {stderr}') + + def test_mount_all_caps_absent(self): + # setup part... + keyring = self.fs1.authorize(self.client_id, ('/', 'rw')) + keyring_path = self.create_keyring_file(self.mount_a.client_remote, + keyring) + + # mount the FS for which client has no auth... + retval = self.mount_a.remount(client_id=self.client_id, + client_keyring_path=keyring_path, + cephfs_name=self.fs2.name, + check_status=False) + + # tests... + self.assertIsInstance(retval, tuple) + self.assertEqual(len(retval), 3) + self.assertIsInstance(retval[0], CommandFailedError) + self.check_that_mount_failed_for_right_reason(retval[2]) + + def test_mount_mon_and_osd_caps_present_mds_caps_absent(self): + # setup part... + moncap = f'allow rw fsname={self.fs1.name}, allow rw fsname={self.fs2.name}' + mdscap = f'allow rw fsname={self.fs1.name}' + osdcap = (f'allow rw tag cephfs data={self.fs1.name}, allow rw tag ' + f'cephfs data={self.fs2.name}') + keyring = self.create_client(self.client_id, moncap, osdcap, mdscap) + keyring_path = self.create_keyring_file(self.mount_a.client_remote, + keyring) + + # mount the FS for which client has no auth... + retval = self.mount_a.remount(client_id=self.client_id, + client_keyring_path=keyring_path, + cephfs_name=self.fs2.name, + check_status=False) + + # tests... + self.assertIsInstance(retval, tuple) + self.assertEqual(len(retval), 3) + self.assertIsInstance(retval[0], CommandFailedError) + self.check_that_mount_failed_for_right_reason(retval[2]) diff --git a/qa/tasks/cephfs/test_multimds_misc.py b/qa/tasks/cephfs/test_multimds_misc.py new file mode 100644 index 000000000..3c464e91d --- /dev/null +++ b/qa/tasks/cephfs/test_multimds_misc.py @@ -0,0 +1,223 @@ +import logging +import errno +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.contextutil import safe_while +from teuthology.orchestra.run import CommandFailedError + +log = logging.getLogger(__name__) + +class TestScrub2(CephFSTestCase): + MDSS_REQUIRED = 3 + CLIENTS_REQUIRED = 1 + + def _check_scrub_status(self, result=None, reverse=False): + self.assertEqual(self.fs.wait_until_scrub_complete(result=result, rank=1, + sleep=5, timeout=30, + reverse=reverse), True) + self.assertEqual(self.fs.wait_until_scrub_complete(result=result, rank=2, + sleep=5, timeout=30, + reverse=reverse), True) + self.assertEqual(self.fs.wait_until_scrub_complete(result=result, rank=0, + sleep=5, timeout=30, + reverse=reverse), True) + + def _check_task_status_na(self, timo=120): + """ check absence of scrub status in ceph status """ + with safe_while(sleep=1, tries=120, action='wait for task status') as proceed: + while proceed(): + active = self.fs.get_active_names() + log.debug("current active={0}".format(active)) + task_status = self.fs.get_task_status("scrub status") + if not active[0] in task_status: + return True + + def _check_task_status(self, expected_status, timo=120): + """ check scrub status for current active mds in ceph status """ + with safe_while(sleep=1, tries=120, action='wait for task status') as proceed: + while proceed(): + active = self.fs.get_active_names() + log.debug("current active={0}".format(active)) + task_status = self.fs.get_task_status("scrub status") + try: + if task_status[active[0]].startswith(expected_status): + return True + except KeyError: + pass + + def _find_path_inos(self, root_path): + inos = [] + p = self.mount_a.run_shell(["find", root_path]) + paths = p.stdout.getvalue().strip().split() + for path in paths: + inos.append(self.mount_a.path_to_ino(path)) + return inos + + def _setup_subtrees(self): + self.fs.set_max_mds(3) + self.fs.wait_for_daemons() + status = self.fs.status() + + path = 'd1/d2/d3/d4/d5/d6/d7/d8' + self.mount_a.run_shell(['mkdir', '-p', path]) + self.mount_a.run_shell(['sync', path]) + + self.mount_a.setfattr("d1/d2", "ceph.dir.pin", "0") + self.mount_a.setfattr("d1/d2/d3/d4", "ceph.dir.pin", "1") + self.mount_a.setfattr("d1/d2/d3/d4/d5/d6", "ceph.dir.pin", "2") + + self._wait_subtrees([('/d1/d2', 0), ('/d1/d2/d3/d4', 1)], status, 0) + self._wait_subtrees([('/d1/d2/d3/d4', 1), ('/d1/d2/d3/d4/d5/d6', 2)], status, 1) + self._wait_subtrees([('/d1/d2/d3/d4', 1), ('/d1/d2/d3/d4/d5/d6', 2)], status, 2) + + for rank in range(3): + self.fs.rank_tell(["flush", "journal"], rank) + + def test_apply_tag(self): + self._setup_subtrees() + inos = self._find_path_inos('d1/d2/d3/') + + tag = "tag123" + out_json = self.fs.rank_tell(["tag", "path", "/d1/d2/d3", tag], 0) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + def assertTagged(ino): + file_obj_name = "{0:x}.00000000".format(ino) + self.fs.radosm(["getxattr", file_obj_name, "scrub_tag"]) + + for ino in inos: + assertTagged(ino) + + def test_scrub_backtrace(self): + self._setup_subtrees() + inos = self._find_path_inos('d1/d2/d3/') + + for ino in inos: + file_obj_name = "{0:x}.00000000".format(ino) + self.fs.radosm(["rmxattr", file_obj_name, "parent"]) + + out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + def _check_damage(mds_rank, inos): + all_damage = self.fs.rank_tell(["damage", "ls"], mds_rank) + damage = [d for d in all_damage if d['ino'] in inos and d['damage_type'] == "backtrace"] + return len(damage) >= len(inos) + + self.assertTrue(_check_damage(0, inos[0:2])) + self.assertTrue(_check_damage(1, inos[2:4])) + self.assertTrue(_check_damage(2, inos[4:6])) + + def test_scrub_non_mds0(self): + self._setup_subtrees() + + def expect_exdev(cmd, mds): + try: + self.fs.mon_manager.raw_cluster_cmd('tell', 'mds.{0}'.format(mds), *cmd) + except CommandFailedError as e: + if e.exitstatus == errno.EXDEV: + pass + else: + raise + else: + raise RuntimeError("expected failure") + + rank1 = self.fs.get_rank(rank=1) + expect_exdev(["scrub", "start", "/d1/d2/d3"], rank1["name"]) + expect_exdev(["scrub", "abort"], rank1["name"]) + expect_exdev(["scrub", "pause"], rank1["name"]) + expect_exdev(["scrub", "resume"], rank1["name"]) + + def test_scrub_abort_mds0(self): + self._setup_subtrees() + + inos = self._find_path_inos('d1/d2/d3/') + + for ino in inos: + file_obj_name = "{0:x}.00000000".format(ino) + self.fs.radosm(["rmxattr", file_obj_name, "parent"]) + + out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0) + self.assertNotEqual(out_json, None) + + res = self.fs.run_scrub(["abort"]) + self.assertEqual(res['return_code'], 0) + + # Abort and verify in both mdss. We also check the status in rank 0 mds because + # it is supposed to gather the scrub status from other mdss. + self._check_scrub_status() + + # sleep enough to fetch updated task status + checked = self._check_task_status_na() + self.assertTrue(checked) + + def test_scrub_pause_and_resume_mds0(self): + self._setup_subtrees() + + inos = self._find_path_inos('d1/d2/d3/') + + for ino in inos: + file_obj_name = "{0:x}.00000000".format(ino) + self.fs.radosm(["rmxattr", file_obj_name, "parent"]) + + out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0) + self.assertNotEqual(out_json, None) + + res = self.fs.run_scrub(["pause"]) + self.assertEqual(res['return_code'], 0) + + self._check_scrub_status(result="PAUSED") + + checked = self._check_task_status("paused") + self.assertTrue(checked) + + # resume and verify + res = self.fs.run_scrub(["resume"]) + self.assertEqual(res['return_code'], 0) + + self._check_scrub_status(result="PAUSED", reverse=True) + + checked = self._check_task_status_na() + self.assertTrue(checked) + + def test_scrub_pause_and_resume_with_abort_mds0(self): + self._setup_subtrees() + + inos = self._find_path_inos('d1/d2/d3/') + + for ino in inos: + file_obj_name = "{0:x}.00000000".format(ino) + self.fs.radosm(["rmxattr", file_obj_name, "parent"]) + + out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0) + self.assertNotEqual(out_json, None) + + res = self.fs.run_scrub(["pause"]) + self.assertEqual(res['return_code'], 0) + + self._check_scrub_status(result="PAUSED") + + checked = self._check_task_status("paused") + self.assertTrue(checked) + + res = self.fs.run_scrub(["abort"]) + self.assertEqual(res['return_code'], 0) + + self._check_scrub_status(result="PAUSED") + self._check_scrub_status(result="0 inodes") + + # scrub status should still be paused... + checked = self._check_task_status("paused") + self.assertTrue(checked) + + # resume and verify + res = self.fs.run_scrub(["resume"]) + self.assertEqual(res['return_code'], 0) + + self._check_scrub_status(result="PAUSED", reverse=True) + + checked = self._check_task_status_na() + self.assertTrue(checked) diff --git a/qa/tasks/cephfs/test_newops.py b/qa/tasks/cephfs/test_newops.py new file mode 100644 index 000000000..4c34dabdd --- /dev/null +++ b/qa/tasks/cephfs/test_newops.py @@ -0,0 +1,25 @@ +import logging +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from tasks.cephfs.fuse_mount import FuseMount +from tasks.cephfs.kernel_mount import KernelMount + +log = logging.getLogger(__name__) + +class TestNewOps(CephFSTestCase): + def test_newops_getvxattr(self): + """ + For nautilus it will crash the MDSs when receive unknown OPs, as a workaround + the clients should avoid sending them to nautilus + """ + if isinstance(self.mount_a, FuseMount): + log.info('client is fuse mounted') + elif isinstance(self.mount_a, KernelMount): + log.info('client is kernel mounted') + self.skipTest("Currently kclient hasn't fixed new ops issue yet.") + + log.info("Test for new getvxattr op...") + self.mount_a.run_shell(["mkdir", "newop_getvxattr_dir"]) + + # to test whether will nautilus crash the MDSs + self.mount_a.getfattr("./newop_getvxattr_dir", "ceph.dir.pin.random") + log.info("Test for new getvxattr op succeeds") diff --git a/qa/tasks/cephfs/test_nfs.py b/qa/tasks/cephfs/test_nfs.py new file mode 100644 index 000000000..47b3e63a6 --- /dev/null +++ b/qa/tasks/cephfs/test_nfs.py @@ -0,0 +1,727 @@ +# NOTE: these tests are not yet compatible with vstart_runner.py. +import errno +import json +import time +import logging +from io import BytesIO + +from tasks.mgr.mgr_test_case import MgrTestCase +from teuthology import contextutil +from teuthology.exceptions import CommandFailedError + +log = logging.getLogger(__name__) + +NFS_POOL_NAME = '.nfs' # should match mgr_module.py + +# TODO Add test for cluster update when ganesha can be deployed on multiple ports. +class TestNFS(MgrTestCase): + def _cmd(self, *args): + return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args) + + def _nfs_cmd(self, *args): + return self._cmd("nfs", *args) + + def _orch_cmd(self, *args): + return self._cmd("orch", *args) + + def _sys_cmd(self, cmd): + ret = self.ctx.cluster.run(args=cmd, check_status=False, stdout=BytesIO(), stderr=BytesIO()) + stdout = ret[0].stdout + if stdout: + return stdout.getvalue() + + def setUp(self): + super(TestNFS, self).setUp() + self._load_module('nfs') + self.cluster_id = "test" + self.export_type = "cephfs" + self.pseudo_path = "/cephfs" + self.path = "/" + self.fs_name = "nfs-cephfs" + self.expected_name = "nfs.test" + self.sample_export = { + "export_id": 1, + "path": self.path, + "cluster_id": self.cluster_id, + "pseudo": self.pseudo_path, + "access_type": "RW", + "squash": "none", + "security_label": True, + "protocols": [ + 4 + ], + "transports": [ + "TCP" + ], + "fsal": { + "name": "CEPH", + "user_id": "nfs.test.1", + "fs_name": self.fs_name, + }, + "clients": [] + } + + def _check_nfs_server_status(self): + res = self._sys_cmd(['sudo', 'systemctl', 'status', 'nfs-server']) + if isinstance(res, bytes) and b'Active: active' in res: + self._disable_nfs() + + def _disable_nfs(self): + log.info("Disabling NFS") + self._sys_cmd(['sudo', 'systemctl', 'disable', 'nfs-server', '--now']) + + def _fetch_nfs_daemons_details(self, enable_json=False): + args = ('ps', f'--service_name={self.expected_name}') + if enable_json: + args = (*args, '--format=json') + return self._orch_cmd(*args) + + def _check_nfs_cluster_event(self, expected_event): + ''' + Check whether an event occured during the lifetime of the NFS service + :param expected_event: event that was expected to occur + ''' + event_occurred = False + # Wait few seconds for NFS daemons' status to be updated + with contextutil.safe_while(sleep=10, tries=12, _raise=False) as proceed: + while not event_occurred and proceed(): + daemons_details = json.loads( + self._fetch_nfs_daemons_details(enable_json=True)) + log.info('daemons details %s', daemons_details) + for event in daemons_details[0]['events']: + log.info('daemon event %s', event) + if expected_event in event: + event_occurred = True + break + return event_occurred + + def _check_nfs_cluster_status(self, expected_status, fail_msg): + ''' + Check the current status of the NFS service + :param expected_status: Status to be verified + :param fail_msg: Message to be printed if test failed + ''' + # Wait for two minutes as ganesha daemon takes some time to be deleted/created + wait_time = 10 + while wait_time <= 120: + time.sleep(wait_time) + if expected_status in self._fetch_nfs_daemons_details(): + return + wait_time += 10 + self.fail(fail_msg) + + def _check_auth_ls(self, export_id=1, check_in=False): + ''' + Tests export user id creation or deletion. + :param export_id: Denotes export number + :param check_in: Check specified export id + ''' + output = self._cmd('auth', 'ls') + client_id = f'client.nfs.{self.cluster_id}' + if check_in: + self.assertIn(f'{client_id}.{export_id}', output) + else: + self.assertNotIn(f'{client_id}.{export_id}', output) + + def _test_idempotency(self, cmd_func, cmd_args): + ''' + Test idempotency of commands. It first runs the TestNFS test method + for a command and then checks the result of command run again. TestNFS + test method has required checks to verify that command works. + :param cmd_func: TestNFS method + :param cmd_args: nfs command arguments to be run + ''' + cmd_func() + ret = self.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd_args) + if ret != 0: + self.fail("Idempotency test failed") + + def _test_create_cluster(self): + ''' + Test single nfs cluster deployment. + ''' + # Disable any running nfs ganesha daemon + self._check_nfs_server_status() + self._nfs_cmd('cluster', 'create', self.cluster_id) + # Check for expected status and daemon name (nfs.<cluster_id>) + self._check_nfs_cluster_status('running', 'NFS Ganesha cluster deployment failed') + + def _test_delete_cluster(self): + ''' + Test deletion of a single nfs cluster. + ''' + self._nfs_cmd('cluster', 'rm', self.cluster_id) + self._check_nfs_cluster_status('No daemons reported', + 'NFS Ganesha cluster could not be deleted') + + def _test_list_cluster(self, empty=False): + ''' + Test listing of deployed nfs clusters. If nfs cluster is deployed then + it checks for expected cluster id. Otherwise checks nothing is listed. + :param empty: If true it denotes no cluster is deployed. + ''' + if empty: + cluster_id = '' + else: + cluster_id = self.cluster_id + nfs_output = self._nfs_cmd('cluster', 'ls') + self.assertEqual(cluster_id, nfs_output.strip()) + + def _create_export(self, export_id, create_fs=False, extra_cmd=None): + ''' + Test creation of a single export. + :param export_id: Denotes export number + :param create_fs: If false filesytem exists. Otherwise create it. + :param extra_cmd: List of extra arguments for creating export. + ''' + if create_fs: + self._cmd('fs', 'volume', 'create', self.fs_name) + with contextutil.safe_while(sleep=5, tries=30) as proceed: + while proceed(): + output = self._cmd( + 'orch', 'ls', '-f', 'json', + '--service-name', f'mds.{self.fs_name}' + ) + j = json.loads(output) + if j[0]['status']['running']: + break + export_cmd = ['nfs', 'export', 'create', 'cephfs', + '--fsname', self.fs_name, '--cluster-id', self.cluster_id] + if isinstance(extra_cmd, list): + export_cmd.extend(extra_cmd) + else: + export_cmd.extend(['--pseudo-path', self.pseudo_path]) + # Runs the nfs export create command + self._cmd(*export_cmd) + # Check if user id for export is created + self._check_auth_ls(export_id, check_in=True) + res = self._sys_cmd(['rados', '-p', NFS_POOL_NAME, '-N', self.cluster_id, 'get', + f'export-{export_id}', '-']) + # Check if export object is created + if res == b'': + self.fail("Export cannot be created") + + def _create_default_export(self): + ''' + Deploy a single nfs cluster and create export with default options. + ''' + self._test_create_cluster() + self._create_export(export_id='1', create_fs=True) + + def _delete_export(self): + ''' + Delete an export. + ''' + self._nfs_cmd('export', 'rm', self.cluster_id, self.pseudo_path) + self._check_auth_ls() + + def _test_list_export(self): + ''' + Test listing of created exports. + ''' + nfs_output = json.loads(self._nfs_cmd('export', 'ls', self.cluster_id)) + self.assertIn(self.pseudo_path, nfs_output) + + def _test_list_detailed(self, sub_vol_path): + ''' + Test listing of created exports with detailed option. + :param sub_vol_path: Denotes path of subvolume + ''' + nfs_output = json.loads(self._nfs_cmd('export', 'ls', self.cluster_id, '--detailed')) + # Export-1 with default values (access type = rw and path = '\') + self.assertDictEqual(self.sample_export, nfs_output[0]) + # Export-2 with r only + self.sample_export['export_id'] = 2 + self.sample_export['pseudo'] = self.pseudo_path + '1' + self.sample_export['access_type'] = 'RO' + self.sample_export['fsal']['user_id'] = f'{self.expected_name}.2' + self.assertDictEqual(self.sample_export, nfs_output[1]) + # Export-3 for subvolume with r only + self.sample_export['export_id'] = 3 + self.sample_export['path'] = sub_vol_path + self.sample_export['pseudo'] = self.pseudo_path + '2' + self.sample_export['fsal']['user_id'] = f'{self.expected_name}.3' + self.assertDictEqual(self.sample_export, nfs_output[2]) + # Export-4 for subvolume + self.sample_export['export_id'] = 4 + self.sample_export['pseudo'] = self.pseudo_path + '3' + self.sample_export['access_type'] = 'RW' + self.sample_export['fsal']['user_id'] = f'{self.expected_name}.4' + self.assertDictEqual(self.sample_export, nfs_output[3]) + + def _get_export(self): + ''' + Returns export block in json format + ''' + return json.loads(self._nfs_cmd('export', 'info', self.cluster_id, self.pseudo_path)) + + def _test_get_export(self): + ''' + Test fetching of created export. + ''' + nfs_output = self._get_export() + self.assertDictEqual(self.sample_export, nfs_output) + + def _check_export_obj_deleted(self, conf_obj=False): + ''' + Test if export or config object are deleted successfully. + :param conf_obj: It denotes config object needs to be checked + ''' + rados_obj_ls = self._sys_cmd(['rados', '-p', NFS_POOL_NAME, '-N', self.cluster_id, 'ls']) + + if b'export-' in rados_obj_ls or (conf_obj and b'conf-nfs' in rados_obj_ls): + self.fail("Delete export failed") + + def _get_port_ip_info(self): + ''' + Return port and ip for a cluster + ''' + #{'test': {'backend': [{'hostname': 'smithi068', 'ip': '172.21.15.68', 'port': 2049}]}} + info_output = json.loads(self._nfs_cmd('cluster', 'info', self.cluster_id))['test']['backend'][0] + return info_output["port"], info_output["ip"] + + def _test_mnt(self, pseudo_path, port, ip, check=True): + ''' + Test mounting of created exports + :param pseudo_path: It is the pseudo root name + :param port: Port of deployed nfs cluster + :param ip: IP of deployed nfs cluster + :param check: It denotes if i/o testing needs to be done + ''' + tries = 3 + while True: + try: + self.ctx.cluster.run( + args=['sudo', 'mount', '-t', 'nfs', '-o', f'port={port}', + f'{ip}:{pseudo_path}', '/mnt']) + break + except CommandFailedError as e: + if tries: + tries -= 1 + time.sleep(2) + continue + # Check if mount failed only when non existing pseudo path is passed + if not check and e.exitstatus == 32: + return + raise + + self.ctx.cluster.run(args=['sudo', 'chmod', '1777', '/mnt']) + + try: + self.ctx.cluster.run(args=['touch', '/mnt/test']) + out_mnt = self._sys_cmd(['ls', '/mnt']) + self.assertEqual(out_mnt, b'test\n') + finally: + self.ctx.cluster.run(args=['sudo', 'umount', '/mnt']) + + def _write_to_read_only_export(self, pseudo_path, port, ip): + ''' + Check if write to read only export fails + ''' + try: + self._test_mnt(pseudo_path, port, ip) + except CommandFailedError as e: + # Write to cephfs export should fail for test to pass + self.assertEqual( + e.exitstatus, errno.EPERM, + 'invalid error code on trying to write to read-only export') + else: + self.fail('expected write to a read-only export to fail') + + def test_create_and_delete_cluster(self): + ''' + Test successful creation and deletion of the nfs cluster. + ''' + self._test_create_cluster() + self._test_list_cluster() + self._test_delete_cluster() + # List clusters again to ensure no cluster is shown + self._test_list_cluster(empty=True) + + def test_create_delete_cluster_idempotency(self): + ''' + Test idempotency of cluster create and delete commands. + ''' + self._test_idempotency(self._test_create_cluster, ['nfs', 'cluster', 'create', self.cluster_id]) + self._test_idempotency(self._test_delete_cluster, ['nfs', 'cluster', 'rm', self.cluster_id]) + + def test_create_cluster_with_invalid_cluster_id(self): + ''' + Test nfs cluster deployment failure with invalid cluster id. + ''' + try: + invalid_cluster_id = '/cluster_test' # Only [A-Za-z0-9-_.] chars are valid + self._nfs_cmd('cluster', 'create', invalid_cluster_id) + self.fail(f"Cluster successfully created with invalid cluster id {invalid_cluster_id}") + except CommandFailedError as e: + # Command should fail for test to pass + if e.exitstatus != errno.EINVAL: + raise + + def test_create_and_delete_export(self): + ''' + Test successful creation and deletion of the cephfs export. + ''' + self._create_default_export() + self._test_get_export() + port, ip = self._get_port_ip_info() + self._test_mnt(self.pseudo_path, port, ip) + self._delete_export() + # Check if rados export object is deleted + self._check_export_obj_deleted() + self._test_mnt(self.pseudo_path, port, ip, False) + self._test_delete_cluster() + + def test_create_delete_export_idempotency(self): + ''' + Test idempotency of export create and delete commands. + ''' + self._test_idempotency(self._create_default_export, [ + 'nfs', 'export', 'create', 'cephfs', + '--fsname', self.fs_name, '--cluster-id', self.cluster_id, + '--pseudo-path', self.pseudo_path]) + self._test_idempotency(self._delete_export, ['nfs', 'export', 'rm', self.cluster_id, + self.pseudo_path]) + self._test_delete_cluster() + + def test_create_multiple_exports(self): + ''' + Test creating multiple exports with different access type and path. + ''' + # Export-1 with default values (access type = rw and path = '\') + self._create_default_export() + # Export-2 with r only + self._create_export(export_id='2', + extra_cmd=['--pseudo-path', self.pseudo_path+'1', '--readonly']) + # Export-3 for subvolume with r only + self._cmd('fs', 'subvolume', 'create', self.fs_name, 'sub_vol') + fs_path = self._cmd('fs', 'subvolume', 'getpath', self.fs_name, 'sub_vol').strip() + self._create_export(export_id='3', + extra_cmd=['--pseudo-path', self.pseudo_path+'2', '--readonly', + '--path', fs_path]) + # Export-4 for subvolume + self._create_export(export_id='4', + extra_cmd=['--pseudo-path', self.pseudo_path+'3', + '--path', fs_path]) + # Check if exports gets listed + self._test_list_detailed(fs_path) + self._test_delete_cluster() + # Check if rados ganesha conf object is deleted + self._check_export_obj_deleted(conf_obj=True) + self._check_auth_ls() + + def test_exports_on_mgr_restart(self): + ''' + Test export availability on restarting mgr. + ''' + self._create_default_export() + # unload and load module will restart the mgr + self._unload_module("cephadm") + self._load_module("cephadm") + self._orch_cmd("set", "backend", "cephadm") + # Check if ganesha daemon is running + self._check_nfs_cluster_status('running', 'Failed to redeploy NFS Ganesha cluster') + # Checks if created export is listed + self._test_list_export() + port, ip = self._get_port_ip_info() + self._test_mnt(self.pseudo_path, port, ip) + self._delete_export() + self._test_delete_cluster() + + def test_export_create_with_non_existing_fsname(self): + ''' + Test creating export with non-existing filesystem. + ''' + try: + fs_name = 'nfs-test' + self._test_create_cluster() + self._nfs_cmd('export', 'create', 'cephfs', + '--fsname', fs_name, '--cluster-id', self.cluster_id, + '--pseudo-path', self.pseudo_path) + self.fail(f"Export created with non-existing filesystem {fs_name}") + except CommandFailedError as e: + # Command should fail for test to pass + if e.exitstatus != errno.ENOENT: + raise + finally: + self._test_delete_cluster() + + def test_export_create_with_non_existing_clusterid(self): + ''' + Test creating cephfs export with non-existing nfs cluster. + ''' + try: + cluster_id = 'invalidtest' + self._nfs_cmd('export', 'create', 'cephfs', '--fsname', self.fs_name, + '--cluster-id', cluster_id, '--pseudo-path', self.pseudo_path) + self.fail(f"Export created with non-existing cluster id {cluster_id}") + except CommandFailedError as e: + # Command should fail for test to pass + if e.exitstatus != errno.ENOENT: + raise + + def test_export_create_with_relative_pseudo_path_and_root_directory(self): + ''' + Test creating cephfs export with relative or '/' pseudo path. + ''' + def check_pseudo_path(pseudo_path): + try: + self._nfs_cmd('export', 'create', 'cephfs', '--fsname', self.fs_name, + '--cluster-id', self.cluster_id, + '--pseudo-path', pseudo_path) + self.fail(f"Export created for {pseudo_path}") + except CommandFailedError as e: + # Command should fail for test to pass + if e.exitstatus != errno.EINVAL: + raise + + self._test_create_cluster() + self._cmd('fs', 'volume', 'create', self.fs_name) + check_pseudo_path('invalidpath') + check_pseudo_path('/') + check_pseudo_path('//') + self._cmd('fs', 'volume', 'rm', self.fs_name, '--yes-i-really-mean-it') + self._test_delete_cluster() + + def test_write_to_read_only_export(self): + ''' + Test write to readonly export. + ''' + self._test_create_cluster() + self._create_export(export_id='1', create_fs=True, + extra_cmd=['--pseudo-path', self.pseudo_path, '--readonly']) + port, ip = self._get_port_ip_info() + self._check_nfs_cluster_status('running', 'NFS Ganesha cluster restart failed') + self._write_to_read_only_export(self.pseudo_path, port, ip) + self._test_delete_cluster() + + def test_cluster_info(self): + ''' + Test cluster info outputs correct ip and hostname + ''' + self._test_create_cluster() + info_output = json.loads(self._nfs_cmd('cluster', 'info', self.cluster_id)) + print(f'info {info_output}') + info_ip = info_output[self.cluster_id].get('backend', [])[0].pop("ip") + host_details = { + self.cluster_id: { + 'backend': [ + { + "hostname": self._sys_cmd(['hostname']).decode("utf-8").strip(), + "port": 2049 + } + ], + "virtual_ip": None, + } + } + host_ip = self._sys_cmd(['hostname', '-I']).decode("utf-8").split() + print(f'host_ip is {host_ip}, info_ip is {info_ip}') + self.assertDictEqual(info_output, host_details) + self.assertTrue(info_ip in host_ip) + self._test_delete_cluster() + + def test_cluster_set_reset_user_config(self): + ''' + Test cluster is created using user config and reverts back to default + config on reset. + ''' + self._test_create_cluster() + + pool = NFS_POOL_NAME + user_id = 'test' + fs_name = 'user_test_fs' + pseudo_path = '/ceph' + self._cmd('fs', 'volume', 'create', fs_name) + time.sleep(20) + key = self._cmd('auth', 'get-or-create-key', f'client.{user_id}', 'mon', + 'allow r', 'osd', + f'allow rw pool={pool} namespace={self.cluster_id}, allow rw tag cephfs data={fs_name}', + 'mds', f'allow rw path={self.path}').strip() + config = f""" LOG {{ + Default_log_level = FULL_DEBUG; + }} + + EXPORT {{ + Export_Id = 100; + Transports = TCP; + Path = /; + Pseudo = {pseudo_path}; + Protocols = 4; + Access_Type = RW; + Attr_Expiration_Time = 0; + Squash = None; + FSAL {{ + Name = CEPH; + Filesystem = {fs_name}; + User_Id = {user_id}; + Secret_Access_Key = '{key}'; + }} + }}""" + port, ip = self._get_port_ip_info() + self.ctx.cluster.run(args=['ceph', 'nfs', 'cluster', 'config', + 'set', self.cluster_id, '-i', '-'], stdin=config) + time.sleep(30) + res = self._sys_cmd(['rados', '-p', pool, '-N', self.cluster_id, 'get', + f'userconf-nfs.{user_id}', '-']) + self.assertEqual(config, res.decode('utf-8')) + self._test_mnt(pseudo_path, port, ip) + self._nfs_cmd('cluster', 'config', 'reset', self.cluster_id) + rados_obj_ls = self._sys_cmd(['rados', '-p', NFS_POOL_NAME, '-N', self.cluster_id, 'ls']) + if b'conf-nfs' not in rados_obj_ls and b'userconf-nfs' in rados_obj_ls: + self.fail("User config not deleted") + time.sleep(30) + self._test_mnt(pseudo_path, port, ip, False) + self._cmd('fs', 'volume', 'rm', fs_name, '--yes-i-really-mean-it') + self._test_delete_cluster() + + def test_cluster_set_user_config_with_non_existing_clusterid(self): + ''' + Test setting user config for non-existing nfs cluster. + ''' + try: + cluster_id = 'invalidtest' + self.ctx.cluster.run(args=['ceph', 'nfs', 'cluster', + 'config', 'set', self.cluster_id, '-i', '-'], stdin='testing') + self.fail(f"User config set for non-existing cluster {cluster_id}") + except CommandFailedError as e: + # Command should fail for test to pass + if e.exitstatus != errno.ENOENT: + raise + + def test_cluster_reset_user_config_with_non_existing_clusterid(self): + ''' + Test resetting user config for non-existing nfs cluster. + ''' + try: + cluster_id = 'invalidtest' + self._nfs_cmd('cluster', 'config', 'reset', cluster_id) + self.fail(f"User config reset for non-existing cluster {cluster_id}") + except CommandFailedError as e: + # Command should fail for test to pass + if e.exitstatus != errno.ENOENT: + raise + + def test_create_export_via_apply(self): + ''' + Test creation of export via apply + ''' + self._test_create_cluster() + self.ctx.cluster.run(args=['ceph', 'nfs', 'export', 'apply', + self.cluster_id, '-i', '-'], + stdin=json.dumps({ + "path": "/", + "pseudo": "/cephfs", + "squash": "none", + "access_type": "rw", + "protocols": [4], + "fsal": { + "name": "CEPH", + "fs_name": self.fs_name + } + })) + port, ip = self._get_port_ip_info() + self._test_mnt(self.pseudo_path, port, ip) + self._check_nfs_cluster_status( + 'running', 'NFS Ganesha cluster not running after new export was applied') + self._test_delete_cluster() + + def test_update_export(self): + ''' + Test update of export's pseudo path and access type from rw to ro + ''' + self._create_default_export() + port, ip = self._get_port_ip_info() + self._test_mnt(self.pseudo_path, port, ip) + export_block = self._get_export() + new_pseudo_path = '/testing' + export_block['pseudo'] = new_pseudo_path + export_block['access_type'] = 'RO' + self.ctx.cluster.run(args=['ceph', 'nfs', 'export', 'apply', + self.cluster_id, '-i', '-'], + stdin=json.dumps(export_block)) + if not self._check_nfs_cluster_event('restart'): + self.fail("updating export's pseudo path should trigger restart of NFS service") + self._check_nfs_cluster_status('running', 'NFS Ganesha cluster not running after restart') + self._write_to_read_only_export(new_pseudo_path, port, ip) + self._test_delete_cluster() + + def test_update_export_ro_to_rw(self): + ''' + Test update of export's access level from ro to rw + ''' + self._test_create_cluster() + self._create_export( + export_id='1', create_fs=True, + extra_cmd=['--pseudo-path', self.pseudo_path, '--readonly']) + port, ip = self._get_port_ip_info() + self._write_to_read_only_export(self.pseudo_path, port, ip) + export_block = self._get_export() + export_block['access_type'] = 'RW' + self.ctx.cluster.run( + args=['ceph', 'nfs', 'export', 'apply', self.cluster_id, '-i', '-'], + stdin=json.dumps(export_block)) + if self._check_nfs_cluster_event('restart'): + self.fail("update of export's access type should not trigger NFS service restart") + self._test_mnt(self.pseudo_path, port, ip) + self._test_delete_cluster() + + def test_update_export_with_invalid_values(self): + ''' + Test update of export with invalid values + ''' + self._create_default_export() + export_block = self._get_export() + + def update_with_invalid_values(key, value, fsal=False): + export_block_new = dict(export_block) + if fsal: + export_block_new['fsal'] = dict(export_block['fsal']) + export_block_new['fsal'][key] = value + else: + export_block_new[key] = value + try: + self.ctx.cluster.run(args=['ceph', 'nfs', 'export', 'apply', + self.cluster_id, '-i', '-'], + stdin=json.dumps(export_block_new)) + except CommandFailedError: + pass + + update_with_invalid_values('export_id', 9) + update_with_invalid_values('cluster_id', 'testing_new') + update_with_invalid_values('pseudo', 'test_relpath') + update_with_invalid_values('access_type', 'W') + update_with_invalid_values('squash', 'no_squash') + update_with_invalid_values('security_label', 'invalid') + update_with_invalid_values('protocols', [2]) + update_with_invalid_values('transports', ['UD']) + update_with_invalid_values('name', 'RGW', True) + update_with_invalid_values('user_id', 'testing_export', True) + update_with_invalid_values('fs_name', 'b', True) + self._test_delete_cluster() + + def test_cmds_without_reqd_args(self): + ''' + Test that cmd fails on not passing required arguments + ''' + def exec_cmd_invalid(*cmd): + try: + self._nfs_cmd(*cmd) + self.fail(f"nfs {cmd} command executed successfully without required arguments") + except CommandFailedError as e: + # Command should fail for test to pass + if e.exitstatus != errno.EINVAL: + raise + + exec_cmd_invalid('cluster', 'create') + exec_cmd_invalid('cluster', 'delete') + exec_cmd_invalid('cluster', 'config', 'set') + exec_cmd_invalid('cluster', 'config', 'reset') + exec_cmd_invalid('export', 'create', 'cephfs') + exec_cmd_invalid('export', 'create', 'cephfs', 'clusterid') + exec_cmd_invalid('export', 'create', 'cephfs', 'clusterid', 'a_fs') + exec_cmd_invalid('export', 'ls') + exec_cmd_invalid('export', 'delete') + exec_cmd_invalid('export', 'delete', 'clusterid') + exec_cmd_invalid('export', 'info') + exec_cmd_invalid('export', 'info', 'clusterid') + exec_cmd_invalid('export', 'apply') diff --git a/qa/tasks/cephfs/test_openfiletable.py b/qa/tasks/cephfs/test_openfiletable.py new file mode 100644 index 000000000..eff6b5093 --- /dev/null +++ b/qa/tasks/cephfs/test_openfiletable.py @@ -0,0 +1,85 @@ +import time +import logging +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +log = logging.getLogger(__name__) + +class OpenFileTable(CephFSTestCase): + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 1 + + def _check_oft_counter(self, name, count): + perf_dump = self.fs.mds_asok(['perf', 'dump']) + if perf_dump['oft'][name] == count: + return True + return False + + def test_max_items_per_obj(self): + """ + The maximum number of openfiles omap objects keys are now equal to + osd_deep_scrub_large_omap_object_key_threshold option. + """ + self.set_conf("mds", "osd_deep_scrub_large_omap_object_key_threshold", "5") + + self.fs.mds_restart() + self.fs.wait_for_daemons() + + # Write some bytes to a file + size_mb = 1 + + # Hold the file open + file_count = 8 + for i in range(0, file_count): + filename = "open_file{}".format(i) + p = self.mount_a.open_background(filename) + self.mount_a.write_n_mb(filename, size_mb) + + time.sleep(10) + + """ + With osd_deep_scrub_large_omap_object_key_threshold value as 5 and + opening 8 files we should have a new rados object with name + mds0_openfiles.1 to hold the extra keys. + """ + + self.fs.radosm(["stat", "mds0_openfiles.1"]) + + # Now close the file + self.mount_a.kill_background(p) + + def test_perf_counters(self): + """ + Opening a file should increment omap_total_updates by 1. + """ + + self.set_conf("mds", "osd_deep_scrub_large_omap_object_key_threshold", "1") + self.fs.mds_restart() + self.fs.wait_for_daemons() + + perf_dump = self.fs.mds_asok(['perf', 'dump']) + omap_total_updates_0 = perf_dump['oft']['omap_total_updates'] + log.info("omap_total_updates_0:{}".format(omap_total_updates_0)) + + # Open the file + p = self.mount_a.open_background("omap_counter_test_file") + self.wait_until_true(lambda: self._check_oft_counter('omap_total_updates', 2), timeout=120) + + perf_dump = self.fs.mds_asok(['perf', 'dump']) + omap_total_updates_1 = perf_dump['oft']['omap_total_updates'] + log.info("omap_total_updates_1:{}".format(omap_total_updates_1)) + + self.assertTrue((omap_total_updates_1 - omap_total_updates_0) == 2) + + # Now close the file + self.mount_a.kill_background(p) + # Ensure that the file does not exist any more + self.wait_until_true(lambda: self._check_oft_counter('omap_total_removes', 1), timeout=120) + self.wait_until_true(lambda: self._check_oft_counter('omap_total_kv_pairs', 1), timeout=120) + + perf_dump = self.fs.mds_asok(['perf', 'dump']) + omap_total_removes = perf_dump['oft']['omap_total_removes'] + omap_total_kv_pairs = perf_dump['oft']['omap_total_kv_pairs'] + log.info("omap_total_removes:{}".format(omap_total_removes)) + log.info("omap_total_kv_pairs:{}".format(omap_total_kv_pairs)) + self.assertTrue(omap_total_removes == 1) + self.assertTrue(omap_total_kv_pairs == 1) diff --git a/qa/tasks/cephfs/test_pool_perm.py b/qa/tasks/cephfs/test_pool_perm.py new file mode 100644 index 000000000..9912debed --- /dev/null +++ b/qa/tasks/cephfs/test_pool_perm.py @@ -0,0 +1,109 @@ +from textwrap import dedent +from teuthology.exceptions import CommandFailedError +from tasks.cephfs.cephfs_test_case import CephFSTestCase +import os + + +class TestPoolPerm(CephFSTestCase): + def test_pool_perm(self): + self.mount_a.run_shell(["touch", "test_file"]) + + file_path = os.path.join(self.mount_a.mountpoint, "test_file") + + remote_script = dedent(""" + import os + import errno + + fd = os.open("{path}", os.O_RDWR) + try: + if {check_read}: + ret = os.read(fd, 1024) + else: + os.write(fd, b'content') + except OSError as e: + if e.errno != errno.EPERM: + raise + else: + raise RuntimeError("client does not check permission of data pool") + """) + + client_name = "client.{0}".format(self.mount_a.client_id) + + # set data pool read only + self.fs.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', 'osd', + 'allow r pool={0}'.format(self.fs.get_data_pool_name())) + + self.mount_a.umount_wait() + self.mount_a.mount_wait() + + # write should fail + self.mount_a.run_python(remote_script.format(path=file_path, check_read=str(False))) + + # set data pool write only + self.fs.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', 'osd', + 'allow w pool={0}'.format(self.fs.get_data_pool_name())) + + self.mount_a.umount_wait() + self.mount_a.mount_wait() + + # read should fail + self.mount_a.run_python(remote_script.format(path=file_path, check_read=str(True))) + + def test_forbidden_modification(self): + """ + That a client who does not have the capability for setting + layout pools is prevented from doing so. + """ + + # Set up + client_name = "client.{0}".format(self.mount_a.client_id) + new_pool_name = "data_new" + self.fs.add_data_pool(new_pool_name) + + self.mount_a.run_shell(["touch", "layoutfile"]) + self.mount_a.run_shell(["mkdir", "layoutdir"]) + + # Set MDS 'rw' perms: missing 'p' means no setting pool layouts + self.fs.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', client_name, 'mds', 'allow rw', 'mon', 'allow r', + 'osd', + 'allow rw pool={0},allow rw pool={1}'.format( + self.fs.get_data_pool_names()[0], + self.fs.get_data_pool_names()[1], + )) + + self.mount_a.umount_wait() + self.mount_a.mount_wait() + + with self.assertRaises(CommandFailedError): + self.mount_a.setfattr("layoutfile", "ceph.file.layout.pool", + new_pool_name) + with self.assertRaises(CommandFailedError): + self.mount_a.setfattr("layoutdir", "ceph.dir.layout.pool", + new_pool_name) + self.mount_a.umount_wait() + + # Set MDS 'rwp' perms: should now be able to set layouts + self.fs.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', client_name, 'mds', 'allow rwp', 'mon', 'allow r', + 'osd', + 'allow rw pool={0},allow rw pool={1}'.format( + self.fs.get_data_pool_names()[0], + self.fs.get_data_pool_names()[1], + )) + self.mount_a.mount_wait() + self.mount_a.setfattr("layoutfile", "ceph.file.layout.pool", + new_pool_name) + self.mount_a.setfattr("layoutdir", "ceph.dir.layout.pool", + new_pool_name) + self.mount_a.umount_wait() + + def tearDown(self): + self.fs.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_a.client_id), + 'mds', 'allow', 'mon', 'allow r', 'osd', + 'allow rw pool={0}'.format(self.fs.get_data_pool_names()[0])) + super(TestPoolPerm, self).tearDown() + diff --git a/qa/tasks/cephfs/test_quota.py b/qa/tasks/cephfs/test_quota.py new file mode 100644 index 000000000..0386672bd --- /dev/null +++ b/qa/tasks/cephfs/test_quota.py @@ -0,0 +1,106 @@ + +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +from teuthology.exceptions import CommandFailedError + +class TestQuota(CephFSTestCase): + CLIENTS_REQUIRED = 2 + MDSS_REQUIRED = 1 + + def test_remote_update_getfattr(self): + """ + That quota changes made from one client are visible to another + client looking at ceph.quota xattrs + """ + self.mount_a.run_shell(["mkdir", "subdir"]) + + self.assertEqual( + self.mount_a.getfattr("./subdir", "ceph.quota.max_files"), + None) + self.assertEqual( + self.mount_b.getfattr("./subdir", "ceph.quota.max_files"), + None) + + self.mount_a.setfattr("./subdir", "ceph.quota.max_files", "10") + self.assertEqual( + self.mount_a.getfattr("./subdir", "ceph.quota.max_files"), + "10") + + # Should be visible as soon as setxattr operation completes on + # mds (we get here sooner because setfattr gets an early reply) + self.wait_until_equal( + lambda: self.mount_b.getfattr("./subdir", "ceph.quota.max_files"), + "10", timeout=10) + + def test_remote_update_df(self): + """ + That when a client modifies the quota on a directory used + as another client's root, the other client sees the change + reflected in their statfs output. + """ + + self.mount_b.umount_wait() + + self.mount_a.run_shell(["mkdir", "subdir"]) + + size_before = 1024 * 1024 * 128 + self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes", + "%s" % size_before) + + self.mount_b.mount_wait(cephfs_mntpt="/subdir") + + self.assertDictEqual( + self.mount_b.df(), + { + "total": size_before, + "used": 0, + "available": size_before + }) + + size_after = 1024 * 1024 * 256 + self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes", + "%s" % size_after) + + # Should be visible as soon as setxattr operation completes on + # mds (we get here sooner because setfattr gets an early reply) + self.wait_until_equal( + lambda: self.mount_b.df(), + { + "total": size_after, + "used": 0, + "available": size_after + }, + timeout=10 + ) + + def test_remote_update_write(self): + """ + That when a client modifies the quota on a directory used + as another client's root, the other client sees the effect + of the change when writing data. + """ + + self.mount_a.run_shell(["mkdir", "subdir_files"]) + self.mount_a.run_shell(["mkdir", "subdir_data"]) + + # Set some nice high quotas that mount_b's initial operations + # will be well within + self.mount_a.setfattr("./subdir_files", "ceph.quota.max_files", "100") + self.mount_a.setfattr("./subdir_data", "ceph.quota.max_bytes", "104857600") + + # Do some writes within my quota + self.mount_b.create_n_files("subdir_files/file", 20) + self.mount_b.write_n_mb("subdir_data/file", 20) + + # Set quotas lower than what mount_b already wrote, it should + # refuse to write more once it's seen them + self.mount_a.setfattr("./subdir_files", "ceph.quota.max_files", "10") + self.mount_a.setfattr("./subdir_data", "ceph.quota.max_bytes", "1048576") + + # Do some writes that would have been okay within the old quota, + # but are forbidden under the new quota + with self.assertRaises(CommandFailedError): + self.mount_b.create_n_files("subdir_files/file", 40) + with self.assertRaises(CommandFailedError): + self.mount_b.write_n_mb("subdir_data/file", 40) + diff --git a/qa/tasks/cephfs/test_readahead.py b/qa/tasks/cephfs/test_readahead.py new file mode 100644 index 000000000..7e6270f03 --- /dev/null +++ b/qa/tasks/cephfs/test_readahead.py @@ -0,0 +1,26 @@ +import logging +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +log = logging.getLogger(__name__) + + +class TestReadahead(CephFSTestCase): + def test_flush(self): + # Create 32MB file + self.mount_a.run_shell(["dd", "if=/dev/urandom", "of=foo", "bs=1M", "count=32"]) + + # Unmount and remount the client to flush cache + self.mount_a.umount_wait() + self.mount_a.mount_wait() + + initial_op_read = self.mount_a.get_op_read_count() + self.mount_a.run_shell(["dd", "if=foo", "of=/dev/null", "bs=128k", "count=32"]) + op_read = self.mount_a.get_op_read_count() + self.assertGreaterEqual(op_read, initial_op_read) + op_read -= initial_op_read + log.info("read operations: {0}".format(op_read)) + + # with exponentially increasing readahead, we should see fewer than 10 operations + # but this test simply checks if the client is doing a remote read for each local read + if op_read >= 32: + raise RuntimeError("readahead not working") diff --git a/qa/tasks/cephfs/test_recovery_fs.py b/qa/tasks/cephfs/test_recovery_fs.py new file mode 100644 index 000000000..bbcdf9769 --- /dev/null +++ b/qa/tasks/cephfs/test_recovery_fs.py @@ -0,0 +1,38 @@ +import logging +from os.path import join as os_path_join + +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +log = logging.getLogger(__name__) + +class TestFSRecovery(CephFSTestCase): + """ + Tests for recovering FS after loss of FSMap + """ + + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 3 + + def test_recover_fs_after_fsmap_removal(self): + data_pool = self.fs.get_data_pool_name() + metadata_pool = self.fs.get_metadata_pool_name() + # write data in mount, and fsync + self.mount_a.create_n_files('file_on_fs', 1, sync=True) + # faild MDSs to allow removing the file system in the next step + self.fs.fail() + # Remove file system to lose FSMap and keep the pools intact. + # This mimics the scenario where the monitor store is rebuilt + # using OSDs to recover a cluster with corrupt monitor store. + # The FSMap is permanently lost, but the FS pools are + # recovered/intact + self.fs.rm() + # Recreate file system with pool and previous fscid + self.fs.mon_manager.raw_cluster_cmd( + 'fs', 'new', self.fs.name, metadata_pool, data_pool, + '--recover', '--force', '--fscid', f'{self.fs.id}') + self.fs.set_joinable() + # Check status of file system + self.fs.wait_for_daemons() + # check data in file sytem is intact + filepath = os_path_join(self.mount_a.hostfs_mntpt, 'file_on_fs_0') + self.assertEqual(self.mount_a.read_file(filepath), "0") diff --git a/qa/tasks/cephfs/test_recovery_pool.py b/qa/tasks/cephfs/test_recovery_pool.py new file mode 100644 index 000000000..9926b3670 --- /dev/null +++ b/qa/tasks/cephfs/test_recovery_pool.py @@ -0,0 +1,203 @@ +""" +Test our tools for recovering metadata from the data pool into an alternate pool +""" + +import logging +import traceback +from collections import namedtuple + +from teuthology.orchestra.run import CommandFailedError +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +log = logging.getLogger(__name__) + + +ValidationError = namedtuple("ValidationError", ["exception", "backtrace"]) + + +class OverlayWorkload(object): + def __init__(self, orig_fs, recovery_fs, orig_mount, recovery_mount): + self._orig_fs = orig_fs + self._recovery_fs = recovery_fs + self._orig_mount = orig_mount + self._recovery_mount = recovery_mount + self._initial_state = None + + # Accumulate backtraces for every failed validation, and return them. Backtraces + # are rather verbose, but we only see them when something breaks, and they + # let us see which check failed without having to decorate each check with + # a string + self._errors = [] + + def assert_equal(self, a, b): + try: + if a != b: + raise AssertionError("{0} != {1}".format(a, b)) + except AssertionError as e: + self._errors.append( + ValidationError(e, traceback.format_exc(3)) + ) + + def write(self): + """ + Write the workload files to the mount + """ + raise NotImplementedError() + + def validate(self): + """ + Read from the mount and validate that the workload files are present (i.e. have + survived or been reconstructed from the test scenario) + """ + raise NotImplementedError() + + def damage(self): + """ + Damage the filesystem pools in ways that will be interesting to recover from. By + default just wipe everything in the metadata pool + """ + + pool = self._orig_fs.get_metadata_pool_name() + self._orig_fs.rados(["purge", pool, '--yes-i-really-really-mean-it']) + + def flush(self): + """ + Called after client unmount, after write: flush whatever you want + """ + self._orig_fs.mds_asok(["flush", "journal"]) + self._recovery_fs.mds_asok(["flush", "journal"]) + + +class SimpleOverlayWorkload(OverlayWorkload): + """ + Single file, single directory, check that it gets recovered and so does its size + """ + def write(self): + self._orig_mount.run_shell(["mkdir", "subdir"]) + self._orig_mount.write_n_mb("subdir/sixmegs", 6) + self._initial_state = self._orig_mount.stat("subdir/sixmegs") + + def validate(self): + self._recovery_mount.run_shell(["ls", "subdir"]) + st = self._recovery_mount.stat("subdir/sixmegs") + self.assert_equal(st['st_size'], self._initial_state['st_size']) + return self._errors + +class TestRecoveryPool(CephFSTestCase): + MDSS_REQUIRED = 2 + CLIENTS_REQUIRED = 2 + REQUIRE_RECOVERY_FILESYSTEM = True + + def is_marked_damaged(self, rank): + mds_map = self.fs.get_mds_map() + return rank in mds_map['damaged'] + + def _rebuild_metadata(self, workload, other_pool=None, workers=1): + """ + That when all objects in metadata pool are removed, we can rebuild a metadata pool + based on the contents of a data pool, and a client can see and read our files. + """ + + # First, inject some files + + workload.write() + + # Unmount the client and flush the journal: the tool should also cope with + # situations where there is dirty metadata, but we'll test that separately + self.mount_a.umount_wait() + self.mount_b.umount_wait() + workload.flush() + + # Create the alternate pool if requested + recovery_fs = self.recovery_fs.name + recovery_pool = self.recovery_fs.get_metadata_pool_name() + self.recovery_fs.data_scan(['init', '--force-init', + '--filesystem', recovery_fs, + '--alternate-pool', recovery_pool]) + self.recovery_fs.mon_manager.raw_cluster_cmd('-s') + self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "session"]) + self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "snap"]) + self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "inode"]) + + # Stop the MDS + self.fs.mds_stop() # otherwise MDS will join once the fs is reset + self.fs.fail() + + # After recovery, we need the MDS to not be strict about stats (in production these options + # are off by default, but in QA we need to explicitly disable them) + self.fs.set_ceph_conf('mds', 'mds verify scatter', False) + self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False) + + # Apply any data damage the workload wants + workload.damage() + + # Reset the MDS map in case multiple ranks were in play: recovery procedure + # only understands how to rebuild metadata under rank 0 + self.fs.reset() + + self.fs.table_tool([self.fs.name + ":0", "reset", "session"]) + self.fs.table_tool([self.fs.name + ":0", "reset", "snap"]) + self.fs.table_tool([self.fs.name + ":0", "reset", "inode"]) + + # Run the recovery procedure + if False: + with self.assertRaises(CommandFailedError): + # Normal reset should fail when no objects are present, we'll use --force instead + self.fs.journal_tool(["journal", "reset"], 0) + + self.fs.data_scan(['scan_extents', '--alternate-pool', + recovery_pool, '--filesystem', self.fs.name, + self.fs.get_data_pool_name()]) + self.fs.data_scan(['scan_inodes', '--alternate-pool', + recovery_pool, '--filesystem', self.fs.name, + '--force-corrupt', '--force-init', + self.fs.get_data_pool_name()]) + self.fs.journal_tool(['event', 'recover_dentries', 'list', + '--alternate-pool', recovery_pool], 0) + + self.fs.data_scan(['init', '--force-init', '--filesystem', + self.fs.name]) + self.fs.data_scan(['scan_inodes', '--filesystem', self.fs.name, + '--force-corrupt', '--force-init', + self.fs.get_data_pool_name()]) + self.fs.journal_tool(['event', 'recover_dentries', 'list'], 0) + + self.recovery_fs.journal_tool(['journal', 'reset', '--force'], 0) + self.fs.journal_tool(['journal', 'reset', '--force'], 0) + self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', + recovery_fs + ":0") + + # Mark the MDS repaired + self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0') + + # Start the MDS + self.fs.mds_restart() + self.fs.set_joinable() + self.recovery_fs.mds_restart() + self.fs.wait_for_daemons() + self.recovery_fs.wait_for_daemons() + status = self.recovery_fs.status() + for rank in self.recovery_fs.get_ranks(status=status): + self.fs.mon_manager.raw_cluster_cmd('tell', "mds." + rank['name'], + 'injectargs', '--debug-mds=20') + self.fs.rank_tell(['scrub', 'start', '/', 'recursive,repair'], rank=rank['rank'], status=status) + log.info(str(self.mds_cluster.status())) + + # Mount a client + self.mount_a.mount_wait() + self.mount_b.mount_wait(cephfs_name=recovery_fs) + + # See that the files are present and correct + errors = workload.validate() + if errors: + log.error("Validation errors found: {0}".format(len(errors))) + for e in errors: + log.error(e.exception) + log.error(e.backtrace) + raise AssertionError("Validation failed, first error: {0}\n{1}".format( + errors[0].exception, errors[0].backtrace + )) + + def test_rebuild_simple(self): + self._rebuild_metadata(SimpleOverlayWorkload(self.fs, self.recovery_fs, + self.mount_a, self.mount_b)) diff --git a/qa/tasks/cephfs/test_scrub.py b/qa/tasks/cephfs/test_scrub.py new file mode 100644 index 000000000..dd7c11af5 --- /dev/null +++ b/qa/tasks/cephfs/test_scrub.py @@ -0,0 +1,178 @@ +""" +Test CephFS scrub (distinct from OSD scrub) functionality +""" + +from io import BytesIO +import logging +from collections import namedtuple + +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +log = logging.getLogger(__name__) + +ValidationError = namedtuple("ValidationError", ["exception", "backtrace"]) + + +class Workload(CephFSTestCase): + def __init__(self, test, filesystem, mount): + super().__init__() + self._test = test + self._mount = mount + self._filesystem = filesystem + self._initial_state = None + + # Accumulate backtraces for every failed validation, and return them. Backtraces + # are rather verbose, but we only see them when something breaks, and they + # let us see which check failed without having to decorate each check with + # a string + self._errors = [] + + def write(self): + """ + Write the workload files to the mount + """ + raise NotImplementedError() + + def validate(self): + """ + Read from the mount and validate that the workload files are present (i.e. have + survived or been reconstructed from the test scenario) + """ + raise NotImplementedError() + + def damage(self): + """ + Damage the filesystem pools in ways that will be interesting to recover from. By + default just wipe everything in the metadata pool + """ + # Delete every object in the metadata pool + pool = self._filesystem.get_metadata_pool_name() + self._filesystem.rados(["purge", pool, '--yes-i-really-really-mean-it']) + + def flush(self): + """ + Called after client unmount, after write: flush whatever you want + """ + self._filesystem.mds_asok(["flush", "journal"]) + + +class BacktraceWorkload(Workload): + """ + Single file, single directory, wipe the backtrace and check it. + """ + def write(self): + self._mount.run_shell(["mkdir", "subdir"]) + self._mount.write_n_mb("subdir/sixmegs", 6) + + def validate(self): + st = self._mount.stat("subdir/sixmegs") + self._filesystem.mds_asok(["flush", "journal"]) + bt = self._filesystem.read_backtrace(st['st_ino']) + parent = bt['ancestors'][0]['dname'] + self.assertEqual(parent, 'sixmegs') + return self._errors + + def damage(self): + st = self._mount.stat("subdir/sixmegs") + self._filesystem.mds_asok(["flush", "journal"]) + self._filesystem._write_data_xattr(st['st_ino'], "parent", "") + + def create_files(self, nfiles=1000): + self._mount.create_n_files("scrub-new-files/file", nfiles) + + +class DupInodeWorkload(Workload): + """ + Duplicate an inode and try scrubbing it twice." + """ + + def write(self): + self._mount.run_shell(["mkdir", "parent"]) + self._mount.run_shell(["mkdir", "parent/child"]) + self._mount.write_n_mb("parent/parentfile", 6) + self._mount.write_n_mb("parent/child/childfile", 6) + + def damage(self): + self._mount.umount_wait() + self._filesystem.mds_asok(["flush", "journal"]) + self._filesystem.fail() + d = self._filesystem.radosmo(["getomapval", "10000000000.00000000", "parentfile_head", "-"]) + self._filesystem.radosm(["setomapval", "10000000000.00000000", "shadow_head"], stdin=BytesIO(d)) + self._test.config_set('mds', 'mds_hack_allow_loading_invalid_metadata', True) + self._filesystem.set_joinable() + self._filesystem.wait_for_daemons() + + def validate(self): + out_json = self._filesystem.run_scrub(["start", "/", "recursive,repair"]) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self._filesystem.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + self.assertTrue(self._filesystem.are_daemons_healthy()) + return self._errors + + +class TestScrub(CephFSTestCase): + MDSS_REQUIRED = 1 + + def setUp(self): + super().setUp() + + def _scrub(self, workload, workers=1): + """ + That when all objects in metadata pool are removed, we can rebuild a metadata pool + based on the contents of a data pool, and a client can see and read our files. + """ + + # First, inject some files + + workload.write() + + # are off by default, but in QA we need to explicitly disable them) + self.fs.set_ceph_conf('mds', 'mds verify scatter', False) + self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False) + + # Apply any data damage the workload wants + workload.damage() + + out_json = self.fs.run_scrub(["start", "/", "recursive,repair"]) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + # See that the files are present and correct + errors = workload.validate() + if errors: + log.error("Validation errors found: {0}".format(len(errors))) + for e in errors: + log.error(e.exception) + log.error(e.backtrace) + raise AssertionError("Validation failed, first error: {0}\n{1}".format( + errors[0].exception, errors[0].backtrace + )) + + def _get_damage_count(self, damage_type='backtrace'): + out_json = self.fs.rank_tell(["damage", "ls"]) + self.assertNotEqual(out_json, None) + + damage_count = 0 + for it in out_json: + if it['damage_type'] == damage_type: + damage_count += 1 + return damage_count + + def _scrub_new_files(self, workload): + """ + That scrubbing new files does not lead to errors + """ + workload.create_files(1000) + self.fs.wait_until_scrub_complete() + self.assertEqual(self._get_damage_count(), 0) + + def test_scrub_backtrace_for_new_files(self): + self._scrub_new_files(BacktraceWorkload(self, self.fs, self.mount_a)) + + def test_scrub_backtrace(self): + self._scrub(BacktraceWorkload(self, self.fs, self.mount_a)) + + def test_scrub_dup_inode(self): + self._scrub(DupInodeWorkload(self, self.fs, self.mount_a)) diff --git a/qa/tasks/cephfs/test_scrub_checks.py b/qa/tasks/cephfs/test_scrub_checks.py new file mode 100644 index 000000000..bcfc2fc9a --- /dev/null +++ b/qa/tasks/cephfs/test_scrub_checks.py @@ -0,0 +1,421 @@ +""" +MDS admin socket scrubbing-related tests. +""" +import json +import logging +import errno +import time +from teuthology.exceptions import CommandFailedError +from teuthology.contextutil import safe_while +import os +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +log = logging.getLogger(__name__) + +class TestScrubControls(CephFSTestCase): + """ + Test basic scrub control operations such as abort, pause and resume. + """ + + MDSS_REQUIRED = 2 + CLIENTS_REQUIRED = 1 + + def _abort_scrub(self, expected): + res = self.fs.run_scrub(["abort"]) + self.assertEqual(res['return_code'], expected) + def _pause_scrub(self, expected): + res = self.fs.run_scrub(["pause"]) + self.assertEqual(res['return_code'], expected) + def _resume_scrub(self, expected): + res = self.fs.run_scrub(["resume"]) + self.assertEqual(res['return_code'], expected) + def _check_task_status(self, expected_status, timo=120): + """ check scrub status for current active mds in ceph status """ + with safe_while(sleep=1, tries=120, action='wait for task status') as proceed: + while proceed(): + active = self.fs.get_active_names() + log.debug("current active={0}".format(active)) + task_status = self.fs.get_task_status("scrub status") + try: + if task_status[active[0]].startswith(expected_status): + return True + except KeyError: + pass + + def _check_task_status_na(self, timo=120): + """ check absence of scrub status in ceph status """ + with safe_while(sleep=1, tries=120, action='wait for task status') as proceed: + while proceed(): + active = self.fs.get_active_names() + log.debug("current active={0}".format(active)) + task_status = self.fs.get_task_status("scrub status") + if not active[0] in task_status: + return True + + def create_scrub_data(self, test_dir): + for i in range(32): + dirname = "dir.{0}".format(i) + dirpath = os.path.join(test_dir, dirname) + self.mount_a.run_shell_payload(f""" +set -e +mkdir -p {dirpath} +for ((i = 0; i < 32; i++)); do + dd if=/dev/urandom of={dirpath}/filename.$i bs=1M conv=fdatasync count=1 +done +""") + + def test_scrub_abort(self): + test_dir = "scrub_control_test_path" + abs_test_path = "/{0}".format(test_dir) + + self.create_scrub_data(test_dir) + + out_json = self.fs.run_scrub(["start", abs_test_path, "recursive"]) + self.assertNotEqual(out_json, None) + + # abort and verify + self._abort_scrub(0) + self.fs.wait_until_scrub_complete(sleep=5, timeout=30) + + # sleep enough to fetch updated task status + checked = self._check_task_status_na() + self.assertTrue(checked) + + def test_scrub_pause_and_resume(self): + test_dir = "scrub_control_test_path" + abs_test_path = "/{0}".format(test_dir) + + log.info("mountpoint: {0}".format(self.mount_a.mountpoint)) + client_path = os.path.join(self.mount_a.mountpoint, test_dir) + log.info("client_path: {0}".format(client_path)) + + self.create_scrub_data(test_dir) + + out_json = self.fs.run_scrub(["start", abs_test_path, "recursive"]) + self.assertNotEqual(out_json, None) + + # pause and verify + self._pause_scrub(0) + out_json = self.fs.get_scrub_status() + self.assertTrue("PAUSED" in out_json['status']) + + checked = self._check_task_status("paused") + self.assertTrue(checked) + + # resume and verify + self._resume_scrub(0) + out_json = self.fs.get_scrub_status() + self.assertFalse("PAUSED" in out_json['status']) + + checked = self._check_task_status_na() + self.assertTrue(checked) + + def test_scrub_pause_and_resume_with_abort(self): + test_dir = "scrub_control_test_path" + abs_test_path = "/{0}".format(test_dir) + + self.create_scrub_data(test_dir) + + out_json = self.fs.run_scrub(["start", abs_test_path, "recursive"]) + self.assertNotEqual(out_json, None) + + # pause and verify + self._pause_scrub(0) + out_json = self.fs.get_scrub_status() + self.assertTrue("PAUSED" in out_json['status']) + + checked = self._check_task_status("paused") + self.assertTrue(checked) + + # abort and verify + self._abort_scrub(0) + out_json = self.fs.get_scrub_status() + self.assertTrue("PAUSED" in out_json['status']) + self.assertTrue("0 inodes" in out_json['status']) + + # scrub status should still be paused... + checked = self._check_task_status("paused") + self.assertTrue(checked) + + # resume and verify + self._resume_scrub(0) + out_json = self.fs.get_scrub_status() + self.assertTrue("no active" in out_json['status']) + + checked = self._check_task_status_na() + self.assertTrue(checked) + + def test_scrub_task_status_on_mds_failover(self): + (original_active, ) = self.fs.get_active_names() + original_standbys = self.mds_cluster.get_standby_daemons() + + test_dir = "scrub_control_test_path" + abs_test_path = "/{0}".format(test_dir) + + self.create_scrub_data(test_dir) + + out_json = self.fs.run_scrub(["start", abs_test_path, "recursive"]) + self.assertNotEqual(out_json, None) + + # pause and verify + self._pause_scrub(0) + out_json = self.fs.get_scrub_status() + self.assertTrue("PAUSED" in out_json['status']) + + checked = self._check_task_status("paused") + self.assertTrue(checked) + + # Kill the rank 0 + self.fs.mds_stop(original_active) + + grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon")) + + def promoted(): + active = self.fs.get_active_names() + return active and active[0] in original_standbys + + log.info("Waiting for promotion of one of the original standbys {0}".format( + original_standbys)) + self.wait_until_true(promoted, timeout=grace*2) + + self._check_task_status_na() + +class TestScrubChecks(CephFSTestCase): + """ + Run flush and scrub commands on the specified files in the filesystem. This + task will run through a sequence of operations, but it is not comprehensive + on its own -- it doesn't manipulate the mds cache state to test on both + in- and out-of-memory parts of the hierarchy. So it's designed to be run + multiple times within a single test run, so that the test can manipulate + memory state. + + Usage: + mds_scrub_checks: + mds_rank: 0 + path: path/to/test/dir + client: 0 + run_seq: [0-9]+ + + Increment the run_seq on subsequent invocations within a single test run; + it uses that value to generate unique folder and file names. + """ + + MDSS_REQUIRED = 1 + CLIENTS_REQUIRED = 1 + + def test_scrub_checks(self): + self._checks(0) + self._checks(1) + + def _checks(self, run_seq): + mds_rank = 0 + test_dir = "scrub_test_path" + + abs_test_path = "/{0}".format(test_dir) + + log.info("mountpoint: {0}".format(self.mount_a.mountpoint)) + client_path = os.path.join(self.mount_a.mountpoint, test_dir) + log.info("client_path: {0}".format(client_path)) + + log.info("Cloning repo into place") + repo_path = TestScrubChecks.clone_repo(self.mount_a, client_path) + + log.info("Initiating mds_scrub_checks on mds.{id_} test_path {path}, run_seq {seq}".format( + id_=mds_rank, path=abs_test_path, seq=run_seq) + ) + + + success_validator = lambda j, r: self.json_validator(j, r, "return_code", 0) + + nep = "{test_path}/i/dont/exist".format(test_path=abs_test_path) + self.asok_command(mds_rank, "flush_path {nep}".format(nep=nep), + lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT)) + self.tell_command(mds_rank, "scrub start {nep}".format(nep=nep), + lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT)) + + test_repo_path = "{test_path}/ceph-qa-suite".format(test_path=abs_test_path) + dirpath = "{repo_path}/suites".format(repo_path=test_repo_path) + + if run_seq == 0: + log.info("First run: flushing {dirpath}".format(dirpath=dirpath)) + command = "flush_path {dirpath}".format(dirpath=dirpath) + self.asok_command(mds_rank, command, success_validator) + command = "scrub start {dirpath}".format(dirpath=dirpath) + self.tell_command(mds_rank, command, success_validator) + + filepath = "{repo_path}/suites/fs/verify/validater/valgrind.yaml".format( + repo_path=test_repo_path) + if run_seq == 0: + log.info("First run: flushing {filepath}".format(filepath=filepath)) + command = "flush_path {filepath}".format(filepath=filepath) + self.asok_command(mds_rank, command, success_validator) + command = "scrub start {filepath}".format(filepath=filepath) + self.tell_command(mds_rank, command, success_validator) + + if run_seq == 0: + log.info("First run: flushing base dir /") + command = "flush_path /" + self.asok_command(mds_rank, command, success_validator) + command = "scrub start /" + self.tell_command(mds_rank, command, success_validator) + + new_dir = "{repo_path}/new_dir_{i}".format(repo_path=repo_path, i=run_seq) + test_new_dir = "{repo_path}/new_dir_{i}".format(repo_path=test_repo_path, + i=run_seq) + self.mount_a.run_shell(["mkdir", new_dir]) + command = "flush_path {dir}".format(dir=test_new_dir) + self.asok_command(mds_rank, command, success_validator) + + new_file = "{repo_path}/new_file_{i}".format(repo_path=repo_path, + i=run_seq) + test_new_file = "{repo_path}/new_file_{i}".format(repo_path=test_repo_path, + i=run_seq) + self.mount_a.write_n_mb(new_file, 1) + + command = "flush_path {file}".format(file=test_new_file) + self.asok_command(mds_rank, command, success_validator) + + # check that scrub fails on errors + ino = self.mount_a.path_to_ino(new_file) + rados_obj_name = "{ino:x}.00000000".format(ino=ino) + command = "scrub start {file}".format(file=test_new_file) + + def _check_and_clear_damage(ino, dtype): + all_damage = self.fs.rank_tell(["damage", "ls"], mds_rank) + damage = [d for d in all_damage if d['ino'] == ino and d['damage_type'] == dtype] + for d in damage: + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[mds_rank]), + "damage", "rm", str(d['id'])) + return len(damage) > 0 + + # Missing parent xattr + self.assertFalse(_check_and_clear_damage(ino, "backtrace")); + self.fs.rados(["rmxattr", rados_obj_name, "parent"], pool=self.fs.get_data_pool_name()) + self.tell_command(mds_rank, command, success_validator) + self.fs.wait_until_scrub_complete(sleep=5, timeout=30) + self.assertTrue(_check_and_clear_damage(ino, "backtrace")); + + command = "flush_path /" + self.asok_command(mds_rank, command, success_validator) + + def test_scrub_repair(self): + mds_rank = 0 + test_dir = "scrub_repair_path" + + self.mount_a.run_shell(["mkdir", test_dir]) + self.mount_a.run_shell(["touch", "{0}/file".format(test_dir)]) + dir_objname = "{:x}.00000000".format(self.mount_a.path_to_ino(test_dir)) + + self.mount_a.umount_wait() + + # flush journal entries to dirfrag objects, and expire journal + self.fs.mds_asok(['flush', 'journal']) + self.fs.mds_stop() + + # remove the dentry from dirfrag, cause incorrect fragstat/rstat + self.fs.radosm(["rmomapkey", dir_objname, "file_head"]) + + self.fs.mds_fail_restart() + self.fs.wait_for_daemons() + + self.mount_a.mount_wait() + + # fragstat indicates the directory is not empty, rmdir should fail + with self.assertRaises(CommandFailedError) as ar: + self.mount_a.run_shell(["rmdir", test_dir]) + self.assertEqual(ar.exception.exitstatus, 1) + + self.tell_command(mds_rank, "scrub start /{0} repair".format(test_dir), + lambda j, r: self.json_validator(j, r, "return_code", 0)) + + # wait a few second for background repair + time.sleep(10) + + # fragstat should be fixed + self.mount_a.run_shell(["rmdir", test_dir]) + + @staticmethod + def json_validator(json_out, rc, element, expected_value): + if rc != 0: + return False, "asok command returned error {rc}".format(rc=rc) + element_value = json_out.get(element) + if element_value != expected_value: + return False, "unexpectedly got {jv} instead of {ev}!".format( + jv=element_value, ev=expected_value) + return True, "Succeeded" + + def tell_command(self, mds_rank, command, validator): + log.info("Running command '{command}'".format(command=command)) + + command_list = command.split() + jout = self.fs.rank_tell(command_list, mds_rank) + + log.info("command '{command}' returned '{jout}'".format( + command=command, jout=jout)) + + success, errstring = validator(jout, 0) + if not success: + raise AsokCommandFailedError(command, 0, jout, errstring) + return jout + + def asok_command(self, mds_rank, command, validator): + log.info("Running command '{command}'".format(command=command)) + + command_list = command.split() + + # we just assume there's an active mds for every rank + mds_id = self.fs.get_active_names()[mds_rank] + proc = self.fs.mon_manager.admin_socket('mds', mds_id, + command_list, check_status=False) + rout = proc.exitstatus + sout = proc.stdout.getvalue() + + if sout.strip(): + jout = json.loads(sout) + else: + jout = None + + log.info("command '{command}' got response code '{rout}' and stdout '{sout}'".format( + command=command, rout=rout, sout=sout)) + + success, errstring = validator(jout, rout) + + if not success: + raise AsokCommandFailedError(command, rout, jout, errstring) + + return jout + + @staticmethod + def clone_repo(client_mount, path): + repo = "ceph-qa-suite" + repo_path = os.path.join(path, repo) + client_mount.run_shell(["mkdir", "-p", path]) + + try: + client_mount.stat(repo_path) + except CommandFailedError: + client_mount.run_shell([ + "git", "clone", '--branch', 'giant', + "http://github.com/ceph/{repo}".format(repo=repo), + "{path}/{repo}".format(path=path, repo=repo) + ]) + + return repo_path + + +class AsokCommandFailedError(Exception): + """ + Exception thrown when we get an unexpected response + on an admin socket command + """ + + def __init__(self, command, rc, json_out, errstring): + self.command = command + self.rc = rc + self.json = json_out + self.errstring = errstring + + def __str__(self): + return "Admin socket: {command} failed with rc={rc} json output={json}, because '{es}'".format( + command=self.command, rc=self.rc, json=self.json, es=self.errstring) diff --git a/qa/tasks/cephfs/test_sessionmap.py b/qa/tasks/cephfs/test_sessionmap.py new file mode 100644 index 000000000..ad6fd1d60 --- /dev/null +++ b/qa/tasks/cephfs/test_sessionmap.py @@ -0,0 +1,232 @@ +import time +import json +import logging + +from tasks.cephfs.fuse_mount import FuseMount +from teuthology.exceptions import CommandFailedError +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +log = logging.getLogger(__name__) + + +class TestSessionMap(CephFSTestCase): + CLIENTS_REQUIRED = 2 + MDSS_REQUIRED = 2 + + def test_tell_session_drop(self): + """ + That when a `tell` command is sent using the python CLI, + its MDS session is gone after it terminates + """ + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + status = self.fs.status() + self.fs.rank_tell(["session", "ls"], status=status) + + ls_data = self.fs.rank_asok(['session', 'ls'], status=status) + self.assertEqual(len(ls_data), 0) + + def _get_connection_count(self, status=None): + perf = self.fs.rank_asok(["perf", "dump"], status=status) + conn = 0 + for module, dump in perf.items(): + if "AsyncMessenger::Worker" in module: + conn += dump['msgr_active_connections'] + return conn + + def test_tell_conn_close(self): + """ + That when a `tell` command is sent using the python CLI, + the conn count goes back to where it started (i.e. we aren't + leaving connections open) + """ + self.config_set('mds', 'ms_async_reap_threshold', '1') + + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + status = self.fs.status() + s = self._get_connection_count(status=status) + self.fs.rank_tell(["session", "ls"], status=status) + self.wait_until_true( + lambda: self._get_connection_count(status=status) == s, + timeout=30 + ) + + def test_mount_conn_close(self): + """ + That when a client unmounts, the thread count on the MDS goes back + to what it was before the client mounted + """ + self.config_set('mds', 'ms_async_reap_threshold', '1') + + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + status = self.fs.status() + s = self._get_connection_count(status=status) + self.mount_a.mount_wait() + self.assertGreater(self._get_connection_count(status=status), s) + self.mount_a.umount_wait() + self.wait_until_true( + lambda: self._get_connection_count(status=status) == s, + timeout=30 + ) + + def test_version_splitting(self): + """ + That when many sessions are updated, they are correctly + split into multiple versions to obey mds_sessionmap_keys_per_op + """ + + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + # Configure MDS to write one OMAP key at once + self.set_conf('mds', 'mds_sessionmap_keys_per_op', 1) + self.fs.mds_fail_restart() + status = self.fs.wait_for_daemons() + + # Bring the clients back + self.mount_a.mount_wait() + self.mount_b.mount_wait() + + # See that they've got sessions + self.assert_session_count(2, mds_id=self.fs.get_rank(status=status)['name']) + + # See that we persist their sessions + self.fs.rank_asok(["flush", "journal"], rank=0, status=status) + table_json = json.loads(self.fs.table_tool(["0", "show", "session"])) + log.info("SessionMap: {0}".format(json.dumps(table_json, indent=2))) + self.assertEqual(table_json['0']['result'], 0) + self.assertEqual(len(table_json['0']['data']['sessions']), 2) + + # Now, induce a "force_open_sessions" event by exporting a dir + self.mount_a.run_shell(["mkdir", "bravo"]) + self.mount_a.run_shell(["touch", "bravo/file_a"]) + self.mount_b.run_shell(["touch", "bravo/file_b"]) + + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + def get_omap_wrs(): + return self.fs.rank_asok(['perf', 'dump', 'objecter'], rank=1, status=status)['objecter']['omap_wr'] + + # Flush so that there are no dirty sessions on rank 1 + self.fs.rank_asok(["flush", "journal"], rank=1, status=status) + + # Export so that we get a force_open to rank 1 for the two sessions from rank 0 + initial_omap_wrs = get_omap_wrs() + self.fs.rank_asok(['export', 'dir', '/bravo', '1'], rank=0, status=status) + + # This is the critical (if rather subtle) check: that in the process of doing an export dir, + # we hit force_open_sessions, and as a result we end up writing out the sessionmap. There + # will be two sessions dirtied here, and because we have set keys_per_op to 1, we should see + # a single session get written out (the first of the two, triggered by the second getting marked + # dirty) + # The number of writes is two per session, because the header (sessionmap version) update and + # KV write both count. Also, multiply by 2 for each openfile table update. + self.wait_until_true( + lambda: get_omap_wrs() - initial_omap_wrs == 2*2, + timeout=30 # Long enough for an export to get acked + ) + + # Now end our sessions and check the backing sessionmap is updated correctly + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + # In-memory sessionmap check + self.assert_session_count(0, mds_id=self.fs.get_rank(status=status)['name']) + + # On-disk sessionmap check + self.fs.rank_asok(["flush", "journal"], rank=0, status=status) + table_json = json.loads(self.fs.table_tool(["0", "show", "session"])) + log.info("SessionMap: {0}".format(json.dumps(table_json, indent=2))) + self.assertEqual(table_json['0']['result'], 0) + self.assertEqual(len(table_json['0']['data']['sessions']), 0) + + def _configure_auth(self, mount, id_name, mds_caps, osd_caps=None, mon_caps=None): + """ + Set up auth credentials for a client mount, and write out the keyring + for the client to use. + """ + + if osd_caps is None: + osd_caps = "allow rw" + + if mon_caps is None: + mon_caps = "allow r" + + out = self.fs.mon_manager.raw_cluster_cmd( + "auth", "get-or-create", "client.{name}".format(name=id_name), + "mds", mds_caps, + "osd", osd_caps, + "mon", mon_caps + ) + mount.client_id = id_name + mount.client_remote.write_file(mount.get_keyring_path(), out, sudo=True) + self.set_conf("client.{name}".format(name=id_name), "keyring", mount.get_keyring_path()) + + def test_session_reject(self): + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Requires FUSE client to inject client metadata") + + self.mount_a.run_shell(["mkdir", "foo"]) + self.mount_a.run_shell(["mkdir", "foo/bar"]) + self.mount_a.umount_wait() + + # Mount B will be my rejected client + self.mount_b.umount_wait() + + # Configure a client that is limited to /foo/bar + self._configure_auth(self.mount_b, "badguy", "allow rw path=/foo/bar") + # Check he can mount that dir and do IO + self.mount_b.mount_wait(cephfs_mntpt="/foo/bar") + self.mount_b.create_destroy() + self.mount_b.umount_wait() + + # Configure the client to claim that its mount point metadata is /baz + self.set_conf("client.badguy", "client_metadata", "root=/baz") + # Try to mount the client, see that it fails + with self.assert_cluster_log("client session with non-allowable root '/baz' denied"): + with self.assertRaises(CommandFailedError): + self.mount_b.mount_wait(cephfs_mntpt="/foo/bar") + + def test_session_evict_blocklisted(self): + """ + Check that mds evicts blocklisted client + """ + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Requires FUSE client to use " + "mds_cluster.is_addr_blocklisted()") + + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + self.mount_a.run_shell_payload("mkdir {d0,d1} && touch {d0,d1}/file") + self.mount_a.setfattr("d0", "ceph.dir.pin", "0") + self.mount_a.setfattr("d1", "ceph.dir.pin", "1") + self._wait_subtrees([('/d0', 0), ('/d1', 1)], status=status) + + self.mount_a.run_shell(["touch", "d0/f0"]) + self.mount_a.run_shell(["touch", "d1/f0"]) + self.mount_b.run_shell(["touch", "d0/f1"]) + self.mount_b.run_shell(["touch", "d1/f1"]) + + self.assert_session_count(2, mds_id=self.fs.get_rank(rank=0, status=status)['name']) + self.assert_session_count(2, mds_id=self.fs.get_rank(rank=1, status=status)['name']) + + mount_a_client_id = self.mount_a.get_global_id() + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id], + mds_id=self.fs.get_rank(rank=0, status=status)['name']) + self.wait_until_true(lambda: self.mds_cluster.is_addr_blocklisted( + self.mount_a.get_global_addr()), timeout=30) + + # 10 seconds should be enough for evicting client + time.sleep(10) + self.assert_session_count(1, mds_id=self.fs.get_rank(rank=0, status=status)['name']) + self.assert_session_count(1, mds_id=self.fs.get_rank(rank=1, status=status)['name']) + + self.mount_a.kill_cleanup() + self.mount_a.mount_wait() diff --git a/qa/tasks/cephfs/test_snap_schedules.py b/qa/tasks/cephfs/test_snap_schedules.py new file mode 100644 index 000000000..3ccd4d592 --- /dev/null +++ b/qa/tasks/cephfs/test_snap_schedules.py @@ -0,0 +1,448 @@ +import os +import json +import time +import errno +import logging + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.exceptions import CommandFailedError +from datetime import datetime, timedelta + +log = logging.getLogger(__name__) + +def extract_schedule_and_retention_spec(spec=[]): + schedule = set([s[0] for s in spec]) + retention = set([s[1] for s in spec]) + return (schedule, retention) + +def seconds_upto_next_schedule(time_from, timo): + ts = int(time_from) + return ((int(ts / 60) * 60) + timo) - ts + +class TestSnapSchedules(CephFSTestCase): + CLIENTS_REQUIRED = 1 + + TEST_VOLUME_NAME = 'snap_vol' + TEST_DIRECTORY = 'snap_test_dir1' + + # this should be in sync with snap_schedule format + SNAPSHOT_TS_FORMAT = '%Y-%m-%d-%H_%M_%S' + + def check_scheduled_snapshot(self, exec_time, timo): + now = time.time() + delta = now - exec_time + log.debug(f'exec={exec_time}, now = {now}, timo = {timo}') + # tolerate snapshot existance in the range [-5,+5] + self.assertTrue((delta <= timo + 5) and (delta >= timo - 5)) + + def _fs_cmd(self, *args): + return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", *args) + + def fs_snap_schedule_cmd(self, *args, **kwargs): + fs = kwargs.pop('fs', self.volname) + args += ('--fs', fs) + if 'format' in kwargs: + fmt = kwargs.pop('format') + args += ('--format', fmt) + for name, val in kwargs.items(): + args += (str(val),) + res = self._fs_cmd('snap-schedule', *args) + log.debug(f'res={res}') + return res + + def _create_or_reuse_test_volume(self): + result = json.loads(self._fs_cmd("volume", "ls")) + if len(result) == 0: + self.vol_created = True + self.volname = TestSnapSchedules.TEST_VOLUME_NAME + self._fs_cmd("volume", "create", self.volname) + else: + self.volname = result[0]['name'] + + def _enable_snap_schedule(self): + return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "snap_schedule") + + def _disable_snap_schedule(self): + return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "snap_schedule") + + def _allow_minute_granularity_snapshots(self): + self.config_set('mgr', 'mgr/snap_schedule/allow_m_granularity', True) + + def setUp(self): + super(TestSnapSchedules, self).setUp() + self.volname = None + self.vol_created = False + self._create_or_reuse_test_volume() + self.create_cbks = [] + self.remove_cbks = [] + # used to figure out which snapshots are created/deleted + self.snapshots = set() + self._enable_snap_schedule() + self._allow_minute_granularity_snapshots() + + def tearDown(self): + if self.vol_created: + self._delete_test_volume() + self._disable_snap_schedule() + super(TestSnapSchedules, self).tearDown() + + def _schedule_to_timeout(self, schedule): + mult = schedule[-1] + period = int(schedule[0:-1]) + if mult == 'M': + return period * 60 + elif mult == 'h': + return period * 60 * 60 + elif mult == 'd': + return period * 60 * 60 * 24 + elif mult == 'w': + return period * 60 * 60 * 24 * 7 + else: + raise RuntimeError('schedule multiplier not recognized') + + def add_snap_create_cbk(self, cbk): + self.create_cbks.append(cbk) + def remove_snap_create_cbk(self, cbk): + self.create_cbks.remove(cbk) + + def add_snap_remove_cbk(self, cbk): + self.remove_cbks.append(cbk) + def remove_snap_remove_cbk(self, cbk): + self.remove_cbks.remove(cbk) + + def assert_if_not_verified(self): + self.assertTrue(len(self.create_cbks) == 0 and len(self.remove_cbks) == 0) + + def verify(self, dir_path, max_trials): + trials = 0 + snap_path = "{0}/.snap".format(dir_path) + while (len(self.create_cbks) or len(self.remove_cbks)) and trials < max_trials: + snapshots = set(self.mount_a.ls(path=snap_path)) + added = snapshots - self.snapshots + removed = self.snapshots - snapshots + if added: + for cbk in list(self.create_cbks): + res = cbk(list(added)) + if res: + self.remove_snap_create_cbk(cbk) + break + if removed: + for cbk in list(self.remove_cbks): + res = cbk(list(removed)) + if res: + self.remove_snap_remove_cbk(cbk) + break + self.snapshots = snapshots + trials += 1 + time.sleep(1) + + def calc_wait_time_and_snap_name(self, snap_sched_exec_epoch, schedule): + timo = self._schedule_to_timeout(schedule) + # calculate wait time upto the next minute + wait_timo = seconds_upto_next_schedule(snap_sched_exec_epoch, timo) + + # expected "scheduled" snapshot name + ts_name = (datetime.utcfromtimestamp(snap_sched_exec_epoch) + + timedelta(seconds=wait_timo)).strftime(TestSnapSchedules.SNAPSHOT_TS_FORMAT) + return (wait_timo, ts_name) + + def verify_schedule(self, dir_path, schedules, retentions=[]): + log.debug(f'expected_schedule: {schedules}, expected_retention: {retentions}') + + result = self.fs_snap_schedule_cmd('list', path=dir_path, format='json') + json_res = json.loads(result) + log.debug(f'json_res: {json_res}') + + for schedule in schedules: + self.assertTrue(schedule in json_res['schedule']) + for retention in retentions: + self.assertTrue(retention in json_res['retention']) + + def remove_snapshots(self, dir_path): + snap_path = f'{dir_path}/.snap' + + snapshots = self.mount_a.ls(path=snap_path) + for snapshot in snapshots: + snapshot_path = os.path.join(snap_path, snapshot) + log.debug(f'removing snapshot: {snapshot_path}') + self.mount_a.run_shell(['rmdir', snapshot_path]) + + def test_non_existent_snap_schedule_list(self): + """Test listing snap schedules on a non-existing filesystem path failure""" + try: + self.fs_snap_schedule_cmd('list', path=TestSnapSchedules.TEST_DIRECTORY) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise RuntimeError('incorrect errno when listing a non-existing snap schedule') + else: + raise RuntimeError('expected "fs snap-schedule list" to fail') + + def test_non_existent_schedule(self): + """Test listing non-existing snap schedules failure""" + self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) + + try: + self.fs_snap_schedule_cmd('list', path=TestSnapSchedules.TEST_DIRECTORY) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise RuntimeError('incorrect errno when listing a non-existing snap schedule') + else: + raise RuntimeError('expected "fs snap-schedule list" returned fail') + + self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY]) + + def test_snap_schedule_list_post_schedule_remove(self): + """Test listing snap schedules post removal of a schedule""" + self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) + + self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1h') + + self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY) + + try: + self.fs_snap_schedule_cmd('list', path=TestSnapSchedules.TEST_DIRECTORY) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise RuntimeError('incorrect errno when listing a non-existing snap schedule') + else: + raise RuntimeError('"fs snap-schedule list" returned error') + + self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY]) + + def test_snap_schedule(self): + """Test existence of a scheduled snapshot""" + self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) + + # set a schedule on the dir + self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M') + exec_time = time.time() + + timo, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') + log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo}s...') + to_wait = timo + 2 # some leeway to avoid false failures... + + # verify snapshot schedule + self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M']) + + def verify_added(snaps_added): + log.debug(f'snapshots added={snaps_added}') + self.assertEqual(len(snaps_added), 1) + snapname = snaps_added[0] + if snapname.startswith('scheduled-'): + if snapname[10:26] == snap_sfx[:16]: + self.check_scheduled_snapshot(exec_time, timo) + return True + return False + self.add_snap_create_cbk(verify_added) + self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait) + self.assert_if_not_verified() + + # remove snapshot schedule + self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY) + + # remove all scheduled snapshots + self.remove_snapshots(TestSnapSchedules.TEST_DIRECTORY) + + self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY]) + + def test_multi_snap_schedule(self): + """Test exisitence of multiple scheduled snapshots""" + self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) + + # set schedules on the dir + self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M') + self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='2M') + exec_time = time.time() + + timo_1, snap_sfx_1 = self.calc_wait_time_and_snap_name(exec_time, '1M') + log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_1} in ~{timo_1}s...') + timo_2, snap_sfx_2 = self.calc_wait_time_and_snap_name(exec_time, '2M') + log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_2} in ~{timo_2}s...') + to_wait = timo_2 + 2 # use max timeout + + # verify snapshot schedule + self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M', '2M']) + + def verify_added_1(snaps_added): + log.debug(f'snapshots added={snaps_added}') + self.assertEqual(len(snaps_added), 1) + snapname = snaps_added[0] + if snapname.startswith('scheduled-'): + if snapname[10:26] == snap_sfx_1[:16]: + self.check_scheduled_snapshot(exec_time, timo_1) + return True + return False + def verify_added_2(snaps_added): + log.debug(f'snapshots added={snaps_added}') + self.assertEqual(len(snaps_added), 1) + snapname = snaps_added[0] + if snapname.startswith('scheduled-'): + if snapname[10:26] == snap_sfx_2[:16]: + self.check_scheduled_snapshot(exec_time, timo_2) + return True + return False + self.add_snap_create_cbk(verify_added_1) + self.add_snap_create_cbk(verify_added_2) + self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait) + self.assert_if_not_verified() + + # remove snapshot schedule + self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY) + + # remove all scheduled snapshots + self.remove_snapshots(TestSnapSchedules.TEST_DIRECTORY) + + self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY]) + + def test_snap_schedule_with_retention(self): + """Test scheduled snapshots along with rentention policy""" + self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) + + # set a schedule on the dir + self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M') + self.fs_snap_schedule_cmd('retention', 'add', path=TestSnapSchedules.TEST_DIRECTORY, retention_spec_or_period='1M') + exec_time = time.time() + + timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') + log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo_1}s...') + to_wait = timo_1 + 2 # some leeway to avoid false failures... + + # verify snapshot schedule + self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M'], retentions=[{'M':1}]) + + def verify_added(snaps_added): + log.debug(f'snapshots added={snaps_added}') + self.assertEqual(len(snaps_added), 1) + snapname = snaps_added[0] + if snapname.startswith('scheduled-'): + if snapname[10:26] == snap_sfx[:16]: + self.check_scheduled_snapshot(exec_time, timo_1) + return True + return False + self.add_snap_create_cbk(verify_added) + self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait) + self.assert_if_not_verified() + + timo_2 = timo_1 + 60 # expected snapshot removal timeout + def verify_removed(snaps_removed): + log.debug(f'snapshots removed={snaps_removed}') + self.assertEqual(len(snaps_removed), 1) + snapname = snaps_removed[0] + if snapname.startswith('scheduled-'): + if snapname[10:26] == snap_sfx[:16]: + self.check_scheduled_snapshot(exec_time, timo_2) + return True + return False + log.debug(f'expecting removal of snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo_2}s...') + to_wait = timo_2 + self.add_snap_remove_cbk(verify_removed) + self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait+2) + self.assert_if_not_verified() + + # remove snapshot schedule + self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY) + + # remove all scheduled snapshots + self.remove_snapshots(TestSnapSchedules.TEST_DIRECTORY) + + self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY]) + + def get_snap_stats(self, dir_path): + snap_path = f"{dir_path}/.snap"[1:] + snapshots = self.mount_a.ls(path=snap_path) + fs_count = len(snapshots) + log.debug(f'snapshots: {snapshots}'); + + result = self.fs_snap_schedule_cmd('status', path=dir_path, + format='json') + json_res = json.loads(result)[0] + db_count = int(json_res['created_count']) + log.debug(f'json_res: {json_res}') + + snap_stats = dict() + snap_stats['fs_count'] = fs_count + snap_stats['db_count'] = db_count + + return snap_stats + + def verify_snap_stats(self, dir_path): + snap_stats = self.get_snap_stats(dir_path) + self.assertTrue(snap_stats['fs_count'] == snap_stats['db_count']) + + def test_concurrent_snap_creates(self): + """Test concurrent snap creates in same file-system without db issues""" + """ + Test snap creates at same cadence on same fs to verify correct stats. + A single SQLite DB Connection handle cannot be used to run concurrent + transactions and results transaction aborts. This test makes sure that + proper care has been taken in the code to avoid such situation by + verifying number of dirs created on the file system with the + created_count in the schedule_meta table for the specific path. + """ + self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) + + testdirs = [] + for d in range(10): + testdirs.append(os.path.join("/", TestSnapSchedules.TEST_DIRECTORY, "dir" + str(d))) + + for d in testdirs: + self.mount_a.run_shell(['mkdir', '-p', d[1:]]) + self.fs_snap_schedule_cmd('add', path=d, snap_schedule='1M') + + exec_time = time.time() + timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') + + for d in testdirs: + self.fs_snap_schedule_cmd('activate', path=d, snap_schedule='1M') + + # we wait for 10 snaps to be taken + wait_time = timo_1 + 10 * 60 + 15 + time.sleep(wait_time) + + for d in testdirs: + self.fs_snap_schedule_cmd('deactivate', path=d, snap_schedule='1M') + + for d in testdirs: + self.verify_snap_stats(d) + + for d in testdirs: + self.fs_snap_schedule_cmd('remove', path=d, snap_schedule='1M') + self.remove_snapshots(d[1:]) + self.mount_a.run_shell(['rmdir', d[1:]]) + + def test_snap_schedule_with_mgr_restart(self): + """Test that snap schedule is resumed after mgr restart""" + self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) + testdir = os.path.join("/", TestSnapSchedules.TEST_DIRECTORY, "test_restart") + self.mount_a.run_shell(['mkdir', '-p', testdir[1:]]) + self.fs_snap_schedule_cmd('add', path=testdir, snap_schedule='1M') + + exec_time = time.time() + timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') + + self.fs_snap_schedule_cmd('activate', path=testdir, snap_schedule='1M') + + # we wait for 10 snaps to be taken + wait_time = timo_1 + 10 * 60 + 15 + time.sleep(wait_time) + + old_stats = self.get_snap_stats(testdir) + self.assertTrue(old_stats['fs_count'] == old_stats['db_count']) + self.assertTrue(old_stats['fs_count'] > 9) + + # restart mgr + active_mgr = self.mgr_cluster.mon_manager.get_mgr_dump()['active_name'] + log.debug(f'restarting active mgr: {active_mgr}') + self.mgr_cluster.mon_manager.revive_mgr(active_mgr) + time.sleep(300) # sleep for 5 minutes + self.fs_snap_schedule_cmd('deactivate', path=testdir, snap_schedule='1M') + + new_stats = self.get_snap_stats(testdir) + self.assertTrue(new_stats['fs_count'] == new_stats['db_count']) + self.assertTrue(new_stats['fs_count'] > old_stats['fs_count']) + self.assertTrue(new_stats['db_count'] > old_stats['db_count']) + + # cleanup + self.fs_snap_schedule_cmd('remove', path=testdir, snap_schedule='1M') + self.remove_snapshots(testdir[1:]) + self.mount_a.run_shell(['rmdir', testdir[1:]]) diff --git a/qa/tasks/cephfs/test_snapshots.py b/qa/tasks/cephfs/test_snapshots.py new file mode 100644 index 000000000..306c80ce3 --- /dev/null +++ b/qa/tasks/cephfs/test_snapshots.py @@ -0,0 +1,539 @@ +import errno +import logging +import signal +from textwrap import dedent +from tasks.cephfs.fuse_mount import FuseMount +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.orchestra.run import CommandFailedError, Raw + +log = logging.getLogger(__name__) + +MDS_RESTART_GRACE = 60 + +class TestSnapshots(CephFSTestCase): + MDSS_REQUIRED = 3 + LOAD_SETTINGS = ["mds_max_snaps_per_dir"] + + def _check_subtree(self, rank, path, status=None): + got_subtrees = self.fs.rank_asok(["get", "subtrees"], rank=rank, status=status) + for s in got_subtrees: + if s['dir']['path'] == path and s['auth_first'] == rank: + return True + return False + + def _get_snapclient_dump(self, rank=0, status=None): + return self.fs.rank_asok(["dump", "snaps"], rank=rank, status=status) + + def _get_snapserver_dump(self, rank=0, status=None): + return self.fs.rank_asok(["dump", "snaps", "--server"], rank=rank, status=status) + + def _get_last_created_snap(self, rank=0, status=None): + return int(self._get_snapserver_dump(rank,status=status)["last_created"]) + + def _get_last_destroyed_snap(self, rank=0, status=None): + return int(self._get_snapserver_dump(rank,status=status)["last_destroyed"]) + + def _get_pending_snap_update(self, rank=0, status=None): + return self._get_snapserver_dump(rank,status=status)["pending_update"] + + def _get_pending_snap_destroy(self, rank=0, status=None): + return self._get_snapserver_dump(rank,status=status)["pending_destroy"] + + def test_allow_new_snaps_config(self): + """ + Check whether 'allow_new_snaps' setting works + """ + self.mount_a.run_shell(["mkdir", "test-allow-snaps"]) + + self.fs.set_allow_new_snaps(False); + try: + self.mount_a.run_shell(["mkdir", "test-allow-snaps/.snap/snap00"]) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM, "expected EPERM") + else: + self.fail("expected snap creatiion to fail") + + self.fs.set_allow_new_snaps(True); + self.mount_a.run_shell(["mkdir", "test-allow-snaps/.snap/snap00"]) + self.mount_a.run_shell(["rmdir", "test-allow-snaps/.snap/snap00"]) + self.mount_a.run_shell(["rmdir", "test-allow-snaps"]) + + def test_kill_mdstable(self): + """ + check snaptable transcation + """ + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Require FUSE client to forcibly kill mount") + + self.fs.set_allow_new_snaps(True); + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon")) + + # setup subtrees + self.mount_a.run_shell(["mkdir", "-p", "d1/dir"]) + self.mount_a.setfattr("d1", "ceph.dir.pin", "1") + self._wait_subtrees([("/d1", 1)], rank=1, path="/d1") + + last_created = self._get_last_created_snap(rank=0,status=status) + + # mds_kill_mdstable_at: + # 1: MDSTableServer::handle_prepare + # 2: MDSTableServer::_prepare_logged + # 5: MDSTableServer::handle_commit + # 6: MDSTableServer::_commit_logged + for i in [1,2,5,6]: + log.info("testing snapserver mds_kill_mdstable_at={0}".format(i)) + + status = self.fs.status() + rank0 = self.fs.get_rank(rank=0, status=status) + self.fs.rank_freeze(True, rank=0) + self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "{0}".format(i)], rank=0, status=status) + proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s1{0}".format(i)], wait=False) + self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=0), timeout=grace*2); + self.delete_mds_coredump(rank0['name']); + + self.fs.rank_fail(rank=0) + self.fs.mds_restart(rank0['name']) + self.wait_for_daemon_start([rank0['name']]) + status = self.fs.wait_for_daemons() + + proc.wait() + last_created += 1 + self.wait_until_true(lambda: self._get_last_created_snap(rank=0) == last_created, timeout=30) + + self.set_conf("mds", "mds_reconnect_timeout", "5") + + self.mount_a.run_shell(["rmdir", Raw("d1/dir/.snap/*")]) + + # set mds_kill_mdstable_at, also kill snapclient + for i in [2,5,6]: + log.info("testing snapserver mds_kill_mdstable_at={0}, also kill snapclient".format(i)) + status = self.fs.status() + last_created = self._get_last_created_snap(rank=0, status=status) + + rank0 = self.fs.get_rank(rank=0, status=status) + rank1 = self.fs.get_rank(rank=1, status=status) + self.fs.rank_freeze(True, rank=0) # prevent failover... + self.fs.rank_freeze(True, rank=1) # prevent failover... + self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "{0}".format(i)], rank=0, status=status) + proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s2{0}".format(i)], wait=False) + self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=0), timeout=grace*3); + self.delete_mds_coredump(rank0['name']); + + self.fs.rank_signal(signal.SIGKILL, rank=1) + + self.mount_a.kill() + self.mount_a.kill_cleanup() + + self.fs.rank_fail(rank=0) + self.fs.mds_restart(rank0['name']) + self.wait_for_daemon_start([rank0['name']]) + + self.fs.wait_for_state('up:resolve', rank=0, timeout=MDS_RESTART_GRACE) + if i in [2,5]: + self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1) + elif i == 6: + self.assertEqual(len(self._get_pending_snap_update(rank=0)), 0) + self.assertGreater(self._get_last_created_snap(rank=0), last_created) + + self.fs.rank_fail(rank=1) + self.fs.mds_restart(rank1['name']) + self.wait_for_daemon_start([rank1['name']]) + self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE) + + if i in [2,5]: + self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30) + if i == 2: + self.assertEqual(self._get_last_created_snap(rank=0), last_created) + else: + self.assertGreater(self._get_last_created_snap(rank=0), last_created) + + self.mount_a.mount_wait() + + self.mount_a.run_shell(["rmdir", Raw("d1/dir/.snap/*")]) + + # mds_kill_mdstable_at: + # 3: MDSTableClient::handle_request (got agree) + # 4: MDSTableClient::commit + # 7: MDSTableClient::handle_request (got ack) + for i in [3,4,7]: + log.info("testing snapclient mds_kill_mdstable_at={0}".format(i)) + last_created = self._get_last_created_snap(rank=0) + + status = self.fs.status() + rank1 = self.fs.get_rank(rank=1, status=status) + self.fs.rank_freeze(True, rank=1) # prevent failover... + self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "{0}".format(i)], rank=1, status=status) + proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s3{0}".format(i)], wait=False) + self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=1), timeout=grace*2); + self.delete_mds_coredump(rank1['name']); + + self.mount_a.kill() + self.mount_a.kill_cleanup() + + if i in [3,4]: + self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1) + elif i == 7: + self.assertEqual(len(self._get_pending_snap_update(rank=0)), 0) + self.assertGreater(self._get_last_created_snap(rank=0), last_created) + + self.fs.rank_fail(rank=1) + self.fs.mds_restart(rank1['name']) + self.wait_for_daemon_start([rank1['name']]) + status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE) + + if i in [3,4]: + self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30) + if i == 3: + self.assertEqual(self._get_last_created_snap(rank=0), last_created) + else: + self.assertGreater(self._get_last_created_snap(rank=0), last_created) + + self.mount_a.mount_wait() + + self.mount_a.run_shell(["rmdir", Raw("d1/dir/.snap/*")]) + + # mds_kill_mdstable_at: + # 3: MDSTableClient::handle_request (got agree) + # 8: MDSTableServer::handle_rollback + log.info("testing snapclient mds_kill_mdstable_at=3, snapserver mds_kill_mdstable_at=8") + last_created = self._get_last_created_snap(rank=0) + + status = self.fs.status() + rank0 = self.fs.get_rank(rank=0, status=status) + rank1 = self.fs.get_rank(rank=1, status=status) + self.fs.rank_freeze(True, rank=0) + self.fs.rank_freeze(True, rank=1) + self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "8"], rank=0, status=status) + self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "3"], rank=1, status=status) + proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s4"], wait=False) + self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=1), timeout=grace*2); + self.delete_mds_coredump(rank1['name']); + + self.mount_a.kill() + self.mount_a.kill_cleanup() + + self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1) + + self.fs.rank_fail(rank=1) + self.fs.mds_restart(rank1['name']) + self.wait_for_daemon_start([rank1['name']]) + + # rollback triggers assertion + self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=0), timeout=grace*2); + self.delete_mds_coredump(rank0['name']); + self.fs.rank_fail(rank=0) + self.fs.mds_restart(rank0['name']) + self.wait_for_daemon_start([rank0['name']]) + self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE) + + # mds.1 should re-send rollback message + self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30) + self.assertEqual(self._get_last_created_snap(rank=0), last_created) + + self.mount_a.mount_wait() + + def test_snapclient_cache(self): + """ + check if snapclient cache gets synced properly + """ + self.fs.set_allow_new_snaps(True); + self.fs.set_max_mds(3) + status = self.fs.wait_for_daemons() + + grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon")) + + self.mount_a.run_shell(["mkdir", "-p", "d0/d1/dir"]) + self.mount_a.run_shell(["mkdir", "-p", "d0/d2/dir"]) + self.mount_a.setfattr("d0", "ceph.dir.pin", "0") + self.mount_a.setfattr("d0/d1", "ceph.dir.pin", "1") + self.mount_a.setfattr("d0/d2", "ceph.dir.pin", "2") + self._wait_subtrees([("/d0", 0), ("/d0/d1", 1), ("/d0/d2", 2)], rank="all", status=status, path="/d0") + + def _check_snapclient_cache(snaps_dump, cache_dump=None, rank=0): + if cache_dump is None: + cache_dump = self._get_snapclient_dump(rank=rank) + for key, value in cache_dump.items(): + if value != snaps_dump[key]: + return False + return True; + + # sync after mksnap + last_created = self._get_last_created_snap(rank=0) + self.mount_a.run_shell(["mkdir", "d0/d1/dir/.snap/s1", "d0/d1/dir/.snap/s2"]) + self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30) + self.assertGreater(self._get_last_created_snap(rank=0), last_created) + + snaps_dump = self._get_snapserver_dump(rank=0) + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=0)); + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=1)); + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2)); + + # sync after rmsnap + last_destroyed = self._get_last_destroyed_snap(rank=0) + self.mount_a.run_shell(["rmdir", "d0/d1/dir/.snap/s1"]) + self.wait_until_true(lambda: len(self._get_pending_snap_destroy(rank=0)) == 0, timeout=30) + self.assertGreater(self._get_last_destroyed_snap(rank=0), last_destroyed) + + snaps_dump = self._get_snapserver_dump(rank=0) + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=0)); + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=1)); + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2)); + + # sync during mds recovers + self.fs.rank_fail(rank=2) + status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE) + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2)); + + self.fs.rank_fail(rank=0) + self.fs.rank_fail(rank=1) + status = self.fs.wait_for_daemons() + self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE) + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=0)); + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=1)); + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2)); + + # kill at MDSTableClient::handle_notify_prep + status = self.fs.status() + rank2 = self.fs.get_rank(rank=2, status=status) + self.fs.rank_freeze(True, rank=2) + self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "9"], rank=2, status=status) + proc = self.mount_a.run_shell(["mkdir", "d0/d1/dir/.snap/s3"], wait=False) + self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=2), timeout=grace*2); + self.delete_mds_coredump(rank2['name']); + + # mksnap should wait for notify ack from mds.2 + self.assertFalse(proc.finished); + + # mksnap should proceed after mds.2 fails + self.fs.rank_fail(rank=2) + self.wait_until_true(lambda: proc.finished, timeout=30); + + self.fs.mds_restart(rank2['name']) + self.wait_for_daemon_start([rank2['name']]) + status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE) + + self.mount_a.run_shell(["rmdir", Raw("d0/d1/dir/.snap/*")]) + + # kill at MDSTableClient::commit + # the recovering mds should sync all mds' cache when it enters resolve stage + self.set_conf("mds", "mds_reconnect_timeout", "5") + for i in range(1, 4): + status = self.fs.status() + rank2 = self.fs.get_rank(rank=2, status=status) + self.fs.rank_freeze(True, rank=2) + self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "4"], rank=2, status=status) + last_created = self._get_last_created_snap(rank=0) + proc = self.mount_a.run_shell(["mkdir", "d0/d2/dir/.snap/s{0}".format(i)], wait=False) + self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=2), timeout=grace*2); + self.delete_mds_coredump(rank2['name']); + + self.mount_a.kill() + self.mount_a.kill_cleanup() + + self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1) + + if i in [2,4]: + self.fs.rank_fail(rank=0) + if i in [3,4]: + self.fs.rank_fail(rank=1) + + self.fs.rank_fail(rank=2) + self.fs.mds_restart(rank2['name']) + self.wait_for_daemon_start([rank2['name']]) + status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE) + + rank0_cache = self._get_snapclient_dump(rank=0) + rank1_cache = self._get_snapclient_dump(rank=1) + rank2_cache = self._get_snapclient_dump(rank=2) + + self.assertGreater(int(rank0_cache["last_created"]), last_created) + self.assertEqual(rank0_cache, rank1_cache); + self.assertEqual(rank0_cache, rank2_cache); + + self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30) + + snaps_dump = self._get_snapserver_dump(rank=0) + self.assertEqual(snaps_dump["last_created"], rank0_cache["last_created"]) + self.assertTrue(_check_snapclient_cache(snaps_dump, cache_dump=rank0_cache)); + + self.mount_a.mount_wait() + + self.mount_a.run_shell(["rmdir", Raw("d0/d2/dir/.snap/*")]) + + def test_multimds_mksnap(self): + """ + check if snapshot takes effect across authority subtrees + """ + self.fs.set_allow_new_snaps(True); + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + self.mount_a.run_shell(["mkdir", "-p", "d0/d1/empty"]) + self.mount_a.setfattr("d0", "ceph.dir.pin", "0") + self.mount_a.setfattr("d0/d1", "ceph.dir.pin", "1") + self._wait_subtrees([("/d0", 0), ("/d0/d1", 1)], rank="all", status=status, path="/d0") + + self.mount_a.write_test_pattern("d0/d1/file_a", 8 * 1024 * 1024) + self.mount_a.run_shell(["mkdir", "d0/.snap/s1"]) + self.mount_a.run_shell(["rm", "-f", "d0/d1/file_a"]) + self.mount_a.validate_test_pattern("d0/.snap/s1/d1/file_a", 8 * 1024 * 1024) + + self.mount_a.run_shell(["rmdir", "d0/.snap/s1"]) + self.mount_a.run_shell(["rm", "-rf", "d0"]) + + def test_multimds_past_parents(self): + """ + check if past parents are properly recorded during across authority rename + """ + self.fs.set_allow_new_snaps(True); + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + self.mount_a.run_shell_payload("mkdir -p {d0,d1}/empty") + self.mount_a.setfattr("d0", "ceph.dir.pin", "0") + self.mount_a.setfattr("d1", "ceph.dir.pin", "1") + self._wait_subtrees([("/d0", 0), ("/d1", 1)], rank=0, status=status) + + self.mount_a.run_shell(["mkdir", "d0/d3"]) + self.mount_a.run_shell(["mkdir", "d0/.snap/s1"]) + snap_name = self.mount_a.run_shell(["ls", "d0/d3/.snap"]).stdout.getvalue() + + self.mount_a.run_shell(["mv", "d0/d3", "d1/d3"]) + snap_name1 = self.mount_a.run_shell(["ls", "d1/d3/.snap"]).stdout.getvalue() + self.assertEqual(snap_name1, snap_name); + + self.mount_a.run_shell(["rmdir", "d0/.snap/s1"]) + snap_name1 = self.mount_a.run_shell(["ls", "d1/d3/.snap"]).stdout.getvalue() + self.assertEqual(snap_name1, ""); + + self.mount_a.run_shell(["rm", "-rf", "d0", "d1"]) + + def test_multimds_hardlink(self): + """ + check if hardlink snapshot works in multimds setup + """ + self.fs.set_allow_new_snaps(True); + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + self.mount_a.run_shell_payload("mkdir -p {d0,d1}/empty") + + self.mount_a.setfattr("d0", "ceph.dir.pin", "0") + self.mount_a.setfattr("d1", "ceph.dir.pin", "1") + self._wait_subtrees([("/d0", 0), ("/d1", 1)], rank=0, status=status) + + self.mount_a.run_python(dedent(""" + import os + open(os.path.join("{path}", "d0/file1"), 'w').write("asdf") + open(os.path.join("{path}", "d0/file2"), 'w').write("asdf") + """.format(path=self.mount_a.mountpoint) + )) + + self.mount_a.run_shell(["ln", "d0/file1", "d1/file1"]) + self.mount_a.run_shell(["ln", "d0/file2", "d1/file2"]) + + self.mount_a.run_shell(["mkdir", "d1/.snap/s1"]) + + self.mount_a.run_python(dedent(""" + import os + open(os.path.join("{path}", "d0/file1"), 'w').write("qwer") + """.format(path=self.mount_a.mountpoint) + )) + + self.mount_a.run_shell(["grep", "asdf", "d1/.snap/s1/file1"]) + + self.mount_a.run_shell(["rm", "-f", "d0/file2"]) + self.mount_a.run_shell(["grep", "asdf", "d1/.snap/s1/file2"]) + + self.mount_a.run_shell(["rm", "-f", "d1/file2"]) + self.mount_a.run_shell(["grep", "asdf", "d1/.snap/s1/file2"]) + + self.mount_a.run_shell(["rmdir", "d1/.snap/s1"]) + self.mount_a.run_shell(["rm", "-rf", "d0", "d1"]) + + class SnapLimitViolationException(Exception): + failed_snapshot_number = -1 + + def __init__(self, num): + self.failed_snapshot_number = num + + def get_snap_name(self, dir_name, sno): + sname = "{dir_name}/.snap/s_{sno}".format(dir_name=dir_name, sno=sno) + return sname + + def create_snap_dir(self, sname): + self.mount_a.run_shell(["mkdir", sname]) + + def delete_dir_and_snaps(self, dir_name, snaps): + for sno in range(1, snaps+1, 1): + sname = self.get_snap_name(dir_name, sno) + self.mount_a.run_shell(["rmdir", sname]) + self.mount_a.run_shell(["rmdir", dir_name]) + + def create_dir_and_snaps(self, dir_name, snaps): + self.mount_a.run_shell(["mkdir", dir_name]) + + for sno in range(1, snaps+1, 1): + sname = self.get_snap_name(dir_name, sno) + try: + self.create_snap_dir(sname) + except CommandFailedError as e: + # failing at the last mkdir beyond the limit is expected + if sno == snaps: + log.info("failed while creating snap #{}: {}".format(sno, repr(e))) + raise TestSnapshots.SnapLimitViolationException(sno) + + def test_mds_max_snaps_per_dir_default_limit(self): + """ + Test the newly introudced option named mds_max_snaps_per_dir + Default snaps limit is 100 + Test if the default number of snapshot directories can be created + """ + self.create_dir_and_snaps("accounts", int(self.mds_max_snaps_per_dir)) + self.delete_dir_and_snaps("accounts", int(self.mds_max_snaps_per_dir)) + + def test_mds_max_snaps_per_dir_with_increased_limit(self): + """ + Test the newly introudced option named mds_max_snaps_per_dir + First create 101 directories and ensure that the 101st directory + creation fails. Then increase the default by one and see if the + additional directory creation succeeds + """ + # first test the default limit + new_limit = int(self.mds_max_snaps_per_dir) + self.fs.rank_asok(['config', 'set', 'mds_max_snaps_per_dir', repr(new_limit)]) + try: + self.create_dir_and_snaps("accounts", new_limit + 1) + except TestSnapshots.SnapLimitViolationException as e: + if e.failed_snapshot_number == (new_limit + 1): + pass + # then increase the limit by one and test + new_limit = new_limit + 1 + self.fs.rank_asok(['config', 'set', 'mds_max_snaps_per_dir', repr(new_limit)]) + sname = self.get_snap_name("accounts", new_limit) + self.create_snap_dir(sname) + self.delete_dir_and_snaps("accounts", new_limit) + + def test_mds_max_snaps_per_dir_with_reduced_limit(self): + """ + Test the newly introudced option named mds_max_snaps_per_dir + First create 99 directories. Then reduce the limit to 98. Then try + creating another directory and ensure that additional directory + creation fails. + """ + # first test the new limit + new_limit = int(self.mds_max_snaps_per_dir) - 1 + self.create_dir_and_snaps("accounts", new_limit) + sname = self.get_snap_name("accounts", new_limit + 1) + # then reduce the limit by one and test + new_limit = new_limit - 1 + self.fs.rank_asok(['config', 'set', 'mds_max_snaps_per_dir', repr(new_limit)]) + try: + self.create_snap_dir(sname) + except CommandFailedError: + # after reducing limit we expect the new snapshot creation to fail + pass + self.delete_dir_and_snaps("accounts", new_limit + 1) diff --git a/qa/tasks/cephfs/test_strays.py b/qa/tasks/cephfs/test_strays.py new file mode 100644 index 000000000..9dd71c7bf --- /dev/null +++ b/qa/tasks/cephfs/test_strays.py @@ -0,0 +1,1026 @@ +import json +import time +import logging +from textwrap import dedent +import datetime +import gevent + +from teuthology.orchestra.run import CommandFailedError, Raw +from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology + +log = logging.getLogger(__name__) + + +class TestStrays(CephFSTestCase): + MDSS_REQUIRED = 2 + + OPS_THROTTLE = 1 + FILES_THROTTLE = 2 + + # Range of different file sizes used in throttle test's workload + throttle_workload_size_range = 16 + + @for_teuthology + def test_ops_throttle(self): + self._test_throttling(self.OPS_THROTTLE) + + @for_teuthology + def test_files_throttle(self): + self._test_throttling(self.FILES_THROTTLE) + + def test_dir_deletion(self): + """ + That when deleting a bunch of dentries and the containing + directory, everything gets purged. + Catches cases where the client might e.g. fail to trim + the unlinked dir from its cache. + """ + file_count = 1000 + create_script = dedent(""" + import os + + mountpoint = "{mountpoint}" + subdir = "delete_me" + size = {size} + file_count = {file_count} + os.mkdir(os.path.join(mountpoint, subdir)) + for i in range(0, file_count): + filename = "{{0}}_{{1}}.bin".format(i, size) + with open(os.path.join(mountpoint, subdir, filename), 'w') as f: + f.write(size * 'x') + """.format( + mountpoint=self.mount_a.mountpoint, + size=1024, + file_count=file_count + )) + + self.mount_a.run_python(create_script) + + # That the dirfrag object is created + self.fs.mds_asok(["flush", "journal"]) + dir_ino = self.mount_a.path_to_ino("delete_me") + self.assertTrue(self.fs.dirfrag_exists(dir_ino, 0)) + + # Remove everything + self.mount_a.run_shell(["rm", "-rf", "delete_me"]) + self.fs.mds_asok(["flush", "journal"]) + + # That all the removed files get created as strays + strays = self.get_mdc_stat("strays_created") + self.assertEqual(strays, file_count + 1) + + # That the strays all get enqueued for purge + self.wait_until_equal( + lambda: self.get_mdc_stat("strays_enqueued"), + strays, + timeout=600 + + ) + + # That all the purge operations execute + self.wait_until_equal( + lambda: self.get_stat("purge_queue", "pq_executed"), + strays, + timeout=600 + ) + + # That finally, the directory metadata object is gone + self.assertFalse(self.fs.dirfrag_exists(dir_ino, 0)) + + # That finally, the data objects are all gone + self.await_data_pool_empty() + + def _test_throttling(self, throttle_type): + self.data_log = [] + try: + return self._do_test_throttling(throttle_type) + except: + for l in self.data_log: + log.info(",".join([l_.__str__() for l_ in l])) + raise + + def _do_test_throttling(self, throttle_type): + """ + That the mds_max_purge_ops setting is respected + """ + + def set_throttles(files, ops): + """ + Helper for updating ops/files limits, and calculating effective + ops_per_pg setting to give the same ops limit. + """ + self.set_conf('mds', 'mds_max_purge_files', "%d" % files) + self.set_conf('mds', 'mds_max_purge_ops', "%d" % ops) + + pgs = self.fs.mon_manager.get_pool_int_property( + self.fs.get_data_pool_name(), + "pg_num" + ) + ops_per_pg = float(ops) / pgs + self.set_conf('mds', 'mds_max_purge_ops_per_pg', "%s" % ops_per_pg) + + # Test conditions depend on what we're going to be exercising. + # * Lift the threshold on whatever throttle we are *not* testing, so + # that the throttle of interest is the one that will be the bottleneck + # * Create either many small files (test file count throttling) or fewer + # large files (test op throttling) + if throttle_type == self.OPS_THROTTLE: + set_throttles(files=100000000, ops=16) + size_unit = 1024 * 1024 # big files, generate lots of ops + file_multiplier = 100 + elif throttle_type == self.FILES_THROTTLE: + # The default value of file limit is pretty permissive, so to avoid + # the test running too fast, create lots of files and set the limit + # pretty low. + set_throttles(ops=100000000, files=6) + size_unit = 1024 # small, numerous files + file_multiplier = 200 + else: + raise NotImplementedError(throttle_type) + + # Pick up config changes + self.fs.mds_fail_restart() + self.fs.wait_for_daemons() + + create_script = dedent(""" + import os + + mountpoint = "{mountpoint}" + subdir = "delete_me" + size_unit = {size_unit} + file_multiplier = {file_multiplier} + os.mkdir(os.path.join(mountpoint, subdir)) + for i in range(0, file_multiplier): + for size in range(0, {size_range}*size_unit, size_unit): + filename = "{{0}}_{{1}}.bin".format(i, size // size_unit) + with open(os.path.join(mountpoint, subdir, filename), 'w') as f: + f.write(size * 'x') + """.format( + mountpoint=self.mount_a.mountpoint, + size_unit=size_unit, + file_multiplier=file_multiplier, + size_range=self.throttle_workload_size_range + )) + + self.mount_a.run_python(create_script) + + # We will run the deletion in the background, to reduce the risk of it completing before + # we have started monitoring the stray statistics. + def background(): + self.mount_a.run_shell(["rm", "-rf", "delete_me"]) + self.fs.mds_asok(["flush", "journal"]) + + background_thread = gevent.spawn(background) + + total_inodes = file_multiplier * self.throttle_workload_size_range + 1 + mds_max_purge_ops = int(self.fs.get_config("mds_max_purge_ops", 'mds')) + mds_max_purge_files = int(self.fs.get_config("mds_max_purge_files", 'mds')) + + # During this phase we look for the concurrent ops to exceed half + # the limit (a heuristic) and not exceed the limit (a correctness + # condition). + purge_timeout = 600 + elapsed = 0 + files_high_water = 0 + ops_high_water = 0 + + while True: + stats = self.fs.mds_asok(['perf', 'dump']) + mdc_stats = stats['mds_cache'] + pq_stats = stats['purge_queue'] + if elapsed >= purge_timeout: + raise RuntimeError("Timeout waiting for {0} inodes to purge, stats:{1}".format(total_inodes, mdc_stats)) + + num_strays = mdc_stats['num_strays'] + num_strays_purging = pq_stats['pq_executing'] + num_purge_ops = pq_stats['pq_executing_ops'] + files_high_water = pq_stats['pq_executing_high_water'] + ops_high_water = pq_stats['pq_executing_ops_high_water'] + + self.data_log.append([datetime.datetime.now(), num_strays, num_strays_purging, num_purge_ops, files_high_water, ops_high_water]) + + total_strays_created = mdc_stats['strays_created'] + total_strays_purged = pq_stats['pq_executed'] + + if total_strays_purged == total_inodes: + log.info("Complete purge in {0} seconds".format(elapsed)) + break + elif total_strays_purged > total_inodes: + raise RuntimeError("Saw more strays than expected, mdc stats: {0}".format(mdc_stats)) + else: + if throttle_type == self.OPS_THROTTLE: + # 11 is filer_max_purge_ops plus one for the backtrace: + # limit is allowed to be overshot by this much. + if num_purge_ops > mds_max_purge_ops + 11: + raise RuntimeError("num_purge_ops violates threshold {0}/{1}".format( + num_purge_ops, mds_max_purge_ops + )) + elif throttle_type == self.FILES_THROTTLE: + if num_strays_purging > mds_max_purge_files: + raise RuntimeError("num_strays_purging violates threshold {0}/{1}".format( + num_strays_purging, mds_max_purge_files + )) + else: + raise NotImplementedError(throttle_type) + + log.info("Waiting for purge to complete {0}/{1}, {2}/{3}".format( + num_strays_purging, num_strays, + total_strays_purged, total_strays_created + )) + time.sleep(1) + elapsed += 1 + + background_thread.join() + + # Check that we got up to a respectable rate during the purge. This is totally + # racy, but should be safeish unless the cluster is pathologically slow, or + # insanely fast such that the deletions all pass before we have polled the + # statistics. + if throttle_type == self.OPS_THROTTLE: + if ops_high_water < mds_max_purge_ops // 2: + raise RuntimeError("Ops in flight high water is unexpectedly low ({0} / {1})".format( + ops_high_water, mds_max_purge_ops + )) + # The MDS may go over mds_max_purge_ops for some items, like a + # heavily fragmented directory. The throttle does not kick in + # until *after* we reach or exceed the limit. This is expected + # because we don't want to starve the PQ or never purge a + # particularly large file/directory. + self.assertLessEqual(ops_high_water, mds_max_purge_ops+64) + elif throttle_type == self.FILES_THROTTLE: + if files_high_water < mds_max_purge_files // 2: + raise RuntimeError("Files in flight high water is unexpectedly low ({0} / {1})".format( + files_high_water, mds_max_purge_files + )) + self.assertLessEqual(files_high_water, mds_max_purge_files) + + # Sanity check all MDC stray stats + stats = self.fs.mds_asok(['perf', 'dump']) + mdc_stats = stats['mds_cache'] + pq_stats = stats['purge_queue'] + self.assertEqual(mdc_stats['num_strays'], 0) + self.assertEqual(mdc_stats['num_strays_delayed'], 0) + self.assertEqual(pq_stats['pq_executing'], 0) + self.assertEqual(pq_stats['pq_executing_ops'], 0) + self.assertEqual(mdc_stats['strays_created'], total_inodes) + self.assertEqual(mdc_stats['strays_enqueued'], total_inodes) + self.assertEqual(pq_stats['pq_executed'], total_inodes) + + def get_mdc_stat(self, name, mds_id=None): + return self.get_stat("mds_cache", name, mds_id) + + def get_stat(self, subsys, name, mds_id=None): + return self.fs.mds_asok(['perf', 'dump', subsys, name], + mds_id=mds_id)[subsys][name] + + def _wait_for_counter(self, subsys, counter, expect_val, timeout=60, + mds_id=None): + self.wait_until_equal( + lambda: self.get_stat(subsys, counter, mds_id), + expect_val=expect_val, timeout=timeout, + reject_fn=lambda x: x > expect_val + ) + + def test_open_inode(self): + """ + That the case of a dentry unlinked while a client holds an + inode open is handled correctly. + + The inode should be moved into a stray dentry, while the original + dentry and directory should be purged. + + The inode's data should be purged when the client eventually closes + it. + """ + mount_a_client_id = self.mount_a.get_global_id() + + # Write some bytes to a file + size_mb = 8 + + # Hold the file open + p = self.mount_a.open_background("open_file") + self.mount_a.write_n_mb("open_file", size_mb) + open_file_ino = self.mount_a.path_to_ino("open_file") + + self.assertEqual(self.get_session(mount_a_client_id)['num_caps'], 2) + + # Unlink the dentry + self.mount_a.run_shell(["rm", "-f", "open_file"]) + + # Wait to see the stray count increment + self.wait_until_equal( + lambda: self.get_mdc_stat("num_strays"), + expect_val=1, timeout=60, reject_fn=lambda x: x > 1) + + # See that while the stray count has incremented, none have passed + # on to the purge queue + self.assertEqual(self.get_mdc_stat("strays_created"), 1) + self.assertEqual(self.get_mdc_stat("strays_enqueued"), 0) + + # See that the client still holds 2 caps + self.assertEqual(self.get_session(mount_a_client_id)['num_caps'], 2) + + # See that the data objects remain in the data pool + self.assertTrue(self.fs.data_objects_present(open_file_ino, size_mb * 1024 * 1024)) + + # Now close the file + self.mount_a.kill_background(p) + + # Wait to see the client cap count decrement + self.wait_until_equal( + lambda: self.get_session(mount_a_client_id)['num_caps'], + expect_val=1, timeout=60, reject_fn=lambda x: x > 2 or x < 1 + ) + # Wait to see the purge counter increment, stray count go to zero + self._wait_for_counter("mds_cache", "strays_enqueued", 1) + self.wait_until_equal( + lambda: self.get_mdc_stat("num_strays"), + expect_val=0, timeout=6, reject_fn=lambda x: x > 1 + ) + self._wait_for_counter("purge_queue", "pq_executed", 1) + + # See that the data objects no longer exist + self.assertTrue(self.fs.data_objects_absent(open_file_ino, size_mb * 1024 * 1024)) + + self.await_data_pool_empty() + + def test_reintegration_limit(self): + """ + That the reintegration is not blocked by full directories. + """ + + LOW_LIMIT = 50 + self.config_set('mds', 'mds_bal_fragment_size_max', str(LOW_LIMIT)) + time.sleep(10) # for config to reach MDS; async create is fast!! + + last_reintegrated = self.get_mdc_stat("strays_reintegrated") + self.mount_a.run_shell_payload(""" + mkdir a b + for i in `seq 1 50`; do + touch a/"$i" + ln a/"$i" b/"$i" + done + sync -f a b + rm a/* + """) + + self.wait_until_equal( + lambda: self.get_mdc_stat("num_strays"), + expect_val=0, + timeout=60 + ) + curr_reintegrated = self.get_mdc_stat("strays_reintegrated") + self.assertGreater(curr_reintegrated, last_reintegrated) + + + def test_hardlink_reintegration(self): + """ + That removal of primary dentry of hardlinked inode results + in reintegration of inode into the previously-remote dentry, + rather than lingering as a stray indefinitely. + """ + # Write some bytes to file_a + size_mb = 8 + self.mount_a.run_shell(["mkdir", "dir_1"]) + self.mount_a.write_n_mb("dir_1/file_a", size_mb) + ino = self.mount_a.path_to_ino("dir_1/file_a") + + # Create a hardlink named file_b + self.mount_a.run_shell(["mkdir", "dir_2"]) + self.mount_a.run_shell(["ln", "dir_1/file_a", "dir_2/file_b"]) + self.assertEqual(self.mount_a.path_to_ino("dir_2/file_b"), ino) + + # Flush journal + self.fs.mds_asok(['flush', 'journal']) + + # See that backtrace for the file points to the file_a path + pre_unlink_bt = self.fs.read_backtrace(ino) + self.assertEqual(pre_unlink_bt['ancestors'][0]['dname'], "file_a") + + # empty mds cache. otherwise mds reintegrates stray when unlink finishes + self.mount_a.umount_wait() + self.fs.mds_asok(['flush', 'journal']) + self.fs.mds_fail_restart() + self.fs.wait_for_daemons() + self.mount_a.mount_wait() + + # Unlink file_a + self.mount_a.run_shell(["rm", "-f", "dir_1/file_a"]) + + # See that a stray was created + self.assertEqual(self.get_mdc_stat("num_strays"), 1) + self.assertEqual(self.get_mdc_stat("strays_created"), 1) + + # Wait, see that data objects are still present (i.e. that the + # stray did not advance to purging given time) + time.sleep(30) + self.assertTrue(self.fs.data_objects_present(ino, size_mb * 1024 * 1024)) + self.assertEqual(self.get_mdc_stat("strays_enqueued"), 0) + + # See that before reintegration, the inode's backtrace points to a stray dir + self.fs.mds_asok(['flush', 'journal']) + self.assertTrue(self.get_backtrace_path(ino).startswith("stray")) + + last_reintegrated = self.get_mdc_stat("strays_reintegrated") + + # Do a metadata operation on the remaining link (mv is heavy handed, but + # others like touch may be satisfied from caps without poking MDS) + self.mount_a.run_shell(["mv", "dir_2/file_b", "dir_2/file_c"]) + + # Stray reintegration should happen as a result of the eval_remote call + # on responding to a client request. + self.wait_until_equal( + lambda: self.get_mdc_stat("num_strays"), + expect_val=0, + timeout=60 + ) + + # See the reintegration counter increment + curr_reintegrated = self.get_mdc_stat("strays_reintegrated") + self.assertGreater(curr_reintegrated, last_reintegrated) + last_reintegrated = curr_reintegrated + + # Flush the journal + self.fs.mds_asok(['flush', 'journal']) + + # See that the backtrace for the file points to the remaining link's path + post_reint_bt = self.fs.read_backtrace(ino) + self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "file_c") + + # mds should reintegrates stray when unlink finishes + self.mount_a.run_shell(["ln", "dir_2/file_c", "dir_2/file_d"]) + self.mount_a.run_shell(["rm", "-f", "dir_2/file_c"]) + + # Stray reintegration should happen as a result of the notify_stray call + # on completion of unlink + self.wait_until_equal( + lambda: self.get_mdc_stat("num_strays"), + expect_val=0, + timeout=60 + ) + + # See the reintegration counter increment + curr_reintegrated = self.get_mdc_stat("strays_reintegrated") + self.assertGreater(curr_reintegrated, last_reintegrated) + last_reintegrated = curr_reintegrated + + # Flush the journal + self.fs.mds_asok(['flush', 'journal']) + + # See that the backtrace for the file points to the newest link's path + post_reint_bt = self.fs.read_backtrace(ino) + self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "file_d") + + # Now really delete it + self.mount_a.run_shell(["rm", "-f", "dir_2/file_d"]) + self._wait_for_counter("mds_cache", "strays_enqueued", 1) + self._wait_for_counter("purge_queue", "pq_executed", 1) + + self.assert_purge_idle() + self.assertTrue(self.fs.data_objects_absent(ino, size_mb * 1024 * 1024)) + + # We caused the inode to go stray 3 times + self.assertEqual(self.get_mdc_stat("strays_created"), 3) + # We purged it at the last + self.assertEqual(self.get_mdc_stat("strays_enqueued"), 1) + + def test_reintegration_via_scrub(self): + """ + That reintegration is triggered via recursive scrub. + """ + + self.mount_a.run_shell_payload(""" + mkdir -p a b + for i in `seq 1 50`; do + touch a/"$i" + ln a/"$i" b/"$i" + done + sync -f . + """) + + self.mount_a.remount() # drop caps/cache + self.fs.rank_tell(["flush", "journal"]) + self.fs.rank_fail() + self.fs.wait_for_daemons() + + # only / in cache, reintegration cannot happen + self.wait_until_equal( + lambda: len(self.fs.rank_tell(["dump", "tree", "/"])), + expect_val=3, + timeout=60 + ) + + last_reintegrated = self.get_mdc_stat("strays_reintegrated") + self.mount_a.run_shell_payload(""" + rm a/* + sync -f . + """) + self.wait_until_equal( + lambda: len(self.fs.rank_tell(["dump", "tree", "/"])), + expect_val=3, + timeout=60 + ) + self.assertEqual(self.get_mdc_stat("num_strays"), 50) + curr_reintegrated = self.get_mdc_stat("strays_reintegrated") + self.assertEqual(last_reintegrated, curr_reintegrated) + + self.fs.rank_tell(["scrub", "start", "/", "recursive,force"]) + + self.wait_until_equal( + lambda: self.get_mdc_stat("num_strays"), + expect_val=0, + timeout=60 + ) + curr_reintegrated = self.get_mdc_stat("strays_reintegrated") + # N.B.: reintegrate (rename RPC) may be tried multiple times from different code paths + self.assertGreaterEqual(curr_reintegrated, last_reintegrated+50) + + def test_mv_hardlink_cleanup(self): + """ + That when doing a rename from A to B, and B has hardlinks, + then we make a stray for B which is then reintegrated + into one of his hardlinks. + """ + # Create file_a, file_b, and a hardlink to file_b + size_mb = 8 + self.mount_a.write_n_mb("file_a", size_mb) + file_a_ino = self.mount_a.path_to_ino("file_a") + + self.mount_a.write_n_mb("file_b", size_mb) + file_b_ino = self.mount_a.path_to_ino("file_b") + + self.mount_a.run_shell(["ln", "file_b", "linkto_b"]) + self.assertEqual(self.mount_a.path_to_ino("linkto_b"), file_b_ino) + + # mv file_a file_b + self.mount_a.run_shell(["mv", "file_a", "file_b"]) + + # Stray reintegration should happen as a result of the notify_stray call on + # completion of rename + self.wait_until_equal( + lambda: self.get_mdc_stat("num_strays"), + expect_val=0, + timeout=60 + ) + + self.assertEqual(self.get_mdc_stat("strays_created"), 1) + self.assertGreaterEqual(self.get_mdc_stat("strays_reintegrated"), 1) + + # No data objects should have been deleted, as both files still have linkage. + self.assertTrue(self.fs.data_objects_present(file_a_ino, size_mb * 1024 * 1024)) + self.assertTrue(self.fs.data_objects_present(file_b_ino, size_mb * 1024 * 1024)) + + self.fs.mds_asok(['flush', 'journal']) + + post_reint_bt = self.fs.read_backtrace(file_b_ino) + self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "linkto_b") + + def _setup_two_ranks(self): + # Set up two MDSs + self.fs.set_max_mds(2) + + # See that we have two active MDSs + self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30, + reject_fn=lambda v: v > 2 or v < 1) + + active_mds_names = self.fs.get_active_names() + rank_0_id = active_mds_names[0] + rank_1_id = active_mds_names[1] + log.info("Ranks 0 and 1 are {0} and {1}".format( + rank_0_id, rank_1_id)) + + # Get rid of other MDS daemons so that it's easier to know which + # daemons to expect in which ranks after restarts + for unneeded_mds in set(self.mds_cluster.mds_ids) - {rank_0_id, rank_1_id}: + self.mds_cluster.mds_stop(unneeded_mds) + self.mds_cluster.mds_fail(unneeded_mds) + + return rank_0_id, rank_1_id + + def _force_migrate(self, path, rank=1): + """ + :param to_id: MDS id to move it to + :param path: Filesystem path (string) to move + :return: None + """ + self.mount_a.run_shell(["setfattr", "-n", "ceph.dir.pin", "-v", str(rank), path]) + rpath = "/"+path + self._wait_subtrees([(rpath, rank)], rank=rank, path=rpath) + + def _is_stopped(self, rank): + mds_map = self.fs.get_mds_map() + return rank not in [i['rank'] for i in mds_map['info'].values()] + + def test_purge_on_shutdown(self): + """ + That when an MDS rank is shut down, its purge queue is + drained in the process. + """ + rank_0_id, rank_1_id = self._setup_two_ranks() + + self.set_conf("mds.{0}".format(rank_1_id), 'mds_max_purge_files', "0") + self.mds_cluster.mds_fail_restart(rank_1_id) + self.fs.wait_for_daemons() + + file_count = 5 + + self.mount_a.create_n_files("delete_me/file", file_count) + + self._force_migrate("delete_me") + + self.mount_a.run_shell(["rm", "-rf", Raw("delete_me/*")]) + self.mount_a.umount_wait() + + # See all the strays go into purge queue + self._wait_for_counter("mds_cache", "strays_created", file_count, mds_id=rank_1_id) + self._wait_for_counter("mds_cache", "strays_enqueued", file_count, mds_id=rank_1_id) + self.assertEqual(self.get_stat("mds_cache", "num_strays", mds_id=rank_1_id), 0) + + # See nothing get purged from the purge queue (yet) + time.sleep(10) + self.assertEqual(self.get_stat("purge_queue", "pq_executed", mds_id=rank_1_id), 0) + + # Shut down rank 1 + self.fs.set_max_mds(1) + + # It shouldn't proceed past stopping because its still not allowed + # to purge + time.sleep(10) + self.assertEqual(self.get_stat("purge_queue", "pq_executed", mds_id=rank_1_id), 0) + self.assertFalse(self._is_stopped(1)) + + # Permit the daemon to start purging again + self.fs.mon_manager.raw_cluster_cmd('tell', 'mds.{0}'.format(rank_1_id), + 'injectargs', + "--mds_max_purge_files 100") + + # It should now proceed through shutdown + self.fs.wait_for_daemons(timeout=120) + + # ...and in the process purge all that data + self.await_data_pool_empty() + + def test_migration_on_shutdown(self): + """ + That when an MDS rank is shut down, any non-purgeable strays + get migrated to another rank. + """ + + rank_0_id, rank_1_id = self._setup_two_ranks() + + # Create a non-purgeable stray in a ~mds1 stray directory + # by doing a hard link and deleting the original file + self.mount_a.run_shell_payload(""" +mkdir dir_1 dir_2 +touch dir_1/original +ln dir_1/original dir_2/linkto +""") + + self._force_migrate("dir_1") + self._force_migrate("dir_2", rank=0) + + # empty mds cache. otherwise mds reintegrates stray when unlink finishes + self.mount_a.umount_wait() + self.fs.mds_asok(['flush', 'journal'], rank_1_id) + self.fs.mds_asok(['cache', 'drop'], rank_1_id) + + self.mount_a.mount_wait() + self.mount_a.run_shell(["rm", "-f", "dir_1/original"]) + self.mount_a.umount_wait() + + self._wait_for_counter("mds_cache", "strays_created", 1, + mds_id=rank_1_id) + + # Shut down rank 1 + self.fs.set_max_mds(1) + self.fs.wait_for_daemons(timeout=120) + + # See that the stray counter on rank 0 has incremented + self.assertEqual(self.get_mdc_stat("strays_created", rank_0_id), 1) + + def test_migrate_unlinked_dir(self): + """ + Reproduce https://tracker.ceph.com/issues/53597 + """ + rank_0_id, rank_1_id = self._setup_two_ranks() + + self.mount_a.run_shell_payload(""" +mkdir pin +touch pin/placeholder +""") + + self._force_migrate("pin") + + # Hold the dir open so it cannot be purged + p = self.mount_a.open_dir_background("pin/to-be-unlinked") + + # Unlink the dentry + self.mount_a.run_shell(["rmdir", "pin/to-be-unlinked"]) + + # Wait to see the stray count increment + self.wait_until_equal( + lambda: self.get_mdc_stat("num_strays", mds_id=rank_1_id), + expect_val=1, timeout=60, reject_fn=lambda x: x > 1) + # but not purged + self.assertEqual(self.get_mdc_stat("strays_created", mds_id=rank_1_id), 1) + self.assertEqual(self.get_mdc_stat("strays_enqueued", mds_id=rank_1_id), 0) + + # Test loading unlinked dir into cache + self.fs.mds_asok(['flush', 'journal'], rank_1_id) + self.fs.mds_asok(['cache', 'drop'], rank_1_id) + + # Shut down rank 1 + self.fs.set_max_mds(1) + self.fs.wait_for_daemons(timeout=120) + # Now the stray should be migrated to rank 0 + # self.assertEqual(self.get_mdc_stat("strays_created", mds_id=rank_0_id), 1) + # https://github.com/ceph/ceph/pull/44335#issuecomment-1125940158 + + self.mount_a.kill_background(p) + + def assert_backtrace(self, ino, expected_path): + """ + Assert that the backtrace in the data pool for an inode matches + an expected /foo/bar path. + """ + expected_elements = expected_path.strip("/").split("/") + bt = self.fs.read_backtrace(ino) + actual_elements = list(reversed([dn['dname'] for dn in bt['ancestors']])) + self.assertListEqual(expected_elements, actual_elements) + + def get_backtrace_path(self, ino): + bt = self.fs.read_backtrace(ino) + elements = reversed([dn['dname'] for dn in bt['ancestors']]) + return "/".join(elements) + + def assert_purge_idle(self): + """ + Assert that the MDS perf counters indicate no strays exist and + no ongoing purge activity. Sanity check for when PurgeQueue should + be idle. + """ + mdc_stats = self.fs.mds_asok(['perf', 'dump', "mds_cache"])['mds_cache'] + pq_stats = self.fs.mds_asok(['perf', 'dump', "purge_queue"])['purge_queue'] + self.assertEqual(mdc_stats["num_strays"], 0) + self.assertEqual(mdc_stats["num_strays_delayed"], 0) + self.assertEqual(pq_stats["pq_executing"], 0) + self.assertEqual(pq_stats["pq_executing_ops"], 0) + + def test_mv_cleanup(self): + """ + That when doing a rename from A to B, and B has no hardlinks, + then we make a stray for B and purge him. + """ + # Create file_a and file_b, write some to both + size_mb = 8 + self.mount_a.write_n_mb("file_a", size_mb) + file_a_ino = self.mount_a.path_to_ino("file_a") + self.mount_a.write_n_mb("file_b", size_mb) + file_b_ino = self.mount_a.path_to_ino("file_b") + + self.fs.mds_asok(['flush', 'journal']) + self.assert_backtrace(file_a_ino, "file_a") + self.assert_backtrace(file_b_ino, "file_b") + + # mv file_a file_b + self.mount_a.run_shell(['mv', 'file_a', 'file_b']) + + # See that stray counter increments + self.assertEqual(self.get_mdc_stat("strays_created"), 1) + # Wait for purge counter to increment + self._wait_for_counter("mds_cache", "strays_enqueued", 1) + self._wait_for_counter("purge_queue", "pq_executed", 1) + + self.assert_purge_idle() + + # file_b should have been purged + self.assertTrue(self.fs.data_objects_absent(file_b_ino, size_mb * 1024 * 1024)) + + # Backtrace should have updated from file_a to file_b + self.fs.mds_asok(['flush', 'journal']) + self.assert_backtrace(file_a_ino, "file_b") + + # file_a's data should still exist + self.assertTrue(self.fs.data_objects_present(file_a_ino, size_mb * 1024 * 1024)) + + def _pool_df(self, pool_name): + """ + Return a dict like + { + "kb_used": 0, + "bytes_used": 0, + "max_avail": 19630292406, + "objects": 0 + } + + :param pool_name: Which pool (must exist) + """ + out = self.fs.mon_manager.raw_cluster_cmd("df", "--format=json-pretty") + for p in json.loads(out)['pools']: + if p['name'] == pool_name: + return p['stats'] + + raise RuntimeError("Pool '{0}' not found".format(pool_name)) + + def await_data_pool_empty(self): + self.wait_until_true( + lambda: self._pool_df( + self.fs.get_data_pool_name() + )['objects'] == 0, + timeout=60) + + def test_snapshot_remove(self): + """ + That removal of a snapshot that references a now-unlinked file results + in purging on the stray for the file. + """ + # Enable snapshots + self.fs.set_allow_new_snaps(True) + + # Create a dir with a file in it + size_mb = 8 + self.mount_a.run_shell(["mkdir", "snapdir"]) + self.mount_a.run_shell(["mkdir", "snapdir/subdir"]) + self.mount_a.write_test_pattern("snapdir/subdir/file_a", size_mb * 1024 * 1024) + file_a_ino = self.mount_a.path_to_ino("snapdir/subdir/file_a") + + # Snapshot the dir + self.mount_a.run_shell(["mkdir", "snapdir/.snap/snap1"]) + + # Cause the head revision to deviate from the snapshot + self.mount_a.write_n_mb("snapdir/subdir/file_a", size_mb) + + # Flush the journal so that backtraces, dirfrag objects will actually be written + self.fs.mds_asok(["flush", "journal"]) + + # Unlink the file + self.mount_a.run_shell(["rm", "-f", "snapdir/subdir/file_a"]) + self.mount_a.run_shell(["rmdir", "snapdir/subdir"]) + + # Unmount the client because when I come back to check the data is still + # in the file I don't want to just see what's in the page cache. + self.mount_a.umount_wait() + + self.assertEqual(self.get_mdc_stat("strays_created"), 2) + + # FIXME: at this stage we see a purge and the stray count drops to + # zero, but there's actually still a stray, so at the very + # least the StrayManager stats code is slightly off + + self.mount_a.mount_wait() + + # See that the data from the snapshotted revision of the file is still present + # and correct + self.mount_a.validate_test_pattern("snapdir/.snap/snap1/subdir/file_a", size_mb * 1024 * 1024) + + # Remove the snapshot + self.mount_a.run_shell(["rmdir", "snapdir/.snap/snap1"]) + + # Purging file_a doesn't happen until after we've flushed the journal, because + # it is referenced by the snapshotted subdir, and the snapshot isn't really + # gone until the journal references to it are gone + self.fs.mds_asok(["flush", "journal"]) + + # Wait for purging to complete, which requires the OSDMap to propagate to the OSDs. + # See also: http://tracker.ceph.com/issues/20072 + self.wait_until_true( + lambda: self.fs.data_objects_absent(file_a_ino, size_mb * 1024 * 1024), + timeout=60 + ) + + # See that a purge happens now + self._wait_for_counter("mds_cache", "strays_enqueued", 2) + self._wait_for_counter("purge_queue", "pq_executed", 2) + + self.await_data_pool_empty() + + def test_fancy_layout(self): + """ + purge stray file with fancy layout + """ + + file_name = "fancy_layout_file" + self.mount_a.run_shell(["touch", file_name]) + + file_layout = "stripe_unit=1048576 stripe_count=4 object_size=8388608" + self.mount_a.setfattr(file_name, "ceph.file.layout", file_layout) + + # 35MB requires 7 objects + size_mb = 35 + self.mount_a.write_n_mb(file_name, size_mb) + + self.mount_a.run_shell(["rm", "-f", file_name]) + self.fs.mds_asok(["flush", "journal"]) + + # can't use self.fs.data_objects_absent here, it does not support fancy layout + self.await_data_pool_empty() + + def test_dirfrag_limit(self): + """ + That the directory fragment size cannot exceed mds_bal_fragment_size_max (using a limit of 50 in all configurations). + """ + + LOW_LIMIT = 50 + self.config_set('mds', 'mds_bal_fragment_size_max', str(LOW_LIMIT)) + time.sleep(10) # for config to reach MDS; async create is fast!! + + try: + self.mount_a.create_n_files("subdir/file", LOW_LIMIT+1, finaldirsync=True) + except CommandFailedError: + pass # ENOSPC + else: + self.fail("fragment size exceeded") + + + def test_dirfrag_limit_fragmented(self): + """ + That fragmentation (forced) will allow more entries to be created. + """ + + LOW_LIMIT = 50 + self.config_set('mds', 'mds_bal_fragment_size_max', str(LOW_LIMIT)) + self.config_set('mds', 'mds_bal_merge_size', 1) # disable merging + time.sleep(10) # for config to reach MDS; async create is fast!! + + # Test that we can go beyond the limit if we fragment the directory + self.mount_a.create_n_files("subdir/file", LOW_LIMIT, finaldirsync=True) + self.mount_a.umount_wait() # release client caps + + # Ensure that subdir is fragmented + self.fs.rank_asok(["dirfrag", "split", "/subdir", "0/0", "1"]) + self.fs.rank_asok(["flush", "journal"]) + + # Create 50% more files than the current fragment limit + self.mount_a.mount_wait() + self.mount_a.create_n_files("subdir/file", (LOW_LIMIT*3)//2, finaldirsync=True) + + def test_dirfrag_limit_strays(self): + """ + That unlinking fails when the stray directory fragment becomes too + large and that unlinking may continue once those strays are purged. + """ + + LOW_LIMIT = 10 + # N.B. this test is inherently racy because stray removal may be faster + # than slow(er) file creation. + self.config_set('mds', 'mds_bal_fragment_size_max', LOW_LIMIT) + time.sleep(10) # for config to reach MDS; async create is fast!! + + # Now test the stray directory size is limited and recovers + strays_before = self.get_mdc_stat("strays_created") + try: + # 10 stray directories: expect collisions + self.mount_a.create_n_files("subdir/file", LOW_LIMIT*10, finaldirsync=True, unlink=True) + except CommandFailedError: + pass # ENOSPC + else: + self.fail("fragment size exceeded") + strays_after = self.get_mdc_stat("strays_created") + self.assertGreaterEqual(strays_after-strays_before, LOW_LIMIT) + + self._wait_for_counter("mds_cache", "strays_enqueued", strays_after) + self._wait_for_counter("purge_queue", "pq_executed", strays_after) + + # verify new files can be created and unlinked + self.mount_a.create_n_files("subdir/file", LOW_LIMIT, dirsync=True, unlink=True) + + def test_purge_queue_upgrade(self): + """ + That when starting on a system with no purge queue in the metadata + pool, we silently create one. + :return: + """ + + self.mds_cluster.mds_stop() + self.mds_cluster.mds_fail() + self.fs.radosm(["rm", "500.00000000"]) + self.mds_cluster.mds_restart() + self.fs.wait_for_daemons() + + def test_replicated_delete_speed(self): + """ + That deletions of replicated metadata are not pathologically slow + """ + rank_0_id, rank_1_id = self._setup_two_ranks() + + self.set_conf("mds.{0}".format(rank_1_id), 'mds_max_purge_files', "0") + self.mds_cluster.mds_fail_restart(rank_1_id) + self.fs.wait_for_daemons() + + file_count = 10 + + self.mount_a.create_n_files("delete_me/file", file_count) + + self._force_migrate("delete_me") + + begin = datetime.datetime.now() + self.mount_a.run_shell(["rm", "-rf", Raw("delete_me/*")]) + end = datetime.datetime.now() + + # What we're really checking here is that we are completing client + # operations immediately rather than delaying until the next tick. + tick_period = float(self.fs.get_config("mds_tick_interval", + service_type="mds")) + + duration = (end - begin).total_seconds() + self.assertLess(duration, (file_count * tick_period) * 0.25) diff --git a/qa/tasks/cephfs/test_volume_client.py b/qa/tasks/cephfs/test_volume_client.py new file mode 100644 index 000000000..d1b2e760c --- /dev/null +++ b/qa/tasks/cephfs/test_volume_client.py @@ -0,0 +1,1735 @@ +from io import StringIO +import json +import logging +import os +from textwrap import dedent +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from tasks.cephfs.fuse_mount import FuseMount +from teuthology.exceptions import CommandFailedError + +log = logging.getLogger(__name__) + + +class TestVolumeClient(CephFSTestCase): + # One for looking at the global filesystem, one for being + # the VolumeClient, two for mounting the created shares + CLIENTS_REQUIRED = 4 + + def setUp(self): + CephFSTestCase.setUp(self) + + def _volume_client_python(self, client, script, vol_prefix=None, ns_prefix=None): + # Can't dedent this *and* the script we pass in, because they might have different + # levels of indentation to begin with, so leave this string zero-indented + if vol_prefix: + vol_prefix = "\"" + vol_prefix + "\"" + if ns_prefix: + ns_prefix = "\"" + ns_prefix + "\"" + return client.run_python(""" +from __future__ import print_function +from ceph_volume_client import CephFSVolumeClient, VolumePath +from sys import version_info as sys_version_info +from rados import OSError as rados_OSError +import logging +log = logging.getLogger("ceph_volume_client") +log.addHandler(logging.StreamHandler()) +log.setLevel(logging.DEBUG) +vc = CephFSVolumeClient("manila", "{conf_path}", "ceph", {vol_prefix}, {ns_prefix}) +vc.connect() +{payload} +vc.disconnect() + """.format(payload=script, conf_path=client.config_path, + vol_prefix=vol_prefix, ns_prefix=ns_prefix)) + + def _configure_vc_auth(self, mount, id_name): + """ + Set up auth credentials for the VolumeClient user + """ + out = self.fs.mon_manager.raw_cluster_cmd( + "auth", "get-or-create", "client.{name}".format(name=id_name), + "mds", "allow *", + "osd", "allow rw", + "mon", "allow *" + ) + mount.client_id = id_name + mount.client_remote.write_file(mount.get_keyring_path(), + out, sudo=True) + self.set_conf("client.{name}".format(name=id_name), "keyring", mount.get_keyring_path()) + + def _configure_guest_auth(self, volumeclient_mount, guest_mount, + guest_entity, cephfs_mntpt, + namespace_prefix=None, readonly=False, + tenant_id=None, allow_existing_id=False): + """ + Set up auth credentials for the guest client to mount a volume. + + :param volumeclient_mount: mount used as the handle for driving + volumeclient. + :param guest_mount: mount used by the guest client. + :param guest_entity: auth ID used by the guest client. + :param cephfs_mntpt: path of the volume. + :param namespace_prefix: name prefix of the RADOS namespace, which + is used for the volume's layout. + :param readonly: defaults to False. If set to 'True' only read-only + mount access is granted to the guest. + :param tenant_id: (OpenStack) tenant ID of the guest client. + """ + + head, volume_id = os.path.split(cephfs_mntpt) + head, group_id = os.path.split(head) + head, volume_prefix = os.path.split(head) + volume_prefix = "/" + volume_prefix + + # Authorize the guest client's auth ID to mount the volume. + key = self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + auth_result = vc.authorize(vp, "{guest_entity}", readonly={readonly}, + tenant_id="{tenant_id}", + allow_existing_id="{allow_existing_id}") + print(auth_result['auth_key']) + """.format( + group_id=group_id, + volume_id=volume_id, + guest_entity=guest_entity, + readonly=readonly, + tenant_id=tenant_id, + allow_existing_id=allow_existing_id)), volume_prefix, namespace_prefix + ) + + # CephFSVolumeClient's authorize() does not return the secret + # key to a caller who isn't multi-tenant aware. Explicitly + # query the key for such a client. + if not tenant_id: + key = self.fs.mon_manager.raw_cluster_cmd( + "auth", "get-key", "client.{name}".format(name=guest_entity), + ) + + # The guest auth ID should exist. + existing_ids = [a['entity'] for a in self.auth_list()] + self.assertIn("client.{0}".format(guest_entity), existing_ids) + + # Create keyring file for the guest client. + keyring_txt = dedent(""" + [client.{guest_entity}] + key = {key} + + """.format( + guest_entity=guest_entity, + key=key + )) + guest_mount.client_id = guest_entity + guest_mount.client_remote.write_file(guest_mount.get_keyring_path(), + keyring_txt, sudo=True) + + # Add a guest client section to the ceph config file. + self.set_conf("client.{0}".format(guest_entity), "client quota", "True") + self.set_conf("client.{0}".format(guest_entity), "debug client", "20") + self.set_conf("client.{0}".format(guest_entity), "debug objecter", "20") + self.set_conf("client.{0}".format(guest_entity), + "keyring", guest_mount.get_keyring_path()) + + def test_default_prefix(self): + group_id = "grpid" + volume_id = "volid" + DEFAULT_VOL_PREFIX = "volumes" + DEFAULT_NS_PREFIX = "fsvolumens_" + + self.mount_b.umount_wait() + self._configure_vc_auth(self.mount_b, "manila") + + #create a volume with default prefix + self._volume_client_python(self.mount_b, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.create_volume(vp, 10, data_isolated=True) + """.format( + group_id=group_id, + volume_id=volume_id, + ))) + + # The dir should be created + self.mount_a.stat(os.path.join(DEFAULT_VOL_PREFIX, group_id, volume_id)) + + #namespace should be set + ns_in_attr = self.mount_a.getfattr(os.path.join(DEFAULT_VOL_PREFIX, group_id, volume_id), "ceph.dir.layout.pool_namespace") + namespace = "{0}{1}".format(DEFAULT_NS_PREFIX, volume_id) + self.assertEqual(namespace, ns_in_attr) + + + def test_lifecycle(self): + """ + General smoke test for create, extend, destroy + """ + + # I'm going to use mount_c later as a guest for mounting the created + # shares + self.mounts[2].umount_wait() + + # I'm going to leave mount_b unmounted and just use it as a handle for + # driving volumeclient. It's a little hacky but we don't have a more + # general concept for librados/libcephfs clients as opposed to full + # blown mounting clients. + self.mount_b.umount_wait() + self._configure_vc_auth(self.mount_b, "manila") + + guest_entity = "guest" + group_id = "grpid" + volume_id = "volid" + + volume_prefix = "/myprefix" + namespace_prefix = "mynsprefix_" + + # Create a 100MB volume + volume_size = 100 + cephfs_mntpt = self._volume_client_python(self.mount_b, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + create_result = vc.create_volume(vp, 1024*1024*{volume_size}) + print(create_result['mount_path']) + """.format( + group_id=group_id, + volume_id=volume_id, + volume_size=volume_size + )), volume_prefix, namespace_prefix) + + # The dir should be created + self.mount_a.stat(os.path.join("myprefix", group_id, volume_id)) + + # Authorize and configure credentials for the guest to mount the + # the volume. + self._configure_guest_auth(self.mount_b, self.mounts[2], guest_entity, + cephfs_mntpt, namespace_prefix) + self.mounts[2].mount_wait(cephfs_mntpt=cephfs_mntpt) + + # The kernel client doesn't have the quota-based df behaviour, + # or quotas at all, so only exercise the client behaviour when + # running fuse. + if isinstance(self.mounts[2], FuseMount): + # df should see volume size, same as the quota set on volume's dir + self.assertEqual(self.mounts[2].df()['total'], + volume_size * 1024 * 1024) + self.assertEqual( + self.mount_a.getfattr( + os.path.join(volume_prefix.strip("/"), group_id, volume_id), + "ceph.quota.max_bytes"), + "%s" % (volume_size * 1024 * 1024)) + + # df granularity is 4MB block so have to write at least that much + data_bin_mb = 4 + self.mounts[2].write_n_mb("data.bin", data_bin_mb) + + # Write something outside volume to check this space usage is + # not reported in the volume's DF. + other_bin_mb = 8 + self.mount_a.write_n_mb("other.bin", other_bin_mb) + + # global: df should see all the writes (data + other). This is a > + # rather than a == because the global spaced used includes all pools + def check_df(): + used = self.mount_a.df()['used'] + return used >= (other_bin_mb * 1024 * 1024) + + self.wait_until_true(check_df, timeout=30) + + # Hack: do a metadata IO to kick rstats + self.mounts[2].run_shell(["touch", "foo"]) + + # volume: df should see the data_bin_mb consumed from quota, same + # as the rbytes for the volume's dir + self.wait_until_equal( + lambda: self.mounts[2].df()['used'], + data_bin_mb * 1024 * 1024, timeout=60) + self.wait_until_equal( + lambda: self.mount_a.getfattr( + os.path.join(volume_prefix.strip("/"), group_id, volume_id), + "ceph.dir.rbytes"), + "%s" % (data_bin_mb * 1024 * 1024), timeout=60) + + # sync so that file data are persist to rados + self.mounts[2].run_shell(["sync"]) + + # Our data should stay in particular rados namespace + pool_name = self.mount_a.getfattr(os.path.join("myprefix", group_id, volume_id), "ceph.dir.layout.pool") + namespace = "{0}{1}".format(namespace_prefix, volume_id) + ns_in_attr = self.mount_a.getfattr(os.path.join("myprefix", group_id, volume_id), "ceph.dir.layout.pool_namespace") + self.assertEqual(namespace, ns_in_attr) + + objects_in_ns = set(self.fs.rados(["ls"], pool=pool_name, namespace=namespace, stdout=StringIO()).stdout.getvalue().split("\n")) + self.assertNotEqual(objects_in_ns, set()) + + # De-authorize the guest + self._volume_client_python(self.mount_b, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.deauthorize(vp, "{guest_entity}") + vc.evict("{guest_entity}") + """.format( + group_id=group_id, + volume_id=volume_id, + guest_entity=guest_entity + )), volume_prefix, namespace_prefix) + + # Once deauthorized, the client should be unable to do any more metadata ops + # The way that the client currently behaves here is to block (it acts like + # it has lost network, because there is nothing to tell it that is messages + # are being dropped because it's identity is gone) + background = self.mounts[2].write_n_mb("rogue.bin", 1, wait=False) + try: + background.wait() + except CommandFailedError: + # command failed with EBLOCKLISTED? + if "transport endpoint shutdown" in background.stderr.getvalue(): + pass + else: + raise + + # After deauthorisation, the client ID should be gone (this was the only + # volume it was authorised for) + self.assertNotIn("client.{0}".format(guest_entity), [e['entity'] for e in self.auth_list()]) + + # Clean up the dead mount (ceph-fuse's behaviour here is a bit undefined) + self.mounts[2].umount_wait() + + self._volume_client_python(self.mount_b, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.delete_volume(vp) + vc.purge_volume(vp) + """.format( + group_id=group_id, + volume_id=volume_id, + )), volume_prefix, namespace_prefix) + + def test_idempotency(self): + """ + That the volumeclient interface works when calling everything twice + """ + self.mount_b.umount_wait() + self._configure_vc_auth(self.mount_b, "manila") + + guest_entity = "guest" + group_id = "grpid" + volume_id = "volid" + self._volume_client_python(self.mount_b, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.create_volume(vp, 10) + vc.create_volume(vp, 10) + vc.authorize(vp, "{guest_entity}") + vc.authorize(vp, "{guest_entity}") + vc.deauthorize(vp, "{guest_entity}") + vc.deauthorize(vp, "{guest_entity}") + vc.delete_volume(vp) + vc.delete_volume(vp) + vc.purge_volume(vp) + vc.purge_volume(vp) + + vc.create_volume(vp, 10, data_isolated=True) + vc.create_volume(vp, 10, data_isolated=True) + vc.authorize(vp, "{guest_entity}") + vc.authorize(vp, "{guest_entity}") + vc.deauthorize(vp, "{guest_entity}") + vc.deauthorize(vp, "{guest_entity}") + vc.evict("{guest_entity}") + vc.evict("{guest_entity}") + vc.delete_volume(vp, data_isolated=True) + vc.delete_volume(vp, data_isolated=True) + vc.purge_volume(vp, data_isolated=True) + vc.purge_volume(vp, data_isolated=True) + + vc.create_volume(vp, 10, namespace_isolated=False) + vc.create_volume(vp, 10, namespace_isolated=False) + vc.authorize(vp, "{guest_entity}") + vc.authorize(vp, "{guest_entity}") + vc.deauthorize(vp, "{guest_entity}") + vc.deauthorize(vp, "{guest_entity}") + vc.evict("{guest_entity}") + vc.evict("{guest_entity}") + vc.delete_volume(vp) + vc.delete_volume(vp) + vc.purge_volume(vp) + vc.purge_volume(vp) + """.format( + group_id=group_id, + volume_id=volume_id, + guest_entity=guest_entity + ))) + + def test_data_isolated(self): + """ + That data isolated shares get their own pool + :return: + """ + + self.mount_b.umount_wait() + self._configure_vc_auth(self.mount_b, "manila") + + pools_a = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools'] + + group_id = "grpid" + volume_id = "volid" + self._volume_client_python(self.mount_b, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.create_volume(vp, data_isolated=True) + """.format( + group_id=group_id, + volume_id=volume_id, + ))) + + pools_b = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools'] + + # Should have created one new pool + new_pools = set(p['pool_name'] for p in pools_b) - set([p['pool_name'] for p in pools_a]) + self.assertEqual(len(new_pools), 1) + + def test_15303(self): + """ + Reproducer for #15303 "Client holds incorrect complete flag on dir + after losing caps" (http://tracker.ceph.com/issues/15303) + """ + for m in self.mounts: + m.umount_wait() + + # Create a dir on mount A + self.mount_a.mount_wait() + self.mount_a.run_shell(["mkdir", "parent1"]) + self.mount_a.run_shell(["mkdir", "parent2"]) + self.mount_a.run_shell(["mkdir", "parent1/mydir"]) + + # Put some files in it from mount B + self.mount_b.mount_wait() + self.mount_b.run_shell(["touch", "parent1/mydir/afile"]) + self.mount_b.umount_wait() + + # List the dir's contents on mount A + self.assertListEqual(self.mount_a.ls("parent1/mydir"), + ["afile"]) + + def test_evict_client(self): + """ + That a volume client can be evicted based on its auth ID and the volume + path it has mounted. + """ + + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Requires FUSE client to inject client metadata") + + # mounts[1] would be used as handle for driving VolumeClient. mounts[2] + # and mounts[3] would be used as guests to mount the volumes/shares. + + for i in range(1, 4): + self.mounts[i].umount_wait() + + volumeclient_mount = self.mounts[1] + self._configure_vc_auth(volumeclient_mount, "manila") + guest_mounts = (self.mounts[2], self.mounts[3]) + + guest_entity = "guest" + group_id = "grpid" + cephfs_mntpts = [] + volume_ids = [] + + # Create two volumes. Authorize 'guest' auth ID to mount the two + # volumes. Mount the two volumes. Write data to the volumes. + for i in range(2): + # Create volume. + volume_ids.append("volid_{0}".format(str(i))) + cephfs_mntpts.append( + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + create_result = vc.create_volume(vp, 10 * 1024 * 1024) + print(create_result['mount_path']) + """.format( + group_id=group_id, + volume_id=volume_ids[i] + )))) + + # Authorize 'guest' auth ID to mount the volume. + self._configure_guest_auth(volumeclient_mount, guest_mounts[i], + guest_entity, cephfs_mntpts[i]) + + # Mount the volume. + guest_mounts[i].mountpoint_dir_name = 'mnt.{id}.{suffix}'.format( + id=guest_entity, suffix=str(i)) + guest_mounts[i].mount_wait(cephfs_mntpt=cephfs_mntpts[i]) + guest_mounts[i].write_n_mb("data.bin", 1) + + + # Evict client, guest_mounts[0], using auth ID 'guest' and has mounted + # one volume. + self._volume_client_python(self.mount_b, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.deauthorize(vp, "{guest_entity}") + vc.evict("{guest_entity}", volume_path=vp) + """.format( + group_id=group_id, + volume_id=volume_ids[0], + guest_entity=guest_entity + ))) + + # Evicted guest client, guest_mounts[0], should not be able to do + # anymore metadata ops. It should start failing all operations + # when it sees that its own address is in the blocklist. + try: + guest_mounts[0].write_n_mb("rogue.bin", 1) + except CommandFailedError: + pass + else: + raise RuntimeError("post-eviction write should have failed!") + + # The blocklisted guest client should now be unmountable + guest_mounts[0].umount_wait() + + # Guest client, guest_mounts[1], using the same auth ID 'guest', but + # has mounted the other volume, should be able to use its volume + # unaffected. + guest_mounts[1].write_n_mb("data.bin.1", 1) + + # Cleanup. + for i in range(2): + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.deauthorize(vp, "{guest_entity}") + vc.delete_volume(vp) + vc.purge_volume(vp) + """.format( + group_id=group_id, + volume_id=volume_ids[i], + guest_entity=guest_entity + ))) + + + def test_purge(self): + """ + Reproducer for #15266, exception trying to purge volumes that + contain non-ascii filenames. + + Additionally test any other purge corner cases here. + """ + # I'm going to leave mount_b unmounted and just use it as a handle for + # driving volumeclient. It's a little hacky but we don't have a more + # general concept for librados/libcephfs clients as opposed to full + # blown mounting clients. + self.mount_b.umount_wait() + self._configure_vc_auth(self.mount_b, "manila") + + group_id = "grpid" + # Use a unicode volume ID (like Manila), to reproduce #15266 + volume_id = u"volid" + + # Create + cephfs_mntpt = self._volume_client_python(self.mount_b, dedent(""" + vp = VolumePath("{group_id}", u"{volume_id}") + create_result = vc.create_volume(vp, 10) + print(create_result['mount_path']) + """.format( + group_id=group_id, + volume_id=volume_id + ))) + + # Strip leading "/" + cephfs_mntpt = cephfs_mntpt[1:] + + # A file with non-ascii characters + self.mount_a.run_shell(["touch", os.path.join(cephfs_mntpt, u"b\u00F6b")]) + + # A file with no permissions to do anything + self.mount_a.run_shell(["touch", os.path.join(cephfs_mntpt, "noperms")]) + self.mount_a.run_shell(["chmod", "0000", os.path.join(cephfs_mntpt, "noperms")]) + + self._volume_client_python(self.mount_b, dedent(""" + vp = VolumePath("{group_id}", u"{volume_id}") + vc.delete_volume(vp) + vc.purge_volume(vp) + """.format( + group_id=group_id, + volume_id=volume_id + ))) + + # Check it's really gone + self.assertEqual(self.mount_a.ls("volumes/_deleting"), []) + self.assertEqual(self.mount_a.ls("volumes/"), ["_deleting", group_id]) + + def test_readonly_authorization(self): + """ + That guest clients can be restricted to read-only mounts of volumes. + """ + + volumeclient_mount = self.mounts[1] + guest_mount = self.mounts[2] + volumeclient_mount.umount_wait() + guest_mount.umount_wait() + + # Configure volumeclient_mount as the handle for driving volumeclient. + self._configure_vc_auth(volumeclient_mount, "manila") + + guest_entity = "guest" + group_id = "grpid" + volume_id = "volid" + + # Create a volume. + cephfs_mntpt = self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + create_result = vc.create_volume(vp, 1024*1024*10) + print(create_result['mount_path']) + """.format( + group_id=group_id, + volume_id=volume_id, + ))) + + # Authorize and configure credentials for the guest to mount the + # the volume with read-write access. + self._configure_guest_auth(volumeclient_mount, guest_mount, + guest_entity, cephfs_mntpt, readonly=False) + + # Mount the volume, and write to it. + guest_mount.mount_wait(cephfs_mntpt=cephfs_mntpt) + guest_mount.write_n_mb("data.bin", 1) + + # Change the guest auth ID's authorization to read-only mount access. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.deauthorize(vp, "{guest_entity}") + """.format( + group_id=group_id, + volume_id=volume_id, + guest_entity=guest_entity + ))) + self._configure_guest_auth(volumeclient_mount, guest_mount, guest_entity, + cephfs_mntpt, readonly=True) + + # The effect of the change in access level to read-only is not + # immediate. The guest sees the change only after a remount of + # the volume. + guest_mount.umount_wait() + guest_mount.mount_wait(cephfs_mntpt=cephfs_mntpt) + + # Read existing content of the volume. + self.assertListEqual(guest_mount.ls(guest_mount.mountpoint), ["data.bin"]) + # Cannot write into read-only volume. + try: + guest_mount.write_n_mb("rogue.bin", 1) + except CommandFailedError: + pass + + def test_get_authorized_ids(self): + """ + That for a volume, the authorized IDs and their access levels + can be obtained using CephFSVolumeClient's get_authorized_ids(). + """ + volumeclient_mount = self.mounts[1] + volumeclient_mount.umount_wait() + + # Configure volumeclient_mount as the handle for driving volumeclient. + self._configure_vc_auth(volumeclient_mount, "manila") + + group_id = "grpid" + volume_id = "volid" + guest_entity_1 = "guest1" + guest_entity_2 = "guest2" + + log.info("print(group ID: {0})".format(group_id)) + + # Create a volume. + auths = self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.create_volume(vp, 1024*1024*10) + auths = vc.get_authorized_ids(vp) + print(auths) + """.format( + group_id=group_id, + volume_id=volume_id, + ))) + # Check the list of authorized IDs for the volume. + self.assertEqual('None', auths) + + # Allow two auth IDs access to the volume. + auths = self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.authorize(vp, "{guest_entity_1}", readonly=False) + vc.authorize(vp, "{guest_entity_2}", readonly=True) + auths = vc.get_authorized_ids(vp) + print(auths) + """.format( + group_id=group_id, + volume_id=volume_id, + guest_entity_1=guest_entity_1, + guest_entity_2=guest_entity_2, + ))) + # Check the list of authorized IDs and their access levels. + expected_result = [('guest1', 'rw'), ('guest2', 'r')] + self.assertCountEqual(str(expected_result), auths) + + # Disallow both the auth IDs' access to the volume. + auths = self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.deauthorize(vp, "{guest_entity_1}") + vc.deauthorize(vp, "{guest_entity_2}") + auths = vc.get_authorized_ids(vp) + print(auths) + """.format( + group_id=group_id, + volume_id=volume_id, + guest_entity_1=guest_entity_1, + guest_entity_2=guest_entity_2, + ))) + # Check the list of authorized IDs for the volume. + self.assertEqual('None', auths) + + def test_multitenant_volumes(self): + """ + That volume access can be restricted to a tenant. + + That metadata used to enforce tenant isolation of + volumes is stored as a two-way mapping between auth + IDs and volumes that they're authorized to access. + """ + volumeclient_mount = self.mounts[1] + volumeclient_mount.umount_wait() + + # Configure volumeclient_mount as the handle for driving volumeclient. + self._configure_vc_auth(volumeclient_mount, "manila") + + group_id = "groupid" + volume_id = "volumeid" + + # Guest clients belonging to different tenants, but using the same + # auth ID. + auth_id = "guest" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + guestclient_2 = { + "auth_id": auth_id, + "tenant_id": "tenant2", + } + + # Create a volume. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.create_volume(vp, 1024*1024*10) + """.format( + group_id=group_id, + volume_id=volume_id, + ))) + + # Check that volume metadata file is created on volume creation. + vol_metadata_filename = "_{0}:{1}.meta".format(group_id, volume_id) + self.assertIn(vol_metadata_filename, self.mounts[0].ls("volumes")) + + # Authorize 'guestclient_1', using auth ID 'guest' and belonging to + # 'tenant1', with 'rw' access to the volume. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}") + """.format( + group_id=group_id, + volume_id=volume_id, + auth_id=guestclient_1["auth_id"], + tenant_id=guestclient_1["tenant_id"] + ))) + + # Check that auth metadata file for auth ID 'guest', is + # created on authorizing 'guest' access to the volume. + auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"]) + self.assertIn(auth_metadata_filename, self.mounts[0].ls("volumes")) + + # Verify that the auth metadata file stores the tenant ID that the + # auth ID belongs to, the auth ID's authorized access levels + # for different volumes, versioning details, etc. + expected_auth_metadata = { + "version": 2, + "compat_version": 6, + "dirty": False, + "tenant_id": "tenant1", + "subvolumes": { + "groupid/volumeid": { + "dirty": False, + "access_level": "rw" + } + } + } + + auth_metadata = self._volume_client_python(volumeclient_mount, dedent(""" + import json + vp = VolumePath("{group_id}", "{volume_id}") + auth_metadata = vc._auth_metadata_get("{auth_id}") + print(json.dumps(auth_metadata)) + """.format( + group_id=group_id, + volume_id=volume_id, + auth_id=guestclient_1["auth_id"], + ))) + auth_metadata = json.loads(auth_metadata) + + self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"]) + del expected_auth_metadata["version"] + del auth_metadata["version"] + self.assertEqual(expected_auth_metadata, auth_metadata) + + # Verify that the volume metadata file stores info about auth IDs + # and their access levels to the volume, versioning details, etc. + expected_vol_metadata = { + "version": 2, + "compat_version": 1, + "auths": { + "guest": { + "dirty": False, + "access_level": "rw" + } + } + } + + vol_metadata = self._volume_client_python(volumeclient_mount, dedent(""" + import json + vp = VolumePath("{group_id}", "{volume_id}") + volume_metadata = vc._volume_metadata_get(vp) + print(json.dumps(volume_metadata)) + """.format( + group_id=group_id, + volume_id=volume_id, + ))) + vol_metadata = json.loads(vol_metadata) + + self.assertGreaterEqual(vol_metadata["version"], expected_vol_metadata["version"]) + del expected_vol_metadata["version"] + del vol_metadata["version"] + self.assertEqual(expected_vol_metadata, vol_metadata) + + # Cannot authorize 'guestclient_2' to access the volume. + # It uses auth ID 'guest', which has already been used by a + # 'guestclient_1' belonging to an another tenant for accessing + # the volume. + with self.assertRaises(CommandFailedError): + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}") + """.format( + group_id=group_id, + volume_id=volume_id, + auth_id=guestclient_2["auth_id"], + tenant_id=guestclient_2["tenant_id"] + ))) + + # Check that auth metadata file is cleaned up on removing + # auth ID's only access to a volume. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.deauthorize(vp, "{guest_entity}") + """.format( + group_id=group_id, + volume_id=volume_id, + guest_entity=guestclient_1["auth_id"] + ))) + + self.assertNotIn(auth_metadata_filename, self.mounts[0].ls("volumes")) + + # Check that volume metadata file is cleaned up on volume deletion. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.delete_volume(vp) + """.format( + group_id=group_id, + volume_id=volume_id, + ))) + self.assertNotIn(vol_metadata_filename, self.mounts[0].ls("volumes")) + + def test_authorize_auth_id_not_created_by_ceph_volume_client(self): + """ + If the auth_id already exists and is not created by + ceph_volume_client, it's not allowed to authorize + the auth-id by default. + """ + volumeclient_mount = self.mounts[1] + volumeclient_mount.umount_wait() + + # Configure volumeclient_mount as the handle for driving volumeclient. + self._configure_vc_auth(volumeclient_mount, "manila") + + group_id = "groupid" + volume_id = "volumeid" + + # Create auth_id + self.fs.mon_manager.raw_cluster_cmd( + "auth", "get-or-create", "client.guest1", + "mds", "allow *", + "osd", "allow rw", + "mon", "allow *" + ) + + auth_id = "guest1" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # Create a volume. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.create_volume(vp, 1024*1024*10) + """.format( + group_id=group_id, + volume_id=volume_id, + ))) + + # Cannot authorize 'guestclient_1' to access the volume. + # It uses auth ID 'guest1', which already exists and not + # created by ceph_volume_client + with self.assertRaises(CommandFailedError): + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}") + """.format( + group_id=group_id, + volume_id=volume_id, + auth_id=guestclient_1["auth_id"], + tenant_id=guestclient_1["tenant_id"] + ))) + + # Delete volume + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.delete_volume(vp) + """.format( + group_id=group_id, + volume_id=volume_id, + ))) + + def test_authorize_allow_existing_id_option(self): + """ + If the auth_id already exists and is not created by + ceph_volume_client, it's not allowed to authorize + the auth-id by default but is allowed with option + allow_existing_id. + """ + volumeclient_mount = self.mounts[1] + volumeclient_mount.umount_wait() + + # Configure volumeclient_mount as the handle for driving volumeclient. + self._configure_vc_auth(volumeclient_mount, "manila") + + group_id = "groupid" + volume_id = "volumeid" + + # Create auth_id + self.fs.mon_manager.raw_cluster_cmd( + "auth", "get-or-create", "client.guest1", + "mds", "allow *", + "osd", "allow rw", + "mon", "allow *" + ) + + auth_id = "guest1" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # Create a volume. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.create_volume(vp, 1024*1024*10) + """.format( + group_id=group_id, + volume_id=volume_id, + ))) + + # Cannot authorize 'guestclient_1' to access the volume + # by default, which already exists and not created by + # ceph_volume_client but is allowed with option 'allow_existing_id'. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}", + allow_existing_id="{allow_existing_id}") + """.format( + group_id=group_id, + volume_id=volume_id, + auth_id=guestclient_1["auth_id"], + tenant_id=guestclient_1["tenant_id"], + allow_existing_id=True + ))) + + # Delete volume + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.delete_volume(vp) + """.format( + group_id=group_id, + volume_id=volume_id, + ))) + + def test_deauthorize_auth_id_after_out_of_band_update(self): + """ + If the auth_id authorized by ceph_volume_client is updated + out of band, the auth_id should not be deleted after a + deauthorize. It should only remove caps associated it. + """ + volumeclient_mount = self.mounts[1] + volumeclient_mount.umount_wait() + + # Configure volumeclient_mount as the handle for driving volumeclient. + self._configure_vc_auth(volumeclient_mount, "manila") + + group_id = "groupid" + volume_id = "volumeid" + + + auth_id = "guest1" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # Create a volume. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.create_volume(vp, 1024*1024*10) + """.format( + group_id=group_id, + volume_id=volume_id, + ))) + + # Authorize 'guestclient_1' to access the volume. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}") + """.format( + group_id=group_id, + volume_id=volume_id, + auth_id=guestclient_1["auth_id"], + tenant_id=guestclient_1["tenant_id"] + ))) + + # Update caps for guestclient_1 out of band + out = self.fs.mon_manager.raw_cluster_cmd( + "auth", "caps", "client.guest1", + "mds", "allow rw path=/volumes/groupid, allow rw path=/volumes/groupid/volumeid", + "osd", "allow rw pool=cephfs_data namespace=fsvolumens_volumeid", + "mon", "allow r", + "mgr", "allow *" + ) + + # Deauthorize guestclient_1 + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.deauthorize(vp, "{guest_entity}") + """.format( + group_id=group_id, + volume_id=volume_id, + guest_entity=guestclient_1["auth_id"] + ))) + + # Validate the caps of guestclient_1 after deauthorize. It should not have deleted + # guestclient_1. The mgr and mds caps should be present which was updated out of band. + out = json.loads(self.fs.mon_manager.raw_cluster_cmd("auth", "get", "client.guest1", "--format=json-pretty")) + + self.assertEqual("client.guest1", out[0]["entity"]) + self.assertEqual("allow rw path=/volumes/groupid", out[0]["caps"]["mds"]) + self.assertEqual("allow *", out[0]["caps"]["mgr"]) + self.assertNotIn("osd", out[0]["caps"]) + + # Delete volume + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.delete_volume(vp) + """.format( + group_id=group_id, + volume_id=volume_id, + ))) + + def test_recover_metadata(self): + """ + That volume client can recover from partial auth updates using + metadata files, which store auth info and its update status info. + """ + volumeclient_mount = self.mounts[1] + volumeclient_mount.umount_wait() + + # Configure volumeclient_mount as the handle for driving volumeclient. + self._configure_vc_auth(volumeclient_mount, "manila") + + group_id = "groupid" + volume_id = "volumeid" + + guestclient = { + "auth_id": "guest", + "tenant_id": "tenant", + } + + # Create a volume. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.create_volume(vp, 1024*1024*10) + """.format( + group_id=group_id, + volume_id=volume_id, + ))) + + # Authorize 'guestclient' access to the volume. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}") + """.format( + group_id=group_id, + volume_id=volume_id, + auth_id=guestclient["auth_id"], + tenant_id=guestclient["tenant_id"] + ))) + + # Check that auth metadata file for auth ID 'guest' is created. + auth_metadata_filename = "${0}.meta".format(guestclient["auth_id"]) + self.assertIn(auth_metadata_filename, self.mounts[0].ls("volumes")) + + # Induce partial auth update state by modifying the auth metadata file, + # and then run recovery procedure. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + auth_metadata = vc._auth_metadata_get("{auth_id}") + auth_metadata['dirty'] = True + vc._auth_metadata_set("{auth_id}", auth_metadata) + vc.recover() + """.format( + group_id=group_id, + volume_id=volume_id, + auth_id=guestclient["auth_id"], + ))) + + def test_update_old_style_auth_metadata_to_new_during_recover(self): + """ + From nautilus onwards 'volumes' created by ceph_volume_client were + renamed and used as CephFS subvolumes accessed via the ceph-mgr + interface. Hence it makes sense to store the subvolume data in + auth-metadata file with 'subvolumes' key instead of 'volumes' key. + This test validates the transparent update of 'volumes' key to + 'subvolumes' key in auth metadata file during recover. + """ + volumeclient_mount = self.mounts[1] + volumeclient_mount.umount_wait() + + # Configure volumeclient_mount as the handle for driving volumeclient. + self._configure_vc_auth(volumeclient_mount, "manila") + + group_id = "groupid" + volume_id = "volumeid" + + guestclient = { + "auth_id": "guest", + "tenant_id": "tenant", + } + + # Create a volume. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.create_volume(vp, 1024*1024*10) + """.format( + group_id=group_id, + volume_id=volume_id, + ))) + + # Check that volume metadata file is created on volume creation. + vol_metadata_filename = "_{0}:{1}.meta".format(group_id, volume_id) + self.assertIn(vol_metadata_filename, self.mounts[0].ls("volumes")) + + # Authorize 'guestclient' access to the volume. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}") + """.format( + group_id=group_id, + volume_id=volume_id, + auth_id=guestclient["auth_id"], + tenant_id=guestclient["tenant_id"] + ))) + + # Check that auth metadata file for auth ID 'guest' is created. + auth_metadata_filename = "${0}.meta".format(guestclient["auth_id"]) + self.assertIn(auth_metadata_filename, self.mounts[0].ls("volumes")) + + # Replace 'subvolumes' to 'volumes', old style auth-metadata file + self.mounts[0].run_shell(['sed', '-i', 's/subvolumes/volumes/g', 'volumes/{0}'.format(auth_metadata_filename)]) + + # Verify that the auth metadata file stores the tenant ID that the + # auth ID belongs to, the auth ID's authorized access levels + # for different volumes, versioning details, etc. + expected_auth_metadata = { + "version": 2, + "compat_version": 6, + "dirty": False, + "tenant_id": "tenant", + "subvolumes": { + "groupid/volumeid": { + "dirty": False, + "access_level": "rw" + } + } + } + + # Induce partial auth update state by modifying the auth metadata file, + # and then run recovery procedure. This should also update 'volumes' key + # to 'subvolumes'. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + auth_metadata = vc._auth_metadata_get("{auth_id}") + auth_metadata['dirty'] = True + vc._auth_metadata_set("{auth_id}", auth_metadata) + vc.recover() + """.format( + group_id=group_id, + volume_id=volume_id, + auth_id=guestclient["auth_id"], + ))) + + auth_metadata = self._volume_client_python(volumeclient_mount, dedent(""" + import json + auth_metadata = vc._auth_metadata_get("{auth_id}") + print(json.dumps(auth_metadata)) + """.format( + auth_id=guestclient["auth_id"], + ))) + auth_metadata = json.loads(auth_metadata) + + self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"]) + del expected_auth_metadata["version"] + del auth_metadata["version"] + self.assertEqual(expected_auth_metadata, auth_metadata) + + # Check that auth metadata file is cleaned up on removing + # auth ID's access to volumes 'volumeid'. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.deauthorize(vp, "{guest_entity}") + """.format( + group_id=group_id, + volume_id=volume_id, + guest_entity=guestclient["auth_id"] + ))) + self.assertNotIn(auth_metadata_filename, self.mounts[0].ls("volumes")) + + # Check that volume metadata file is cleaned up on volume deletion. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.delete_volume(vp) + """.format( + group_id=group_id, + volume_id=volume_id, + ))) + self.assertNotIn(vol_metadata_filename, self.mounts[0].ls("volumes")) + + def test_update_old_style_auth_metadata_to_new_during_authorize(self): + """ + From nautilus onwards 'volumes' created by ceph_volume_client were + renamed and used as CephFS subvolumes accessed via the ceph-mgr + interface. Hence it makes sense to store the subvolume data in + auth-metadata file with 'subvolumes' key instead of 'volumes' key. + This test validates the transparent update of 'volumes' key to + 'subvolumes' key in auth metadata file during authorize. + """ + volumeclient_mount = self.mounts[1] + volumeclient_mount.umount_wait() + + # Configure volumeclient_mount as the handle for driving volumeclient. + self._configure_vc_auth(volumeclient_mount, "manila") + + group_id = "groupid" + volume_id1 = "volumeid1" + volume_id2 = "volumeid2" + + auth_id = "guest" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # Create a volume volumeid1. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + create_result = vc.create_volume(vp, 10*1024*1024) + print(create_result['mount_path']) + """.format( + group_id=group_id, + volume_id=volume_id1, + ))) + + # Create a volume volumeid2. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + create_result = vc.create_volume(vp, 10*1024*1024) + print(create_result['mount_path']) + """.format( + group_id=group_id, + volume_id=volume_id2, + ))) + + # Check that volume metadata file is created on volume creation. + vol_metadata_filename = "_{0}:{1}.meta".format(group_id, volume_id1) + self.assertIn(vol_metadata_filename, self.mounts[0].ls("volumes")) + vol_metadata_filename2 = "_{0}:{1}.meta".format(group_id, volume_id2) + self.assertIn(vol_metadata_filename2, self.mounts[0].ls("volumes")) + + # Authorize 'guestclient_1', using auth ID 'guest' and belonging to + # 'tenant1', with 'rw' access to the volume 'volumeid1'. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}") + """.format( + group_id=group_id, + volume_id=volume_id1, + auth_id=guestclient_1["auth_id"], + tenant_id=guestclient_1["tenant_id"] + ))) + + # Check that auth metadata file for auth ID 'guest', is + # created on authorizing 'guest' access to the volume. + auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"]) + self.assertIn(auth_metadata_filename, self.mounts[0].ls("volumes")) + + # Replace 'subvolumes' to 'volumes', old style auth-metadata file + self.mounts[0].run_shell(['sed', '-i', 's/subvolumes/volumes/g', 'volumes/{0}'.format(auth_metadata_filename)]) + + # Authorize 'guestclient_1', using auth ID 'guest' and belonging to + # 'tenant1', with 'rw' access to the volume 'volumeid2'. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}") + """.format( + group_id=group_id, + volume_id=volume_id2, + auth_id=guestclient_1["auth_id"], + tenant_id=guestclient_1["tenant_id"] + ))) + + # Verify that the auth metadata file stores the tenant ID that the + # auth ID belongs to, the auth ID's authorized access levels + # for different volumes, versioning details, etc. + expected_auth_metadata = { + "version": 2, + "compat_version": 6, + "dirty": False, + "tenant_id": "tenant1", + "subvolumes": { + "groupid/volumeid1": { + "dirty": False, + "access_level": "rw" + }, + "groupid/volumeid2": { + "dirty": False, + "access_level": "rw" + } + } + } + + auth_metadata = self._volume_client_python(volumeclient_mount, dedent(""" + import json + auth_metadata = vc._auth_metadata_get("{auth_id}") + print(json.dumps(auth_metadata)) + """.format( + auth_id=guestclient_1["auth_id"], + ))) + auth_metadata = json.loads(auth_metadata) + + self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"]) + del expected_auth_metadata["version"] + del auth_metadata["version"] + self.assertEqual(expected_auth_metadata, auth_metadata) + + # Check that auth metadata file is cleaned up on removing + # auth ID's access to volumes 'volumeid1' and 'volumeid2'. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.deauthorize(vp, "{guest_entity}") + """.format( + group_id=group_id, + volume_id=volume_id1, + guest_entity=guestclient_1["auth_id"] + ))) + + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.deauthorize(vp, "{guest_entity}") + """.format( + group_id=group_id, + volume_id=volume_id2, + guest_entity=guestclient_1["auth_id"] + ))) + self.assertNotIn(auth_metadata_filename, self.mounts[0].ls("volumes")) + + # Check that volume metadata file is cleaned up on volume deletion. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.delete_volume(vp) + """.format( + group_id=group_id, + volume_id=volume_id1, + ))) + self.assertNotIn(vol_metadata_filename, self.mounts[0].ls("volumes")) + + # Check that volume metadata file is cleaned up on volume deletion. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.delete_volume(vp) + """.format( + group_id=group_id, + volume_id=volume_id2, + ))) + self.assertNotIn(vol_metadata_filename2, self.mounts[0].ls("volumes")) + + def test_update_old_style_auth_metadata_to_new_during_deauthorize(self): + """ + From nautilus onwards 'volumes' created by ceph_volume_client were + renamed and used as CephFS subvolumes accessed via the ceph-mgr + interface. Hence it makes sense to store the subvolume data in + auth-metadata file with 'subvolumes' key instead of 'volumes' key. + This test validates the transparent update of 'volumes' key to + 'subvolumes' key in auth metadata file during de-authorize. + """ + volumeclient_mount = self.mounts[1] + volumeclient_mount.umount_wait() + + # Configure volumeclient_mount as the handle for driving volumeclient. + self._configure_vc_auth(volumeclient_mount, "manila") + + group_id = "groupid" + volume_id1 = "volumeid1" + volume_id2 = "volumeid2" + + auth_id = "guest" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # Create a volume volumeid1. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + create_result = vc.create_volume(vp, 10*1024*1024) + print(create_result['mount_path']) + """.format( + group_id=group_id, + volume_id=volume_id1, + ))) + + # Create a volume volumeid2. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + create_result = vc.create_volume(vp, 10*1024*1024) + print(create_result['mount_path']) + """.format( + group_id=group_id, + volume_id=volume_id2, + ))) + + # Check that volume metadata file is created on volume creation. + vol_metadata_filename = "_{0}:{1}.meta".format(group_id, volume_id1) + self.assertIn(vol_metadata_filename, self.mounts[0].ls("volumes")) + vol_metadata_filename2 = "_{0}:{1}.meta".format(group_id, volume_id2) + self.assertIn(vol_metadata_filename2, self.mounts[0].ls("volumes")) + + # Authorize 'guestclient_1', using auth ID 'guest' and belonging to + # 'tenant1', with 'rw' access to the volume 'volumeid1'. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}") + """.format( + group_id=group_id, + volume_id=volume_id1, + auth_id=guestclient_1["auth_id"], + tenant_id=guestclient_1["tenant_id"] + ))) + + # Authorize 'guestclient_1', using auth ID 'guest' and belonging to + # 'tenant1', with 'rw' access to the volume 'volumeid2'. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}") + """.format( + group_id=group_id, + volume_id=volume_id2, + auth_id=guestclient_1["auth_id"], + tenant_id=guestclient_1["tenant_id"] + ))) + + # Check that auth metadata file for auth ID 'guest', is + # created on authorizing 'guest' access to the volume. + auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"]) + self.assertIn(auth_metadata_filename, self.mounts[0].ls("volumes")) + + # Replace 'subvolumes' to 'volumes', old style auth-metadata file + self.mounts[0].run_shell(['sed', '-i', 's/subvolumes/volumes/g', 'volumes/{0}'.format(auth_metadata_filename)]) + + # Deauthorize 'guestclient_1' to access 'volumeid2'. This should update + # 'volumes' key to 'subvolumes' + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.deauthorize(vp, "{guest_entity}") + """.format( + group_id=group_id, + volume_id=volume_id2, + guest_entity=guestclient_1["auth_id"], + ))) + + # Verify that the auth metadata file stores the tenant ID that the + # auth ID belongs to, the auth ID's authorized access levels + # for different volumes, versioning details, etc. + expected_auth_metadata = { + "version": 2, + "compat_version": 6, + "dirty": False, + "tenant_id": "tenant1", + "subvolumes": { + "groupid/volumeid1": { + "dirty": False, + "access_level": "rw" + } + } + } + + auth_metadata = self._volume_client_python(volumeclient_mount, dedent(""" + import json + auth_metadata = vc._auth_metadata_get("{auth_id}") + print(json.dumps(auth_metadata)) + """.format( + auth_id=guestclient_1["auth_id"], + ))) + auth_metadata = json.loads(auth_metadata) + + self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"]) + del expected_auth_metadata["version"] + del auth_metadata["version"] + self.assertEqual(expected_auth_metadata, auth_metadata) + + # Check that auth metadata file is cleaned up on removing + # auth ID's access to volumes 'volumeid1' and 'volumeid2' + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.deauthorize(vp, "{guest_entity}") + """.format( + group_id=group_id, + volume_id=volume_id1, + guest_entity=guestclient_1["auth_id"] + ))) + self.assertNotIn(auth_metadata_filename, self.mounts[0].ls("volumes")) + + # Check that volume metadata file is cleaned up on 'volumeid1' deletion. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.delete_volume(vp) + """.format( + group_id=group_id, + volume_id=volume_id1, + ))) + self.assertNotIn(vol_metadata_filename, self.mounts[0].ls("volumes")) + + # Check that volume metadata file is cleaned up on 'volumeid2' deletion. + self._volume_client_python(volumeclient_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.delete_volume(vp) + """.format( + group_id=group_id, + volume_id=volume_id2, + ))) + self.assertNotIn(vol_metadata_filename2, self.mounts[0].ls("volumes")) + + def test_put_object(self): + vc_mount = self.mounts[1] + vc_mount.umount_wait() + self._configure_vc_auth(vc_mount, "manila") + + obj_data = 'test data' + obj_name = 'test_vc_obj_1' + pool_name = self.fs.get_data_pool_names()[0] + + self._volume_client_python(vc_mount, dedent(""" + vc.put_object("{pool_name}", "{obj_name}", b"{obj_data}") + """.format( + pool_name = pool_name, + obj_name = obj_name, + obj_data = obj_data + ))) + + read_data = self.fs.rados(['get', obj_name, '-'], pool=pool_name, stdout=StringIO()).stdout.getvalue() + self.assertEqual(obj_data, read_data) + + def test_get_object(self): + vc_mount = self.mounts[1] + vc_mount.umount_wait() + self._configure_vc_auth(vc_mount, "manila") + + obj_data = 'test_data' + obj_name = 'test_vc_ob_2' + pool_name = self.fs.get_data_pool_names()[0] + + self.fs.rados(['put', obj_name, '-'], pool=pool_name, stdin=StringIO(obj_data)) + + self._volume_client_python(vc_mount, dedent(""" + data_read = vc.get_object("{pool_name}", "{obj_name}") + assert data_read == b"{obj_data}" + """.format( + pool_name = pool_name, + obj_name = obj_name, + obj_data = obj_data + ))) + + def test_put_object_versioned(self): + vc_mount = self.mounts[1] + vc_mount.umount_wait() + self._configure_vc_auth(vc_mount, "manila") + + obj_data = 'test_data' + obj_name = 'test_vc_obj' + pool_name = self.fs.get_data_pool_names()[0] + self.fs.rados(['put', obj_name, '-'], pool=pool_name, stdin=StringIO(obj_data)) + + self._volume_client_python(vc_mount, dedent(""" + data, version_before = vc.get_object_and_version("{pool_name}", "{obj_name}") + + if sys_version_info.major < 3: + data = data + 'modification1' + elif sys_version_info.major > 3: + data = str.encode(data.decode() + 'modification1') + + vc.put_object_versioned("{pool_name}", "{obj_name}", data, version_before) + data, version_after = vc.get_object_and_version("{pool_name}", "{obj_name}") + assert version_after == version_before + 1 + """).format(pool_name=pool_name, obj_name=obj_name)) + + def test_version_check_for_put_object_versioned(self): + vc_mount = self.mounts[1] + vc_mount.umount_wait() + self._configure_vc_auth(vc_mount, "manila") + + obj_data = 'test_data' + obj_name = 'test_vc_ob_2' + pool_name = self.fs.get_data_pool_names()[0] + self.fs.rados(['put', obj_name, '-'], pool=pool_name, stdin=StringIO(obj_data)) + + # Test if put_object_versioned() crosschecks the version of the + # given object. Being a negative test, an exception is expected. + expected_exception = 'rados_OSError' + output = self._volume_client_python(vc_mount, dedent(""" + data, version = vc.get_object_and_version("{pool_name}", "{obj_name}") + + if sys_version_info.major < 3: + data = data + 'm1' + elif sys_version_info.major > 3: + data = str.encode(data.decode('utf-8') + 'm1') + + vc.put_object("{pool_name}", "{obj_name}", data) + + if sys_version_info.major < 3: + data = data + 'm2' + elif sys_version_info.major > 3: + data = str.encode(data.decode('utf-8') + 'm2') + + try: + vc.put_object_versioned("{pool_name}", "{obj_name}", data, version) + except {expected_exception}: + print('{expected_exception} raised') + """).format(pool_name=pool_name, obj_name=obj_name, + expected_exception=expected_exception)) + self.assertEqual(expected_exception + ' raised', output) + + + def test_delete_object(self): + vc_mount = self.mounts[1] + vc_mount.umount_wait() + self._configure_vc_auth(vc_mount, "manila") + + obj_data = 'test data' + obj_name = 'test_vc_obj_3' + pool_name = self.fs.get_data_pool_names()[0] + + self.fs.rados(['put', obj_name, '-'], pool=pool_name, stdin=StringIO(obj_data)) + + self._volume_client_python(vc_mount, dedent(""" + data_read = vc.delete_object("{pool_name}", "{obj_name}") + """.format( + pool_name = pool_name, + obj_name = obj_name, + ))) + + with self.assertRaises(CommandFailedError): + self.fs.rados(['stat', obj_name], pool=pool_name) + + # Check idempotency -- no error raised trying to delete non-existent + # object + self._volume_client_python(vc_mount, dedent(""" + data_read = vc.delete_object("{pool_name}", "{obj_name}") + """.format( + pool_name = pool_name, + obj_name = obj_name, + ))) + + def test_21501(self): + """ + Reproducer for #21501 "ceph_volume_client: sets invalid caps for + existing IDs with no caps" (http://tracker.ceph.com/issues/21501) + """ + + vc_mount = self.mounts[1] + vc_mount.umount_wait() + + # Configure vc_mount as the handle for driving volumeclient + self._configure_vc_auth(vc_mount, "manila") + + # Create a volume + group_id = "grpid" + volume_id = "volid" + cephfs_mntpt = self._volume_client_python(vc_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + create_result = vc.create_volume(vp, 1024*1024*10) + print(create_result['mount_path']) + """.format( + group_id=group_id, + volume_id=volume_id + ))) + + # Create an auth ID with no caps + guest_id = '21501' + self.fs.mon_manager.raw_cluster_cmd_result( + 'auth', 'get-or-create', 'client.{0}'.format(guest_id)) + + guest_mount = self.mounts[2] + guest_mount.umount_wait() +# Set auth caps for the auth ID using the volumeclient + self._configure_guest_auth(vc_mount, guest_mount, guest_id, cephfs_mntpt, + allow_existing_id=True) + + # Mount the volume in the guest using the auth ID to assert that the + # auth caps are valid + guest_mount.mount_wait(cephfs_mntpt=cephfs_mntpt) + + def test_volume_without_namespace_isolation(self): + """ + That volume client can create volumes that do not have separate RADOS + namespace layouts. + """ + vc_mount = self.mounts[1] + vc_mount.umount_wait() + + # Configure vc_mount as the handle for driving volumeclient + self._configure_vc_auth(vc_mount, "manila") + + # Create a volume + volume_prefix = "/myprefix" + group_id = "grpid" + volume_id = "volid" + self._volume_client_python(vc_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + create_result = vc.create_volume(vp, 1024*1024*10, namespace_isolated=False) + print(create_result['mount_path']) + """.format( + group_id=group_id, + volume_id=volume_id + )), volume_prefix) + + # The CephFS volume should be created + self.mounts[0].stat(os.path.join("myprefix", group_id, volume_id)) + vol_namespace = self.mounts[0].getfattr( + os.path.join("myprefix", group_id, volume_id), + "ceph.dir.layout.pool_namespace") + assert not vol_namespace + + self._volume_client_python(vc_mount, dedent(""" + vp = VolumePath("{group_id}", "{volume_id}") + vc.delete_volume(vp) + vc.purge_volume(vp) + """.format( + group_id=group_id, + volume_id=volume_id, + )), volume_prefix) diff --git a/qa/tasks/cephfs/test_volumes.py b/qa/tasks/cephfs/test_volumes.py new file mode 100644 index 000000000..7f17fe9e4 --- /dev/null +++ b/qa/tasks/cephfs/test_volumes.py @@ -0,0 +1,7807 @@ +import os +import json +import time +import errno +import random +import logging +import collections +import uuid +import unittest +from hashlib import md5 +from textwrap import dedent +from io import StringIO + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from tasks.cephfs.fuse_mount import FuseMount +from teuthology.exceptions import CommandFailedError + +log = logging.getLogger(__name__) + +class TestVolumesHelper(CephFSTestCase): + """Helper class for testing FS volume, subvolume group and subvolume operations.""" + TEST_VOLUME_PREFIX = "volume" + TEST_SUBVOLUME_PREFIX="subvolume" + TEST_GROUP_PREFIX="group" + TEST_SNAPSHOT_PREFIX="snapshot" + TEST_CLONE_PREFIX="clone" + TEST_FILE_NAME_PREFIX="subvolume_file" + + # for filling subvolume with data + CLIENTS_REQUIRED = 2 + MDSS_REQUIRED = 2 + + # io defaults + DEFAULT_FILE_SIZE = 1 # MB + DEFAULT_NUMBER_OF_FILES = 1024 + + def _fs_cmd(self, *args): + return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", *args) + + def _raw_cmd(self, *args): + return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args) + + def __check_clone_state(self, state, clone, clone_group=None, timo=120): + check = 0 + args = ["clone", "status", self.volname, clone] + if clone_group: + args.append(clone_group) + args = tuple(args) + while check < timo: + result = json.loads(self._fs_cmd(*args)) + if result["status"]["state"] == state: + break + check += 1 + time.sleep(1) + self.assertTrue(check < timo) + + def _get_clone_status(self, clone, clone_group=None): + args = ["clone", "status", self.volname, clone] + if clone_group: + args.append(clone_group) + args = tuple(args) + result = json.loads(self._fs_cmd(*args)) + return result + + def _wait_for_clone_to_complete(self, clone, clone_group=None, timo=120): + self.__check_clone_state("complete", clone, clone_group, timo) + + def _wait_for_clone_to_fail(self, clone, clone_group=None, timo=120): + self.__check_clone_state("failed", clone, clone_group, timo) + + def _wait_for_clone_to_be_in_progress(self, clone, clone_group=None, timo=120): + self.__check_clone_state("in-progress", clone, clone_group, timo) + + def _check_clone_canceled(self, clone, clone_group=None): + self.__check_clone_state("canceled", clone, clone_group, timo=1) + + def _get_subvolume_snapshot_path(self, subvolume, snapshot, source_group, subvol_path, source_version): + if source_version == 2: + # v2 + if subvol_path is not None: + (base_path, uuid_str) = os.path.split(subvol_path) + else: + (base_path, uuid_str) = os.path.split(self._get_subvolume_path(self.volname, subvolume, group_name=source_group)) + return os.path.join(base_path, ".snap", snapshot, uuid_str) + + # v1 + base_path = self._get_subvolume_path(self.volname, subvolume, group_name=source_group) + return os.path.join(base_path, ".snap", snapshot) + + def _verify_clone_attrs(self, source_path, clone_path): + path1 = source_path + path2 = clone_path + + p = self.mount_a.run_shell(["find", path1]) + paths = p.stdout.getvalue().strip().split() + + # for each entry in source and clone (sink) verify certain inode attributes: + # inode type, mode, ownership, [am]time. + for source_path in paths: + sink_entry = source_path[len(path1)+1:] + sink_path = os.path.join(path2, sink_entry) + + # mode+type + sval = int(self.mount_a.run_shell(['stat', '-c' '%f', source_path]).stdout.getvalue().strip(), 16) + cval = int(self.mount_a.run_shell(['stat', '-c' '%f', sink_path]).stdout.getvalue().strip(), 16) + self.assertEqual(sval, cval) + + # ownership + sval = int(self.mount_a.run_shell(['stat', '-c' '%u', source_path]).stdout.getvalue().strip()) + cval = int(self.mount_a.run_shell(['stat', '-c' '%u', sink_path]).stdout.getvalue().strip()) + self.assertEqual(sval, cval) + + sval = int(self.mount_a.run_shell(['stat', '-c' '%g', source_path]).stdout.getvalue().strip()) + cval = int(self.mount_a.run_shell(['stat', '-c' '%g', sink_path]).stdout.getvalue().strip()) + self.assertEqual(sval, cval) + + # inode timestamps + # do not check access as kclient will generally not update this like ceph-fuse will. + sval = int(self.mount_a.run_shell(['stat', '-c' '%Y', source_path]).stdout.getvalue().strip()) + cval = int(self.mount_a.run_shell(['stat', '-c' '%Y', sink_path]).stdout.getvalue().strip()) + self.assertEqual(sval, cval) + + def _verify_clone_root(self, source_path, clone_path, clone, clone_group, clone_pool): + # verifies following clone root attrs quota, data_pool and pool_namespace + # remaining attributes of clone root are validated in _verify_clone_attrs + + clone_info = json.loads(self._get_subvolume_info(self.volname, clone, clone_group)) + + # verify quota is inherited from source snapshot + src_quota = self.mount_a.getfattr(source_path, "ceph.quota.max_bytes") + # FIXME: kclient fails to get this quota value: https://tracker.ceph.com/issues/48075 + if isinstance(self.mount_a, FuseMount): + self.assertEqual(clone_info["bytes_quota"], "infinite" if src_quota is None else int(src_quota)) + + if clone_pool: + # verify pool is set as per request + self.assertEqual(clone_info["data_pool"], clone_pool) + else: + # verify pool and pool namespace are inherited from snapshot + self.assertEqual(clone_info["data_pool"], + self.mount_a.getfattr(source_path, "ceph.dir.layout.pool")) + self.assertEqual(clone_info["pool_namespace"], + self.mount_a.getfattr(source_path, "ceph.dir.layout.pool_namespace")) + + def _verify_clone(self, subvolume, snapshot, clone, + source_group=None, clone_group=None, clone_pool=None, + subvol_path=None, source_version=2, timo=120): + # pass in subvol_path (subvolume path when snapshot was taken) when subvolume is removed + # but snapshots are retained for clone verification + path1 = self._get_subvolume_snapshot_path(subvolume, snapshot, source_group, subvol_path, source_version) + path2 = self._get_subvolume_path(self.volname, clone, group_name=clone_group) + + check = 0 + # TODO: currently snapshot rentries are not stable if snapshot source entries + # are removed, https://tracker.ceph.com/issues/46747 + while check < timo and subvol_path is None: + val1 = int(self.mount_a.getfattr(path1, "ceph.dir.rentries")) + val2 = int(self.mount_a.getfattr(path2, "ceph.dir.rentries")) + if val1 == val2: + break + check += 1 + time.sleep(1) + self.assertTrue(check < timo) + + self._verify_clone_root(path1, path2, clone, clone_group, clone_pool) + self._verify_clone_attrs(path1, path2) + + def _generate_random_volume_name(self, count=1): + n = self.volume_start + volumes = [f"{TestVolumes.TEST_VOLUME_PREFIX}_{i:016}" for i in range(n, n+count)] + self.volume_start += count + return volumes[0] if count == 1 else volumes + + def _generate_random_subvolume_name(self, count=1): + n = self.subvolume_start + subvolumes = [f"{TestVolumes.TEST_SUBVOLUME_PREFIX}_{i:016}" for i in range(n, n+count)] + self.subvolume_start += count + return subvolumes[0] if count == 1 else subvolumes + + def _generate_random_group_name(self, count=1): + n = self.group_start + groups = [f"{TestVolumes.TEST_GROUP_PREFIX}_{i:016}" for i in range(n, n+count)] + self.group_start += count + return groups[0] if count == 1 else groups + + def _generate_random_snapshot_name(self, count=1): + n = self.snapshot_start + snaps = [f"{TestVolumes.TEST_SNAPSHOT_PREFIX}_{i:016}" for i in range(n, n+count)] + self.snapshot_start += count + return snaps[0] if count == 1 else snaps + + def _generate_random_clone_name(self, count=1): + n = self.clone_start + clones = [f"{TestVolumes.TEST_CLONE_PREFIX}_{i:016}" for i in range(n, n+count)] + self.clone_start += count + return clones[0] if count == 1 else clones + + def _enable_multi_fs(self): + self._fs_cmd("flag", "set", "enable_multiple", "true", "--yes-i-really-mean-it") + + def _create_or_reuse_test_volume(self): + result = json.loads(self._fs_cmd("volume", "ls")) + if len(result) == 0: + self.vol_created = True + self.volname = self._generate_random_volume_name() + self._fs_cmd("volume", "create", self.volname) + else: + self.volname = result[0]['name'] + + def _get_volume_info(self, vol_name, human_readable=False): + if human_readable: + args = ["volume", "info", vol_name, human_readable] + else: + args = ["volume", "info", vol_name] + args = tuple(args) + vol_md = self._fs_cmd(*args) + return vol_md + + def _get_subvolume_group_path(self, vol_name, group_name): + args = ("subvolumegroup", "getpath", vol_name, group_name) + path = self._fs_cmd(*args) + # remove the leading '/', and trailing whitespaces + return path[1:].rstrip() + + def _get_subvolume_group_info(self, vol_name, group_name): + args = ["subvolumegroup", "info", vol_name, group_name] + args = tuple(args) + group_md = self._fs_cmd(*args) + return group_md + + def _get_subvolume_path(self, vol_name, subvol_name, group_name=None): + args = ["subvolume", "getpath", vol_name, subvol_name] + if group_name: + args.append(group_name) + args = tuple(args) + path = self._fs_cmd(*args) + # remove the leading '/', and trailing whitespaces + return path[1:].rstrip() + + def _get_subvolume_info(self, vol_name, subvol_name, group_name=None): + args = ["subvolume", "info", vol_name, subvol_name] + if group_name: + args.append(group_name) + args = tuple(args) + subvol_md = self._fs_cmd(*args) + return subvol_md + + def _get_subvolume_snapshot_info(self, vol_name, subvol_name, snapname, group_name=None): + args = ["subvolume", "snapshot", "info", vol_name, subvol_name, snapname] + if group_name: + args.append(group_name) + args = tuple(args) + snap_md = self._fs_cmd(*args) + return snap_md + + def _delete_test_volume(self): + self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it") + + def _do_subvolume_pool_and_namespace_update(self, subvolume, pool=None, pool_namespace=None, subvolume_group=None): + subvolpath = self._get_subvolume_path(self.volname, subvolume, group_name=subvolume_group) + + if pool is not None: + self.mount_a.setfattr(subvolpath, 'ceph.dir.layout.pool', pool, sudo=True) + + if pool_namespace is not None: + self.mount_a.setfattr(subvolpath, 'ceph.dir.layout.pool_namespace', pool_namespace, sudo=True) + + def _do_subvolume_attr_update(self, subvolume, uid, gid, mode, subvolume_group=None): + subvolpath = self._get_subvolume_path(self.volname, subvolume, group_name=subvolume_group) + + # mode + self.mount_a.run_shell(['chmod', mode, subvolpath], sudo=True) + + # ownership + self.mount_a.run_shell(['chown', uid, subvolpath], sudo=True) + self.mount_a.run_shell(['chgrp', gid, subvolpath], sudo=True) + + def _do_subvolume_io(self, subvolume, subvolume_group=None, create_dir=None, + number_of_files=DEFAULT_NUMBER_OF_FILES, file_size=DEFAULT_FILE_SIZE): + # get subvolume path for IO + args = ["subvolume", "getpath", self.volname, subvolume] + if subvolume_group: + args.append(subvolume_group) + args = tuple(args) + subvolpath = self._fs_cmd(*args) + self.assertNotEqual(subvolpath, None) + subvolpath = subvolpath[1:].rstrip() # remove "/" prefix and any trailing newline + + io_path = subvolpath + if create_dir: + io_path = os.path.join(subvolpath, create_dir) + self.mount_a.run_shell_payload(f"mkdir -p {io_path}") + + log.debug("filling subvolume {0} with {1} files each {2}MB size under directory {3}".format(subvolume, number_of_files, file_size, io_path)) + for i in range(number_of_files): + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i) + self.mount_a.write_n_mb(os.path.join(io_path, filename), file_size) + + def _do_subvolume_io_mixed(self, subvolume, subvolume_group=None): + subvolpath = self._get_subvolume_path(self.volname, subvolume, group_name=subvolume_group) + + reg_file = "regfile.0" + dir_path = os.path.join(subvolpath, "dir.0") + sym_path1 = os.path.join(subvolpath, "sym.0") + # this symlink's ownership would be changed + sym_path2 = os.path.join(dir_path, "sym.0") + + self.mount_a.run_shell(["mkdir", dir_path]) + self.mount_a.run_shell(["ln", "-s", "./{}".format(reg_file), sym_path1]) + self.mount_a.run_shell(["ln", "-s", "./{}".format(reg_file), sym_path2]) + # flip ownership to nobody. assumption: nobody's id is 65534 + self.mount_a.run_shell(["chown", "-h", "65534:65534", sym_path2], sudo=True, omit_sudo=False) + + def _wait_for_trash_empty(self, timeout=60): + # XXX: construct the trash dir path (note that there is no mgr + # [sub]volume interface for this). + trashdir = os.path.join("./", "volumes", "_deleting") + self.mount_a.wait_for_dir_empty(trashdir, timeout=timeout) + + def _wait_for_subvol_trash_empty(self, subvol, group="_nogroup", timeout=30): + trashdir = os.path.join("./", "volumes", group, subvol, ".trash") + try: + self.mount_a.wait_for_dir_empty(trashdir, timeout=timeout) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + pass + else: + raise + + def _assert_meta_location_and_version(self, vol_name, subvol_name, subvol_group=None, version=2, legacy=False): + if legacy: + subvol_path = self._get_subvolume_path(vol_name, subvol_name, group_name=subvol_group) + m = md5() + m.update(("/"+subvol_path).encode('utf-8')) + meta_filename = "{0}.meta".format(m.digest().hex()) + metapath = os.path.join(".", "volumes", "_legacy", meta_filename) + else: + group = subvol_group if subvol_group is not None else '_nogroup' + metapath = os.path.join(".", "volumes", group, subvol_name, ".meta") + + out = self.mount_a.run_shell(['cat', metapath], sudo=True) + lines = out.stdout.getvalue().strip().split('\n') + sv_version = -1 + for line in lines: + if line == "version = " + str(version): + sv_version = version + break + self.assertEqual(sv_version, version, "version expected was '{0}' but got '{1}' from meta file at '{2}'".format( + version, sv_version, metapath)) + + def _create_v1_subvolume(self, subvol_name, subvol_group=None, has_snapshot=True, subvol_type='subvolume', state='complete'): + group = subvol_group if subvol_group is not None else '_nogroup' + basepath = os.path.join("volumes", group, subvol_name) + uuid_str = str(uuid.uuid4()) + createpath = os.path.join(basepath, uuid_str) + self.mount_a.run_shell(['mkdir', '-p', createpath], sudo=True) + + # create a v1 snapshot, to prevent auto upgrades + if has_snapshot: + snappath = os.path.join(createpath, ".snap", "fake") + self.mount_a.run_shell(['mkdir', '-p', snappath], sudo=True) + + # add required xattrs to subvolume + default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool") + self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True) + + # create a v1 .meta file + meta_contents = "[GLOBAL]\nversion = 1\ntype = {0}\npath = {1}\nstate = {2}\n".format(subvol_type, "/" + createpath, state) + if state == 'pending': + # add a fake clone source + meta_contents = meta_contents + '[source]\nvolume = fake\nsubvolume = fake\nsnapshot = fake\n' + meta_filepath1 = os.path.join(self.mount_a.mountpoint, basepath, ".meta") + self.mount_a.client_remote.write_file(meta_filepath1, meta_contents, sudo=True) + return createpath + + def _update_fake_trash(self, subvol_name, subvol_group=None, trash_name='fake', create=True): + group = subvol_group if subvol_group is not None else '_nogroup' + trashpath = os.path.join("volumes", group, subvol_name, '.trash', trash_name) + if create: + self.mount_a.run_shell(['mkdir', '-p', trashpath], sudo=True) + else: + self.mount_a.run_shell(['rmdir', trashpath], sudo=True) + + def _configure_guest_auth(self, guest_mount, authid, key): + """ + Set up auth credentials for a guest client. + """ + # Create keyring file for the guest client. + keyring_txt = dedent(""" + [client.{authid}] + key = {key} + + """.format(authid=authid,key=key)) + + guest_mount.client_id = authid + guest_mount.client_remote.write_file(guest_mount.get_keyring_path(), + keyring_txt, sudo=True) + # Add a guest client section to the ceph config file. + self.config_set("client.{0}".format(authid), "debug client", 20) + self.config_set("client.{0}".format(authid), "debug objecter", 20) + self.set_conf("client.{0}".format(authid), + "keyring", guest_mount.get_keyring_path()) + + def _auth_metadata_get(self, filedata): + """ + Return a deserialized JSON object, or None + """ + try: + data = json.loads(filedata) + except json.decoder.JSONDecodeError: + data = None + return data + + def setUp(self): + super(TestVolumesHelper, self).setUp() + self.volname = None + self.vol_created = False + self._enable_multi_fs() + self._create_or_reuse_test_volume() + self.config_set('mon', 'mon_allow_pool_delete', True) + self.volume_start = random.randint(1, (1<<20)) + self.subvolume_start = random.randint(1, (1<<20)) + self.group_start = random.randint(1, (1<<20)) + self.snapshot_start = random.randint(1, (1<<20)) + self.clone_start = random.randint(1, (1<<20)) + + def tearDown(self): + if self.vol_created: + self._delete_test_volume() + super(TestVolumesHelper, self).tearDown() + + +class TestVolumes(TestVolumesHelper): + """Tests for FS volume operations.""" + def test_volume_create(self): + """ + That the volume can be created and then cleans up + """ + volname = self._generate_random_volume_name() + self._fs_cmd("volume", "create", volname) + volumels = json.loads(self._fs_cmd("volume", "ls")) + + if not (volname in ([volume['name'] for volume in volumels])): + raise RuntimeError("Error creating volume '{0}'".format(volname)) + else: + # clean up + self._fs_cmd("volume", "rm", volname, "--yes-i-really-mean-it") + + def test_volume_ls(self): + """ + That the existing and the newly created volumes can be listed and + finally cleans up. + """ + vls = json.loads(self._fs_cmd("volume", "ls")) + volumes = [volume['name'] for volume in vls] + + #create new volumes and add it to the existing list of volumes + volumenames = self._generate_random_volume_name(2) + for volumename in volumenames: + self._fs_cmd("volume", "create", volumename) + volumes.extend(volumenames) + + # list volumes + try: + volumels = json.loads(self._fs_cmd('volume', 'ls')) + if len(volumels) == 0: + raise RuntimeError("Expected the 'fs volume ls' command to list the created volumes.") + else: + volnames = [volume['name'] for volume in volumels] + if collections.Counter(volnames) != collections.Counter(volumes): + raise RuntimeError("Error creating or listing volumes") + finally: + # clean up + for volume in volumenames: + self._fs_cmd("volume", "rm", volume, "--yes-i-really-mean-it") + + def test_volume_rm(self): + """ + That the volume can only be removed when --yes-i-really-mean-it is used + and verify that the deleted volume is not listed anymore. + """ + for m in self.mounts: + m.umount_wait() + try: + self._fs_cmd("volume", "rm", self.volname) + except CommandFailedError as ce: + if ce.exitstatus != errno.EPERM: + raise RuntimeError("expected the 'fs volume rm' command to fail with EPERM, " + "but it failed with {0}".format(ce.exitstatus)) + else: + self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it") + + #check if it's gone + volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty")) + if (self.volname in [volume['name'] for volume in volumes]): + raise RuntimeError("Expected the 'fs volume rm' command to succeed. " + "The volume {0} not removed.".format(self.volname)) + else: + raise RuntimeError("expected the 'fs volume rm' command to fail.") + + def test_volume_rm_arbitrary_pool_removal(self): + """ + That the arbitrary pool added to the volume out of band is removed + successfully on volume removal. + """ + for m in self.mounts: + m.umount_wait() + new_pool = "new_pool" + # add arbitrary data pool + self.fs.add_data_pool(new_pool) + vol_status = json.loads(self._fs_cmd("status", self.volname, "--format=json-pretty")) + self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it") + + #check if fs is gone + volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty")) + volnames = [volume['name'] for volume in volumes] + self.assertNotIn(self.volname, volnames) + + #check if osd pools are gone + pools = json.loads(self._raw_cmd("osd", "pool", "ls", "--format=json-pretty")) + for pool in vol_status["pools"]: + self.assertNotIn(pool["name"], pools) + + def test_volume_rm_when_mon_delete_pool_false(self): + """ + That the volume can only be removed when mon_allowd_pool_delete is set + to true and verify that the pools are removed after volume deletion. + """ + for m in self.mounts: + m.umount_wait() + self.config_set('mon', 'mon_allow_pool_delete', False) + try: + self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM, + "expected the 'fs volume rm' command to fail with EPERM, " + "but it failed with {0}".format(ce.exitstatus)) + vol_status = json.loads(self._fs_cmd("status", self.volname, "--format=json-pretty")) + self.config_set('mon', 'mon_allow_pool_delete', True) + self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it") + + #check if fs is gone + volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty")) + volnames = [volume['name'] for volume in volumes] + self.assertNotIn(self.volname, volnames, + "volume {0} exists after removal".format(self.volname)) + #check if pools are gone + pools = json.loads(self._raw_cmd("osd", "pool", "ls", "--format=json-pretty")) + for pool in vol_status["pools"]: + self.assertNotIn(pool["name"], pools, + "pool {0} exists after volume removal".format(pool["name"])) + + def test_volume_info(self): + """ + Tests the 'fs volume info' command + """ + vol_fields = ["pools", "used_size", "pending_subvolume_deletions", "mon_addrs"] + group = self._generate_random_group_name() + # create subvolumegroup + self._fs_cmd("subvolumegroup", "create", self.volname, group) + # get volume metadata + vol_info = json.loads(self._get_volume_info(self.volname)) + for md in vol_fields: + self.assertIn(md, vol_info, + f"'{md}' key not present in metadata of volume") + self.assertEqual(vol_info["used_size"], 0, + "Size should be zero when volumes directory is empty") + + def test_volume_info_without_subvolumegroup(self): + """ + Tests the 'fs volume info' command without subvolume group + """ + vol_fields = ["pools", "mon_addrs"] + # get volume metadata + vol_info = json.loads(self._get_volume_info(self.volname)) + for md in vol_fields: + self.assertIn(md, vol_info, + f"'{md}' key not present in metadata of volume") + self.assertNotIn("used_size", vol_info, + "'used_size' should not be present in absence of subvolumegroup") + self.assertNotIn("pending_subvolume_deletions", vol_info, + "'pending_subvolume_deletions' should not be present in absence" + " of subvolumegroup") + + def test_volume_info_with_human_readable_flag(self): + """ + Tests the 'fs volume info --human_readable' command + """ + vol_fields = ["pools", "used_size", "pending_subvolume_deletions", "mon_addrs"] + group = self._generate_random_group_name() + # create subvolumegroup + self._fs_cmd("subvolumegroup", "create", self.volname, group) + # get volume metadata + vol_info = json.loads(self._get_volume_info(self.volname, "--human_readable")) + for md in vol_fields: + self.assertIn(md, vol_info, + f"'{md}' key not present in metadata of volume") + units = [' ', 'k', 'M', 'G', 'T', 'P', 'E'] + assert vol_info["used_size"][-1] in units, "unit suffix in used_size is absent" + assert vol_info["pools"]["data"][0]["avail"][-1] in units, "unit suffix in avail data is absent" + assert vol_info["pools"]["data"][0]["used"][-1] in units, "unit suffix in used data is absent" + assert vol_info["pools"]["metadata"][0]["avail"][-1] in units, "unit suffix in avail metadata is absent" + assert vol_info["pools"]["metadata"][0]["used"][-1] in units, "unit suffix in used metadata is absent" + self.assertEqual(int(vol_info["used_size"]), 0, + "Size should be zero when volumes directory is empty") + + def test_volume_info_with_human_readable_flag_without_subvolumegroup(self): + """ + Tests the 'fs volume info --human_readable' command without subvolume group + """ + vol_fields = ["pools", "mon_addrs"] + # get volume metadata + vol_info = json.loads(self._get_volume_info(self.volname, "--human_readable")) + for md in vol_fields: + self.assertIn(md, vol_info, + f"'{md}' key not present in metadata of volume") + units = [' ', 'k', 'M', 'G', 'T', 'P', 'E'] + assert vol_info["pools"]["data"][0]["avail"][-1] in units, "unit suffix in avail data is absent" + assert vol_info["pools"]["data"][0]["used"][-1] in units, "unit suffix in used data is absent" + assert vol_info["pools"]["metadata"][0]["avail"][-1] in units, "unit suffix in avail metadata is absent" + assert vol_info["pools"]["metadata"][0]["used"][-1] in units, "unit suffix in used metadata is absent" + self.assertNotIn("used_size", vol_info, + "'used_size' should not be present in absence of subvolumegroup") + self.assertNotIn("pending_subvolume_deletions", vol_info, + "'pending_subvolume_deletions' should not be present in absence" + " of subvolumegroup") + + +class TestSubvolumeGroups(TestVolumesHelper): + """Tests for FS subvolume group operations.""" + def test_default_uid_gid_subvolume_group(self): + group = self._generate_random_group_name() + expected_uid = 0 + expected_gid = 0 + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + group_path = self._get_subvolume_group_path(self.volname, group) + + # check group's uid and gid + stat = self.mount_a.stat(group_path) + self.assertEqual(stat['st_uid'], expected_uid) + self.assertEqual(stat['st_gid'], expected_gid) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_nonexistent_subvolume_group_create(self): + subvolume = self._generate_random_subvolume_name() + group = "non_existent_group" + + # try, creating subvolume in a nonexistent group + try: + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise + else: + raise RuntimeError("expected the 'fs subvolume create' command to fail") + + def test_nonexistent_subvolume_group_rm(self): + group = "non_existent_group" + + # try, remove subvolume group + try: + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise + else: + raise RuntimeError("expected the 'fs subvolumegroup rm' command to fail") + + def test_subvolume_group_create_with_auto_cleanup_on_fail(self): + group = self._generate_random_group_name() + data_pool = "invalid_pool" + # create group with invalid data pool layout + with self.assertRaises(CommandFailedError): + self._fs_cmd("subvolumegroup", "create", self.volname, group, "--pool_layout", data_pool) + + # check whether group path is cleaned up + try: + self._fs_cmd("subvolumegroup", "getpath", self.volname, group) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise + else: + raise RuntimeError("expected the 'fs subvolumegroup getpath' command to fail") + + def test_subvolume_group_create_with_desired_data_pool_layout(self): + group1, group2 = self._generate_random_group_name(2) + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group1) + group1_path = self._get_subvolume_group_path(self.volname, group1) + + default_pool = self.mount_a.getfattr(group1_path, "ceph.dir.layout.pool") + new_pool = "new_pool" + self.assertNotEqual(default_pool, new_pool) + + # add data pool + newid = self.fs.add_data_pool(new_pool) + + # create group specifying the new data pool as its pool layout + self._fs_cmd("subvolumegroup", "create", self.volname, group2, + "--pool_layout", new_pool) + group2_path = self._get_subvolume_group_path(self.volname, group2) + + desired_pool = self.mount_a.getfattr(group2_path, "ceph.dir.layout.pool") + try: + self.assertEqual(desired_pool, new_pool) + except AssertionError: + self.assertEqual(int(desired_pool), newid) # old kernel returns id + + self._fs_cmd("subvolumegroup", "rm", self.volname, group1) + self._fs_cmd("subvolumegroup", "rm", self.volname, group2) + + def test_subvolume_group_create_with_desired_mode(self): + group1, group2 = self._generate_random_group_name(2) + # default mode + expected_mode1 = "755" + # desired mode + expected_mode2 = "777" + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group2, f"--mode={expected_mode2}") + self._fs_cmd("subvolumegroup", "create", self.volname, group1) + + group1_path = self._get_subvolume_group_path(self.volname, group1) + group2_path = self._get_subvolume_group_path(self.volname, group2) + volumes_path = os.path.dirname(group1_path) + + # check group's mode + actual_mode1 = self.mount_a.run_shell(['stat', '-c' '%a', group1_path]).stdout.getvalue().strip() + actual_mode2 = self.mount_a.run_shell(['stat', '-c' '%a', group2_path]).stdout.getvalue().strip() + actual_mode3 = self.mount_a.run_shell(['stat', '-c' '%a', volumes_path]).stdout.getvalue().strip() + self.assertEqual(actual_mode1, expected_mode1) + self.assertEqual(actual_mode2, expected_mode2) + self.assertEqual(actual_mode3, expected_mode1) + + self._fs_cmd("subvolumegroup", "rm", self.volname, group1) + self._fs_cmd("subvolumegroup", "rm", self.volname, group2) + + def test_subvolume_group_create_with_desired_uid_gid(self): + """ + That the subvolume group can be created with the desired uid and gid and its uid and gid matches the + expected values. + """ + uid = 1000 + gid = 1000 + + # create subvolume group + subvolgroupname = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, subvolgroupname, "--uid", str(uid), "--gid", str(gid)) + + # make sure it exists + subvolgrouppath = self._get_subvolume_group_path(self.volname, subvolgroupname) + self.assertNotEqual(subvolgrouppath, None) + + # verify the uid and gid + suid = int(self.mount_a.run_shell(['stat', '-c' '%u', subvolgrouppath]).stdout.getvalue().strip()) + sgid = int(self.mount_a.run_shell(['stat', '-c' '%g', subvolgrouppath]).stdout.getvalue().strip()) + self.assertEqual(uid, suid) + self.assertEqual(gid, sgid) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, subvolgroupname) + + def test_subvolume_group_create_with_invalid_data_pool_layout(self): + group = self._generate_random_group_name() + data_pool = "invalid_pool" + # create group with invalid data pool layout + try: + self._fs_cmd("subvolumegroup", "create", self.volname, group, "--pool_layout", data_pool) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise + else: + raise RuntimeError("expected the 'fs subvolumegroup create' command to fail") + + def test_subvolume_group_create_with_size(self): + # create group with size -- should set quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000") + + # get group metadata + group_info = json.loads(self._get_subvolume_group_info(self.volname, group)) + self.assertEqual(group_info["bytes_quota"], 1000000000) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_info(self): + # tests the 'fs subvolumegroup info' command + + group_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime", + "data_pool", "gid", "mode", "mon_addrs", "mtime", "uid"] + + # create group + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # get group metadata + group_info = json.loads(self._get_subvolume_group_info(self.volname, group)) + for md in group_md: + self.assertIn(md, group_info, "'{0}' key not present in metadata of group".format(md)) + + self.assertEqual(group_info["bytes_pcent"], "undefined", "bytes_pcent should be set to undefined if quota is not set") + self.assertEqual(group_info["bytes_quota"], "infinite", "bytes_quota should be set to infinite if quota is not set") + self.assertEqual(group_info["uid"], 0) + self.assertEqual(group_info["gid"], 0) + + nsize = self.DEFAULT_FILE_SIZE*1024*1024 + self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize)) + + # get group metadata after quota set + group_info = json.loads(self._get_subvolume_group_info(self.volname, group)) + for md in group_md: + self.assertIn(md, group_info, "'{0}' key not present in metadata of subvolume".format(md)) + + self.assertNotEqual(group_info["bytes_pcent"], "undefined", "bytes_pcent should not be set to undefined if quota is set") + self.assertEqual(group_info["bytes_quota"], nsize, "bytes_quota should be set to '{0}'".format(nsize)) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_create_idempotence(self): + # create group + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # try creating w/ same subvolume group name -- should be idempotent + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_create_idempotence_mode(self): + # create group + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # try creating w/ same subvolume group name with mode -- should set mode + self._fs_cmd("subvolumegroup", "create", self.volname, group, "--mode=766") + + group_path = self._get_subvolume_group_path(self.volname, group) + + # check subvolumegroup's mode + mode = self.mount_a.run_shell(['stat', '-c' '%a', group_path]).stdout.getvalue().strip() + self.assertEqual(mode, "766") + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_create_idempotence_uid_gid(self): + desired_uid = 1000 + desired_gid = 1000 + + # create group + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # try creating w/ same subvolume group name with uid/gid -- should set uid/gid + self._fs_cmd("subvolumegroup", "create", self.volname, group, "--uid", str(desired_uid), "--gid", str(desired_gid)) + + group_path = self._get_subvolume_group_path(self.volname, group) + + # verify the uid and gid + actual_uid = int(self.mount_a.run_shell(['stat', '-c' '%u', group_path]).stdout.getvalue().strip()) + actual_gid = int(self.mount_a.run_shell(['stat', '-c' '%g', group_path]).stdout.getvalue().strip()) + self.assertEqual(desired_uid, actual_uid) + self.assertEqual(desired_gid, actual_gid) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_create_idempotence_data_pool(self): + # create group + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + group_path = self._get_subvolume_group_path(self.volname, group) + + default_pool = self.mount_a.getfattr(group_path, "ceph.dir.layout.pool") + new_pool = "new_pool" + self.assertNotEqual(default_pool, new_pool) + + # add data pool + newid = self.fs.add_data_pool(new_pool) + + # try creating w/ same subvolume group name with new data pool -- should set pool + self._fs_cmd("subvolumegroup", "create", self.volname, group, "--pool_layout", new_pool) + desired_pool = self.mount_a.getfattr(group_path, "ceph.dir.layout.pool") + try: + self.assertEqual(desired_pool, new_pool) + except AssertionError: + self.assertEqual(int(desired_pool), newid) # old kernel returns id + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_create_idempotence_resize(self): + # create group + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # try creating w/ same subvolume name with size -- should set quota + self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000") + + # get group metadata + group_info = json.loads(self._get_subvolume_group_info(self.volname, group)) + self.assertEqual(group_info["bytes_quota"], 1000000000) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_quota_mds_path_restriction_to_group_path(self): + """ + Tests subvolumegroup quota enforcement with mds path restriction set to group. + For quota to be enforced, read permission needs to be provided to the parent + of the directory on which quota is set. Please see the tracker comment [1] + [1] https://tracker.ceph.com/issues/55090#note-8 + """ + osize = self.DEFAULT_FILE_SIZE*1024*1024*100 + # create group with 100MB quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, + "--size", str(osize), "--mode=777") + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # create subvolume under the group + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, + "--group_name", group, "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group) + self.assertNotEqual(subvolpath, None) + + # Create auth_id + authid = "client.guest1" + user = json.loads(self.fs.mon_manager.raw_cluster_cmd( + "auth", "get-or-create", authid, + "mds", "allow rw path=/volumes", + "mgr", "allow rw", + "osd", "allow rw tag cephfs *=*", + "mon", "allow r", + "--format=json-pretty" + )) + + # Prepare guest_mount with new authid + guest_mount = self.mount_b + guest_mount.umount_wait() + + # configure credentials for guest client + self._configure_guest_auth(guest_mount, "guest1", user[0]["key"]) + + # mount the subvolume + mount_path = os.path.join("/", subvolpath) + guest_mount.mount_wait(cephfs_mntpt=mount_path) + + # create 99 files of 1MB + guest_mount.run_shell_payload("mkdir -p dir1") + for i in range(99): + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i) + guest_mount.write_n_mb(os.path.join("dir1", filename), self.DEFAULT_FILE_SIZE) + try: + # write two files of 1MB file to exceed the quota + guest_mount.run_shell_payload("mkdir -p dir2") + for i in range(2): + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i) + guest_mount.write_n_mb(os.path.join("dir2", filename), self.DEFAULT_FILE_SIZE) + # For quota to be enforced + time.sleep(60) + # create 400 files of 1MB to exceed quota + for i in range(400): + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i) + guest_mount.write_n_mb(os.path.join("dir2", filename), self.DEFAULT_FILE_SIZE) + # Sometimes quota enforcement takes time. + if i == 200: + time.sleep(60) + except CommandFailedError: + pass + else: + self.fail(f"expected filling subvolume {subvolname} with 400 files of size 1MB to fail") + + # clean up + guest_mount.umount_wait() + + # Delete the subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_quota_mds_path_restriction_to_subvolume_path(self): + """ + Tests subvolumegroup quota enforcement with mds path restriction set to subvolume path + The quota should not be enforced because of the fourth limitation mentioned at + https://docs.ceph.com/en/latest/cephfs/quota/#limitations + """ + osize = self.DEFAULT_FILE_SIZE*1024*1024*100 + # create group with 100MB quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, + "--size", str(osize), "--mode=777") + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # create subvolume under the group + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, + "--group_name", group, "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group) + self.assertNotEqual(subvolpath, None) + + mount_path = os.path.join("/", subvolpath) + + # Create auth_id + authid = "client.guest1" + user = json.loads(self.fs.mon_manager.raw_cluster_cmd( + "auth", "get-or-create", authid, + "mds", f"allow rw path={mount_path}", + "mgr", "allow rw", + "osd", "allow rw tag cephfs *=*", + "mon", "allow r", + "--format=json-pretty" + )) + + # Prepare guest_mount with new authid + guest_mount = self.mount_b + guest_mount.umount_wait() + + # configure credentials for guest client + self._configure_guest_auth(guest_mount, "guest1", user[0]["key"]) + + # mount the subvolume + guest_mount.mount_wait(cephfs_mntpt=mount_path) + + # create 99 files of 1MB to exceed quota + guest_mount.run_shell_payload("mkdir -p dir1") + for i in range(99): + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i) + guest_mount.write_n_mb(os.path.join("dir1", filename), self.DEFAULT_FILE_SIZE) + try: + # write two files of 1MB file to exceed the quota + guest_mount.run_shell_payload("mkdir -p dir2") + for i in range(2): + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i) + guest_mount.write_n_mb(os.path.join("dir2", filename), self.DEFAULT_FILE_SIZE) + # For quota to be enforced + time.sleep(60) + # create 400 files of 1MB to exceed quota + for i in range(400): + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i) + guest_mount.write_n_mb(os.path.join("dir2", filename), self.DEFAULT_FILE_SIZE) + # Sometimes quota enforcement takes time. + if i == 200: + time.sleep(60) + except CommandFailedError: + self.fail(f"Quota should not be enforced, expected filling subvolume {subvolname} with 400 files of size 1MB to succeed") + + # clean up + guest_mount.umount_wait() + + # Delete the subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_quota_exceeded_subvolume_removal(self): + """ + Tests subvolume removal if it's group quota is exceeded + """ + osize = self.DEFAULT_FILE_SIZE*1024*1024*100 + # create group with 100MB quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, + "--size", str(osize), "--mode=777") + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # create subvolume under the group + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, + "--group_name", group, "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group) + self.assertNotEqual(subvolpath, None) + + # create 99 files of 1MB to exceed quota + self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=99) + + try: + # write two files of 1MB file to exceed the quota + self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=2) + # For quota to be enforced + time.sleep(20) + # create 400 files of 1MB to exceed quota + self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=400) + except CommandFailedError: + # Delete subvolume when group quota is exceeded + self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group) + else: + self.fail(f"expected filling subvolume {subvolname} with 400 files of size 1MB to fail") + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_quota_exceeded_subvolume_removal_retained_snaps(self): + """ + Tests retained snapshot subvolume removal if it's group quota is exceeded + """ + group = self._generate_random_group_name() + subvolname = self._generate_random_subvolume_name() + snapshot1, snapshot2 = self._generate_random_snapshot_name(2) + + osize = self.DEFAULT_FILE_SIZE*1024*1024*100 + # create group with 100MB quota + self._fs_cmd("subvolumegroup", "create", self.volname, group, + "--size", str(osize), "--mode=777") + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # create subvolume under the group + self._fs_cmd("subvolume", "create", self.volname, subvolname, + "--group_name", group, "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group) + self.assertNotEqual(subvolpath, None) + + # create 99 files of 1MB to exceed quota + self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=99) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot1, "--group_name", group) + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot2, "--group_name", group) + + try: + # write two files of 1MB file to exceed the quota + self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=2) + # For quota to be enforced + time.sleep(20) + # create 400 files of 1MB to exceed quota + self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=400) + except CommandFailedError: + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group, "--retain-snapshots") + # remove snapshot1 + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot1, "--group_name", group) + # remove snapshot2 (should remove volume) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot2, "--group_name", group) + # verify subvolume trash is clean + self._wait_for_subvol_trash_empty(subvolname, group=group) + else: + self.fail(f"expected filling subvolume {subvolname} with 400 files of size 1MB to fail") + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_quota_subvolume_removal(self): + """ + Tests subvolume removal if it's group quota is set. + """ + # create group with size -- should set quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000") + + # create subvolume under the group + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # remove subvolume + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume rm' command to succeed if group quota is set") + + # remove subvolumegroup + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_quota_legacy_subvolume_removal(self): + """ + Tests legacy subvolume removal if it's group quota is set. + """ + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # emulate a old-fashioned subvolume -- in a custom group + createpath1 = os.path.join(".", "volumes", group, subvolume) + self.mount_a.run_shell(['mkdir', '-p', createpath1], sudo=True) + + # this would auto-upgrade on access without anyone noticing + subvolpath1 = self._fs_cmd("subvolume", "getpath", self.volname, subvolume, "--group-name", group) + self.assertNotEqual(subvolpath1, None) + subvolpath1 = subvolpath1.rstrip() # remove "/" prefix and any trailing newline + + # and... the subvolume path returned should be what we created behind the scene + self.assertEqual(createpath1[1:], subvolpath1) + + # Set subvolumegroup quota on idempotent subvolumegroup creation + self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000") + + # remove subvolume + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume rm' command to succeed if group quota is set") + + # remove subvolumegroup + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_quota_v1_subvolume_removal(self): + """ + Tests v1 subvolume removal if it's group quota is set. + """ + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # emulate a v1 subvolume -- in a custom group + self._create_v1_subvolume(subvolume, subvol_group=group, has_snapshot=False) + + # Set subvolumegroup quota on idempotent subvolumegroup creation + self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000") + + # remove subvolume + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume rm' command to succeed if group quota is set") + + # remove subvolumegroup + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_resize_fail_invalid_size(self): + """ + That a subvolume group cannot be resized to an invalid size and the quota did not change + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024 + # create group with 1MB quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize)) + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # try to resize the subvolume with an invalid size -10 + nsize = -10 + try: + self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize)) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, + "invalid error code on resize of subvolume group with invalid size") + else: + self.fail("expected the 'fs subvolumegroup resize' command to fail") + + # verify the quota did not change + size = int(self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes")) + self.assertEqual(size, osize) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_resize_fail_zero_size(self): + """ + That a subvolume group cannot be resized to a zero size and the quota did not change + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024 + # create group with 1MB quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize)) + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # try to resize the subvolume group with size 0 + nsize = 0 + try: + self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize)) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, + "invalid error code on resize of subvolume group with invalid size") + else: + self.fail("expected the 'fs subvolumegroup resize' command to fail") + + # verify the quota did not change + size = int(self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes")) + self.assertEqual(size, osize) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_resize_quota_lt_used_size(self): + """ + That a subvolume group can be resized to a size smaller than the current used size + and the resulting quota matches the expected size. + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024*20 + # create group with 20MB quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, + "--size", str(osize), "--mode=777") + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # create subvolume under the group + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, + "--group_name", group, "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group) + self.assertNotEqual(subvolpath, None) + + # create one file of 10MB + file_size=self.DEFAULT_FILE_SIZE*10 + number_of_files=1 + log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname, + number_of_files, + file_size)) + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+1) + self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size) + + usedsize = int(self.mount_a.getfattr(subvolpath, "ceph.dir.rbytes")) + + # shrink the subvolume group + nsize = usedsize // 2 + try: + self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize)) + except CommandFailedError: + self.fail("expected the 'fs subvolumegroup resize' command to succeed") + + # verify the quota + size = int(self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes")) + self.assertEqual(size, nsize) + + # remove subvolume and group + self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_resize_fail_quota_lt_used_size_no_shrink(self): + """ + That a subvolume group cannot be resized to a size smaller than the current used size + when --no_shrink is given and the quota did not change. + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024*20 + # create group with 20MB quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, + "--size", str(osize), "--mode=777") + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # create subvolume under the group + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, + "--group_name", group, "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group) + self.assertNotEqual(subvolpath, None) + + # create one file of 10MB + file_size=self.DEFAULT_FILE_SIZE*10 + number_of_files=1 + log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname, + number_of_files, + file_size)) + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+2) + self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size) + + usedsize = int(self.mount_a.getfattr(grouppath, "ceph.dir.rbytes")) + + # shrink the subvolume group + nsize = usedsize // 2 + try: + self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize), "--no_shrink") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolumegroup with quota less than used") + else: + self.fail("expected the 'fs subvolumegroup resize' command to fail") + + # verify the quota did not change + size = int(self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes")) + self.assertEqual(size, osize) + + # remove subvolume and group + self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_resize_expand_on_full_subvolume(self): + """ + That the subvolume group can be expanded after it is full and future write succeed + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024*100 + # create group with 100MB quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, + "--size", str(osize), "--mode=777") + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # create subvolume under the group + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, + "--group_name", group, "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group) + self.assertNotEqual(subvolpath, None) + + # create 99 files of 1MB + self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=99) + + try: + # write two files of 1MB file to exceed the quota + self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=2) + # For quota to be enforced + time.sleep(20) + # create 500 files of 1MB + self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=500) + except CommandFailedError: + # Not able to write. So expand the subvolumegroup more and try writing the files again + nsize = osize*7 + self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize)) + try: + self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=500) + except CommandFailedError: + self.fail("expected filling subvolume {0} with 500 files of size 1MB " + "to succeed".format(subvolname)) + else: + self.fail("expected filling subvolume {0} with 500 files of size 1MB " + "to fail".format(subvolname)) + + # remove subvolume and group + self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_resize_infinite_size(self): + """ + That a subvolume group can be resized to an infinite size by unsetting its quota. + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024 + # create group + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, + "--size", str(osize)) + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # resize inf + self._fs_cmd("subvolumegroup", "resize", self.volname, group, "inf") + + # verify that the quota is None + size = self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes") + self.assertEqual(size, None) + + # remove subvolume group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_resize_infinite_size_future_writes(self): + """ + That a subvolume group can be resized to an infinite size and the future writes succeed. + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024*5 + # create group with 5MB quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, + "--size", str(osize), "--mode=777") + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # create subvolume under the group + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, + "--group_name", group, "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group) + self.assertNotEqual(subvolpath, None) + + # create 4 files of 1MB + self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=4) + + try: + # write two files of 1MB file to exceed the quota + self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=2) + # For quota to be enforced + time.sleep(20) + # create 500 files of 1MB + self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=500) + except CommandFailedError: + # Not able to write. So resize subvolumegroup to 'inf' and try writing the files again + # resize inf + self._fs_cmd("subvolumegroup", "resize", self.volname, group, "inf") + try: + self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=500) + except CommandFailedError: + self.fail("expected filling subvolume {0} with 500 files of size 1MB " + "to succeed".format(subvolname)) + else: + self.fail("expected filling subvolume {0} with 500 files of size 1MB " + "to fail".format(subvolname)) + + + # verify that the quota is None + size = self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes") + self.assertEqual(size, None) + + # remove subvolume and group + self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_ls(self): + # tests the 'fs subvolumegroup ls' command + + subvolumegroups = [] + + #create subvolumegroups + subvolumegroups = self._generate_random_group_name(3) + for groupname in subvolumegroups: + self._fs_cmd("subvolumegroup", "create", self.volname, groupname) + + subvolumegroupls = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname)) + if len(subvolumegroupls) == 0: + raise RuntimeError("Expected the 'fs subvolumegroup ls' command to list the created subvolume groups") + else: + subvolgroupnames = [subvolumegroup['name'] for subvolumegroup in subvolumegroupls] + if collections.Counter(subvolgroupnames) != collections.Counter(subvolumegroups): + raise RuntimeError("Error creating or listing subvolume groups") + + def test_subvolume_group_ls_filter(self): + # tests the 'fs subvolumegroup ls' command filters '_deleting' directory + + subvolumegroups = [] + + #create subvolumegroup + subvolumegroups = self._generate_random_group_name(3) + for groupname in subvolumegroups: + self._fs_cmd("subvolumegroup", "create", self.volname, groupname) + + # create subvolume and remove. This creates '_deleting' directory. + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + subvolumegroupls = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname)) + subvolgroupnames = [subvolumegroup['name'] for subvolumegroup in subvolumegroupls] + if "_deleting" in subvolgroupnames: + self.fail("Listing subvolume groups listed '_deleting' directory") + + def test_subvolume_group_ls_filter_internal_directories(self): + # tests the 'fs subvolumegroup ls' command filters internal directories + # eg: '_deleting', '_nogroup', '_index', "_legacy" + + subvolumegroups = self._generate_random_group_name(3) + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + #create subvolumegroups + for groupname in subvolumegroups: + self._fs_cmd("subvolumegroup", "create", self.volname, groupname) + + # create subvolume which will create '_nogroup' directory + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # create snapshot + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # clone snapshot which will create '_index' directory + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # wait for clone to complete + self._wait_for_clone_to_complete(clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume which will create '_deleting' directory + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # list subvolumegroups + ret = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname)) + self.assertEqual(len(ret), len(subvolumegroups)) + + ret_list = [subvolumegroup['name'] for subvolumegroup in ret] + self.assertEqual(len(ret_list), len(subvolumegroups)) + + self.assertEqual(all(elem in subvolumegroups for elem in ret_list), True) + + # cleanup + self._fs_cmd("subvolume", "rm", self.volname, clone) + for groupname in subvolumegroups: + self._fs_cmd("subvolumegroup", "rm", self.volname, groupname) + + def test_subvolume_group_ls_for_nonexistent_volume(self): + # tests the 'fs subvolumegroup ls' command when /volume doesn't exist + # prerequisite: we expect that the test volume is created and a subvolumegroup is NOT created + + # list subvolume groups + subvolumegroupls = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname)) + if len(subvolumegroupls) > 0: + raise RuntimeError("Expected the 'fs subvolumegroup ls' command to output an empty list") + + def test_subvolumegroup_pin_distributed(self): + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + self.config_set('mds', 'mds_export_ephemeral_distributed', True) + + group = "pinme" + self._fs_cmd("subvolumegroup", "create", self.volname, group) + self._fs_cmd("subvolumegroup", "pin", self.volname, group, "distributed", "True") + subvolumes = self._generate_random_subvolume_name(50) + for subvolume in subvolumes: + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + self._wait_distributed_subtrees(2 * 2, status=status, rank="all") + + # remove subvolumes + for subvolume in subvolumes: + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_rm_force(self): + # test removing non-existing subvolume group with --force + group = self._generate_random_group_name() + try: + self._fs_cmd("subvolumegroup", "rm", self.volname, group, "--force") + except CommandFailedError: + raise RuntimeError("expected the 'fs subvolumegroup rm --force' command to succeed") + + def test_subvolume_group_exists_with_subvolumegroup_and_no_subvolume(self): + """Test the presence of any subvolumegroup when only subvolumegroup is present""" + + group = self._generate_random_group_name() + # create subvolumegroup + self._fs_cmd("subvolumegroup", "create", self.volname, group) + ret = self._fs_cmd("subvolumegroup", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "subvolumegroup exists") + # delete subvolumegroup + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + ret = self._fs_cmd("subvolumegroup", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "no subvolumegroup exists") + + def test_subvolume_group_exists_with_no_subvolumegroup_and_subvolume(self): + """Test the presence of any subvolumegroup when no subvolumegroup is present""" + + ret = self._fs_cmd("subvolumegroup", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "no subvolumegroup exists") + + def test_subvolume_group_exists_with_subvolumegroup_and_subvolume(self): + """Test the presence of any subvolume when subvolumegroup + and subvolume both are present""" + + group = self._generate_random_group_name() + subvolume = self._generate_random_subvolume_name(2) + # create subvolumegroup + self._fs_cmd("subvolumegroup", "create", self.volname, group) + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume[0], "--group_name", group) + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume[1]) + ret = self._fs_cmd("subvolumegroup", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "subvolumegroup exists") + # delete subvolume in group + self._fs_cmd("subvolume", "rm", self.volname, subvolume[0], "--group_name", group) + ret = self._fs_cmd("subvolumegroup", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "subvolumegroup exists") + # delete subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume[1]) + ret = self._fs_cmd("subvolumegroup", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "subvolumegroup exists") + # delete subvolumegroup + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + ret = self._fs_cmd("subvolumegroup", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "no subvolumegroup exists") + + def test_subvolume_group_exists_without_subvolumegroup_and_with_subvolume(self): + """Test the presence of any subvolume when subvolume is present + but no subvolumegroup is present""" + + subvolume = self._generate_random_subvolume_name() + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + ret = self._fs_cmd("subvolumegroup", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "no subvolumegroup exists") + # delete subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + ret = self._fs_cmd("subvolumegroup", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "no subvolumegroup exists") + + +class TestSubvolumes(TestVolumesHelper): + """Tests for FS subvolume operations, except snapshot and snapshot clone.""" + def test_async_subvolume_rm(self): + subvolumes = self._generate_random_subvolume_name(100) + + # create subvolumes + for subvolume in subvolumes: + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + self._do_subvolume_io(subvolume, number_of_files=10) + + self.mount_a.umount_wait() + + # remove subvolumes + for subvolume in subvolumes: + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + self.mount_a.mount_wait() + + # verify trash dir is clean + self._wait_for_trash_empty(timeout=300) + + def test_default_uid_gid_subvolume(self): + subvolume = self._generate_random_subvolume_name() + expected_uid = 0 + expected_gid = 0 + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + subvol_path = self._get_subvolume_path(self.volname, subvolume) + + # check subvolume's uid and gid + stat = self.mount_a.stat(subvol_path) + self.assertEqual(stat['st_uid'], expected_uid) + self.assertEqual(stat['st_gid'], expected_gid) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_nonexistent_subvolume_rm(self): + # remove non-existing subvolume + subvolume = "non_existent_subvolume" + + # try, remove subvolume + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise + else: + raise RuntimeError("expected the 'fs subvolume rm' command to fail") + + def test_subvolume_create_and_rm(self): + # create subvolume + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # make sure it exists + subvolpath = self._fs_cmd("subvolume", "getpath", self.volname, subvolume) + self.assertNotEqual(subvolpath, None) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + # make sure its gone + try: + self._fs_cmd("subvolume", "getpath", self.volname, subvolume) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise + else: + raise RuntimeError("expected the 'fs subvolume getpath' command to fail. Subvolume not removed.") + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_and_rm_in_group(self): + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_create_idempotence(self): + # create subvolume + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # try creating w/ same subvolume name -- should be idempotent + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_idempotence_resize(self): + # create subvolume + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # try creating w/ same subvolume name with size -- should set quota + self._fs_cmd("subvolume", "create", self.volname, subvolume, "1000000000") + + # get subvolume metadata + subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume)) + self.assertEqual(subvol_info["bytes_quota"], 1000000000) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_idempotence_mode(self): + # default mode + default_mode = "755" + + # create subvolume + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + subvol_path = self._get_subvolume_path(self.volname, subvolume) + + actual_mode_1 = self.mount_a.run_shell(['stat', '-c' '%a', subvol_path]).stdout.getvalue().strip() + self.assertEqual(actual_mode_1, default_mode) + + # try creating w/ same subvolume name with --mode 777 + new_mode = "777" + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", new_mode) + + actual_mode_2 = self.mount_a.run_shell(['stat', '-c' '%a', subvol_path]).stdout.getvalue().strip() + self.assertEqual(actual_mode_2, new_mode) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_idempotence_without_passing_mode(self): + # create subvolume + desired_mode = "777" + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", desired_mode) + + subvol_path = self._get_subvolume_path(self.volname, subvolume) + + actual_mode_1 = self.mount_a.run_shell(['stat', '-c' '%a', subvol_path]).stdout.getvalue().strip() + self.assertEqual(actual_mode_1, desired_mode) + + # default mode + default_mode = "755" + + # try creating w/ same subvolume name without passing --mode argument + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + actual_mode_2 = self.mount_a.run_shell(['stat', '-c' '%a', subvol_path]).stdout.getvalue().strip() + self.assertEqual(actual_mode_2, default_mode) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_isolated_namespace(self): + """ + Create subvolume in separate rados namespace + """ + + # create subvolume + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--namespace-isolated") + + # get subvolume metadata + subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume)) + self.assertNotEqual(len(subvol_info), 0) + self.assertEqual(subvol_info["pool_namespace"], "fsvolumens_" + subvolume) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_with_auto_cleanup_on_fail(self): + subvolume = self._generate_random_subvolume_name() + data_pool = "invalid_pool" + # create subvolume with invalid data pool layout fails + with self.assertRaises(CommandFailedError): + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool) + + # check whether subvol path is cleaned up + try: + self._fs_cmd("subvolume", "getpath", self.volname, subvolume) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on getpath of non-existent subvolume") + else: + self.fail("expected the 'fs subvolume getpath' command to fail") + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_with_desired_data_pool_layout_in_group(self): + subvol1, subvol2 = self._generate_random_subvolume_name(2) + group = self._generate_random_group_name() + + # create group. this also helps set default pool layout for subvolumes + # created within the group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvol1, "--group_name", group) + subvol1_path = self._get_subvolume_path(self.volname, subvol1, group_name=group) + + default_pool = self.mount_a.getfattr(subvol1_path, "ceph.dir.layout.pool") + new_pool = "new_pool" + self.assertNotEqual(default_pool, new_pool) + + # add data pool + newid = self.fs.add_data_pool(new_pool) + + # create subvolume specifying the new data pool as its pool layout + self._fs_cmd("subvolume", "create", self.volname, subvol2, "--group_name", group, + "--pool_layout", new_pool) + subvol2_path = self._get_subvolume_path(self.volname, subvol2, group_name=group) + + desired_pool = self.mount_a.getfattr(subvol2_path, "ceph.dir.layout.pool") + try: + self.assertEqual(desired_pool, new_pool) + except AssertionError: + self.assertEqual(int(desired_pool), newid) # old kernel returns id + + self._fs_cmd("subvolume", "rm", self.volname, subvol2, group) + self._fs_cmd("subvolume", "rm", self.volname, subvol1, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_with_desired_mode(self): + subvol1 = self._generate_random_subvolume_name() + + # default mode + default_mode = "755" + # desired mode + desired_mode = "777" + + self._fs_cmd("subvolume", "create", self.volname, subvol1, "--mode", "777") + + subvol1_path = self._get_subvolume_path(self.volname, subvol1) + + # check subvolumegroup's mode + subvol_par_path = os.path.dirname(subvol1_path) + group_path = os.path.dirname(subvol_par_path) + actual_mode1 = self.mount_a.run_shell(['stat', '-c' '%a', group_path]).stdout.getvalue().strip() + self.assertEqual(actual_mode1, default_mode) + # check /volumes mode + volumes_path = os.path.dirname(group_path) + actual_mode2 = self.mount_a.run_shell(['stat', '-c' '%a', volumes_path]).stdout.getvalue().strip() + self.assertEqual(actual_mode2, default_mode) + # check subvolume's mode + actual_mode3 = self.mount_a.run_shell(['stat', '-c' '%a', subvol1_path]).stdout.getvalue().strip() + self.assertEqual(actual_mode3, desired_mode) + + self._fs_cmd("subvolume", "rm", self.volname, subvol1) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_with_desired_mode_in_group(self): + subvol1, subvol2, subvol3 = self._generate_random_subvolume_name(3) + + group = self._generate_random_group_name() + # default mode + expected_mode1 = "755" + # desired mode + expected_mode2 = "777" + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvol1, "--group_name", group) + self._fs_cmd("subvolume", "create", self.volname, subvol2, "--group_name", group, "--mode", "777") + # check whether mode 0777 also works + self._fs_cmd("subvolume", "create", self.volname, subvol3, "--group_name", group, "--mode", "0777") + + subvol1_path = self._get_subvolume_path(self.volname, subvol1, group_name=group) + subvol2_path = self._get_subvolume_path(self.volname, subvol2, group_name=group) + subvol3_path = self._get_subvolume_path(self.volname, subvol3, group_name=group) + + # check subvolume's mode + actual_mode1 = self.mount_a.run_shell(['stat', '-c' '%a', subvol1_path]).stdout.getvalue().strip() + actual_mode2 = self.mount_a.run_shell(['stat', '-c' '%a', subvol2_path]).stdout.getvalue().strip() + actual_mode3 = self.mount_a.run_shell(['stat', '-c' '%a', subvol3_path]).stdout.getvalue().strip() + self.assertEqual(actual_mode1, expected_mode1) + self.assertEqual(actual_mode2, expected_mode2) + self.assertEqual(actual_mode3, expected_mode2) + + self._fs_cmd("subvolume", "rm", self.volname, subvol1, group) + self._fs_cmd("subvolume", "rm", self.volname, subvol2, group) + self._fs_cmd("subvolume", "rm", self.volname, subvol3, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_with_desired_uid_gid(self): + """ + That the subvolume can be created with the desired uid and gid and its uid and gid matches the + expected values. + """ + uid = 1000 + gid = 1000 + + # create subvolume + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--uid", str(uid), "--gid", str(gid)) + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # verify the uid and gid + suid = int(self.mount_a.run_shell(['stat', '-c' '%u', subvolpath]).stdout.getvalue().strip()) + sgid = int(self.mount_a.run_shell(['stat', '-c' '%g', subvolpath]).stdout.getvalue().strip()) + self.assertEqual(uid, suid) + self.assertEqual(gid, sgid) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_with_invalid_data_pool_layout(self): + subvolume = self._generate_random_subvolume_name() + data_pool = "invalid_pool" + # create subvolume with invalid data pool layout + try: + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on create of subvolume with invalid pool layout") + else: + self.fail("expected the 'fs subvolume create' command to fail") + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_with_invalid_size(self): + # create subvolume with an invalid size -1 + subvolume = self._generate_random_subvolume_name() + try: + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--size", "-1") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on create of subvolume with invalid size") + else: + self.fail("expected the 'fs subvolume create' command to fail") + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_and_ls_providing_group_as_nogroup(self): + """ + That a 'subvolume create' and 'subvolume ls' should throw + permission denied error if option --group=_nogroup is provided. + """ + + subvolname = self._generate_random_subvolume_name() + + # try to create subvolume providing --group_name=_nogroup option + try: + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", "_nogroup") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM) + else: + self.fail("expected the 'fs subvolume create' command to fail") + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolname) + + # try to list subvolumes providing --group_name=_nogroup option + try: + self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_nogroup") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM) + else: + self.fail("expected the 'fs subvolume ls' command to fail") + + # list subvolumes + self._fs_cmd("subvolume", "ls", self.volname) + + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_expand(self): + """ + That a subvolume can be expanded in size and its quota matches the expected size. + """ + + # create subvolume + subvolname = self._generate_random_subvolume_name() + osize = self.DEFAULT_FILE_SIZE*1024*1024 + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize)) + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # expand the subvolume + nsize = osize*2 + self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize)) + + # verify the quota + size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")) + self.assertEqual(size, nsize) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_info(self): + # tests the 'fs subvolume info' command + + subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime", + "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace", + "type", "uid", "features", "state"] + + # create subvolume + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # get subvolume metadata + subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume)) + for md in subvol_md: + self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md)) + + self.assertEqual(subvol_info["bytes_pcent"], "undefined", "bytes_pcent should be set to undefined if quota is not set") + self.assertEqual(subvol_info["bytes_quota"], "infinite", "bytes_quota should be set to infinite if quota is not set") + self.assertEqual(subvol_info["pool_namespace"], "", "expected pool namespace to be empty") + self.assertEqual(subvol_info["state"], "complete", "expected state to be complete") + + self.assertEqual(len(subvol_info["features"]), 3, + msg="expected 3 features, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"])) + for feature in ['snapshot-clone', 'snapshot-autoprotect', 'snapshot-retention']: + self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature)) + + nsize = self.DEFAULT_FILE_SIZE*1024*1024 + self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize)) + + # get subvolume metadata after quota set + subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume)) + for md in subvol_md: + self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md)) + + self.assertNotEqual(subvol_info["bytes_pcent"], "undefined", "bytes_pcent should not be set to undefined if quota is not set") + self.assertEqual(subvol_info["bytes_quota"], nsize, "bytes_quota should be set to '{0}'".format(nsize)) + self.assertEqual(subvol_info["type"], "subvolume", "type should be set to subvolume") + self.assertEqual(subvol_info["state"], "complete", "expected state to be complete") + + self.assertEqual(len(subvol_info["features"]), 3, + msg="expected 3 features, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"])) + for feature in ['snapshot-clone', 'snapshot-autoprotect', 'snapshot-retention']: + self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature)) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_ls(self): + # tests the 'fs subvolume ls' command + + subvolumes = [] + + # create subvolumes + subvolumes = self._generate_random_subvolume_name(3) + for subvolume in subvolumes: + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # list subvolumes + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + if len(subvolumels) == 0: + self.fail("Expected the 'fs subvolume ls' command to list the created subvolumes.") + else: + subvolnames = [subvolume['name'] for subvolume in subvolumels] + if collections.Counter(subvolnames) != collections.Counter(subvolumes): + self.fail("Error creating or listing subvolumes") + + # remove subvolume + for subvolume in subvolumes: + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_ls_with_groupname_as_internal_directory(self): + # tests the 'fs subvolume ls' command when the default groupname as internal directories + # Eg: '_nogroup', '_legacy', '_deleting', '_index'. + # Expecting 'fs subvolume ls' will be fail with errno EINVAL for '_legacy', '_deleting', '_index' + # Expecting 'fs subvolume ls' will be fail with errno EPERM for '_nogroup' + + # try to list subvolumes providing --group_name=_nogroup option + try: + self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_nogroup") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM) + else: + self.fail("expected the 'fs subvolume ls' command to fail with error 'EPERM' for _nogroup") + + # try to list subvolumes providing --group_name=_legacy option + try: + self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_legacy") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL) + else: + self.fail("expected the 'fs subvolume ls' command to fail with error 'EINVAL' for _legacy") + + # try to list subvolumes providing --group_name=_deleting option + try: + self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_deleting") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL) + else: + self.fail("expected the 'fs subvolume ls' command to fail with error 'EINVAL' for _deleting") + + # try to list subvolumes providing --group_name=_index option + try: + self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_index") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL) + else: + self.fail("expected the 'fs subvolume ls' command to fail with error 'EINVAL' for _index") + + def test_subvolume_ls_for_notexistent_default_group(self): + # tests the 'fs subvolume ls' command when the default group '_nogroup' doesn't exist + # prerequisite: we expect that the volume is created and the default group _nogroup is + # NOT created (i.e. a subvolume without group is not created) + + # list subvolumes + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + if len(subvolumels) > 0: + raise RuntimeError("Expected the 'fs subvolume ls' command to output an empty list.") + + def test_subvolume_marked(self): + """ + ensure a subvolume is marked with the ceph.dir.subvolume xattr + """ + subvolume = self._generate_random_subvolume_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # getpath + subvolpath = self._get_subvolume_path(self.volname, subvolume) + + # subdirectory of a subvolume cannot be moved outside the subvolume once marked with + # the xattr ceph.dir.subvolume, hence test by attempting to rename subvol path (incarnation) + # outside the subvolume + dstpath = os.path.join(self.mount_a.mountpoint, 'volumes', '_nogroup', 'new_subvol_location') + srcpath = os.path.join(self.mount_a.mountpoint, subvolpath) + rename_script = dedent(""" + import os + import errno + try: + os.rename("{src}", "{dst}") + except OSError as e: + if e.errno != errno.EXDEV: + raise RuntimeError("invalid error code on renaming subvolume incarnation out of subvolume directory") + else: + raise RuntimeError("expected renaming subvolume incarnation out of subvolume directory to fail") + """) + self.mount_a.run_python(rename_script.format(src=srcpath, dst=dstpath), sudo=True) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_pin_export(self): + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + self._fs_cmd("subvolume", "pin", self.volname, subvolume, "export", "1") + path = self._fs_cmd("subvolume", "getpath", self.volname, subvolume) + path = os.path.dirname(path) # get subvolume path + + self._get_subtrees(status=status, rank=1) + self._wait_subtrees([(path, 1)], status=status) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + ### authorize operations + + def test_authorize_deauthorize_legacy_subvolume(self): + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + authid = "alice" + + guest_mount = self.mount_b + guest_mount.umount_wait() + + # emulate a old-fashioned subvolume in a custom group + createpath = os.path.join(".", "volumes", group, subvolume) + self.mount_a.run_shell(['mkdir', '-p', createpath], sudo=True) + + # add required xattrs to subvolume + default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool") + self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True) + + mount_path = os.path.join("/", "volumes", group, subvolume) + + # authorize guest authID read-write access to subvolume + key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid, + "--group_name", group, "--tenant_id", "tenant_id") + + # guest authID should exist + existing_ids = [a['entity'] for a in self.auth_list()] + self.assertIn("client.{0}".format(authid), existing_ids) + + # configure credentials for guest client + self._configure_guest_auth(guest_mount, authid, key) + + # mount the subvolume, and write to it + guest_mount.mount_wait(cephfs_mntpt=mount_path) + guest_mount.write_n_mb("data.bin", 1) + + # authorize guest authID read access to subvolume + key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid, + "--group_name", group, "--tenant_id", "tenant_id", "--access_level", "r") + + # guest client sees the change in access level to read only after a + # remount of the subvolume. + guest_mount.umount_wait() + guest_mount.mount_wait(cephfs_mntpt=mount_path) + + # read existing content of the subvolume + self.assertListEqual(guest_mount.ls(guest_mount.mountpoint), ["data.bin"]) + # cannot write into read-only subvolume + with self.assertRaises(CommandFailedError): + guest_mount.write_n_mb("rogue.bin", 1) + + # cleanup + guest_mount.umount_wait() + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid, + "--group_name", group) + # guest authID should no longer exist + existing_ids = [a['entity'] for a in self.auth_list()] + self.assertNotIn("client.{0}".format(authid), existing_ids) + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_authorize_deauthorize_subvolume(self): + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + authid = "alice" + + guest_mount = self.mount_b + guest_mount.umount_wait() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group, "--mode=777") + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + mount_path = self._fs_cmd("subvolume", "getpath", self.volname, subvolume, + "--group_name", group).rstrip() + + # authorize guest authID read-write access to subvolume + key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid, + "--group_name", group, "--tenant_id", "tenant_id") + + # guest authID should exist + existing_ids = [a['entity'] for a in self.auth_list()] + self.assertIn("client.{0}".format(authid), existing_ids) + + # configure credentials for guest client + self._configure_guest_auth(guest_mount, authid, key) + + # mount the subvolume, and write to it + guest_mount.mount_wait(cephfs_mntpt=mount_path) + guest_mount.write_n_mb("data.bin", 1) + + # authorize guest authID read access to subvolume + key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid, + "--group_name", group, "--tenant_id", "tenant_id", "--access_level", "r") + + # guest client sees the change in access level to read only after a + # remount of the subvolume. + guest_mount.umount_wait() + guest_mount.mount_wait(cephfs_mntpt=mount_path) + + # read existing content of the subvolume + self.assertListEqual(guest_mount.ls(guest_mount.mountpoint), ["data.bin"]) + # cannot write into read-only subvolume + with self.assertRaises(CommandFailedError): + guest_mount.write_n_mb("rogue.bin", 1) + + # cleanup + guest_mount.umount_wait() + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid, + "--group_name", group) + # guest authID should no longer exist + existing_ids = [a['entity'] for a in self.auth_list()] + self.assertNotIn("client.{0}".format(authid), existing_ids) + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_multitenant_subvolumes(self): + """ + That subvolume access can be restricted to a tenant. + + That metadata used to enforce tenant isolation of + subvolumes is stored as a two-way mapping between auth + IDs and subvolumes that they're authorized to access. + """ + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + guest_mount = self.mount_b + + # Guest clients belonging to different tenants, but using the same + # auth ID. + auth_id = "alice" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + guestclient_2 = { + "auth_id": auth_id, + "tenant_id": "tenant2", + } + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # Check that subvolume metadata file is created on subvolume creation. + subvol_metadata_filename = "_{0}:{1}.meta".format(group, subvolume) + self.assertIn(subvol_metadata_filename, guest_mount.ls("volumes")) + + # Authorize 'guestclient_1', using auth ID 'alice' and belonging to + # 'tenant1', with 'rw' access to the volume. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + # Check that auth metadata file for auth ID 'alice', is + # created on authorizing 'alice' access to the subvolume. + auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"]) + self.assertIn(auth_metadata_filename, guest_mount.ls("volumes")) + + # Verify that the auth metadata file stores the tenant ID that the + # auth ID belongs to, the auth ID's authorized access levels + # for different subvolumes, versioning details, etc. + expected_auth_metadata = { + "version": 5, + "compat_version": 6, + "dirty": False, + "tenant_id": "tenant1", + "subvolumes": { + "{0}/{1}".format(group,subvolume): { + "dirty": False, + "access_level": "rw" + } + } + } + + auth_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(auth_metadata_filename))) + self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"]) + del expected_auth_metadata["version"] + del auth_metadata["version"] + self.assertEqual(expected_auth_metadata, auth_metadata) + + # Verify that the subvolume metadata file stores info about auth IDs + # and their access levels to the subvolume, versioning details, etc. + expected_subvol_metadata = { + "version": 1, + "compat_version": 1, + "auths": { + "alice": { + "dirty": False, + "access_level": "rw" + } + } + } + subvol_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(subvol_metadata_filename))) + + self.assertGreaterEqual(subvol_metadata["version"], expected_subvol_metadata["version"]) + del expected_subvol_metadata["version"] + del subvol_metadata["version"] + self.assertEqual(expected_subvol_metadata, subvol_metadata) + + # Cannot authorize 'guestclient_2' to access the volume. + # It uses auth ID 'alice', which has already been used by a + # 'guestclient_1' belonging to an another tenant for accessing + # the volume. + + try: + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_2["auth_id"], + "--group_name", group, "--tenant_id", guestclient_2["tenant_id"]) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM, + "Invalid error code returned on authorize of subvolume with same auth_id but different tenant_id") + else: + self.fail("expected the 'fs subvolume authorize' command to fail") + + # Check that auth metadata file is cleaned up on removing + # auth ID's only access to a volume. + + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id, + "--group_name", group) + self.assertNotIn(auth_metadata_filename, guest_mount.ls("volumes")) + + # Check that subvolume metadata file is cleaned up on subvolume deletion. + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + self.assertNotIn(subvol_metadata_filename, guest_mount.ls("volumes")) + + # clean up + guest_mount.umount_wait() + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_authorized_list(self): + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + authid1 = "alice" + authid2 = "guest1" + authid3 = "guest2" + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # authorize alice authID read-write access to subvolume + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid1, + "--group_name", group) + # authorize guest1 authID read-write access to subvolume + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid2, + "--group_name", group) + # authorize guest2 authID read access to subvolume + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid3, + "--group_name", group, "--access_level", "r") + + # list authorized-ids of the subvolume + expected_auth_list = [{'alice': 'rw'}, {'guest1': 'rw'}, {'guest2': 'r'}] + auth_list = json.loads(self._fs_cmd('subvolume', 'authorized_list', self.volname, subvolume, "--group_name", group)) + self.assertCountEqual(expected_auth_list, auth_list) + + # cleanup + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid1, + "--group_name", group) + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid2, + "--group_name", group) + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid3, + "--group_name", group) + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_authorize_auth_id_not_created_by_mgr_volumes(self): + """ + If the auth_id already exists and is not created by mgr plugin, + it's not allowed to authorize the auth-id by default. + """ + + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # Create auth_id + self.fs.mon_manager.raw_cluster_cmd( + "auth", "get-or-create", "client.guest1", + "mds", "allow *", + "osd", "allow rw", + "mon", "allow *" + ) + + auth_id = "guest1" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + try: + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM, + "Invalid error code returned on authorize of subvolume for auth_id created out of band") + else: + self.fail("expected the 'fs subvolume authorize' command to fail") + + # clean up + self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_authorize_allow_existing_id_option(self): + """ + If the auth_id already exists and is not created by mgr volumes, + it's not allowed to authorize the auth-id by default but is + allowed with option allow_existing_id. + """ + + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # Create auth_id + self.fs.mon_manager.raw_cluster_cmd( + "auth", "get-or-create", "client.guest1", + "mds", "allow *", + "osd", "allow rw", + "mon", "allow *" + ) + + auth_id = "guest1" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # Cannot authorize 'guestclient_1' to access the volume by default, + # which already exists and not created by mgr volumes but is allowed + # with option 'allow_existing_id'. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"], "--allow-existing-id") + + # clean up + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id, + "--group_name", group) + self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_deauthorize_auth_id_after_out_of_band_update(self): + """ + If the auth_id authorized by mgr/volumes plugin is updated + out of band, the auth_id should not be deleted after a + deauthorize. It should only remove caps associated with it. + """ + + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + auth_id = "guest1" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # Authorize 'guestclient_1' to access the subvolume. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + subvol_path = self._fs_cmd("subvolume", "getpath", self.volname, subvolume, + "--group_name", group).rstrip() + + # Update caps for guestclient_1 out of band + out = self.fs.mon_manager.raw_cluster_cmd( + "auth", "caps", "client.guest1", + "mds", "allow rw path=/volumes/{0}, allow rw path={1}".format(group, subvol_path), + "osd", "allow rw pool=cephfs_data", + "mon", "allow r", + "mgr", "allow *" + ) + + # Deauthorize guestclient_1 + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id, "--group_name", group) + + # Validate the caps of guestclient_1 after deauthorize. It should not have deleted + # guestclient_1. The mgr and mds caps should be present which was updated out of band. + out = json.loads(self.fs.mon_manager.raw_cluster_cmd("auth", "get", "client.guest1", "--format=json-pretty")) + + self.assertEqual("client.guest1", out[0]["entity"]) + self.assertEqual("allow rw path=/volumes/{0}".format(group), out[0]["caps"]["mds"]) + self.assertEqual("allow *", out[0]["caps"]["mgr"]) + self.assertNotIn("osd", out[0]["caps"]) + + # clean up + out = self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_recover_auth_metadata_during_authorize(self): + """ + That auth metadata manager can recover from partial auth updates using + metadata files, which store auth info and its update status info. This + test validates the recovery during authorize. + """ + + guest_mount = self.mount_b + + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + auth_id = "guest1" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # Authorize 'guestclient_1' to access the subvolume. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + # Check that auth metadata file for auth ID 'guest1', is + # created on authorizing 'guest1' access to the subvolume. + auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"]) + self.assertIn(auth_metadata_filename, guest_mount.ls("volumes")) + expected_auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename))) + + # Induce partial auth update state by modifying the auth metadata file, + # and then run authorize again. + guest_mount.run_shell(['sed', '-i', 's/false/true/g', 'volumes/{0}'.format(auth_metadata_filename)], sudo=True) + + # Authorize 'guestclient_1' to access the subvolume. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename))) + self.assertEqual(auth_metadata_content, expected_auth_metadata_content) + + # clean up + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id, "--group_name", group) + guest_mount.umount_wait() + self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_recover_auth_metadata_during_deauthorize(self): + """ + That auth metadata manager can recover from partial auth updates using + metadata files, which store auth info and its update status info. This + test validates the recovery during deauthorize. + """ + + guest_mount = self.mount_b + + subvolume1, subvolume2 = self._generate_random_subvolume_name(2) + group = self._generate_random_group_name() + + guestclient_1 = { + "auth_id": "guest1", + "tenant_id": "tenant1", + } + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolumes in group + self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group) + self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--group_name", group) + + # Authorize 'guestclient_1' to access the subvolume1. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume1, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + # Check that auth metadata file for auth ID 'guest1', is + # created on authorizing 'guest1' access to the subvolume1. + auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"]) + self.assertIn(auth_metadata_filename, guest_mount.ls("volumes")) + expected_auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename))) + + # Authorize 'guestclient_1' to access the subvolume2. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume2, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + # Induce partial auth update state by modifying the auth metadata file, + # and then run de-authorize. + guest_mount.run_shell(['sed', '-i', 's/false/true/g', 'volumes/{0}'.format(auth_metadata_filename)], sudo=True) + + # Deauthorize 'guestclient_1' to access the subvolume2. + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume2, guestclient_1["auth_id"], + "--group_name", group) + + auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename))) + self.assertEqual(auth_metadata_content, expected_auth_metadata_content) + + # clean up + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, "guest1", "--group_name", group) + guest_mount.umount_wait() + self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group) + self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_update_old_style_auth_metadata_to_new_during_authorize(self): + """ + CephVolumeClient stores the subvolume data in auth metadata file with + 'volumes' key as there was no subvolume namespace. It doesn't makes sense + with mgr/volumes. This test validates the transparent update of 'volumes' + key to 'subvolumes' key in auth metadata file during authorize. + """ + + guest_mount = self.mount_b + + subvolume1, subvolume2 = self._generate_random_subvolume_name(2) + group = self._generate_random_group_name() + + auth_id = "guest1" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolumes in group + self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group) + self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--group_name", group) + + # Authorize 'guestclient_1' to access the subvolume1. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume1, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + # Check that auth metadata file for auth ID 'guest1', is + # created on authorizing 'guest1' access to the subvolume1. + auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"]) + self.assertIn(auth_metadata_filename, guest_mount.ls("volumes")) + + # Replace 'subvolumes' to 'volumes', old style auth-metadata file + guest_mount.run_shell(['sed', '-i', 's/subvolumes/volumes/g', 'volumes/{0}'.format(auth_metadata_filename)], sudo=True) + + # Authorize 'guestclient_1' to access the subvolume2. This should transparently update 'volumes' to 'subvolumes' + self._fs_cmd("subvolume", "authorize", self.volname, subvolume2, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + expected_auth_metadata = { + "version": 5, + "compat_version": 6, + "dirty": False, + "tenant_id": "tenant1", + "subvolumes": { + "{0}/{1}".format(group,subvolume1): { + "dirty": False, + "access_level": "rw" + }, + "{0}/{1}".format(group,subvolume2): { + "dirty": False, + "access_level": "rw" + } + } + } + + auth_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(auth_metadata_filename))) + + self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"]) + del expected_auth_metadata["version"] + del auth_metadata["version"] + self.assertEqual(expected_auth_metadata, auth_metadata) + + # clean up + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, auth_id, "--group_name", group) + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume2, auth_id, "--group_name", group) + guest_mount.umount_wait() + self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group) + self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_update_old_style_auth_metadata_to_new_during_deauthorize(self): + """ + CephVolumeClient stores the subvolume data in auth metadata file with + 'volumes' key as there was no subvolume namespace. It doesn't makes sense + with mgr/volumes. This test validates the transparent update of 'volumes' + key to 'subvolumes' key in auth metadata file during deauthorize. + """ + + guest_mount = self.mount_b + + subvolume1, subvolume2 = self._generate_random_subvolume_name(2) + group = self._generate_random_group_name() + + auth_id = "guest1" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolumes in group + self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group) + self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--group_name", group) + + # Authorize 'guestclient_1' to access the subvolume1. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume1, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + # Authorize 'guestclient_1' to access the subvolume2. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume2, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + # Check that auth metadata file for auth ID 'guest1', is created. + auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"]) + self.assertIn(auth_metadata_filename, guest_mount.ls("volumes")) + + # Replace 'subvolumes' to 'volumes', old style auth-metadata file + guest_mount.run_shell(['sed', '-i', 's/subvolumes/volumes/g', 'volumes/{0}'.format(auth_metadata_filename)], sudo=True) + + # Deauthorize 'guestclient_1' to access the subvolume2. This should update 'volumes' to subvolumes' + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume2, auth_id, "--group_name", group) + + expected_auth_metadata = { + "version": 5, + "compat_version": 6, + "dirty": False, + "tenant_id": "tenant1", + "subvolumes": { + "{0}/{1}".format(group,subvolume1): { + "dirty": False, + "access_level": "rw" + } + } + } + + auth_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(auth_metadata_filename))) + + self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"]) + del expected_auth_metadata["version"] + del auth_metadata["version"] + self.assertEqual(expected_auth_metadata, auth_metadata) + + # clean up + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, auth_id, "--group_name", group) + guest_mount.umount_wait() + self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group) + self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_evict_client(self): + """ + That a subvolume client can be evicted based on the auth ID + """ + + subvolumes = self._generate_random_subvolume_name(2) + group = self._generate_random_group_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # mounts[0] and mounts[1] would be used as guests to mount the volumes/shares. + for i in range(0, 2): + self.mounts[i].umount_wait() + guest_mounts = (self.mounts[0], self.mounts[1]) + auth_id = "guest" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # Create two subvolumes. Authorize 'guest' auth ID to mount the two + # subvolumes. Mount the two subvolumes. Write data to the volumes. + for i in range(2): + # Create subvolume. + self._fs_cmd("subvolume", "create", self.volname, subvolumes[i], "--group_name", group, "--mode=777") + + # authorize guest authID read-write access to subvolume + key = self._fs_cmd("subvolume", "authorize", self.volname, subvolumes[i], guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + mount_path = self._fs_cmd("subvolume", "getpath", self.volname, subvolumes[i], + "--group_name", group).rstrip() + # configure credentials for guest client + self._configure_guest_auth(guest_mounts[i], auth_id, key) + + # mount the subvolume, and write to it + guest_mounts[i].mount_wait(cephfs_mntpt=mount_path) + guest_mounts[i].write_n_mb("data.bin", 1) + + # Evict client, guest_mounts[0], using auth ID 'guest' and has mounted + # one volume. + self._fs_cmd("subvolume", "evict", self.volname, subvolumes[0], auth_id, "--group_name", group) + + # Evicted guest client, guest_mounts[0], should not be able to do + # anymore metadata ops. It should start failing all operations + # when it sees that its own address is in the blocklist. + try: + guest_mounts[0].write_n_mb("rogue.bin", 1) + except CommandFailedError: + pass + else: + raise RuntimeError("post-eviction write should have failed!") + + # The blocklisted guest client should now be unmountable + guest_mounts[0].umount_wait() + + # Guest client, guest_mounts[1], using the same auth ID 'guest', but + # has mounted the other volume, should be able to use its volume + # unaffected. + guest_mounts[1].write_n_mb("data.bin.1", 1) + + # Cleanup. + guest_mounts[1].umount_wait() + for i in range(2): + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolumes[i], auth_id, "--group_name", group) + self._fs_cmd("subvolume", "rm", self.volname, subvolumes[i], "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_pin_random(self): + self.fs.set_max_mds(2) + self.fs.wait_for_daemons() + self.config_set('mds', 'mds_export_ephemeral_random', True) + + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + self._fs_cmd("subvolume", "pin", self.volname, subvolume, "random", ".01") + # no verification + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_resize_fail_invalid_size(self): + """ + That a subvolume cannot be resized to an invalid size and the quota did not change + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024 + # create subvolume + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize)) + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # try to resize the subvolume with an invalid size -10 + nsize = -10 + try: + self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize)) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolume with invalid size") + else: + self.fail("expected the 'fs subvolume resize' command to fail") + + # verify the quota did not change + size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")) + self.assertEqual(size, osize) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_resize_fail_zero_size(self): + """ + That a subvolume cannot be resized to a zero size and the quota did not change + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024 + # create subvolume + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize)) + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # try to resize the subvolume with size 0 + nsize = 0 + try: + self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize)) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolume with invalid size") + else: + self.fail("expected the 'fs subvolume resize' command to fail") + + # verify the quota did not change + size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")) + self.assertEqual(size, osize) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_resize_quota_lt_used_size(self): + """ + That a subvolume can be resized to a size smaller than the current used size + and the resulting quota matches the expected size. + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024*20 + # create subvolume + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize), "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # create one file of 10MB + file_size=self.DEFAULT_FILE_SIZE*10 + number_of_files=1 + log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname, + number_of_files, + file_size)) + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+1) + self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size) + + usedsize = int(self.mount_a.getfattr(subvolpath, "ceph.dir.rbytes")) + susedsize = int(self.mount_a.run_shell(['stat', '-c' '%s', subvolpath]).stdout.getvalue().strip()) + if isinstance(self.mount_a, FuseMount): + # kclient dir does not have size==rbytes + self.assertEqual(usedsize, susedsize) + + # shrink the subvolume + nsize = usedsize // 2 + try: + self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize)) + except CommandFailedError: + self.fail("expected the 'fs subvolume resize' command to succeed") + + # verify the quota + size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")) + self.assertEqual(size, nsize) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_resize_fail_quota_lt_used_size_no_shrink(self): + """ + That a subvolume cannot be resized to a size smaller than the current used size + when --no_shrink is given and the quota did not change. + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024*20 + # create subvolume + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize), "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # create one file of 10MB + file_size=self.DEFAULT_FILE_SIZE*10 + number_of_files=1 + log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname, + number_of_files, + file_size)) + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+2) + self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size) + + usedsize = int(self.mount_a.getfattr(subvolpath, "ceph.dir.rbytes")) + susedsize = int(self.mount_a.run_shell(['stat', '-c' '%s', subvolpath]).stdout.getvalue().strip()) + if isinstance(self.mount_a, FuseMount): + # kclient dir does not have size==rbytes + self.assertEqual(usedsize, susedsize) + + # shrink the subvolume + nsize = usedsize // 2 + try: + self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize), "--no_shrink") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolume with invalid size") + else: + self.fail("expected the 'fs subvolume resize' command to fail") + + # verify the quota did not change + size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")) + self.assertEqual(size, osize) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_resize_expand_on_full_subvolume(self): + """ + That the subvolume can be expanded from a full subvolume and future writes succeed. + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024*10 + # create subvolume of quota 10MB and make sure it exists + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize), "--mode=777") + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # create one file of size 10MB and write + file_size=self.DEFAULT_FILE_SIZE*10 + number_of_files=1 + log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname, + number_of_files, + file_size)) + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+3) + self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size) + + # create a file of size 5MB and try write more + file_size=file_size // 2 + number_of_files=1 + log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname, + number_of_files, + file_size)) + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+4) + try: + self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size) + except CommandFailedError: + # Not able to write. So expand the subvolume more and try writing the 5MB file again + nsize = osize*2 + self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize)) + try: + self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size) + except CommandFailedError: + self.fail("expected filling subvolume {0} with {1} file of size {2}MB" + "to succeed".format(subvolname, number_of_files, file_size)) + else: + self.fail("expected filling subvolume {0} with {1} file of size {2}MB" + "to fail".format(subvolname, number_of_files, file_size)) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_resize_infinite_size(self): + """ + That a subvolume can be resized to an infinite size by unsetting its quota. + """ + + # create subvolume + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", + str(self.DEFAULT_FILE_SIZE*1024*1024)) + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # resize inf + self._fs_cmd("subvolume", "resize", self.volname, subvolname, "inf") + + # verify that the quota is None + size = self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes") + self.assertEqual(size, None) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_resize_infinite_size_future_writes(self): + """ + That a subvolume can be resized to an infinite size and the future writes succeed. + """ + + # create subvolume + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", + str(self.DEFAULT_FILE_SIZE*1024*1024*5), "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # resize inf + self._fs_cmd("subvolume", "resize", self.volname, subvolname, "inf") + + # verify that the quota is None + size = self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes") + self.assertEqual(size, None) + + # create one file of 10MB and try to write + file_size=self.DEFAULT_FILE_SIZE*10 + number_of_files=1 + log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname, + number_of_files, + file_size)) + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+5) + + try: + self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size) + except CommandFailedError: + self.fail("expected filling subvolume {0} with {1} file of size {2}MB " + "to succeed".format(subvolname, number_of_files, file_size)) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_rm_force(self): + # test removing non-existing subvolume with --force + subvolume = self._generate_random_subvolume_name() + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--force") + except CommandFailedError: + self.fail("expected the 'fs subvolume rm --force' command to succeed") + + def test_subvolume_exists_with_subvolumegroup_and_subvolume(self): + """Test the presence of any subvolume by specifying the name of subvolumegroup""" + + group = self._generate_random_group_name() + subvolume1 = self._generate_random_subvolume_name() + # create subvolumegroup + self._fs_cmd("subvolumegroup", "create", self.volname, group) + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group) + ret = self._fs_cmd("subvolume", "exist", self.volname, "--group_name", group) + self.assertEqual(ret.strip('\n'), "subvolume exists") + # delete subvolume in group + self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group) + ret = self._fs_cmd("subvolume", "exist", self.volname, "--group_name", group) + self.assertEqual(ret.strip('\n'), "no subvolume exists") + # delete subvolumegroup + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_exists_with_subvolumegroup_and_no_subvolume(self): + """Test the presence of any subvolume specifying the name + of subvolumegroup and no subvolumes""" + + group = self._generate_random_group_name() + # create subvolumegroup + self._fs_cmd("subvolumegroup", "create", self.volname, group) + ret = self._fs_cmd("subvolume", "exist", self.volname, "--group_name", group) + self.assertEqual(ret.strip('\n'), "no subvolume exists") + # delete subvolumegroup + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_exists_without_subvolumegroup_and_with_subvolume(self): + """Test the presence of any subvolume without specifying the name + of subvolumegroup""" + + subvolume1 = self._generate_random_subvolume_name() + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume1) + ret = self._fs_cmd("subvolume", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "subvolume exists") + # delete subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume1) + ret = self._fs_cmd("subvolume", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "no subvolume exists") + + def test_subvolume_exists_without_subvolumegroup_and_without_subvolume(self): + """Test the presence of any subvolume without any subvolumegroup + and without any subvolume""" + + ret = self._fs_cmd("subvolume", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "no subvolume exists") + + def test_subvolume_shrink(self): + """ + That a subvolume can be shrinked in size and its quota matches the expected size. + """ + + # create subvolume + subvolname = self._generate_random_subvolume_name() + osize = self.DEFAULT_FILE_SIZE*1024*1024 + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize)) + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # shrink the subvolume + nsize = osize // 2 + self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize)) + + # verify the quota + size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")) + self.assertEqual(size, nsize) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_rm_idempotency(self): + """ + ensure subvolume deletion of a subvolume which is already deleted with retain snapshots option passes. + After subvolume deletion with retain snapshots, the subvolume exists until the trash directory (resides inside subvolume) + is cleaned up. The subvolume deletion issued while the trash directory is not empty, should pass and should + not error out with EAGAIN. + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=256) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # remove snapshots (removes retained volume) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume (check idempotency) + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + self.fail(f"expected subvolume rm to pass with error: {os.strerror(ce.exitstatus)}") + + # verify trash dir is clean + self._wait_for_trash_empty() + + + def test_subvolume_user_metadata_set(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + key = "key" + value = "value" + try: + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata set' command to succeed") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_set_idempotence(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + key = "key" + value = "value" + try: + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata set' command to succeed") + + # set same metadata again for subvolume. + try: + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata set' command to succeed because it is idempotent operation") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_get(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + key = "key" + value = "value" + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + + # get value for specified key. + try: + ret = self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata get' command to succeed") + + # remove '\n' from returned value. + ret = ret.strip('\n') + + # match received value with expected value. + self.assertEqual(value, ret) + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_get_for_nonexisting_key(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + key = "key" + value = "value" + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + + # try to get value for nonexisting key + # Expecting ENOENT exit status because key does not exist + try: + self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, "key_nonexist", "--group_name", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because 'key_nonexist' does not exist") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_get_for_nonexisting_section(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # try to get value for nonexisting key (as section does not exist) + # Expecting ENOENT exit status because key does not exist + try: + self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, "key", "--group_name", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because section does not exist") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_update(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + key = "key" + value = "value" + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + + # update metadata against key. + new_value = "new_value" + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, new_value, "--group_name", group) + + # get metadata for specified key of subvolume. + try: + ret = self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata get' command to succeed") + + # remove '\n' from returned value. + ret = ret.strip('\n') + + # match received value with expected value. + self.assertEqual(new_value, ret) + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_list(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + input_metadata_dict = {f'key_{i}' : f'value_{i}' for i in range(3)} + + for k, v in input_metadata_dict.items(): + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, k, v, "--group_name", group) + + # list metadata + try: + ret = self._fs_cmd("subvolume", "metadata", "ls", self.volname, subvolname, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata ls' command to succeed") + + ret_dict = json.loads(ret) + + # compare output with expected output + self.assertDictEqual(input_metadata_dict, ret_dict) + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_list_if_no_metadata_set(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # list metadata + try: + ret = self._fs_cmd("subvolume", "metadata", "ls", self.volname, subvolname, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata ls' command to succeed") + + # remove '\n' from returned value. + ret = ret.strip('\n') + + # compare output with expected output + # expecting empty json/dictionary + self.assertEqual(ret, "{}") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_remove(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + key = "key" + value = "value" + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + + # remove metadata against specified key. + try: + self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, key, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata rm' command to succeed") + + # confirm key is removed by again fetching metadata + try: + self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because key does not exist") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_remove_for_nonexisting_key(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + key = "key" + value = "value" + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + + # try to remove value for nonexisting key + # Expecting ENOENT exit status because key does not exist + try: + self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, "key_nonexist", "--group_name", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because 'key_nonexist' does not exist") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_remove_for_nonexisting_section(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # try to remove value for nonexisting key (as section does not exist) + # Expecting ENOENT exit status because key does not exist + try: + self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, "key", "--group_name", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because section does not exist") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_remove_force(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + key = "key" + value = "value" + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + + # remove metadata against specified key with --force option. + try: + self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, key, "--group_name", group, "--force") + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata rm' command to succeed") + + # confirm key is removed by again fetching metadata + try: + self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because key does not exist") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_remove_force_for_nonexisting_key(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + key = "key" + value = "value" + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + + # remove metadata against specified key. + try: + self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, key, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata rm' command to succeed") + + # confirm key is removed by again fetching metadata + try: + self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because key does not exist") + + # again remove metadata against already removed key with --force option. + try: + self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, key, "--group_name", group, "--force") + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata rm' (with --force) command to succeed") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_set_and_get_for_legacy_subvolume(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # emulate a old-fashioned subvolume in a custom group + createpath = os.path.join(".", "volumes", group, subvolname) + self.mount_a.run_shell(['mkdir', '-p', createpath], sudo=True) + + # set metadata for subvolume. + key = "key" + value = "value" + try: + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata set' command to succeed") + + # get value for specified key. + try: + ret = self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata get' command to succeed") + + # remove '\n' from returned value. + ret = ret.strip('\n') + + # match received value with expected value. + self.assertEqual(value, ret) + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_list_and_remove_for_legacy_subvolume(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # emulate a old-fashioned subvolume in a custom group + createpath = os.path.join(".", "volumes", group, subvolname) + self.mount_a.run_shell(['mkdir', '-p', createpath], sudo=True) + + # set metadata for subvolume. + input_metadata_dict = {f'key_{i}' : f'value_{i}' for i in range(3)} + + for k, v in input_metadata_dict.items(): + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, k, v, "--group_name", group) + + # list metadata + try: + ret = self._fs_cmd("subvolume", "metadata", "ls", self.volname, subvolname, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata ls' command to succeed") + + ret_dict = json.loads(ret) + + # compare output with expected output + self.assertDictEqual(input_metadata_dict, ret_dict) + + # remove metadata against specified key. + try: + self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, "key_1", "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata rm' command to succeed") + + # confirm key is removed by again fetching metadata + try: + self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, "key_1", "--group_name", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because key_1 does not exist") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + +class TestSubvolumeGroupSnapshots(TestVolumesHelper): + """Tests for FS subvolume group snapshot operations.""" + @unittest.skip("skipping subvolumegroup snapshot tests") + def test_nonexistent_subvolume_group_snapshot_rm(self): + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # snapshot group + self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot) + + # remove snapshot + self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot) + + # remove snapshot + try: + self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise + else: + raise RuntimeError("expected the 'fs subvolumegroup snapshot rm' command to fail") + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + @unittest.skip("skipping subvolumegroup snapshot tests") + def test_subvolume_group_snapshot_create_and_rm(self): + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # snapshot group + self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot) + + # remove snapshot + self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + @unittest.skip("skipping subvolumegroup snapshot tests") + def test_subvolume_group_snapshot_idempotence(self): + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # snapshot group + self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot) + + # try creating snapshot w/ same snapshot name -- shoule be idempotent + self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot) + + # remove snapshot + self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + @unittest.skip("skipping subvolumegroup snapshot tests") + def test_subvolume_group_snapshot_ls(self): + # tests the 'fs subvolumegroup snapshot ls' command + + snapshots = [] + + # create group + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolumegroup snapshots + snapshots = self._generate_random_snapshot_name(3) + for snapshot in snapshots: + self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot) + + subvolgrpsnapshotls = json.loads(self._fs_cmd('subvolumegroup', 'snapshot', 'ls', self.volname, group)) + if len(subvolgrpsnapshotls) == 0: + raise RuntimeError("Expected the 'fs subvolumegroup snapshot ls' command to list the created subvolume group snapshots") + else: + snapshotnames = [snapshot['name'] for snapshot in subvolgrpsnapshotls] + if collections.Counter(snapshotnames) != collections.Counter(snapshots): + raise RuntimeError("Error creating or listing subvolume group snapshots") + + @unittest.skip("skipping subvolumegroup snapshot tests") + def test_subvolume_group_snapshot_rm_force(self): + # test removing non-existing subvolume group snapshot with --force + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + # remove snapshot + try: + self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot, "--force") + except CommandFailedError: + raise RuntimeError("expected the 'fs subvolumegroup snapshot rm --force' command to succeed") + + def test_subvolume_group_snapshot_unsupported_status(self): + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # snapshot group + try: + self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOSYS, "invalid error code on subvolumegroup snapshot create") + else: + self.fail("expected subvolumegroup snapshot create command to fail") + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + +class TestSubvolumeSnapshots(TestVolumesHelper): + """Tests for FS subvolume snapshot operations.""" + def test_nonexistent_subvolume_snapshot_rm(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove snapshot again + try: + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise + else: + raise RuntimeError("expected the 'fs subvolume snapshot rm' command to fail") + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_create_and_rm(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_create_idempotence(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # try creating w/ same subvolume snapshot name -- should be idempotent + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_info(self): + + """ + tests the 'fs subvolume snapshot info' command + """ + + snap_md = ["created_at", "data_pool", "has_pending_clones"] + + subvolume = self._generate_random_subvolume_name() + snapshot, snap_missing = self._generate_random_snapshot_name(2) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=1) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot)) + for md in snap_md: + self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md)) + self.assertEqual(snap_info["has_pending_clones"], "no") + + # snapshot info for non-existent snapshot + try: + self._get_subvolume_snapshot_info(self.volname, subvolume, snap_missing) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on snapshot info of non-existent snapshot") + else: + self.fail("expected snapshot info of non-existent snapshot to fail") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_in_group(self): + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # snapshot subvolume in group + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_snapshot_ls(self): + # tests the 'fs subvolume snapshot ls' command + + snapshots = [] + + # create subvolume + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # create subvolume snapshots + snapshots = self._generate_random_snapshot_name(3) + for snapshot in snapshots: + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + subvolsnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume)) + if len(subvolsnapshotls) == 0: + self.fail("Expected the 'fs subvolume snapshot ls' command to list the created subvolume snapshots") + else: + snapshotnames = [snapshot['name'] for snapshot in subvolsnapshotls] + if collections.Counter(snapshotnames) != collections.Counter(snapshots): + self.fail("Error creating or listing subvolume snapshots") + + # remove snapshot + for snapshot in snapshots: + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_inherited_snapshot_ls(self): + # tests the scenario where 'fs subvolume snapshot ls' command + # should not list inherited snapshots created as part of snapshot + # at ancestral level + + snapshots = [] + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snap_count = 3 + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # create subvolume snapshots + snapshots = self._generate_random_snapshot_name(snap_count) + for snapshot in snapshots: + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group) + + # Create snapshot at ancestral level + ancestral_snappath1 = os.path.join(".", "volumes", group, ".snap", "ancestral_snap_1") + ancestral_snappath2 = os.path.join(".", "volumes", group, ".snap", "ancestral_snap_2") + self.mount_a.run_shell(['mkdir', '-p', ancestral_snappath1, ancestral_snappath2], sudo=True) + + subvolsnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume, group)) + self.assertEqual(len(subvolsnapshotls), snap_count) + + # remove ancestral snapshots + self.mount_a.run_shell(['rmdir', ancestral_snappath1, ancestral_snappath2], sudo=True) + + # remove snapshot + for snapshot in snapshots: + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_inherited_snapshot_info(self): + """ + tests the scenario where 'fs subvolume snapshot info' command + should fail for inherited snapshots created as part of snapshot + at ancestral level + """ + + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # Create snapshot at ancestral level + ancestral_snap_name = "ancestral_snap_1" + ancestral_snappath1 = os.path.join(".", "volumes", group, ".snap", ancestral_snap_name) + self.mount_a.run_shell(['mkdir', '-p', ancestral_snappath1], sudo=True) + + # Validate existence of inherited snapshot + group_path = os.path.join(".", "volumes", group) + inode_number_group_dir = int(self.mount_a.run_shell(['stat', '-c' '%i', group_path]).stdout.getvalue().strip()) + inherited_snap = "_{0}_{1}".format(ancestral_snap_name, inode_number_group_dir) + inherited_snappath = os.path.join(".", "volumes", group, subvolume,".snap", inherited_snap) + self.mount_a.run_shell(['ls', inherited_snappath]) + + # snapshot info on inherited snapshot + try: + self._get_subvolume_snapshot_info(self.volname, subvolume, inherited_snap, group) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on snapshot info of inherited snapshot") + else: + self.fail("expected snapshot info of inherited snapshot to fail") + + # remove ancestral snapshots + self.mount_a.run_shell(['rmdir', ancestral_snappath1], sudo=True) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_inherited_snapshot_rm(self): + """ + tests the scenario where 'fs subvolume snapshot rm' command + should fail for inherited snapshots created as part of snapshot + at ancestral level + """ + + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # Create snapshot at ancestral level + ancestral_snap_name = "ancestral_snap_1" + ancestral_snappath1 = os.path.join(".", "volumes", group, ".snap", ancestral_snap_name) + self.mount_a.run_shell(['mkdir', '-p', ancestral_snappath1], sudo=True) + + # Validate existence of inherited snap + group_path = os.path.join(".", "volumes", group) + inode_number_group_dir = int(self.mount_a.run_shell(['stat', '-c' '%i', group_path]).stdout.getvalue().strip()) + inherited_snap = "_{0}_{1}".format(ancestral_snap_name, inode_number_group_dir) + inherited_snappath = os.path.join(".", "volumes", group, subvolume,".snap", inherited_snap) + self.mount_a.run_shell(['ls', inherited_snappath]) + + # inherited snapshot should not be deletable + try: + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, inherited_snap, "--group_name", group) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, msg="invalid error code when removing inherited snapshot") + else: + self.fail("expected removing inheirted snapshot to fail") + + # remove ancestral snapshots + self.mount_a.run_shell(['rmdir', ancestral_snappath1], sudo=True) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_subvolumegroup_snapshot_name_conflict(self): + """ + tests the scenario where creation of subvolume snapshot name + with same name as it's subvolumegroup snapshot name. This should + fail. + """ + + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + group_snapshot = self._generate_random_snapshot_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # Create subvolumegroup snapshot + group_snapshot_path = os.path.join(".", "volumes", group, ".snap", group_snapshot) + self.mount_a.run_shell(['mkdir', '-p', group_snapshot_path], sudo=True) + + # Validate existence of subvolumegroup snapshot + self.mount_a.run_shell(['ls', group_snapshot_path]) + + # Creation of subvolume snapshot with it's subvolumegroup snapshot name should fail + try: + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, group_snapshot, "--group_name", group) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, msg="invalid error code when creating subvolume snapshot with same name as subvolume group snapshot") + else: + self.fail("expected subvolume snapshot creation with same name as subvolumegroup snapshot to fail") + + # remove subvolumegroup snapshot + self.mount_a.run_shell(['rmdir', group_snapshot_path], sudo=True) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_retain_snapshot_invalid_recreate(self): + """ + ensure retained subvolume recreate does not leave any incarnations in the subvolume and trash + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # recreate subvolume with an invalid pool + data_pool = "invalid_pool" + try: + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on recreate of subvolume with invalid poolname") + else: + self.fail("expected recreate of subvolume with invalid poolname to fail") + + # fetch info + subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume)) + self.assertEqual(subvol_info["state"], "snapshot-retained", + msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"])) + + # getpath + try: + self._fs_cmd("subvolume", "getpath", self.volname, subvolume) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on getpath of subvolume with retained snapshots") + else: + self.fail("expected getpath of subvolume with retained snapshots to fail") + + # remove snapshot (should remove volume) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_recreate_subvolume(self): + """ + ensure a retained subvolume can be recreated and further snapshotted + """ + snap_md = ["created_at", "data_pool", "has_pending_clones"] + + subvolume = self._generate_random_subvolume_name() + snapshot1, snapshot2 = self._generate_random_snapshot_name(2) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot1) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # fetch info + subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume)) + self.assertEqual(subvol_info["state"], "snapshot-retained", + msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"])) + + # recreate retained subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # fetch info + subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume)) + self.assertEqual(subvol_info["state"], "complete", + msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"])) + + # snapshot info (older snapshot) + snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot1)) + for md in snap_md: + self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md)) + self.assertEqual(snap_info["has_pending_clones"], "no") + + # snap-create (new snapshot) + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot2) + + # remove with retain snapshots + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # list snapshots + subvolsnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume)) + self.assertEqual(len(subvolsnapshotls), 2, "Expected the 'fs subvolume snapshot ls' command to list the" + " created subvolume snapshots") + snapshotnames = [snapshot['name'] for snapshot in subvolsnapshotls] + for snap in [snapshot1, snapshot2]: + self.assertIn(snap, snapshotnames, "Missing snapshot '{0}' in snapshot list".format(snap)) + + # remove snapshots (should remove volume) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot1) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot2) + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_with_snapshots(self): + """ + ensure retain snapshots based delete of a subvolume with snapshots retains the subvolume + also test allowed and dis-allowed operations on a retained subvolume + """ + snap_md = ["created_at", "data_pool", "has_pending_clones"] + + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove subvolume -- should fail with ENOTEMPTY since it has snapshots + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on rm of retained subvolume with snapshots") + else: + self.fail("expected rm of subvolume with retained snapshots to fail") + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # fetch info + subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume)) + self.assertEqual(subvol_info["state"], "snapshot-retained", + msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"])) + + ## test allowed ops in retained state + # ls + subvolumes = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumes), 1, "subvolume ls count mismatch, expected '1', found {0}".format(len(subvolumes))) + self.assertEqual(subvolumes[0]['name'], subvolume, + "subvolume name mismatch in ls output, expected '{0}', found '{1}'".format(subvolume, subvolumes[0]['name'])) + + # snapshot info + snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot)) + for md in snap_md: + self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md)) + self.assertEqual(snap_info["has_pending_clones"], "no") + + # rm --force (allowed but should fail) + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--force") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on rm of subvolume with retained snapshots") + else: + self.fail("expected rm of subvolume with retained snapshots to fail") + + # rm (allowed but should fail) + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on rm of subvolume with retained snapshots") + else: + self.fail("expected rm of subvolume with retained snapshots to fail") + + ## test disallowed ops + # getpath + try: + self._fs_cmd("subvolume", "getpath", self.volname, subvolume) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on getpath of subvolume with retained snapshots") + else: + self.fail("expected getpath of subvolume with retained snapshots to fail") + + # resize + nsize = self.DEFAULT_FILE_SIZE*1024*1024 + try: + self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize)) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on resize of subvolume with retained snapshots") + else: + self.fail("expected resize of subvolume with retained snapshots to fail") + + # snap-create + try: + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, "fail") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on snapshot create of subvolume with retained snapshots") + else: + self.fail("expected snapshot create of subvolume with retained snapshots to fail") + + # remove snapshot (should remove volume) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_without_snapshots(self): + """ + ensure retain snapshots based delete of a subvolume with no snapshots, deletes the subbvolume + """ + subvolume = self._generate_random_subvolume_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # remove with snapshot retention (should remove volume, no snapshots to retain) + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_trash_busy_recreate(self): + """ + ensure retained subvolume recreate fails if its trash is not yet purged + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # fake a trash entry + self._update_fake_trash(subvolume) + + # recreate subvolume + try: + self._fs_cmd("subvolume", "create", self.volname, subvolume) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EAGAIN, "invalid error code on recreate of subvolume with purge pending") + else: + self.fail("expected recreate of subvolume with purge pending to fail") + + # clear fake trash entry + self._update_fake_trash(subvolume, create=False) + + # recreate subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_rm_with_snapshots(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove subvolume -- should fail with ENOTEMPTY since it has snapshots + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOTEMPTY: + raise RuntimeError("invalid error code returned when deleting subvolume with snapshots") + else: + raise RuntimeError("expected subvolume deletion to fail") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_protect_unprotect_sanity(self): + """ + Snapshot protect/unprotect commands are deprecated. This test exists to ensure that + invoking the command does not cause errors, till they are removed from a subsequent release. + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=64) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # now, protect snapshot + self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # now, unprotect snapshot + self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_rm_force(self): + # test removing non existing subvolume snapshot with --force + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # remove snapshot + try: + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, "--force") + except CommandFailedError: + raise RuntimeError("expected the 'fs subvolume snapshot rm --force' command to succeed") + + def test_subvolume_snapshot_metadata_set(self): + """ + Set custom metadata for subvolume snapshot. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata set' command to succeed") + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_set_idempotence(self): + """ + Set custom metadata for subvolume snapshot (Idempotency). + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata set' command to succeed") + + # set same metadata again for subvolume. + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata set' command to succeed because it is idempotent operation") + + # get value for specified key. + try: + ret = self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata get' command to succeed") + + # remove '\n' from returned value. + ret = ret.strip('\n') + + # match received value with expected value. + self.assertEqual(value, ret) + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_get(self): + """ + Get custom metadata for a specified key in subvolume snapshot metadata. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + + # get value for specified key. + try: + ret = self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata get' command to succeed") + + # remove '\n' from returned value. + ret = ret.strip('\n') + + # match received value with expected value. + self.assertEqual(value, ret) + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_get_for_nonexisting_key(self): + """ + Get custom metadata for subvolume snapshot if specified key not exist in metadata. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + + # try to get value for nonexisting key + # Expecting ENOENT exit status because key does not exist + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, "key_nonexist", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because 'key_nonexist' does not exist") + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_get_for_nonexisting_section(self): + """ + Get custom metadata for subvolume snapshot if metadata is not added for subvolume snapshot. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # try to get value for nonexisting key (as section does not exist) + # Expecting ENOENT exit status because key does not exist + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, "key", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because section does not exist") + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_update(self): + """ + Update custom metadata for a specified key in subvolume snapshot metadata. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + + # update metadata against key. + new_value = "new_value" + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, new_value, group) + + # get metadata for specified key of snapshot. + try: + ret = self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata get' command to succeed") + + # remove '\n' from returned value. + ret = ret.strip('\n') + + # match received value with expected value. + self.assertEqual(new_value, ret) + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_list(self): + """ + List custom metadata for subvolume snapshot. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for subvolume. + input_metadata_dict = {f'key_{i}' : f'value_{i}' for i in range(3)} + + for k, v in input_metadata_dict.items(): + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, k, v, group) + + # list metadata + try: + ret_dict = json.loads(self._fs_cmd("subvolume", "snapshot", "metadata", "ls", self.volname, subvolname, snapshot, group)) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata ls' command to succeed") + + # compare output with expected output + self.assertDictEqual(input_metadata_dict, ret_dict) + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_list_if_no_metadata_set(self): + """ + List custom metadata for subvolume snapshot if metadata is not added for subvolume snapshot. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # list metadata + try: + ret_dict = json.loads(self._fs_cmd("subvolume", "snapshot", "metadata", "ls", self.volname, subvolname, snapshot, group)) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata ls' command to succeed") + + # compare output with expected output + empty_dict = {} + self.assertDictEqual(ret_dict, empty_dict) + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_remove(self): + """ + Remove custom metadata for a specified key in subvolume snapshot metadata. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + + # remove metadata against specified key. + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, key, group) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata rm' command to succeed") + + # confirm key is removed by again fetching metadata + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, key, snapshot, group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because key does not exist") + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_remove_for_nonexisting_key(self): + """ + Remove custom metadata for subvolume snapshot if specified key not exist in metadata. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + + # try to remove value for nonexisting key + # Expecting ENOENT exit status because key does not exist + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, "key_nonexist", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because 'key_nonexist' does not exist") + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_remove_for_nonexisting_section(self): + """ + Remove custom metadata for subvolume snapshot if metadata is not added for subvolume snapshot. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # try to remove value for nonexisting key (as section does not exist) + # Expecting ENOENT exit status because key does not exist + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, "key", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because section does not exist") + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_remove_force(self): + """ + Forcefully remove custom metadata for a specified key in subvolume snapshot metadata. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + + # remove metadata against specified key with --force option. + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, key, group, "--force") + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata rm' command to succeed") + + # confirm key is removed by again fetching metadata + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because key does not exist") + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_remove_force_for_nonexisting_key(self): + """ + Forcefully remove custom metadata for subvolume snapshot if specified key not exist in metadata. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + + # remove metadata against specified key. + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, key, group) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata rm' command to succeed") + + # confirm key is removed by again fetching metadata + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because key does not exist") + + # again remove metadata against already removed key with --force option. + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, key, group, "--force") + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata rm' (with --force) command to succeed") + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_after_snapshot_remove(self): + """ + Verify metadata removal of subvolume snapshot after snapshot removal. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + + # get value for specified key. + ret = self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group) + + # remove '\n' from returned value. + ret = ret.strip('\n') + + # match received value with expected value. + self.assertEqual(value, ret) + + # remove subvolume snapshot. + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + + # try to get metadata after removing snapshot. + # Expecting error ENOENT with error message of snapshot does not exist + cmd_ret = self.mgr_cluster.mon_manager.run_cluster_cmd( + args=["fs", "subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group], + check_status=False, stdout=StringIO(), stderr=StringIO()) + self.assertEqual(cmd_ret.returncode, errno.ENOENT, "Expecting ENOENT error") + self.assertIn(f"snapshot '{snapshot}' does not exist", cmd_ret.stderr.getvalue(), + f"Expecting message: snapshot '{snapshot}' does not exist ") + + # confirm metadata is removed by searching section name in .meta file + meta_path = os.path.join(".", "volumes", group, subvolname, ".meta") + section_name = "SNAP_METADATA_" + snapshot + + try: + self.mount_a.run_shell(f"sudo grep {section_name} {meta_path}", omit_sudo=False) + except CommandFailedError as e: + self.assertNotEqual(e.exitstatus, 0) + else: + self.fail("Expected non-zero exist status because section should not exist") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_clean_stale_subvolume_snapshot_metadata(self): + """ + Validate cleaning of stale subvolume snapshot metadata. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata set' command to succeed") + + # save the subvolume config file. + meta_path = os.path.join(".", "volumes", group, subvolname, ".meta") + tmp_meta_path = os.path.join(".", "volumes", group, subvolname, ".meta.stale_snap_section") + self.mount_a.run_shell(['sudo', 'cp', '-p', meta_path, tmp_meta_path], omit_sudo=False) + + # Delete snapshot, this would remove user snap metadata + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + + # Copy back saved subvolume config file. This would have stale snapshot metadata + self.mount_a.run_shell(['sudo', 'cp', '-p', tmp_meta_path, meta_path], omit_sudo=False) + + # Verify that it has stale snapshot metadata + section_name = "SNAP_METADATA_" + snapshot + try: + self.mount_a.run_shell(f"sudo grep {section_name} {meta_path}", omit_sudo=False) + except CommandFailedError: + self.fail("Expected grep cmd to succeed because stale snapshot metadata exist") + + # Do any subvolume operation to clean the stale snapshot metadata + _ = json.loads(self._get_subvolume_info(self.volname, subvolname, group)) + + # Verify that the stale snapshot metadata is cleaned + try: + self.mount_a.run_shell(f"sudo grep {section_name} {meta_path}", omit_sudo=False) + except CommandFailedError as e: + self.assertNotEqual(e.exitstatus, 0) + else: + self.fail("Expected non-zero exist status because stale snapshot metadata should not exist") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + # Clean tmp config file + self.mount_a.run_shell(['sudo', 'rm', '-f', tmp_meta_path], omit_sudo=False) + + +class TestSubvolumeSnapshotClones(TestVolumesHelper): + """ Tests for FS subvolume snapshot clone operations.""" + def test_clone_subvolume_info(self): + # tests the 'fs subvolume info' command for a clone + subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime", + "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace", + "type", "uid"] + + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=1) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + subvol_info = json.loads(self._get_subvolume_info(self.volname, clone)) + if len(subvol_info) == 0: + raise RuntimeError("Expected the 'fs subvolume info' command to list metadata of subvolume") + for md in subvol_md: + if md not in subvol_info.keys(): + raise RuntimeError("%s not present in the metadata of subvolume" % md) + if subvol_info["type"] != "clone": + raise RuntimeError("type should be set to clone") + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_info_without_snapshot_clone(self): + """ + Verify subvolume snapshot info output without clonig snapshot. + If no clone is performed then path /volumes/_index/clone/{track_id} + will not exist. + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume. + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # list snapshot info + result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot)) + + # verify snapshot info + self.assertEqual(result['has_pending_clones'], "no") + self.assertFalse('orphan_clones_count' in result) + self.assertFalse('pending_clones' in result) + + # remove snapshot, subvolume, clone + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_info_if_no_clone_pending(self): + """ + Verify subvolume snapshot info output if no clone is in pending state. + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone_list = [f'clone_{i}' for i in range(3)] + + # create subvolume. + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clones + for clone in clone_list: + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clones status + for clone in clone_list: + self._wait_for_clone_to_complete(clone) + + # list snapshot info + result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot)) + + # verify snapshot info + self.assertEqual(result['has_pending_clones'], "no") + self.assertFalse('orphan_clones_count' in result) + self.assertFalse('pending_clones' in result) + + # remove snapshot, subvolume, clone + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + for clone in clone_list: + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_info_if_clone_pending_for_no_group(self): + """ + Verify subvolume snapshot info output if clones are in pending state. + Clones are not specified for particular target_group. Hence target_group + should not be in the output as we don't show _nogroup (default group) + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone_list = [f'clone_{i}' for i in range(3)] + + # create subvolume. + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5) + + # schedule a clones + for clone in clone_list: + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # list snapshot info + result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot)) + + # verify snapshot info + expected_clone_list = [] + for clone in clone_list: + expected_clone_list.append({"name": clone}) + self.assertEqual(result['has_pending_clones'], "yes") + self.assertFalse('orphan_clones_count' in result) + self.assertListEqual(result['pending_clones'], expected_clone_list) + self.assertEqual(len(result['pending_clones']), 3) + + # check clones status + for clone in clone_list: + self._wait_for_clone_to_complete(clone) + + # remove snapshot, subvolume, clone + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + for clone in clone_list: + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_info_if_clone_pending_for_target_group(self): + """ + Verify subvolume snapshot info output if clones are in pending state. + Clones are not specified for target_group. + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + group = self._generate_random_group_name() + target_group = self._generate_random_group_name() + + # create groups + self._fs_cmd("subvolumegroup", "create", self.volname, group) + self._fs_cmd("subvolumegroup", "create", self.volname, target_group) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, group, "--mode=777") + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group) + + # insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, + "--group_name", group, "--target_group_name", target_group) + + # list snapshot info + result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot, "--group_name", group)) + + # verify snapshot info + expected_clone_list = [{"name": clone, "target_group": target_group}] + self.assertEqual(result['has_pending_clones'], "yes") + self.assertFalse('orphan_clones_count' in result) + self.assertListEqual(result['pending_clones'], expected_clone_list) + self.assertEqual(len(result['pending_clones']), 1) + + # check clone status + self._wait_for_clone_to_complete(clone, clone_group=target_group) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + self._fs_cmd("subvolume", "rm", self.volname, clone, target_group) + + # remove groups + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, target_group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_info_if_orphan_clone(self): + """ + Verify subvolume snapshot info output if orphan clones exists. + Orphan clones should not list under pending clones. + orphan_clones_count should display correct count of orphan clones' + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone_list = [f'clone_{i}' for i in range(3)] + + # create subvolume. + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 15) + + # schedule a clones + for clone in clone_list: + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # remove track file for third clone to make it orphan + meta_path = os.path.join(".", "volumes", "_nogroup", subvolume, ".meta") + pending_clones_result = self.mount_a.run_shell(['sudo', 'grep', 'clone snaps', '-A3', meta_path], omit_sudo=False, stdout=StringIO(), stderr=StringIO()) + third_clone_track_id = pending_clones_result.stdout.getvalue().splitlines()[3].split(" = ")[0] + third_clone_track_path = os.path.join(".", "volumes", "_index", "clone", third_clone_track_id) + self.mount_a.run_shell(f"sudo rm -f {third_clone_track_path}", omit_sudo=False) + + # list snapshot info + result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot)) + + # verify snapshot info + expected_clone_list = [] + for i in range(len(clone_list)-1): + expected_clone_list.append({"name": clone_list[i]}) + self.assertEqual(result['has_pending_clones'], "yes") + self.assertEqual(result['orphan_clones_count'], 1) + self.assertListEqual(result['pending_clones'], expected_clone_list) + self.assertEqual(len(result['pending_clones']), 2) + + # check clones status + for i in range(len(clone_list)-1): + self._wait_for_clone_to_complete(clone_list[i]) + + # list snapshot info after cloning completion + res = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot)) + + # verify snapshot info (has_pending_clones should be no) + self.assertEqual(res['has_pending_clones'], "no") + + def test_non_clone_status(self): + subvolume = self._generate_random_subvolume_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + try: + self._fs_cmd("clone", "status", self.volname, subvolume) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOTSUP: + raise RuntimeError("invalid error code when fetching status of a non cloned subvolume") + else: + raise RuntimeError("expected fetching of clone status of a subvolume to fail") + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_clone_inherit_snapshot_namespace_and_size(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + osize = self.DEFAULT_FILE_SIZE*1024*1024*12 + + # create subvolume, in an isolated namespace with a specified size + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--namespace-isolated", "--size", str(osize), "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=8) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # create a pool different from current subvolume pool + subvol_path = self._get_subvolume_path(self.volname, subvolume) + default_pool = self.mount_a.getfattr(subvol_path, "ceph.dir.layout.pool") + new_pool = "new_pool" + self.assertNotEqual(default_pool, new_pool) + self.fs.add_data_pool(new_pool) + + # update source subvolume pool + self._do_subvolume_pool_and_namespace_update(subvolume, pool=new_pool, pool_namespace="") + + # schedule a clone, with NO --pool specification + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_clone_inherit_quota_attrs(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + osize = self.DEFAULT_FILE_SIZE*1024*1024*12 + + # create subvolume with a specified size + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777", "--size", str(osize)) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=8) + + # get subvolume path + subvolpath = self._get_subvolume_path(self.volname, subvolume) + + # set quota on number of files + self.mount_a.setfattr(subvolpath, 'ceph.quota.max_files', "20", sudo=True) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # get subvolume path + clonepath = self._get_subvolume_path(self.volname, clone) + + # verify quota max_files is inherited from source snapshot + subvol_quota = self.mount_a.getfattr(subvolpath, "ceph.quota.max_files") + clone_quota = self.mount_a.getfattr(clonepath, "ceph.quota.max_files") + self.assertEqual(subvol_quota, clone_quota) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_clone_in_progress_getpath(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=64) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # clone should not be accessible right now + try: + self._get_subvolume_path(self.volname, clone) + except CommandFailedError as ce: + if ce.exitstatus != errno.EAGAIN: + raise RuntimeError("invalid error code when fetching path of an pending clone") + else: + raise RuntimeError("expected fetching path of an pending clone to fail") + + # check clone status + self._wait_for_clone_to_complete(clone) + + # clone should be accessible now + subvolpath = self._get_subvolume_path(self.volname, clone) + self.assertNotEqual(subvolpath, None) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_clone_in_progress_snapshot_rm(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=64) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # snapshot should not be deletable now + try: + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EAGAIN, msg="invalid error code when removing source snapshot of a clone") + else: + self.fail("expected removing source snapshot of a clone to fail") + + # check clone status + self._wait_for_clone_to_complete(clone) + + # clone should be accessible now + subvolpath = self._get_subvolume_path(self.volname, clone) + self.assertNotEqual(subvolpath, None) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_clone_in_progress_source(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=64) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # verify clone source + result = json.loads(self._fs_cmd("clone", "status", self.volname, clone)) + source = result['status']['source'] + self.assertEqual(source['volume'], self.volname) + self.assertEqual(source['subvolume'], subvolume) + self.assertEqual(source.get('group', None), None) + self.assertEqual(source['snapshot'], snapshot) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # clone should be accessible now + subvolpath = self._get_subvolume_path(self.volname, clone) + self.assertNotEqual(subvolpath, None) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_clone_retain_snapshot_with_snapshots(self): + """ + retain snapshots of a cloned subvolume and check disallowed operations + """ + subvolume = self._generate_random_subvolume_name() + snapshot1, snapshot2 = self._generate_random_snapshot_name(2) + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # store path for clone verification + subvol1_path = self._get_subvolume_path(self.volname, subvolume) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=16) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot1) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # clone retained subvolume snapshot + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot1, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot1, clone, subvol_path=subvol1_path) + + # create a snapshot on the clone + self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone, snapshot2) + + # retain a clone + self._fs_cmd("subvolume", "rm", self.volname, clone, "--retain-snapshots") + + # list snapshots + clonesnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, clone)) + self.assertEqual(len(clonesnapshotls), 1, "Expected the 'fs subvolume snapshot ls' command to list the" + " created subvolume snapshots") + snapshotnames = [snapshot['name'] for snapshot in clonesnapshotls] + for snap in [snapshot2]: + self.assertIn(snap, snapshotnames, "Missing snapshot '{0}' in snapshot list".format(snap)) + + ## check disallowed operations on retained clone + # clone-status + try: + self._fs_cmd("clone", "status", self.volname, clone) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on clone status of clone with retained snapshots") + else: + self.fail("expected clone status of clone with retained snapshots to fail") + + # clone-cancel + try: + self._fs_cmd("clone", "cancel", self.volname, clone) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on clone cancel of clone with retained snapshots") + else: + self.fail("expected clone cancel of clone with retained snapshots to fail") + + # remove snapshots (removes subvolumes as all are in retained state) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot1) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone, snapshot2) + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_clone(self): + """ + clone a snapshot from a snapshot retained subvolume + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # store path for clone verification + subvol_path = self._get_subvolume_path(self.volname, subvolume) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=16) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # clone retained subvolume snapshot + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone, subvol_path=subvol_path) + + # remove snapshots (removes retained volume) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_clone_from_newer_snapshot(self): + """ + clone a subvolume from recreated subvolume's latest snapshot + """ + subvolume = self._generate_random_subvolume_name() + snapshot1, snapshot2 = self._generate_random_snapshot_name(2) + clone = self._generate_random_clone_name(1) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=16) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot1) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # recreate subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # get and store path for clone verification + subvol2_path = self._get_subvolume_path(self.volname, subvolume) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=16) + + # snapshot newer subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot2) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # clone retained subvolume's newer snapshot + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot2, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot2, clone, subvol_path=subvol2_path) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot1) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot2) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_recreate(self): + """ + recreate a subvolume from one of its retained snapshots + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # store path for clone verification + subvol_path = self._get_subvolume_path(self.volname, subvolume) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=16) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # recreate retained subvolume using its own snapshot to clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, subvolume) + + # check clone status + self._wait_for_clone_to_complete(subvolume) + + # verify clone + self._verify_clone(subvolume, snapshot, subvolume, subvol_path=subvol_path) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_trash_busy_recreate_clone(self): + """ + ensure retained clone recreate fails if its trash is not yet purged + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # clone subvolume snapshot + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # snapshot clone + self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone, snapshot) + + # remove clone with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, clone, "--retain-snapshots") + + # fake a trash entry + self._update_fake_trash(clone) + + # clone subvolume snapshot (recreate) + try: + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EAGAIN, "invalid error code on recreate of clone with purge pending") + else: + self.fail("expected recreate of clone with purge pending to fail") + + # clear fake trash entry + self._update_fake_trash(clone, create=False) + + # recreate subvolume + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_attr_clone(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io_mixed(subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_clone_failure_status_pending_in_progress_complete(self): + """ + ensure failure status is not shown when clone is not in failed/cancelled state + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone1 = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=200) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5) + + # schedule a clone1 + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1) + + # pending clone shouldn't show failure status + clone1_result = self._get_clone_status(clone1) + try: + clone1_result["status"]["failure"]["errno"] + except KeyError as e: + self.assertEqual(str(e), "'failure'") + else: + self.fail("clone status shouldn't show failure for pending clone") + + # check clone1 to be in-progress + self._wait_for_clone_to_be_in_progress(clone1) + + # in-progress clone1 shouldn't show failure status + clone1_result = self._get_clone_status(clone1) + try: + clone1_result["status"]["failure"]["errno"] + except KeyError as e: + self.assertEqual(str(e), "'failure'") + else: + self.fail("clone status shouldn't show failure for in-progress clone") + + # wait for clone1 to complete + self._wait_for_clone_to_complete(clone1) + + # complete clone1 shouldn't show failure status + clone1_result = self._get_clone_status(clone1) + try: + clone1_result["status"]["failure"]["errno"] + except KeyError as e: + self.assertEqual(str(e), "'failure'") + else: + self.fail("clone status shouldn't show failure for complete clone") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone1) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_clone_failure_status_failed(self): + """ + ensure failure status is shown when clone is in failed state and validate the reason + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone1 = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=200) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5) + + # schedule a clone1 + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1) + + # remove snapshot from backend to force the clone failure. + snappath = os.path.join(".", "volumes", "_nogroup", subvolume, ".snap", snapshot) + self.mount_a.run_shell(['rmdir', snappath], sudo=True) + + # wait for clone1 to fail. + self._wait_for_clone_to_fail(clone1) + + # check clone1 status + clone1_result = self._get_clone_status(clone1) + self.assertEqual(clone1_result["status"]["state"], "failed") + self.assertEqual(clone1_result["status"]["failure"]["errno"], "2") + self.assertEqual(clone1_result["status"]["failure"]["error_msg"], "snapshot '{0}' does not exist".format(snapshot)) + + # clone removal should succeed after failure, remove clone1 + self._fs_cmd("subvolume", "rm", self.volname, clone1, "--force") + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_clone_failure_status_pending_cancelled(self): + """ + ensure failure status is shown when clone is cancelled during pending state and validate the reason + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone1 = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=200) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5) + + # schedule a clone1 + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1) + + # cancel pending clone1 + self._fs_cmd("clone", "cancel", self.volname, clone1) + + # check clone1 status + clone1_result = self._get_clone_status(clone1) + self.assertEqual(clone1_result["status"]["state"], "canceled") + self.assertEqual(clone1_result["status"]["failure"]["errno"], "4") + self.assertEqual(clone1_result["status"]["failure"]["error_msg"], "user interrupted clone operation") + + # clone removal should succeed with force after cancelled, remove clone1 + self._fs_cmd("subvolume", "rm", self.volname, clone1, "--force") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_clone_failure_status_in_progress_cancelled(self): + """ + ensure failure status is shown when clone is cancelled during in-progress state and validate the reason + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone1 = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=200) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5) + + # schedule a clone1 + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1) + + # wait for clone1 to be in-progress + self._wait_for_clone_to_be_in_progress(clone1) + + # cancel in-progess clone1 + self._fs_cmd("clone", "cancel", self.volname, clone1) + + # check clone1 status + clone1_result = self._get_clone_status(clone1) + self.assertEqual(clone1_result["status"]["state"], "canceled") + self.assertEqual(clone1_result["status"]["failure"]["errno"], "4") + self.assertEqual(clone1_result["status"]["failure"]["error_msg"], "user interrupted clone operation") + + # clone removal should succeed with force after cancelled, remove clone1 + self._fs_cmd("subvolume", "rm", self.volname, clone1, "--force") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=64) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_quota_exceeded(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume with 20MB quota + osize = self.DEFAULT_FILE_SIZE*1024*1024*20 + self._fs_cmd("subvolume", "create", self.volname, subvolume,"--mode=777", "--size", str(osize)) + + # do IO, write 50 files of 1MB each to exceed quota. This mostly succeeds as quota enforcement takes time. + try: + self._do_subvolume_io(subvolume, number_of_files=50) + except CommandFailedError: + # ignore quota enforcement error. + pass + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_in_complete_clone_rm(self): + """ + Validates the removal of clone when it is not in 'complete|cancelled|failed' state. + The forceful removl of subvolume clone succeeds only if it's in any of the + 'complete|cancelled|failed' states. It fails with EAGAIN in any other states. + """ + + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=64) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # Use --force since clone is not complete. Returns EAGAIN as clone is not either complete or cancelled. + try: + self._fs_cmd("subvolume", "rm", self.volname, clone, "--force") + except CommandFailedError as ce: + if ce.exitstatus != errno.EAGAIN: + raise RuntimeError("invalid error code when trying to remove failed clone") + else: + raise RuntimeError("expected error when removing a failed clone") + + # cancel on-going clone + self._fs_cmd("clone", "cancel", self.volname, clone) + + # verify canceled state + self._check_clone_canceled(clone) + + # clone removal should succeed after cancel + self._fs_cmd("subvolume", "rm", self.volname, clone, "--force") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_retain_suid_guid(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # Create a file with suid, guid bits set along with executable bit. + args = ["subvolume", "getpath", self.volname, subvolume] + args = tuple(args) + subvolpath = self._fs_cmd(*args) + self.assertNotEqual(subvolpath, None) + subvolpath = subvolpath[1:].rstrip() # remove "/" prefix and any trailing newline + + file_path = subvolpath + file_path = os.path.join(subvolpath, "test_suid_file") + self.mount_a.run_shell(["touch", file_path]) + self.mount_a.run_shell(["chmod", "u+sx,g+sx", file_path]) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_and_reclone(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone1, clone2 = self._generate_random_clone_name(2) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=32) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1) + + # check clone status + self._wait_for_clone_to_complete(clone1) + + # verify clone + self._verify_clone(subvolume, snapshot, clone1) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # now the clone is just like a normal subvolume -- snapshot the clone and fork + # another clone. before that do some IO so it's can be differentiated. + self._do_subvolume_io(clone1, create_dir="data", number_of_files=32) + + # snapshot clone -- use same snap name + self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone1, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, clone1, snapshot, clone2) + + # check clone status + self._wait_for_clone_to_complete(clone2) + + # verify clone + self._verify_clone(clone1, snapshot, clone2) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone1, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone1) + self._fs_cmd("subvolume", "rm", self.volname, clone2) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_cancel_in_progress(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=128) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # cancel on-going clone + self._fs_cmd("clone", "cancel", self.volname, clone) + + # verify canceled state + self._check_clone_canceled(clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone, "--force") + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_cancel_pending(self): + """ + this test is a bit more involved compared to canceling an in-progress clone. + we'd need to ensure that a to-be canceled clone has still not been picked up + by cloner threads. exploit the fact that clones are picked up in an FCFS + fashion and there are four (4) cloner threads by default. When the number of + cloner threads increase, this test _may_ start tripping -- so, the number of + clone operations would need to be jacked up. + """ + # default number of clone threads + NR_THREADS = 4 + # good enough for 4 threads + NR_CLONES = 5 + # yeh, 1gig -- we need the clone to run for sometime + FILE_SIZE_MB = 1024 + + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clones = self._generate_random_clone_name(NR_CLONES) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=4, file_size=FILE_SIZE_MB) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule clones + for clone in clones: + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + to_wait = clones[0:NR_THREADS] + to_cancel = clones[NR_THREADS:] + + # cancel pending clones and verify + for clone in to_cancel: + status = json.loads(self._fs_cmd("clone", "status", self.volname, clone)) + self.assertEqual(status["status"]["state"], "pending") + self._fs_cmd("clone", "cancel", self.volname, clone) + self._check_clone_canceled(clone) + + # let's cancel on-going clones. handle the case where some of the clones + # _just_ complete + for clone in list(to_wait): + try: + self._fs_cmd("clone", "cancel", self.volname, clone) + to_cancel.append(clone) + to_wait.remove(clone) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise RuntimeError("invalid error code when cancelling on-going clone") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + for clone in to_wait: + self._fs_cmd("subvolume", "rm", self.volname, clone) + for clone in to_cancel: + self._fs_cmd("subvolume", "rm", self.volname, clone, "--force") + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_different_groups(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + s_group, c_group = self._generate_random_group_name(2) + + # create groups + self._fs_cmd("subvolumegroup", "create", self.volname, s_group) + self._fs_cmd("subvolumegroup", "create", self.volname, c_group) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, s_group, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, subvolume_group=s_group, number_of_files=32) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, s_group) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, + '--group_name', s_group, '--target_group_name', c_group) + + # check clone status + self._wait_for_clone_to_complete(clone, clone_group=c_group) + + # verify clone + self._verify_clone(subvolume, snapshot, clone, source_group=s_group, clone_group=c_group) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, s_group) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume, s_group) + self._fs_cmd("subvolume", "rm", self.volname, clone, c_group) + + # remove groups + self._fs_cmd("subvolumegroup", "rm", self.volname, s_group) + self._fs_cmd("subvolumegroup", "rm", self.volname, c_group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_fail_with_remove(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone1, clone2 = self._generate_random_clone_name(2) + + pool_capacity = 32 * 1024 * 1024 + # number of files required to fill up 99% of the pool + nr_files = int((pool_capacity * 0.99) / (TestVolumes.DEFAULT_FILE_SIZE * 1024 * 1024)) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=nr_files) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # add data pool + new_pool = "new_pool" + self.fs.add_data_pool(new_pool) + + self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", new_pool, + "max_bytes", "{0}".format(pool_capacity // 4)) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1, "--pool_layout", new_pool) + + # check clone status -- this should dramatically overshoot the pool quota + self._wait_for_clone_to_complete(clone1) + + # verify clone + self._verify_clone(subvolume, snapshot, clone1, clone_pool=new_pool) + + # wait a bit so that subsequent I/O will give pool full error + time.sleep(120) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone2, "--pool_layout", new_pool) + + # check clone status + self._wait_for_clone_to_fail(clone2) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone1) + try: + self._fs_cmd("subvolume", "rm", self.volname, clone2) + except CommandFailedError as ce: + if ce.exitstatus != errno.EAGAIN: + raise RuntimeError("invalid error code when trying to remove failed clone") + else: + raise RuntimeError("expected error when removing a failed clone") + + # ... and with force, failed clone can be removed + self._fs_cmd("subvolume", "rm", self.volname, clone2, "--force") + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_on_existing_subvolumes(self): + subvolume1, subvolume2 = self._generate_random_subvolume_name(2) + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolumes + self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--mode=777") + self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume1, number_of_files=32) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume1, snapshot) + + # schedule a clone with target as subvolume2 + try: + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume1, snapshot, subvolume2) + except CommandFailedError as ce: + if ce.exitstatus != errno.EEXIST: + raise RuntimeError("invalid error code when cloning to existing subvolume") + else: + raise RuntimeError("expected cloning to fail if the target is an existing subvolume") + + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume1, snapshot, clone) + + # schedule a clone with target as clone + try: + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume1, snapshot, clone) + except CommandFailedError as ce: + if ce.exitstatus != errno.EEXIST: + raise RuntimeError("invalid error code when cloning to existing clone") + else: + raise RuntimeError("expected cloning to fail if the target is an existing clone") + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume1, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume1, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume1) + self._fs_cmd("subvolume", "rm", self.volname, subvolume2) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_pool_layout(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # add data pool + new_pool = "new_pool" + newid = self.fs.add_data_pool(new_pool) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=32) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, "--pool_layout", new_pool) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone, clone_pool=new_pool) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + subvol_path = self._get_subvolume_path(self.volname, clone) + desired_pool = self.mount_a.getfattr(subvol_path, "ceph.dir.layout.pool") + try: + self.assertEqual(desired_pool, new_pool) + except AssertionError: + self.assertEqual(int(desired_pool), newid) # old kernel returns id + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_under_group(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + group = self._generate_random_group_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=32) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, '--target_group_name', group) + + # check clone status + self._wait_for_clone_to_complete(clone, clone_group=group) + + # verify clone + self._verify_clone(subvolume, snapshot, clone, clone_group=group) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone, group) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_with_attrs(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + mode = "777" + uid = "1000" + gid = "1000" + new_uid = "1001" + new_gid = "1001" + new_mode = "700" + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", mode, "--uid", uid, "--gid", gid) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=32) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # change subvolume attrs (to ensure clone picks up snapshot attrs) + self._do_subvolume_attr_update(subvolume, new_uid, new_gid, new_mode) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_with_upgrade(self): + """ + yet another poor man's upgrade test -- rather than going through a full + upgrade cycle, emulate old types subvolumes by going through the wormhole + and verify clone operation. + further ensure that a legacy volume is not updated to v2, but clone is. + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # emulate a old-fashioned subvolume + createpath = os.path.join(".", "volumes", "_nogroup", subvolume) + self.mount_a.run_shell_payload(f"mkdir -p -m 777 {createpath}", sudo=True) + + # add required xattrs to subvolume + default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool") + self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=64) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # ensure metadata file is in legacy location, with required version v1 + self._assert_meta_location_and_version(self.volname, subvolume, version=1, legacy=True) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # snapshot should not be deletable now + try: + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EAGAIN, msg="invalid error code when removing source snapshot of a clone") + else: + self.fail("expected removing source snapshot of a clone to fail") + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone, source_version=1) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # ensure metadata file is in v2 location, with required version v2 + self._assert_meta_location_and_version(self.volname, clone) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_reconf_max_concurrent_clones(self): + """ + Validate 'max_concurrent_clones' config option + """ + + # get the default number of cloner threads + default_max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones')) + self.assertEqual(default_max_concurrent_clones, 4) + + # Increase number of cloner threads + self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 6) + max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones')) + self.assertEqual(max_concurrent_clones, 6) + + # Decrease number of cloner threads + self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 2) + max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones')) + self.assertEqual(max_concurrent_clones, 2) + + def test_subvolume_snapshot_config_snapshot_clone_delay(self): + """ + Validate 'snapshot_clone_delay' config option + """ + + # get the default delay before starting the clone + default_timeout = int(self.config_get('mgr', 'mgr/volumes/snapshot_clone_delay')) + self.assertEqual(default_timeout, 0) + + # Insert delay of 2 seconds at the beginning of the snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2) + default_timeout = int(self.config_get('mgr', 'mgr/volumes/snapshot_clone_delay')) + self.assertEqual(default_timeout, 2) + + # Decrease number of cloner threads + self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 2) + max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones')) + self.assertEqual(max_concurrent_clones, 2) + + def test_subvolume_under_group_snapshot_clone(self): + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, group, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, subvolume_group=group, number_of_files=32) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, '--group_name', group) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone, source_group=group) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + +class TestMisc(TestVolumesHelper): + """Miscellaneous tests related to FS volume, subvolume group, and subvolume operations.""" + def test_connection_expiration(self): + # unmount any cephfs mounts + for i in range(0, self.CLIENTS_REQUIRED): + self.mounts[i].umount_wait() + sessions = self._session_list() + self.assertLessEqual(len(sessions), 1) # maybe mgr is already mounted + + # Get the mgr to definitely mount cephfs + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + sessions = self._session_list() + self.assertEqual(len(sessions), 1) + + # Now wait for the mgr to expire the connection: + self.wait_until_evicted(sessions[0]['id'], timeout=90) + + def test_mgr_eviction(self): + # unmount any cephfs mounts + for i in range(0, self.CLIENTS_REQUIRED): + self.mounts[i].umount_wait() + sessions = self._session_list() + self.assertLessEqual(len(sessions), 1) # maybe mgr is already mounted + + # Get the mgr to definitely mount cephfs + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + sessions = self._session_list() + self.assertEqual(len(sessions), 1) + + # Now fail the mgr, check the session was evicted + mgr = self.mgr_cluster.get_active_id() + self.mgr_cluster.mgr_fail(mgr) + self.wait_until_evicted(sessions[0]['id']) + + def test_names_can_only_be_goodchars(self): + """ + Test the creating vols, subvols subvolgroups fails when their names uses + characters beyond [a-zA-Z0-9 -_.]. + """ + volname, badname = 'testvol', 'abcd@#' + + with self.assertRaises(CommandFailedError): + self._fs_cmd('volume', 'create', badname) + self._fs_cmd('volume', 'create', volname) + + with self.assertRaises(CommandFailedError): + self._fs_cmd('subvolumegroup', 'create', volname, badname) + + with self.assertRaises(CommandFailedError): + self._fs_cmd('subvolume', 'create', volname, badname) + self._fs_cmd('volume', 'rm', volname, '--yes-i-really-mean-it') + + def test_subvolume_ops_on_nonexistent_vol(self): + # tests the fs subvolume operations on non existing volume + + volname = "non_existent_subvolume" + + # try subvolume operations + for op in ("create", "rm", "getpath", "info", "resize", "pin", "ls"): + try: + if op == "resize": + self._fs_cmd("subvolume", "resize", volname, "subvolname_1", "inf") + elif op == "pin": + self._fs_cmd("subvolume", "pin", volname, "subvolname_1", "export", "1") + elif op == "ls": + self._fs_cmd("subvolume", "ls", volname) + else: + self._fs_cmd("subvolume", op, volname, "subvolume_1") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT) + else: + self.fail("expected the 'fs subvolume {0}' command to fail".format(op)) + + # try subvolume snapshot operations and clone create + for op in ("create", "rm", "info", "protect", "unprotect", "ls", "clone"): + try: + if op == "ls": + self._fs_cmd("subvolume", "snapshot", op, volname, "subvolume_1") + elif op == "clone": + self._fs_cmd("subvolume", "snapshot", op, volname, "subvolume_1", "snapshot_1", "clone_1") + else: + self._fs_cmd("subvolume", "snapshot", op, volname, "subvolume_1", "snapshot_1") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT) + else: + self.fail("expected the 'fs subvolume snapshot {0}' command to fail".format(op)) + + # try, clone status + try: + self._fs_cmd("clone", "status", volname, "clone_1") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT) + else: + self.fail("expected the 'fs clone status' command to fail") + + # try subvolumegroup operations + for op in ("create", "rm", "getpath", "pin", "ls"): + try: + if op == "pin": + self._fs_cmd("subvolumegroup", "pin", volname, "group_1", "export", "0") + elif op == "ls": + self._fs_cmd("subvolumegroup", op, volname) + else: + self._fs_cmd("subvolumegroup", op, volname, "group_1") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT) + else: + self.fail("expected the 'fs subvolumegroup {0}' command to fail".format(op)) + + # try subvolumegroup snapshot operations + for op in ("create", "rm", "ls"): + try: + if op == "ls": + self._fs_cmd("subvolumegroup", "snapshot", op, volname, "group_1") + else: + self._fs_cmd("subvolumegroup", "snapshot", op, volname, "group_1", "snapshot_1") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT) + else: + self.fail("expected the 'fs subvolumegroup snapshot {0}' command to fail".format(op)) + + def test_subvolume_upgrade_legacy_to_v1(self): + """ + poor man's upgrade test -- rather than going through a full upgrade cycle, + emulate subvolumes by going through the wormhole and verify if they are + accessible. + further ensure that a legacy volume is not updated to v2. + """ + subvolume1, subvolume2 = self._generate_random_subvolume_name(2) + group = self._generate_random_group_name() + + # emulate a old-fashioned subvolume -- one in the default group and + # the other in a custom group + createpath1 = os.path.join(".", "volumes", "_nogroup", subvolume1) + self.mount_a.run_shell(['mkdir', '-p', createpath1], sudo=True) + + # create group + createpath2 = os.path.join(".", "volumes", group, subvolume2) + self.mount_a.run_shell(['mkdir', '-p', createpath2], sudo=True) + + # this would auto-upgrade on access without anyone noticing + subvolpath1 = self._fs_cmd("subvolume", "getpath", self.volname, subvolume1) + self.assertNotEqual(subvolpath1, None) + subvolpath1 = subvolpath1.rstrip() # remove "/" prefix and any trailing newline + + subvolpath2 = self._fs_cmd("subvolume", "getpath", self.volname, subvolume2, group) + self.assertNotEqual(subvolpath2, None) + subvolpath2 = subvolpath2.rstrip() # remove "/" prefix and any trailing newline + + # and... the subvolume path returned should be what we created behind the scene + self.assertEqual(createpath1[1:], subvolpath1) + self.assertEqual(createpath2[1:], subvolpath2) + + # ensure metadata file is in legacy location, with required version v1 + self._assert_meta_location_and_version(self.volname, subvolume1, version=1, legacy=True) + self._assert_meta_location_and_version(self.volname, subvolume2, subvol_group=group, version=1, legacy=True) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume1) + self._fs_cmd("subvolume", "rm", self.volname, subvolume2, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_no_upgrade_v1_sanity(self): + """ + poor man's upgrade test -- theme continues... + + This test is to ensure v1 subvolumes are retained as is, due to a snapshot being present, and runs through + a series of operations on the v1 subvolume to ensure they work as expected. + """ + subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime", + "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace", + "type", "uid", "features", "state"] + snap_md = ["created_at", "data_pool", "has_pending_clones"] + + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone1, clone2 = self._generate_random_clone_name(2) + mode = "777" + uid = "1000" + gid = "1000" + + # emulate a v1 subvolume -- in the default group + subvolume_path = self._create_v1_subvolume(subvolume) + + # getpath + subvolpath = self._get_subvolume_path(self.volname, subvolume) + self.assertEqual(subvolpath, subvolume_path) + + # ls + subvolumes = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumes), 1, "subvolume ls count mismatch, expected '1', found {0}".format(len(subvolumes))) + self.assertEqual(subvolumes[0]['name'], subvolume, + "subvolume name mismatch in ls output, expected '{0}', found '{1}'".format(subvolume, subvolumes[0]['name'])) + + # info + subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume)) + for md in subvol_md: + self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md)) + + self.assertEqual(subvol_info["state"], "complete", + msg="expected state to be 'complete', found '{0}".format(subvol_info["state"])) + self.assertEqual(len(subvol_info["features"]), 2, + msg="expected 1 feature, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"])) + for feature in ['snapshot-clone', 'snapshot-autoprotect']: + self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature)) + + # resize + nsize = self.DEFAULT_FILE_SIZE*1024*1024*10 + self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize)) + subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume)) + for md in subvol_md: + self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md)) + self.assertEqual(subvol_info["bytes_quota"], nsize, "bytes_quota should be set to '{0}'".format(nsize)) + + # create (idempotent) (change some attrs, to ensure attrs are preserved from the snapshot on clone) + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", mode, "--uid", uid, "--gid", gid) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=8) + + # snap-create + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1) + + # check clone status + self._wait_for_clone_to_complete(clone1) + + # ensure clone is v2 + self._assert_meta_location_and_version(self.volname, clone1, version=2) + + # verify clone + self._verify_clone(subvolume, snapshot, clone1, source_version=1) + + # clone (older snapshot) + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, 'fake', clone2) + + # check clone status + self._wait_for_clone_to_complete(clone2) + + # ensure clone is v2 + self._assert_meta_location_and_version(self.volname, clone2, version=2) + + # verify clone + # TODO: rentries will mismatch till this is fixed https://tracker.ceph.com/issues/46747 + #self._verify_clone(subvolume, 'fake', clone2, source_version=1) + + # snap-info + snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot)) + for md in snap_md: + self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md)) + self.assertEqual(snap_info["has_pending_clones"], "no") + + # snap-ls + subvol_snapshots = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume)) + self.assertEqual(len(subvol_snapshots), 2, "subvolume ls count mismatch, expected 2', found {0}".format(len(subvol_snapshots))) + snapshotnames = [snapshot['name'] for snapshot in subvol_snapshots] + for name in [snapshot, 'fake']: + self.assertIn(name, snapshotnames, msg="expected snapshot '{0}' in subvolume snapshot ls".format(name)) + + # snap-rm + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, "fake") + + # ensure volume is still at version 1 + self._assert_meta_location_and_version(self.volname, subvolume, version=1) + + # rm + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone1) + self._fs_cmd("subvolume", "rm", self.volname, clone2) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_no_upgrade_v1_to_v2(self): + """ + poor man's upgrade test -- theme continues... + ensure v1 to v2 upgrades are not done automatically due to various states of v1 + """ + subvolume1, subvolume2, subvolume3 = self._generate_random_subvolume_name(3) + group = self._generate_random_group_name() + + # emulate a v1 subvolume -- in the default group + subvol1_path = self._create_v1_subvolume(subvolume1) + + # emulate a v1 subvolume -- in a custom group + subvol2_path = self._create_v1_subvolume(subvolume2, subvol_group=group) + + # emulate a v1 subvolume -- in a clone pending state + self._create_v1_subvolume(subvolume3, subvol_type='clone', has_snapshot=False, state='pending') + + # this would attempt auto-upgrade on access, but fail to do so as snapshots exist + subvolpath1 = self._get_subvolume_path(self.volname, subvolume1) + self.assertEqual(subvolpath1, subvol1_path) + + subvolpath2 = self._get_subvolume_path(self.volname, subvolume2, group_name=group) + self.assertEqual(subvolpath2, subvol2_path) + + # this would attempt auto-upgrade on access, but fail to do so as volume is not complete + # use clone status, as only certain operations are allowed in pending state + status = json.loads(self._fs_cmd("clone", "status", self.volname, subvolume3)) + self.assertEqual(status["status"]["state"], "pending") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume1, "fake") + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume2, "fake", group) + + # ensure metadata file is in v1 location, with version retained as v1 + self._assert_meta_location_and_version(self.volname, subvolume1, version=1) + self._assert_meta_location_and_version(self.volname, subvolume2, subvol_group=group, version=1) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume1) + self._fs_cmd("subvolume", "rm", self.volname, subvolume2, group) + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume3) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EAGAIN, "invalid error code on rm of subvolume undergoing clone") + else: + self.fail("expected rm of subvolume undergoing clone to fail") + + # ensure metadata file is in v1 location, with version retained as v1 + self._assert_meta_location_and_version(self.volname, subvolume3, version=1) + self._fs_cmd("subvolume", "rm", self.volname, subvolume3, "--force") + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_upgrade_v1_to_v2(self): + """ + poor man's upgrade test -- theme continues... + ensure v1 to v2 upgrades work + """ + subvolume1, subvolume2 = self._generate_random_subvolume_name(2) + group = self._generate_random_group_name() + + # emulate a v1 subvolume -- in the default group + subvol1_path = self._create_v1_subvolume(subvolume1, has_snapshot=False) + + # emulate a v1 subvolume -- in a custom group + subvol2_path = self._create_v1_subvolume(subvolume2, subvol_group=group, has_snapshot=False) + + # this would attempt auto-upgrade on access + subvolpath1 = self._get_subvolume_path(self.volname, subvolume1) + self.assertEqual(subvolpath1, subvol1_path) + + subvolpath2 = self._get_subvolume_path(self.volname, subvolume2, group_name=group) + self.assertEqual(subvolpath2, subvol2_path) + + # ensure metadata file is in v2 location, with version retained as v2 + self._assert_meta_location_and_version(self.volname, subvolume1, version=2) + self._assert_meta_location_and_version(self.volname, subvolume2, subvol_group=group, version=2) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume1) + self._fs_cmd("subvolume", "rm", self.volname, subvolume2, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_malicious_metafile_on_legacy_to_v1_upgrade(self): + """ + Validate handcrafted .meta file on legacy subvol root doesn't break the system + on legacy subvol upgrade to v1 + poor man's upgrade test -- theme continues... + """ + subvol1, subvol2 = self._generate_random_subvolume_name(2) + + # emulate a old-fashioned subvolume in the default group + createpath1 = os.path.join(".", "volumes", "_nogroup", subvol1) + self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath1], omit_sudo=False) + + # add required xattrs to subvolume + default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool") + self.mount_a.setfattr(createpath1, 'ceph.dir.layout.pool', default_pool, sudo=True) + + # create v2 subvolume + self._fs_cmd("subvolume", "create", self.volname, subvol2) + + # Create malicious .meta file in legacy subvolume root. Copy v2 subvolume + # .meta into legacy subvol1's root + subvol2_metapath = os.path.join(".", "volumes", "_nogroup", subvol2, ".meta") + self.mount_a.run_shell(['sudo', 'cp', subvol2_metapath, createpath1], omit_sudo=False) + + # Upgrade legacy subvol1 to v1 + subvolpath1 = self._fs_cmd("subvolume", "getpath", self.volname, subvol1) + self.assertNotEqual(subvolpath1, None) + subvolpath1 = subvolpath1.rstrip() + + # the subvolume path returned should not be of subvol2 from handcrafted + # .meta file + self.assertEqual(createpath1[1:], subvolpath1) + + # ensure metadata file is in legacy location, with required version v1 + self._assert_meta_location_and_version(self.volname, subvol1, version=1, legacy=True) + + # Authorize alice authID read-write access to subvol1. Verify it authorizes subvol1 path and not subvol2 + # path whose '.meta' file is copied to subvol1 root + authid1 = "alice" + self._fs_cmd("subvolume", "authorize", self.volname, subvol1, authid1) + + # Validate that the mds path added is of subvol1 and not of subvol2 + out = json.loads(self.fs.mon_manager.raw_cluster_cmd("auth", "get", "client.alice", "--format=json-pretty")) + self.assertEqual("client.alice", out[0]["entity"]) + self.assertEqual("allow rw path={0}".format(createpath1[1:]), out[0]["caps"]["mds"]) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvol1) + self._fs_cmd("subvolume", "rm", self.volname, subvol2) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_binary_metafile_on_legacy_to_v1_upgrade(self): + """ + Validate binary .meta file on legacy subvol root doesn't break the system + on legacy subvol upgrade to v1 + poor man's upgrade test -- theme continues... + """ + subvol = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # emulate a old-fashioned subvolume -- in a custom group + createpath = os.path.join(".", "volumes", group, subvol) + self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False) + + # add required xattrs to subvolume + default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool") + self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True) + + # Create unparseable binary .meta file on legacy subvol's root + meta_contents = os.urandom(4096) + meta_filepath = os.path.join(self.mount_a.mountpoint, createpath, ".meta") + self.mount_a.client_remote.write_file(meta_filepath, meta_contents, sudo=True) + + # Upgrade legacy subvol to v1 + subvolpath = self._fs_cmd("subvolume", "getpath", self.volname, subvol, group) + self.assertNotEqual(subvolpath, None) + subvolpath = subvolpath.rstrip() + + # The legacy subvolume path should be returned for subvol. + # Should ignore unparseable binary .meta file in subvol's root + self.assertEqual(createpath[1:], subvolpath) + + # ensure metadata file is in legacy location, with required version v1 + self._assert_meta_location_and_version(self.volname, subvol, subvol_group=group, version=1, legacy=True) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvol, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_unparseable_metafile_on_legacy_to_v1_upgrade(self): + """ + Validate unparseable text .meta file on legacy subvol root doesn't break the system + on legacy subvol upgrade to v1 + poor man's upgrade test -- theme continues... + """ + subvol = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # emulate a old-fashioned subvolume -- in a custom group + createpath = os.path.join(".", "volumes", group, subvol) + self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False) + + # add required xattrs to subvolume + default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool") + self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True) + + # Create unparseable text .meta file on legacy subvol's root + meta_contents = "unparseable config\nfile ...\nunparseable config\nfile ...\n" + meta_filepath = os.path.join(self.mount_a.mountpoint, createpath, ".meta") + self.mount_a.client_remote.write_file(meta_filepath, meta_contents, sudo=True) + + # Upgrade legacy subvol to v1 + subvolpath = self._fs_cmd("subvolume", "getpath", self.volname, subvol, group) + self.assertNotEqual(subvolpath, None) + subvolpath = subvolpath.rstrip() + + # The legacy subvolume path should be returned for subvol. + # Should ignore unparseable binary .meta file in subvol's root + self.assertEqual(createpath[1:], subvolpath) + + # ensure metadata file is in legacy location, with required version v1 + self._assert_meta_location_and_version(self.volname, subvol, subvol_group=group, version=1, legacy=True) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvol, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) diff --git a/qa/tasks/cephfs/xfstests_dev.py b/qa/tasks/cephfs/xfstests_dev.py new file mode 100644 index 000000000..ea9e14b89 --- /dev/null +++ b/qa/tasks/cephfs/xfstests_dev.py @@ -0,0 +1,165 @@ +from io import BytesIO +import logging +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +logger = logging.getLogger(__name__) + + +# TODO: add code to run non-ACL tests too. +# TODO: get tests running with SCRATCH_DEV and SCRATCH_DIR. +# TODO: make xfstests-dev tests running without running `make install`. +# TODO: make xfstests-dev compatible with ceph-fuse. xfstests-dev remounts +# CephFS before running tests using kernel, so ceph-fuse mounts are never +# actually testsed. +class XFSTestsDev(CephFSTestCase): + + def setUp(self): + super(XFSTestsDev, self).setUp() + self.prepare_xfstests_dev() + + def prepare_xfstests_dev(self): + self.get_repo() + self.get_test_and_scratch_dirs_ready() + self.install_deps() + self.create_reqd_users() + self.write_local_config() + + # NOTE: On teuthology machines it's necessary to run "make" as + # superuser since the repo is cloned somewhere in /tmp. + self.mount_a.client_remote.run(args=['sudo', 'make'], + cwd=self.repo_path, stdout=BytesIO(), + stderr=BytesIO()) + self.mount_a.client_remote.run(args=['sudo', 'make', 'install'], + cwd=self.repo_path, omit_sudo=False, + stdout=BytesIO(), stderr=BytesIO()) + + def get_repo(self): + """ + Clone xfstests_dev repository. If already present, update it. + """ + # TODO: make sure that repo is not cloned for every test. it should + # happen only once. + remoteurl = 'https://git.ceph.com/xfstests-dev.git' + self.repo_path = self.mount_a.client_remote.mkdtemp(suffix= + 'xfstests-dev') + self.mount_a.run_shell(['git', 'clone', remoteurl, '--depth', '1', + self.repo_path]) + + def get_admin_key(self): + import configparser + + cp = configparser.ConfigParser() + cp.read_string(self.fs.mon_manager.raw_cluster_cmd( + 'auth', 'get-or-create', 'client.admin')) + + return cp['client.admin']['key'] + + def get_test_and_scratch_dirs_ready(self): + """ "test" and "scratch" directories are directories inside Ceph FS. + And, test and scratch mounts are path on the local FS where "test" + and "scratch" directories would be mounted. Look at xfstests-dev + local.config's template inside this file to get some context. + """ + from os.path import join + + self.test_dirname = 'test' + self.mount_a.run_shell(['mkdir', self.test_dirname]) + # read var name as "test dir's mount path" + self.test_dirs_mount_path = self.mount_a.client_remote.mkdtemp( + suffix=self.test_dirname) + self.mount_a.run_shell(['sudo','ln','-s',join(self.mount_a.mountpoint, + self.test_dirname), + self.test_dirs_mount_path]) + + self.scratch_dirname = 'scratch' + self.mount_a.run_shell(['mkdir', self.scratch_dirname]) + # read var name as "scratch dir's mount path" + self.scratch_dirs_mount_path = self.mount_a.client_remote.mkdtemp( + suffix=self.scratch_dirname) + self.mount_a.run_shell(['sudo','ln','-s',join(self.mount_a.mountpoint, + self.scratch_dirname), + self.scratch_dirs_mount_path]) + + def install_deps(self): + from teuthology.misc import get_system_type + + distro, version = get_system_type(self.mount_a.client_remote, + distro=True, version=True) + distro = distro.lower() + major_ver_num = int(version.split('.')[0]) # only keep major release + # number + + # we keep fedora here so that right deps are installed when this test + # is run locally by a dev. + if distro in ('redhatenterpriseserver', 'redhatenterprise', 'fedora', + 'centos'): + deps = """acl attr automake bc dbench dump e2fsprogs fio \ + gawk gcc indent libtool lvm2 make psmisc quota sed \ + xfsdump xfsprogs \ + libacl-devel libattr-devel libaio-devel libuuid-devel \ + xfsprogs-devel btrfs-progs-devel python2 sqlite""".split() + deps_old_distros = ['xfsprogs-qa-devel'] + + if distro != 'fedora' and major_ver_num > 7: + deps.remove('btrfs-progs-devel') + + args = ['sudo', 'yum', 'install', '-y'] + deps + deps_old_distros + elif distro == 'ubuntu': + deps = """xfslibs-dev uuid-dev libtool-bin \ + e2fsprogs automake gcc libuuid1 quota attr libattr1-dev make \ + libacl1-dev libaio-dev xfsprogs libgdbm-dev gawk fio dbench \ + uuid-runtime python sqlite3""".split() + + if major_ver_num >= 19: + deps[deps.index('python')] ='python2' + args = ['sudo', 'apt-get', 'install', '-y'] + deps + else: + raise RuntimeError('expected a yum based or a apt based system') + + self.mount_a.client_remote.run(args=args, omit_sudo=False) + + def create_reqd_users(self): + self.mount_a.client_remote.run(args=['sudo', 'useradd', 'fsgqa'], + omit_sudo=False, check_status=False) + self.mount_a.client_remote.run(args=['sudo', 'groupadd', 'fsgqa'], + omit_sudo=False, check_status=False) + self.mount_a.client_remote.run(args=['sudo', 'useradd', + '123456-fsgqa'], omit_sudo=False, + check_status=False) + + def write_local_config(self): + from os.path import join + from textwrap import dedent + + mon_sock = self.fs.mon_manager.get_msgrv1_mon_socks()[0] + self.test_dev = mon_sock + ':/' + self.test_dirname + self.scratch_dev = mon_sock + ':/' + self.scratch_dirname + + xfstests_config_contents = dedent('''\ + export FSTYP=ceph + export TEST_DEV={} + export TEST_DIR={} + #export SCRATCH_DEV={} + #export SCRATCH_MNT={} + export TEST_FS_MOUNT_OPTS="-o name=admin,secret={}" + ''').format(self.test_dev, self.test_dirs_mount_path, self.scratch_dev, + self.scratch_dirs_mount_path, self.get_admin_key()) + + self.mount_a.client_remote.write_file(join(self.repo_path, 'local.config'), + xfstests_config_contents, sudo=True) + + def tearDown(self): + self.mount_a.client_remote.run(args=['sudo', 'userdel', '--force', + '--remove', 'fsgqa'], + omit_sudo=False, check_status=False) + self.mount_a.client_remote.run(args=['sudo', 'userdel', '--force', + '--remove', '123456-fsgqa'], + omit_sudo=False, check_status=False) + self.mount_a.client_remote.run(args=['sudo', 'groupdel', 'fsgqa'], + omit_sudo=False, check_status=False) + + self.mount_a.client_remote.run(args=['sudo', 'rm', '-rf', + self.repo_path], + omit_sudo=False, check_status=False) + + super(XFSTestsDev, self).tearDown() diff --git a/qa/tasks/cephfs_mirror.py b/qa/tasks/cephfs_mirror.py new file mode 100644 index 000000000..9602a5a7f --- /dev/null +++ b/qa/tasks/cephfs_mirror.py @@ -0,0 +1,73 @@ +""" +Task for running cephfs mirror daemons +""" + +import logging + +from teuthology.orchestra import run +from teuthology import misc +from teuthology.exceptions import ConfigError +from teuthology.task import Task +from tasks.ceph_manager import get_valgrind_args +from tasks.util import get_remote_for_role + +log = logging.getLogger(__name__) + +class CephFSMirror(Task): + def __init__(self, ctx, config): + super(CephFSMirror, self).__init__(ctx, config) + self.log = log + + def setup(self): + super(CephFSMirror, self).setup() + try: + self.client = self.config['client'] + except KeyError: + raise ConfigError('cephfs-mirror requires a client to connect') + + self.cluster_name, type_, self.client_id = misc.split_role(self.client) + if not type_ == 'client': + raise ConfigError(f'client role {self.client} must be a client') + self.remote = get_remote_for_role(self.ctx, self.client) + + def begin(self): + super(CephFSMirror, self).begin() + testdir = misc.get_testdir(self.ctx) + + args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'daemon-helper', + 'term', + ] + + if 'valgrind' in self.config: + args = get_valgrind_args( + testdir, 'cephfs-mirror-{id}'.format(id=self.client), + args, self.config.get('valgrind')) + + args.extend([ + 'cephfs-mirror', + '--cluster', + self.cluster_name, + '--id', + self.client_id, + ]) + if 'run_in_foreground' in self.config: + args.extend(['--foreground']) + + self.ctx.daemons.add_daemon( + self.remote, 'cephfs-mirror', self.client, + args=args, + logger=self.log.getChild(self.client), + stdin=run.PIPE, + wait=False, + ) + + def end(self): + mirror_daemon = self.ctx.daemons.get_daemon('cephfs-mirror', self.client) + mirror_daemon.stop() + super(CephFSMirror, self).end() + +task = CephFSMirror diff --git a/qa/tasks/cephfs_mirror_thrash.py b/qa/tasks/cephfs_mirror_thrash.py new file mode 100644 index 000000000..91f60ac50 --- /dev/null +++ b/qa/tasks/cephfs_mirror_thrash.py @@ -0,0 +1,219 @@ +""" +Task for thrashing cephfs-mirror daemons +""" + +import contextlib +import logging +import random +import signal +import socket +import time + +from gevent import sleep +from gevent.greenlet import Greenlet +from gevent.event import Event + +from teuthology.exceptions import CommandFailedError +from teuthology.orchestra import run +from tasks.thrasher import Thrasher + +log = logging.getLogger(__name__) + + +class CephFSMirrorThrasher(Thrasher, Greenlet): + """ + CephFSMirrorThrasher:: + + The CephFSMirrorThrasher thrashes cephfs-mirror daemons during execution of other + tasks (workunits, etc). + + The config is optional. Many of the config parameters are a maximum value + to use when selecting a random value from a range. The config is a dict + containing some or all of: + + cluster: [default: ceph] cluster to thrash + + max_thrash: [default: 1] the maximum number of active cephfs-mirror daemons per + cluster will be thrashed at any given time. + + min_thrash_delay: [default: 60] minimum number of seconds to delay before + thrashing again. + + max_thrash_delay: [default: 120] maximum number of seconds to delay before + thrashing again. + + max_revive_delay: [default: 10] maximum number of seconds to delay before + bringing back a thrashed cephfs-mirror daemon. + + randomize: [default: true] enables randomization and use the max/min values + + seed: [no default] seed the random number generator + + Examples:: + + The following example disables randomization, and uses the max delay + values: + + tasks: + - ceph: + - cephfs_mirror_thrash: + randomize: False + max_thrash_delay: 10 + """ + + def __init__(self, ctx, config, cluster, daemons): + super(CephFSMirrorThrasher, self).__init__() + + self.ctx = ctx + self.config = config + self.cluster = cluster + self.daemons = daemons + + self.logger = log + self.name = 'thrasher.cephfs_mirror.[{cluster}]'.format(cluster = cluster) + self.stopping = Event() + + self.randomize = bool(self.config.get('randomize', True)) + self.max_thrash = int(self.config.get('max_thrash', 1)) + self.min_thrash_delay = float(self.config.get('min_thrash_delay', 5.0)) + self.max_thrash_delay = float(self.config.get('max_thrash_delay', 10)) + self.max_revive_delay = float(self.config.get('max_revive_delay', 15.0)) + + def _run(self): + try: + self.do_thrash() + except Exception as e: + # See _run exception comment for MDSThrasher + self.set_thrasher_exception(e) + self.logger.exception("exception:") + # Allow successful completion so gevent doesn't see an exception. + # The DaemonWatchdog will observe the error and tear down the test. + + def log(self, x): + """Write data to logger assigned to this CephFSMirrorThrasher""" + self.logger.info(x) + + def stop(self): + self.stopping.set() + + def do_thrash(self): + """ + Perform the random thrashing action + """ + + self.log('starting thrash for cluster {cluster}'.format(cluster=self.cluster)) + stats = { + "kill": 0, + } + + while not self.stopping.is_set(): + delay = self.max_thrash_delay + if self.randomize: + delay = random.randrange(self.min_thrash_delay, self.max_thrash_delay) + + if delay > 0.0: + self.log('waiting for {delay} secs before thrashing'.format(delay=delay)) + self.stopping.wait(delay) + if self.stopping.is_set(): + continue + + killed_daemons = [] + + weight = 1.0 / len(self.daemons) + count = 0 + for daemon in self.daemons: + skip = random.uniform(0.0, 1.0) + if weight <= skip: + self.log('skipping daemon {label} with skip ({skip}) > weight ({weight})'.format( + label=daemon.id_, skip=skip, weight=weight)) + continue + + self.log('kill {label}'.format(label=daemon.id_)) + try: + daemon.signal(signal.SIGTERM) + except Exception as e: + self.log(f'exception when stopping mirror daemon: {e}') + else: + killed_daemons.append(daemon) + stats['kill'] += 1 + + # if we've reached max_thrash, we're done + count += 1 + if count >= self.max_thrash: + break + + if killed_daemons: + # wait for a while before restarting + delay = self.max_revive_delay + if self.randomize: + delay = random.randrange(0.0, self.max_revive_delay) + + self.log('waiting for {delay} secs before reviving daemons'.format(delay=delay)) + sleep(delay) + + for daemon in killed_daemons: + self.log('waiting for {label}'.format(label=daemon.id_)) + try: + run.wait([daemon.proc], timeout=600) + except CommandFailedError: + pass + except: + self.log('Failed to stop {label}'.format(label=daemon.id_)) + + try: + # try to capture a core dump + daemon.signal(signal.SIGABRT) + except socket.error: + pass + raise + finally: + daemon.reset() + + for daemon in killed_daemons: + self.log('reviving {label}'.format(label=daemon.id_)) + daemon.start() + + for stat in stats: + self.log("stat['{key}'] = {value}".format(key = stat, value = stats[stat])) + +@contextlib.contextmanager +def task(ctx, config): + """ + Stress test the cephfs-mirror by thrashing while another task/workunit + is running. + + Please refer to CephFSMirrorThrasher class for further information on the + available options. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'cephfs_mirror_thrash task only accepts a dict for configuration' + + cluster = config.get('cluster', 'ceph') + daemons = list(ctx.daemons.iter_daemons_of_role('cephfs-mirror', cluster)) + assert len(daemons) > 0, \ + 'cephfs_mirror_thrash task requires at least 1 cephfs-mirror daemon' + + # choose random seed + if 'seed' in config: + seed = int(config['seed']) + else: + seed = int(time.time()) + log.info('cephfs_mirror_thrash using random seed: {seed}'.format(seed=seed)) + random.seed(seed) + + thrasher = CephFSMirrorThrasher(ctx, config, cluster, daemons) + thrasher.start() + ctx.ceph[cluster].thrashers.append(thrasher) + + try: + log.debug('Yielding') + yield + finally: + log.info('joining cephfs_mirror_thrash') + thrasher.stop() + if thrasher.exception is not None: + raise RuntimeError('error during thrashing') + thrasher.join() + log.info('done joining') diff --git a/qa/tasks/cephfs_test_runner.py b/qa/tasks/cephfs_test_runner.py new file mode 100644 index 000000000..8a4919b93 --- /dev/null +++ b/qa/tasks/cephfs_test_runner.py @@ -0,0 +1,213 @@ +import contextlib +import logging +import os +import unittest +from unittest import suite, loader, case +from teuthology.task import interactive +from teuthology import misc +from tasks.cephfs.filesystem import Filesystem, MDSCluster, CephCluster +from tasks.mgr.mgr_test_case import MgrCluster + +log = logging.getLogger(__name__) + + +class DecoratingLoader(loader.TestLoader): + """ + A specialization of TestLoader that tags some extra attributes + onto test classes as they are loaded. + """ + def __init__(self, params): + self._params = params + super(DecoratingLoader, self).__init__() + + def _apply_params(self, obj): + for k, v in self._params.items(): + if obj.__class__ is type: + cls = obj + else: + cls = obj.__class__ + setattr(cls, k, v) + + def loadTestsFromTestCase(self, testCaseClass): + self._apply_params(testCaseClass) + return super(DecoratingLoader, self).loadTestsFromTestCase(testCaseClass) + + def loadTestsFromName(self, name, module=None): + result = super(DecoratingLoader, self).loadTestsFromName(name, module) + + # Special case for when we were called with the name of a method, we get + # a suite with one TestCase + tests_in_result = list(result) + if len(tests_in_result) == 1 and isinstance(tests_in_result[0], case.TestCase): + self._apply_params(tests_in_result[0]) + + return result + + +class LogStream(object): + def __init__(self): + self.buffer = "" + + def write(self, data): + self.buffer += data + if "\n" in self.buffer: + lines = self.buffer.split("\n") + for line in lines[:-1]: + log.info(line) + self.buffer = lines[-1] + + def flush(self): + pass + + +class InteractiveFailureResult(unittest.TextTestResult): + """ + Specialization that implements interactive-on-error style + behavior. + """ + ctx = None + + def addFailure(self, test, err): + log.error(self._exc_info_to_string(err, test)) + log.error("Failure in test '{0}', going interactive".format( + self.getDescription(test) + )) + interactive.task(ctx=self.ctx, config=None) + + def addError(self, test, err): + log.error(self._exc_info_to_string(err, test)) + log.error("Error in test '{0}', going interactive".format( + self.getDescription(test) + )) + interactive.task(ctx=self.ctx, config=None) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run the CephFS test cases. + + Run everything in tasks/cephfs/test_*.py: + + :: + + tasks: + - install: + - ceph: + - ceph-fuse: + - cephfs_test_runner: + + `modules` argument allows running only some specific modules: + + :: + + tasks: + ... + - cephfs_test_runner: + modules: + - tasks.cephfs.test_sessionmap + - tasks.cephfs.test_auto_repair + + By default, any cases that can't be run on the current cluster configuration + will generate a failure. When the optional `fail_on_skip` argument is set + to false, any tests that can't be run on the current configuration will + simply be skipped: + + :: + tasks: + ... + - cephfs_test_runner: + fail_on_skip: false + + """ + + ceph_cluster = CephCluster(ctx) + + if len(list(misc.all_roles_of_type(ctx.cluster, 'mds'))): + mds_cluster = MDSCluster(ctx) + fs = Filesystem(ctx) + else: + mds_cluster = None + fs = None + + if len(list(misc.all_roles_of_type(ctx.cluster, 'mgr'))): + mgr_cluster = MgrCluster(ctx) + else: + mgr_cluster = None + + # Mount objects, sorted by ID + if hasattr(ctx, 'mounts'): + mounts = [v for k, v in sorted(ctx.mounts.items(), key=lambda mount: mount[0])] + else: + # The test configuration has a filesystem but no fuse/kclient mounts + mounts = [] + + decorating_loader = DecoratingLoader({ + "ctx": ctx, + "mounts": mounts, + "fs": fs, + "ceph_cluster": ceph_cluster, + "mds_cluster": mds_cluster, + "mgr_cluster": mgr_cluster, + }) + + fail_on_skip = config.get('fail_on_skip', True) + + # Put useful things onto ctx for interactive debugging + ctx.fs = fs + ctx.mds_cluster = mds_cluster + ctx.mgr_cluster = mgr_cluster + + # Depending on config, either load specific modules, or scan for moduless + if config and 'modules' in config and config['modules']: + module_suites = [] + for mod_name in config['modules']: + # Test names like cephfs.test_auto_repair + module_suites.append(decorating_loader.loadTestsFromName(mod_name)) + overall_suite = suite.TestSuite(module_suites) + else: + # Default, run all tests + overall_suite = decorating_loader.discover( + os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "cephfs/" + ) + ) + + if ctx.config.get("interactive-on-error", False): + InteractiveFailureResult.ctx = ctx + result_class = InteractiveFailureResult + else: + result_class = unittest.TextTestResult + + class LoggingResult(result_class): + def startTest(self, test): + log.info("Starting test: {0}".format(self.getDescription(test))) + return super(LoggingResult, self).startTest(test) + + def addSkip(self, test, reason): + if fail_on_skip: + # Don't just call addFailure because that requires a traceback + self.failures.append((test, reason)) + else: + super(LoggingResult, self).addSkip(test, reason) + + # Execute! + result = unittest.TextTestRunner( + stream=LogStream(), + resultclass=LoggingResult, + verbosity=2, + failfast=True).run(overall_suite) + + if not result.wasSuccessful(): + result.printErrors() # duplicate output at end for convenience + + bad_tests = [] + for test, error in result.errors: + bad_tests.append(str(test)) + for test, failure in result.failures: + bad_tests.append(str(test)) + + raise RuntimeError("Test failure: {0}".format(", ".join(bad_tests))) + + yield diff --git a/qa/tasks/cephfs_upgrade_snap.py b/qa/tasks/cephfs_upgrade_snap.py new file mode 100644 index 000000000..1b0a737a7 --- /dev/null +++ b/qa/tasks/cephfs_upgrade_snap.py @@ -0,0 +1,47 @@ +""" +Upgrade cluster snap format. +""" + +import logging +import time + +from tasks.cephfs.filesystem import Filesystem + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Upgrade CephFS file system snap format. + """ + + if config is None: + config = {} + assert isinstance(config, dict), \ + 'snap-upgrade task only accepts a dict for configuration' + + fs = Filesystem(ctx) + + mds_map = fs.get_mds_map() + assert(mds_map['max_mds'] == 1) + + json = fs.run_scrub(["start", "/", "force", "recursive", "repair"]) + if not json or json['return_code'] == 0: + assert(fs.wait_until_scrub_complete(tag=json["scrub_tag"]) == True) + log.info("scrub / completed") + else: + log.info("scrub / failed: {}".format(json)) + + json = fs.run_scrub(["start", "~mdsdir", "force", "recursive", "repair"]) + if not json or json['return_code'] == 0: + assert(fs.wait_until_scrub_complete(tag=json["scrub_tag"]) == True) + log.info("scrub ~mdsdir completed") + else: + log.info("scrub / failed: {}".format(json)) + + for i in range(0, 10): + mds_map = fs.get_mds_map() + if (mds_map['flags'] & (1<<1)) != 0 and (mds_map['flags'] & (1<<4)) != 0: + break + time.sleep(10) + assert((mds_map['flags'] & (1<<1)) != 0) # Test CEPH_MDSMAP_ALLOW_SNAPS + assert((mds_map['flags'] & (1<<4)) != 0) # Test CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS diff --git a/qa/tasks/check_counter.py b/qa/tasks/check_counter.py new file mode 100644 index 000000000..daa81973b --- /dev/null +++ b/qa/tasks/check_counter.py @@ -0,0 +1,98 @@ + +import logging +import json + +from teuthology.task import Task +from teuthology import misc + +log = logging.getLogger(__name__) + + +class CheckCounter(Task): + """ + Use this task to validate that some daemon perf counters were + incremented by the nested tasks. + + Config: + 'cluster_name': optional, specify which cluster + 'target': dictionary of daemon type to list of performance counters. + 'dry_run': just log the value of the counters, don't fail if they + aren't nonzero. + + Success condition is that for all of the named counters, at least + one of the daemons of that type has the counter nonzero. + + Example to check cephfs dirfrag splits are happening: + - install: + - ceph: + - ceph-fuse: + - check-counter: + counters: + mds: + - "mds.dir_split" + - workunit: ... + """ + + def start(self): + log.info("START") + + def end(self): + overrides = self.ctx.config.get('overrides', {}) + misc.deep_merge(self.config, overrides.get('check-counter', {})) + + cluster_name = self.config.get('cluster_name', None) + dry_run = self.config.get('dry_run', False) + targets = self.config.get('counters', {}) + + if cluster_name is None: + cluster_name = next(iter(self.ctx.managers.keys())) + + for daemon_type, counters in targets.items(): + # List of 'a', 'b', 'c'... + daemon_ids = list(misc.all_roles_of_type(self.ctx.cluster, daemon_type)) + daemons = dict([(daemon_id, + self.ctx.daemons.get_daemon(daemon_type, daemon_id)) + for daemon_id in daemon_ids]) + + seen = set() + + for daemon_id, daemon in daemons.items(): + if not daemon.running(): + log.info("Ignoring daemon {0}, it isn't running".format(daemon_id)) + continue + else: + log.debug("Getting stats from {0}".format(daemon_id)) + + manager = self.ctx.managers[cluster_name] + proc = manager.admin_socket(daemon_type, daemon_id, ["perf", "dump"]) + response_data = proc.stdout.getvalue().strip() + if response_data: + perf_dump = json.loads(response_data) + else: + log.warning("No admin socket response from {0}, skipping".format(daemon_id)) + continue + + for counter in counters: + subsys, counter_id = counter.split(".") + if subsys not in perf_dump or counter_id not in perf_dump[subsys]: + log.warning("Counter '{0}' not found on daemon {1}.{2}".format( + counter, daemon_type, daemon_id)) + continue + value = perf_dump[subsys][counter_id] + + log.info("Daemon {0}.{1} {2}={3}".format( + daemon_type, daemon_id, counter, value + )) + + if value > 0: + seen.add(counter) + + if not dry_run: + unseen = set(counters) - set(seen) + if unseen: + raise RuntimeError("The following counters failed to be set " + "on {0} daemons: {1}".format( + daemon_type, unseen + )) + +task = CheckCounter diff --git a/qa/tasks/cifs_mount.py b/qa/tasks/cifs_mount.py new file mode 100644 index 000000000..b282b0b7d --- /dev/null +++ b/qa/tasks/cifs_mount.py @@ -0,0 +1,137 @@ +""" +Mount cifs clients. Unmount when finished. +""" +import contextlib +import logging +import os + +from teuthology import misc as teuthology +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Mount/unmount a cifs client. + + The config is optional and defaults to mounting on all clients. If + a config is given, it is expected to be a list of clients to do + this operation on. + + Example that starts smbd and mounts cifs on all nodes:: + + tasks: + - ceph: + - samba: + - cifs-mount: + - interactive: + + Example that splits smbd and cifs: + + tasks: + - ceph: + - samba: [samba.0] + - cifs-mount: [client.0] + - ceph-fuse: [client.1] + - interactive: + + Example that specifies the share name: + + tasks: + - ceph: + - ceph-fuse: + - samba: + samba.0: + cephfuse: "{testdir}/mnt.0" + - cifs-mount: + client.0: + share: cephfuse + + :param ctx: Context + :param config: Configuration + """ + log.info('Mounting cifs clients...') + + if config is None: + config = dict(('client.{id}'.format(id=id_), None) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')) + elif isinstance(config, list): + config = dict((name, None) for name in config) + + clients = list(teuthology.get_clients(ctx=ctx, roles=config.keys())) + + from .samba import get_sambas + samba_roles = ['samba.{id_}'.format(id_=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba')] + sambas = list(get_sambas(ctx=ctx, roles=samba_roles)) + (ip, _) = sambas[0][1].ssh.get_transport().getpeername() + log.info('samba ip: {ip}'.format(ip=ip)) + + for id_, remote in clients: + mnt = os.path.join(teuthology.get_testdir(ctx), 'mnt.{id}'.format(id=id_)) + log.info('Mounting cifs client.{id} at {remote} {mnt}...'.format( + id=id_, remote=remote,mnt=mnt)) + + remote.run( + args=[ + 'mkdir', + '--', + mnt, + ], + ) + + rolestr = 'client.{id_}'.format(id_=id_) + unc = "ceph" + log.info("config: {c}".format(c=config)) + if config[rolestr] is not None and 'share' in config[rolestr]: + unc = config[rolestr]['share'] + + remote.run( + args=[ + 'sudo', + 'mount', + '-t', + 'cifs', + '//{sambaip}/{unc}'.format(sambaip=ip, unc=unc), + '-o', + 'username=ubuntu,password=ubuntu', + mnt, + ], + ) + + remote.run( + args=[ + 'sudo', + 'chown', + 'ubuntu:ubuntu', + '{m}/'.format(m=mnt), + ], + ) + + try: + yield + finally: + log.info('Unmounting cifs clients...') + for id_, remote in clients: + remote.run( + args=[ + 'sudo', + 'umount', + mnt, + ], + ) + for id_, remote in clients: + while True: + try: + remote.run( + args=[ + 'rmdir', '--', mnt, + run.Raw('2>&1'), + run.Raw('|'), + 'grep', 'Device or resource busy', + ], + ) + import time + time.sleep(1) + except Exception: + break diff --git a/qa/tasks/cram.py b/qa/tasks/cram.py new file mode 100644 index 000000000..a445a146f --- /dev/null +++ b/qa/tasks/cram.py @@ -0,0 +1,160 @@ +""" +Cram tests +""" +import logging +import os + +from tasks.util.workunit import get_refspec_after_overrides + +from teuthology import misc as teuthology +from teuthology.parallel import parallel +from teuthology.orchestra import run +from teuthology.config import config as teuth_config + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Run all cram tests from the specified paths on the specified + clients. Each client runs tests in parallel as default, and + you can also disable it by adding "parallel: False" option. + + Limitations: + Tests must have a .t suffix. Tests with duplicate names will + overwrite each other, so only the last one will run. + + For example:: + + tasks: + - ceph: + - cram: + clients: + client.0: + - qa/test.t + - qa/test2.t] + client.1: [qa/test.t] + branch: foo + parallel: False + + You can also run a list of cram tests on all clients:: + + tasks: + - ceph: + - cram: + clients: + all: [qa/test.t] + + :param ctx: Context + :param config: Configuration + """ + assert isinstance(config, dict) + assert 'clients' in config and isinstance(config['clients'], dict), \ + 'configuration must contain a dictionary of clients' + + clients = teuthology.replace_all_with_clients(ctx.cluster, + config['clients']) + testdir = teuthology.get_testdir(ctx) + + overrides = ctx.config.get('overrides', {}) + refspec = get_refspec_after_overrides(config, overrides) + + _parallel = config.get('parallel', True) + + git_url = teuth_config.get_ceph_qa_suite_git_url() + log.info('Pulling tests from %s ref %s', git_url, refspec) + + try: + for client, tests in clients.items(): + (remote,) = (ctx.cluster.only(client).remotes.keys()) + client_dir = '{tdir}/archive/cram.{role}'.format(tdir=testdir, role=client) + remote.run( + args=[ + 'mkdir', '--', client_dir, + run.Raw('&&'), + 'python3', '-m', 'venv', '{tdir}/virtualenv'.format(tdir=testdir), + run.Raw('&&'), + '{tdir}/virtualenv/bin/pip'.format(tdir=testdir), + 'install', 'cram==0.6', + ], + ) + clone_dir = '{tdir}/clone.{role}'.format(tdir=testdir, role=client) + remote.run(args=refspec.clone(git_url, clone_dir)) + + for test in tests: + assert test.endswith('.t'), 'tests must end in .t' + remote.run( + args=[ + 'cp', '--', os.path.join(clone_dir, test), client_dir, + ], + ) + + if _parallel: + with parallel() as p: + for role in clients.keys(): + p.spawn(_run_tests, ctx, role) + else: + for role in clients.keys(): + _run_tests(ctx, role) + finally: + for client, tests in clients.items(): + (remote,) = (ctx.cluster.only(client).remotes.keys()) + client_dir = '{tdir}/archive/cram.{role}'.format(tdir=testdir, role=client) + test_files = set([test.rsplit('/', 1)[1] for test in tests]) + + # remove test files unless they failed + for test_file in test_files: + abs_file = os.path.join(client_dir, test_file) + remote.run( + args=[ + 'test', '-f', abs_file + '.err', + run.Raw('||'), + 'rm', '-f', '--', abs_file, + ], + ) + + # ignore failure since more than one client may + # be run on a host, and the client dir should be + # non-empty if the test failed + clone_dir = '{tdir}/clone.{role}'.format(tdir=testdir, role=client) + remote.run( + args=[ + 'rm', '-rf', '--', + '{tdir}/virtualenv'.format(tdir=testdir), + clone_dir, + run.Raw(';'), + 'rmdir', '--ignore-fail-on-non-empty', client_dir, + ], + ) + +def _run_tests(ctx, role): + """ + For each role, check to make sure it's a client, then run the cram on that client + + :param ctx: Context + :param role: Roles + """ + assert isinstance(role, str) + PREFIX = 'client.' + if role.startswith(PREFIX): + id_ = role[len(PREFIX):] + else: + id_ = role + (remote,) = (ctx.cluster.only(role).remotes.keys()) + ceph_ref = ctx.summary.get('ceph-sha1', 'master') + + testdir = teuthology.get_testdir(ctx) + log.info('Running tests for %s...', role) + remote.run( + args=[ + run.Raw('CEPH_REF={ref}'.format(ref=ceph_ref)), + run.Raw('CEPH_ID="{id}"'.format(id=id_)), + run.Raw('PATH=$PATH:/usr/sbin'), + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + '{tdir}/virtualenv/bin/cram'.format(tdir=testdir), + '-v', '--', + run.Raw('{tdir}/archive/cram.{role}/*.t'.format(tdir=testdir, role=role)), + ], + logger=log.getChild(role), + ) diff --git a/qa/tasks/create_verify_lfn_objects.py b/qa/tasks/create_verify_lfn_objects.py new file mode 100644 index 000000000..532541581 --- /dev/null +++ b/qa/tasks/create_verify_lfn_objects.py @@ -0,0 +1,83 @@ +""" +Rados modle-based integration tests +""" +import contextlib +import logging + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + For each combination of namespace and name_length, create + <num_objects> objects with name length <name_length> + on entry. On exit, verify that the objects still exist, can + be deleted, and then don't exist. + + Usage:: + + create_verify_lfn_objects.py: + pool: <pool_name> default: 'data' + prefix: <prefix> default: '' + namespace: [<namespace>] default: [''] + num_objects: [<num_objects>] default: 10 + name_length: [<name_length>] default: [400] + """ + pool = config.get('pool', 'data') + num_objects = config.get('num_objects', 10) + name_length = config.get('name_length', [400]) + namespace = config.get('namespace', [None]) + prefix = config.get('prefix', None) + manager = ctx.managers['ceph'] + + objects = [] + for l in name_length: + for ns in namespace: + def object_name(i): + nslength = 0 + if namespace != '': + nslength = len(namespace) + numstr = str(i) + fillerlen = l - nslength - len(prefix) - len(numstr) + assert fillerlen >= 0 + return prefix + ('a'*fillerlen) + numstr + objects += [(ns, object_name(i)) for i in range(num_objects)] + + for ns, name in objects: + err = manager.do_put( + pool, + name, + '/etc/resolv.conf', + namespace=ns) + log.info("err is " + str(err)) + assert err == 0 + + try: + yield + finally: + log.info('ceph_verify_lfn_objects verifying...') + for ns, name in objects: + err = manager.do_get( + pool, + name, + namespace=ns) + log.info("err is " + str(err)) + assert err == 0 + + log.info('ceph_verify_lfn_objects deleting...') + for ns, name in objects: + err = manager.do_rm( + pool, + name, + namespace=ns) + log.info("err is " + str(err)) + assert err == 0 + + log.info('ceph_verify_lfn_objects verifying absent...') + for ns, name in objects: + err = manager.do_get( + pool, + name, + namespace=ns) + log.info("err is " + str(err)) + assert err != 0 diff --git a/qa/tasks/daemonwatchdog.py b/qa/tasks/daemonwatchdog.py new file mode 100644 index 000000000..c8fa9f3c2 --- /dev/null +++ b/qa/tasks/daemonwatchdog.py @@ -0,0 +1,128 @@ +import logging +import signal +import time + +from gevent import sleep +from gevent.greenlet import Greenlet +from gevent.event import Event + +log = logging.getLogger(__name__) + +class DaemonWatchdog(Greenlet): + """ + DaemonWatchdog:: + + Watch Ceph daemons for failures. If an extended failure is detected (i.e. + not intentional), then the watchdog will unmount file systems and send + SIGTERM to all daemons. The duration of an extended failure is configurable + with watchdog_daemon_timeout. + + ceph: + watchdog: + daemon_restart [default: no]: restart daemon if "normal" exit (status==0). + + daemon_timeout [default: 300]: number of seconds a daemon + is allowed to be failed before the + watchdog will bark. + """ + + def __init__(self, ctx, config, thrashers): + super(DaemonWatchdog, self).__init__() + self.config = ctx.config.get('watchdog', {}) + self.ctx = ctx + self.e = None + self.logger = log.getChild('daemon_watchdog') + self.cluster = config.get('cluster', 'ceph') + self.name = 'watchdog' + self.stopping = Event() + self.thrashers = thrashers + + def _run(self): + try: + self.watch() + except Exception as e: + # See _run exception comment for MDSThrasher + self.e = e + self.logger.exception("exception:") + # allow successful completion so gevent doesn't see an exception... + + def log(self, x): + """Write data to logger""" + self.logger.info(x) + + def stop(self): + self.stopping.set() + + def bark(self): + self.log("BARK! unmounting mounts and killing all daemons") + if hasattr(self.ctx, 'mounts'): + for mount in self.ctx.mounts.values(): + try: + mount.umount_wait(force=True) + except: + self.logger.exception("ignoring exception:") + daemons = [] + daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('osd', cluster=self.cluster))) + daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('mds', cluster=self.cluster))) + daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('mon', cluster=self.cluster))) + daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('rgw', cluster=self.cluster))) + daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('mgr', cluster=self.cluster))) + + for daemon in daemons: + try: + daemon.signal(signal.SIGTERM) + except: + self.logger.exception("ignoring exception:") + + def watch(self): + self.log("watchdog starting") + daemon_timeout = int(self.config.get('daemon_timeout', 300)) + daemon_restart = self.config.get('daemon_restart', False) + daemon_failure_time = {} + while not self.stopping.is_set(): + bark = False + now = time.time() + + osds = self.ctx.daemons.iter_daemons_of_role('osd', cluster=self.cluster) + mons = self.ctx.daemons.iter_daemons_of_role('mon', cluster=self.cluster) + mdss = self.ctx.daemons.iter_daemons_of_role('mds', cluster=self.cluster) + rgws = self.ctx.daemons.iter_daemons_of_role('rgw', cluster=self.cluster) + mgrs = self.ctx.daemons.iter_daemons_of_role('mgr', cluster=self.cluster) + + daemon_failures = [] + daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, osds)) + daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, mons)) + daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, mdss)) + daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, rgws)) + daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, mgrs)) + + for daemon in daemon_failures: + name = daemon.role + '.' + daemon.id_ + dt = daemon_failure_time.setdefault(name, (daemon, now)) + assert dt[0] is daemon + delta = now-dt[1] + self.log("daemon {name} is failed for ~{t:.0f}s".format(name=name, t=delta)) + if delta > daemon_timeout: + bark = True + if daemon_restart == 'normal' and daemon.proc.exitstatus == 0: + self.log(f"attempting to restart daemon {name}") + daemon.restart() + + # If a daemon is no longer failed, remove it from tracking: + for name in list(daemon_failure_time.keys()): + if name not in [d.role + '.' + d.id_ for d in daemon_failures]: + self.log("daemon {name} has been restored".format(name=name)) + del daemon_failure_time[name] + + for thrasher in self.thrashers: + if thrasher.exception is not None: + self.log("{name} failed".format(name=thrasher.name)) + bark = True + + if bark: + self.bark() + return + + sleep(5) + + self.log("watchdog finished") diff --git a/qa/tasks/devstack.py b/qa/tasks/devstack.py new file mode 100644 index 000000000..2499e9e53 --- /dev/null +++ b/qa/tasks/devstack.py @@ -0,0 +1,371 @@ +#!/usr/bin/env python +import contextlib +import logging +import textwrap +import time +from configparser import ConfigParser +from io import BytesIO, StringIO + +from teuthology.orchestra import run +from teuthology import misc +from teuthology.contextutil import nested + +log = logging.getLogger(__name__) + +DEVSTACK_GIT_REPO = 'https://github.com/openstack-dev/devstack.git' +DS_STABLE_BRANCHES = ("havana", "grizzly") + +is_devstack_node = lambda role: role.startswith('devstack') +is_osd_node = lambda role: role.startswith('osd') + + +@contextlib.contextmanager +def task(ctx, config): + if config is None: + config = {} + if not isinstance(config, dict): + raise TypeError("config must be a dict") + with nested(lambda: install(ctx=ctx, config=config), + lambda: smoke(ctx=ctx, config=config), + ): + yield + + +@contextlib.contextmanager +def install(ctx, config): + """ + Install OpenStack DevStack and configure it to use a Ceph cluster for + Glance and Cinder. + + Requires one node with a role 'devstack' + + Since devstack runs rampant on the system it's used on, typically you will + want to reprovision that machine after using devstack on it. + + Also, the default 2GB of RAM that is given to vps nodes is insufficient. I + recommend 4GB. Downburst can be instructed to give 4GB to a vps node by + adding this to the yaml: + + downburst: + ram: 4G + + This was created using documentation found here: + https://github.com/openstack-dev/devstack/blob/master/README.md + http://docs.ceph.com/en/latest/rbd/rbd-openstack/ + """ + if config is None: + config = {} + if not isinstance(config, dict): + raise TypeError("config must be a dict") + + devstack_node = next(iter(ctx.cluster.only(is_devstack_node).remotes.keys())) + an_osd_node = next(iter(ctx.cluster.only(is_osd_node).remotes.keys())) + + devstack_branch = config.get("branch", "master") + install_devstack(devstack_node, devstack_branch) + try: + configure_devstack_and_ceph(ctx, config, devstack_node, an_osd_node) + yield + finally: + pass + + +def install_devstack(devstack_node, branch="master"): + log.info("Cloning DevStack repo...") + + args = ['git', 'clone', DEVSTACK_GIT_REPO] + devstack_node.run(args=args) + + if branch != "master": + if branch in DS_STABLE_BRANCHES and not branch.startswith("stable"): + branch = "stable/" + branch + log.info("Checking out {branch} branch...".format(branch=branch)) + cmd = "cd devstack && git checkout " + branch + devstack_node.run(args=cmd) + + log.info("Installing DevStack...") + args = ['cd', 'devstack', run.Raw('&&'), './stack.sh'] + devstack_node.run(args=args) + + +def configure_devstack_and_ceph(ctx, config, devstack_node, ceph_node): + pool_size = config.get('pool_size', '128') + create_pools(ceph_node, pool_size) + distribute_ceph_conf(devstack_node, ceph_node) + # This is where we would install python-ceph and ceph-common but it appears + # the ceph task does that for us. + generate_ceph_keys(ceph_node) + distribute_ceph_keys(devstack_node, ceph_node) + secret_uuid = set_libvirt_secret(devstack_node, ceph_node) + update_devstack_config_files(devstack_node, secret_uuid) + set_apache_servername(devstack_node) + # Rebooting is the most-often-used method of restarting devstack services + misc.reboot(devstack_node) + start_devstack(devstack_node) + restart_apache(devstack_node) + + +def create_pools(ceph_node, pool_size): + log.info("Creating pools on Ceph cluster...") + + for pool_name in ['volumes', 'images', 'backups']: + args = ['sudo', 'ceph', 'osd', 'pool', 'create', pool_name, pool_size] + ceph_node.run(args=args) + + +def distribute_ceph_conf(devstack_node, ceph_node): + log.info("Copying ceph.conf to DevStack node...") + + ceph_conf_path = '/etc/ceph/ceph.conf' + ceph_conf = ceph_node.read_file(ceph_conf_path, sudo=True) + devstack_node.write_file(ceph_conf_path, ceph_conf, sudo=True) + + +def generate_ceph_keys(ceph_node): + log.info("Generating Ceph keys...") + + ceph_auth_cmds = [ + ['sudo', 'ceph', 'auth', 'get-or-create', 'client.cinder', 'mon', + 'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=volumes, allow rx pool=images'], # noqa + ['sudo', 'ceph', 'auth', 'get-or-create', 'client.glance', 'mon', + 'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=images'], # noqa + ['sudo', 'ceph', 'auth', 'get-or-create', 'client.cinder-backup', 'mon', + 'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=backups'], # noqa + ] + for cmd in ceph_auth_cmds: + ceph_node.run(args=cmd) + + +def distribute_ceph_keys(devstack_node, ceph_node): + log.info("Copying Ceph keys to DevStack node...") + + def copy_key(from_remote, key_name, to_remote, dest_path, owner): + key_stringio = BytesIO() + from_remote.run( + args=['sudo', 'ceph', 'auth', 'get-or-create', key_name], + stdout=key_stringio) + key_stringio.seek(0) + to_remote.write_file(dest_path, key_stringio, owner=owner, sudo=True) + keys = [ + dict(name='client.glance', + path='/etc/ceph/ceph.client.glance.keyring', + # devstack appears to just want root:root + #owner='glance:glance', + ), + dict(name='client.cinder', + path='/etc/ceph/ceph.client.cinder.keyring', + # devstack appears to just want root:root + #owner='cinder:cinder', + ), + dict(name='client.cinder-backup', + path='/etc/ceph/ceph.client.cinder-backup.keyring', + # devstack appears to just want root:root + #owner='cinder:cinder', + ), + ] + for key_dict in keys: + copy_key(ceph_node, key_dict['name'], devstack_node, + key_dict['path'], key_dict.get('owner')) + + +def set_libvirt_secret(devstack_node, ceph_node): + log.info("Setting libvirt secret...") + + cinder_key = ceph_node.sh('sudo ceph auth get-key client.cinder').strip() + uuid = devstack_node.sh('uuidgen').strip() + + secret_path = '/tmp/secret.xml' + secret_template = textwrap.dedent(""" + <secret ephemeral='no' private='no'> + <uuid>{uuid}</uuid> + <usage type='ceph'> + <name>client.cinder secret</name> + </usage> + </secret>""") + secret_data = secret_template.format(uuid=uuid) + devstack_node.write_file(secret_path, secret_data) + devstack_node.run(args=['sudo', 'virsh', 'secret-define', '--file', + secret_path]) + devstack_node.run(args=['sudo', 'virsh', 'secret-set-value', '--secret', + uuid, '--base64', cinder_key]) + return uuid + + +def update_devstack_config_files(devstack_node, secret_uuid): + log.info("Updating DevStack config files to use Ceph...") + + def backup_config(node, file_name, backup_ext='.orig.teuth'): + node.run(args=['cp', '-f', file_name, file_name + backup_ext]) + + def update_config(config_name, config_stream, update_dict, + section='DEFAULT'): + parser = ConfigParser() + parser.read_file(config_stream) + for (key, value) in update_dict.items(): + parser.set(section, key, value) + out_stream = StringIO() + parser.write(out_stream) + out_stream.seek(0) + return out_stream + + updates = [ + dict(name='/etc/glance/glance-api.conf', options=dict( + default_store='rbd', + rbd_store_user='glance', + rbd_store_pool='images', + show_image_direct_url='True',)), + dict(name='/etc/cinder/cinder.conf', options=dict( + volume_driver='cinder.volume.drivers.rbd.RBDDriver', + rbd_pool='volumes', + rbd_ceph_conf='/etc/ceph/ceph.conf', + rbd_flatten_volume_from_snapshot='false', + rbd_max_clone_depth='5', + glance_api_version='2', + rbd_user='cinder', + rbd_secret_uuid=secret_uuid, + backup_driver='cinder.backup.drivers.ceph', + backup_ceph_conf='/etc/ceph/ceph.conf', + backup_ceph_user='cinder-backup', + backup_ceph_chunk_size='134217728', + backup_ceph_pool='backups', + backup_ceph_stripe_unit='0', + backup_ceph_stripe_count='0', + restore_discard_excess_bytes='true', + )), + dict(name='/etc/nova/nova.conf', options=dict( + libvirt_images_type='rbd', + libvirt_images_rbd_pool='volumes', + libvirt_images_rbd_ceph_conf='/etc/ceph/ceph.conf', + rbd_user='cinder', + rbd_secret_uuid=secret_uuid, + libvirt_inject_password='false', + libvirt_inject_key='false', + libvirt_inject_partition='-2', + )), + ] + + for update in updates: + file_name = update['name'] + options = update['options'] + config_data = devstack_node.read_file(file_name, sudo=True) + config_stream = StringIO(config_data) + backup_config(devstack_node, file_name) + new_config_stream = update_config(file_name, config_stream, options) + devstack_node.write_file(file_name, new_config_stream, sudo=True) + + +def set_apache_servername(node): + # Apache complains: "Could not reliably determine the server's fully + # qualified domain name, using 127.0.0.1 for ServerName" + # So, let's make sure it knows its name. + log.info("Setting Apache ServerName...") + + hostname = node.hostname + config_file = '/etc/apache2/conf.d/servername' + config_data = "ServerName {name}".format(name=hostname) + node.write_file(config_file, config_data, sudo=True) + + +def start_devstack(devstack_node): + log.info("Patching devstack start script...") + # This causes screen to start headless - otherwise rejoin-stack.sh fails + # because there is no terminal attached. + cmd = "cd devstack && sed -ie 's/screen -c/screen -dm -c/' rejoin-stack.sh" + devstack_node.run(args=cmd) + + log.info("Starting devstack...") + cmd = "cd devstack && ./rejoin-stack.sh" + devstack_node.run(args=cmd) + + # This was added because I was getting timeouts on Cinder requests - which + # were trying to access Keystone on port 5000. A more robust way to handle + # this would be to introduce a wait-loop on devstack_node that checks to + # see if a service is listening on port 5000. + log.info("Waiting 30s for devstack to start...") + time.sleep(30) + + +def restart_apache(node): + node.run(args=['sudo', '/etc/init.d/apache2', 'restart'], wait=True) + + +@contextlib.contextmanager +def exercise(ctx, config): + log.info("Running devstack exercises...") + + if config is None: + config = {} + if not isinstance(config, dict): + raise TypeError("config must be a dict") + + devstack_node = next(iter(ctx.cluster.only(is_devstack_node).remotes.keys())) + + # TODO: save the log *and* preserve failures + #devstack_archive_dir = create_devstack_archive(ctx, devstack_node) + + try: + #cmd = "cd devstack && ./exercise.sh 2>&1 | tee {dir}/exercise.log".format( # noqa + # dir=devstack_archive_dir) + cmd = "cd devstack && ./exercise.sh" + devstack_node.run(args=cmd, wait=True) + yield + finally: + pass + + +def create_devstack_archive(ctx, devstack_node): + test_dir = misc.get_testdir(ctx) + devstack_archive_dir = "{test_dir}/archive/devstack".format( + test_dir=test_dir) + devstack_node.run(args="mkdir -p " + devstack_archive_dir) + return devstack_archive_dir + + +@contextlib.contextmanager +def smoke(ctx, config): + log.info("Running a basic smoketest...") + + devstack_node = next(iter(ctx.cluster.only(is_devstack_node).remotes.keys())) + an_osd_node = next(iter(ctx.cluster.only(is_osd_node).remotes.keys())) + + try: + create_volume(devstack_node, an_osd_node, 'smoke0', 1) + yield + finally: + pass + + +def create_volume(devstack_node, ceph_node, vol_name, size): + """ + :param size: The size of the volume, in GB + """ + size = str(size) + log.info("Creating a {size}GB volume named {name}...".format( + name=vol_name, + size=size)) + args = ['source', 'devstack/openrc', run.Raw('&&'), 'cinder', 'create', + '--display-name', vol_name, size] + cinder_create = devstack_node.sh(args, wait=True) + vol_info = parse_os_table(cinder_create) + log.debug("Volume info: %s", str(vol_info)) + + try: + rbd_output = ceph_node.sh("rbd --id cinder ls -l volumes", wait=True) + except run.CommandFailedError: + log.debug("Original rbd call failed; retrying without '--id cinder'") + rbd_output = ceph_node.sh("rbd ls -l volumes", wait=True) + + assert vol_info['id'] in rbd_output, \ + "Volume not found on Ceph cluster" + assert vol_info['size'] == size, \ + "Volume size on Ceph cluster is different than specified" + return vol_info['id'] + + +def parse_os_table(table_str): + out_dict = dict() + for line in table_str.split('\n'): + if line.startswith('|'): + items = line.split() + out_dict[items[1]] = items[3] + return out_dict diff --git a/qa/tasks/die_on_err.py b/qa/tasks/die_on_err.py new file mode 100644 index 000000000..a6aa4c632 --- /dev/null +++ b/qa/tasks/die_on_err.py @@ -0,0 +1,70 @@ +""" +Raise exceptions on osd coredumps or test err directories +""" +import contextlib +import logging +import time +from teuthology.orchestra import run + +from tasks import ceph_manager +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Die if {testdir}/err exists or if an OSD dumps core + """ + if config is None: + config = {} + + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + log.info('num_osds is %s' % num_osds) + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < num_osds: + time.sleep(10) + + testdir = teuthology.get_testdir(ctx) + + while True: + for i in range(num_osds): + (osd_remote,) = ctx.cluster.only('osd.%d' % i).remotes.keys() + p = osd_remote.run( + args = [ 'test', '-e', '{tdir}/err'.format(tdir=testdir) ], + wait=True, + check_status=False, + ) + exit_status = p.exitstatus + + if exit_status == 0: + log.info("osd %d has an error" % i) + raise Exception("osd %d error" % i) + + log_path = '/var/log/ceph/osd.%d.log' % (i) + + p = osd_remote.run( + args = [ + 'tail', '-1', log_path, + run.Raw('|'), + 'grep', '-q', 'end dump' + ], + wait=True, + check_status=False, + ) + exit_status = p.exitstatus + + if exit_status == 0: + log.info("osd %d dumped core" % i) + raise Exception("osd %d dumped core" % i) + + time.sleep(5) diff --git a/qa/tasks/divergent_priors.py b/qa/tasks/divergent_priors.py new file mode 100644 index 000000000..e000bb2bb --- /dev/null +++ b/qa/tasks/divergent_priors.py @@ -0,0 +1,160 @@ +""" +Special case divergence test +""" +import logging +import time + +from teuthology import misc as teuthology +from tasks.util.rados import rados + + +log = logging.getLogger(__name__) + + +def task(ctx, config): + """ + Test handling of divergent entries with prior_version + prior to log_tail + + overrides: + ceph: + conf: + osd: + debug osd: 5 + + Requires 3 osds on a single test node. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'divergent_priors task only accepts a dict for configuration' + + manager = ctx.managers['ceph'] + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + manager.flush_pg_stats([0, 1, 2]) + manager.raw_cluster_cmd('osd', 'set', 'noout') + manager.raw_cluster_cmd('osd', 'set', 'noin') + manager.raw_cluster_cmd('osd', 'set', 'nodown') + manager.wait_for_clean() + + # something that is always there + dummyfile = '/etc/fstab' + dummyfile2 = '/etc/resolv.conf' + + # create 1 pg pool + log.info('creating foo') + manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') + + osds = [0, 1, 2] + for i in osds: + manager.set_config(i, osd_min_pg_log_entries=10) + manager.set_config(i, osd_max_pg_log_entries=10) + manager.set_config(i, osd_pg_log_trim_min=5) + + # determine primary + divergent = manager.get_pg_primary('foo', 0) + log.info("primary and soon to be divergent is %d", divergent) + non_divergent = list(osds) + non_divergent.remove(divergent) + + log.info('writing initial objects') + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + # write 100 objects + for i in range(100): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) + + manager.wait_for_clean() + + # blackhole non_divergent + log.info("blackholing osds %s", str(non_divergent)) + for i in non_divergent: + manager.set_config(i, objectstore_blackhole=1) + + DIVERGENT_WRITE = 5 + DIVERGENT_REMOVE = 5 + # Write some soon to be divergent + log.info('writing divergent objects') + for i in range(DIVERGENT_WRITE): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, + dummyfile2], wait=False) + # Remove some soon to be divergent + log.info('remove divergent objects') + for i in range(DIVERGENT_REMOVE): + rados(ctx, mon, ['-p', 'foo', 'rm', + 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) + time.sleep(10) + mon.run( + args=['killall', '-9', 'rados'], + wait=True, + check_status=False) + + # kill all the osds but leave divergent in + log.info('killing all the osds') + for i in osds: + manager.kill_osd(i) + for i in osds: + manager.mark_down_osd(i) + for i in non_divergent: + manager.mark_out_osd(i) + + # bring up non-divergent + log.info("bringing up non_divergent %s", str(non_divergent)) + for i in non_divergent: + manager.revive_osd(i) + for i in non_divergent: + manager.mark_in_osd(i) + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) + log.info('writing non-divergent object ' + objname) + rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) + + manager.wait_for_recovery() + + # ensure no recovery of up osds first + log.info('delay recovery') + for i in non_divergent: + manager.wait_run_admin_socket( + 'osd', i, ['set_recovery_delay', '100000']) + + # bring in our divergent friend + log.info("revive divergent %d", divergent) + manager.raw_cluster_cmd('osd', 'set', 'noup') + manager.revive_osd(divergent) + + log.info('delay recovery divergent') + manager.wait_run_admin_socket( + 'osd', divergent, ['set_recovery_delay', '100000']) + + manager.raw_cluster_cmd('osd', 'unset', 'noup') + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + + log.info('wait for peering') + rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) + + # At this point the divergent_priors should have been detected + + log.info("killing divergent %d", divergent) + manager.kill_osd(divergent) + log.info("reviving divergent %d", divergent) + manager.revive_osd(divergent) + + time.sleep(20) + + log.info('allowing recovery') + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in osds: + manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', + 'kick_recovery_wq', ' 0') + + log.info('reading divergent objects') + for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): + exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, + '/tmp/existing']) + assert exit_status == 0 + + log.info("success") diff --git a/qa/tasks/divergent_priors2.py b/qa/tasks/divergent_priors2.py new file mode 100644 index 000000000..4d4b07fc4 --- /dev/null +++ b/qa/tasks/divergent_priors2.py @@ -0,0 +1,192 @@ +""" +Special case divergence test with ceph-objectstore-tool export/remove/import +""" +import logging +import time + +from teuthology.exceptions import CommandFailedError +from teuthology import misc as teuthology +from tasks.util.rados import rados +import os + + +log = logging.getLogger(__name__) + + +def task(ctx, config): + """ + Test handling of divergent entries with prior_version + prior to log_tail and a ceph-objectstore-tool export/import + + overrides: + ceph: + conf: + osd: + debug osd: 5 + + Requires 3 osds on a single test node. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'divergent_priors task only accepts a dict for configuration' + + manager = ctx.managers['ceph'] + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + manager.flush_pg_stats([0, 1, 2]) + manager.raw_cluster_cmd('osd', 'set', 'noout') + manager.raw_cluster_cmd('osd', 'set', 'noin') + manager.raw_cluster_cmd('osd', 'set', 'nodown') + manager.wait_for_clean() + + # something that is always there + dummyfile = '/etc/fstab' + dummyfile2 = '/etc/resolv.conf' + testdir = teuthology.get_testdir(ctx) + + # create 1 pg pool + log.info('creating foo') + manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') + + osds = [0, 1, 2] + for i in osds: + manager.set_config(i, osd_min_pg_log_entries=10) + manager.set_config(i, osd_max_pg_log_entries=10) + manager.set_config(i, osd_pg_log_trim_min=5) + + # determine primary + divergent = manager.get_pg_primary('foo', 0) + log.info("primary and soon to be divergent is %d", divergent) + non_divergent = list(osds) + non_divergent.remove(divergent) + + log.info('writing initial objects') + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + # write 100 objects + for i in range(100): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) + + manager.wait_for_clean() + + # blackhole non_divergent + log.info("blackholing osds %s", str(non_divergent)) + for i in non_divergent: + manager.set_config(i, objectstore_blackhole=1) + + DIVERGENT_WRITE = 5 + DIVERGENT_REMOVE = 5 + # Write some soon to be divergent + log.info('writing divergent objects') + for i in range(DIVERGENT_WRITE): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, + dummyfile2], wait=False) + # Remove some soon to be divergent + log.info('remove divergent objects') + for i in range(DIVERGENT_REMOVE): + rados(ctx, mon, ['-p', 'foo', 'rm', + 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) + time.sleep(10) + mon.run( + args=['killall', '-9', 'rados'], + wait=True, + check_status=False) + + # kill all the osds but leave divergent in + log.info('killing all the osds') + for i in osds: + manager.kill_osd(i) + for i in osds: + manager.mark_down_osd(i) + for i in non_divergent: + manager.mark_out_osd(i) + + # bring up non-divergent + log.info("bringing up non_divergent %s", str(non_divergent)) + for i in non_divergent: + manager.revive_osd(i) + for i in non_divergent: + manager.mark_in_osd(i) + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) + log.info('writing non-divergent object ' + objname) + rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) + + manager.wait_for_recovery() + + # ensure no recovery of up osds first + log.info('delay recovery') + for i in non_divergent: + manager.wait_run_admin_socket( + 'osd', i, ['set_recovery_delay', '100000']) + + # bring in our divergent friend + log.info("revive divergent %d", divergent) + manager.raw_cluster_cmd('osd', 'set', 'noup') + manager.revive_osd(divergent) + + log.info('delay recovery divergent') + manager.wait_run_admin_socket( + 'osd', divergent, ['set_recovery_delay', '100000']) + + manager.raw_cluster_cmd('osd', 'unset', 'noup') + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + + log.info('wait for peering') + rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) + + # At this point the divergent_priors should have been detected + + log.info("killing divergent %d", divergent) + manager.kill_osd(divergent) + + # Export a pg + (exp_remote,) = ctx.\ + cluster.only('osd.{o}'.format(o=divergent)).remotes.keys() + FSPATH = manager.get_filepath() + JPATH = os.path.join(FSPATH, "journal") + prefix = ("sudo adjust-ulimits ceph-objectstore-tool " + "--data-path {fpath} --journal-path {jpath} " + "--log-file=" + "/var/log/ceph/objectstore_tool.$$.log ". + format(fpath=FSPATH, jpath=JPATH)) + pid = os.getpid() + expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid)) + cmd = ((prefix + "--op export-remove --pgid 2.0 --file {file}"). + format(id=divergent, file=expfile)) + try: + exp_remote.sh(cmd, wait=True) + except CommandFailedError as e: + assert e.exitstatus == 0 + + cmd = ((prefix + "--op import --file {file}"). + format(id=divergent, file=expfile)) + try: + exp_remote.sh(cmd, wait=True) + except CommandFailedError as e: + assert e.exitstatus == 0 + + log.info("reviving divergent %d", divergent) + manager.revive_osd(divergent) + manager.wait_run_admin_socket('osd', divergent, ['dump_ops_in_flight']) + time.sleep(20); + + log.info('allowing recovery') + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in osds: + manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', + 'kick_recovery_wq', ' 0') + + log.info('reading divergent objects') + for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): + exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, + '/tmp/existing']) + assert exit_status == 0 + + cmd = 'rm {file}'.format(file=expfile) + exp_remote.run(args=cmd, wait=True) + log.info("success") diff --git a/qa/tasks/dnsmasq.py b/qa/tasks/dnsmasq.py new file mode 100644 index 000000000..df8ccecb1 --- /dev/null +++ b/qa/tasks/dnsmasq.py @@ -0,0 +1,170 @@ +""" +Task for dnsmasq configuration +""" +import contextlib +import logging + +from teuthology import misc +from teuthology.exceptions import ConfigError +from teuthology import contextutil +from teuthology import packaging +from tasks.util import get_remote_for_role + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def install_dnsmasq(remote): + """ + If dnsmasq is not installed, install it for the duration of the task. + """ + try: + existing = packaging.get_package_version(remote, 'dnsmasq') + except: + existing = None + + if existing is None: + packaging.install_package('dnsmasq', remote) + try: + yield + finally: + if existing is None: + packaging.remove_package('dnsmasq', remote) + +@contextlib.contextmanager +def backup_resolv(remote, path): + """ + Store a backup of resolv.conf in the testdir and restore it after the task. + """ + remote.run(args=['cp', '/etc/resolv.conf', path]) + try: + yield + finally: + # restore with 'cp' to avoid overwriting its security context + remote.run(args=['sudo', 'cp', path, '/etc/resolv.conf']) + remote.run(args=['rm', path]) + +@contextlib.contextmanager +def replace_resolv(remote, path): + """ + Update resolv.conf to point the nameserver at localhost. + """ + remote.write_file(path, "nameserver 127.0.0.1\n") + try: + # install it + if remote.os.package_type == "rpm": + # for centos ovh resolv.conf has immutable attribute set + remote.run(args=['sudo', 'chattr', '-i', '/etc/resolv.conf'], check_status=False) + remote.run(args=['sudo', 'cp', path, '/etc/resolv.conf']) + yield + finally: + remote.run(args=['rm', path]) + +@contextlib.contextmanager +def setup_dnsmasq(remote, testdir, cnames): + """ configure dnsmasq on the given remote, adding each cname given """ + log.info('Configuring dnsmasq on remote %s..', remote.name) + + # add address entries for each cname + dnsmasq = "server=8.8.8.8\nserver=8.8.4.4\n" + address_template = "address=/{cname}/{ip_address}\n" + for cname, ip_address in cnames.items(): + dnsmasq += address_template.format(cname=cname, ip_address=ip_address) + + # write to temporary dnsmasq file + dnsmasq_tmp = '/'.join((testdir, 'ceph.tmp')) + remote.write_file(dnsmasq_tmp, dnsmasq) + + # move into /etc/dnsmasq.d/ + dnsmasq_path = '/etc/dnsmasq.d/ceph' + remote.run(args=['sudo', 'mv', dnsmasq_tmp, dnsmasq_path]) + # restore selinux context if necessary + remote.run(args=['sudo', 'restorecon', dnsmasq_path], check_status=False) + + # restart dnsmasq + remote.run(args=['sudo', 'systemctl', 'restart', 'dnsmasq']) + # verify dns name is set + remote.run(args=['ping', '-c', '4', next(iter(cnames.keys()))]) + + try: + yield + finally: + log.info('Removing dnsmasq configuration from remote %s..', remote.name) + # remove /etc/dnsmasq.d/ceph + remote.run(args=['sudo', 'rm', dnsmasq_path]) + # restart dnsmasq + remote.run(args=['sudo', 'systemctl', 'restart', 'dnsmasq']) + +@contextlib.contextmanager +def task(ctx, config): + """ + Configures dnsmasq to add cnames for teuthology remotes. The task expects a + dictionary, where each key is a role. If all cnames for that role use the + same address as that role, the cnames can be given as a list. For example, + this entry configures dnsmasq on the remote associated with client.0, adding + two cnames for the ip address associated with client.0: + + - dnsmasq: + client.0: + - client0.example.com + - c0.example.com + + If the addresses do not all match the given role, a dictionary can be given + to specify the ip address by its target role. For example: + + - dnsmasq: + client.0: + client.0.example.com: client.0 + client.1.example.com: client.1 + + Cnames that end with a . are treated as prefix for the existing hostname. + For example, if the remote for client.0 has a hostname of 'example.com', + this task will add cnames for dev.example.com and test.example.com: + + - dnsmasq: + client.0: [dev., test.] + """ + # apply overrides + overrides = config.get('overrides', {}) + misc.deep_merge(config, overrides.get('dnsmasq', {})) + + # multiple roles may map to the same remote, so collect names by remote + remote_names = {} + for role, cnames in config.items(): + remote = get_remote_for_role(ctx, role) + if remote is None: + raise ConfigError('no remote for role %s' % role) + + names = remote_names.get(remote, {}) + + if isinstance(cnames, list): + # when given a list of cnames, point to local ip + for cname in cnames: + if cname.endswith('.'): + cname += remote.hostname + names[cname] = remote.ip_address + elif isinstance(cnames, dict): + # when given a dict, look up the remote ip for each + for cname, client in cnames.items(): + r = get_remote_for_role(ctx, client) + if r is None: + raise ConfigError('no remote for role %s' % client) + if cname.endswith('.'): + cname += r.hostname + names[cname] = r.ip_address + + remote_names[remote] = names + + testdir = misc.get_testdir(ctx) + resolv_bak = '/'.join((testdir, 'resolv.bak')) + resolv_tmp = '/'.join((testdir, 'resolv.tmp')) + + # run subtasks for each unique remote + subtasks = [] + for remote, cnames in remote_names.items(): + subtasks.extend([ lambda r=remote: install_dnsmasq(r) ]) + subtasks.extend([ lambda r=remote: backup_resolv(r, resolv_bak) ]) + subtasks.extend([ lambda r=remote: replace_resolv(r, resolv_tmp) ]) + subtasks.extend([ lambda r=remote, cn=cnames: setup_dnsmasq(r, testdir, cn) ]) + + with contextutil.nested(*subtasks): + yield diff --git a/qa/tasks/dump_stuck.py b/qa/tasks/dump_stuck.py new file mode 100644 index 000000000..4971f1916 --- /dev/null +++ b/qa/tasks/dump_stuck.py @@ -0,0 +1,161 @@ +""" +Dump_stuck command +""" +import logging +import time + +from tasks import ceph_manager +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + +def check_stuck(manager, num_inactive, num_unclean, num_stale, timeout=10): + """ + Do checks. Make sure get_stuck_pgs return the right amount of information, then + extract health information from the raw_cluster_cmd and compare the results with + values passed in. This passes if all asserts pass. + + :param num_manager: Ceph manager + :param num_inactive: number of inaactive pages that are stuck + :param num_unclean: number of unclean pages that are stuck + :param num_stale: number of stale pages that are stuck + :param timeout: timeout value for get_stuck_pgs calls + """ + inactive = manager.get_stuck_pgs('inactive', timeout) + unclean = manager.get_stuck_pgs('unclean', timeout) + stale = manager.get_stuck_pgs('stale', timeout) + log.info('inactive %s / %d, unclean %s / %d, stale %s / %d', + len(inactive), num_inactive, + len(unclean), num_unclean, + len(stale), num_stale) + assert len(inactive) == num_inactive + assert len(unclean) == num_unclean + assert len(stale) == num_stale + +def task(ctx, config): + """ + Test the dump_stuck command. + + :param ctx: Context + :param config: Configuration + """ + assert config is None, \ + 'dump_stuck requires no configuration' + assert teuthology.num_instances_of_type(ctx.cluster, 'osd') == 2, \ + 'dump_stuck requires exactly 2 osds' + + timeout = 60 + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + manager.flush_pg_stats([0, 1]) + manager.wait_for_clean(timeout) + + manager.raw_cluster_cmd('tell', 'mon.a', 'injectargs', '--', +# '--mon-osd-report-timeout 90', + '--mon-pg-stuck-threshold 10') + + # all active+clean + check_stuck( + manager, + num_inactive=0, + num_unclean=0, + num_stale=0, + ) + num_pgs = manager.get_num_pgs() + + manager.mark_out_osd(0) + time.sleep(timeout) + manager.flush_pg_stats([1]) + manager.wait_for_recovery(timeout) + + # all active+clean+remapped + check_stuck( + manager, + num_inactive=0, + num_unclean=0, + num_stale=0, + ) + + manager.mark_in_osd(0) + manager.flush_pg_stats([0, 1]) + manager.wait_for_clean(timeout) + + # all active+clean + check_stuck( + manager, + num_inactive=0, + num_unclean=0, + num_stale=0, + ) + + log.info('stopping first osd') + manager.kill_osd(0) + manager.mark_down_osd(0) + manager.wait_for_active(timeout) + + log.info('waiting for all to be unclean') + starttime = time.time() + done = False + while not done: + try: + check_stuck( + manager, + num_inactive=0, + num_unclean=num_pgs, + num_stale=0, + ) + done = True + except AssertionError: + # wait up to 15 minutes to become stale + if time.time() - starttime > 900: + raise + + + log.info('stopping second osd') + manager.kill_osd(1) + manager.mark_down_osd(1) + + log.info('waiting for all to be stale') + starttime = time.time() + done = False + while not done: + try: + check_stuck( + manager, + num_inactive=0, + num_unclean=num_pgs, + num_stale=num_pgs, + ) + done = True + except AssertionError: + # wait up to 15 minutes to become stale + if time.time() - starttime > 900: + raise + + log.info('reviving') + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'osd'): + manager.revive_osd(id_) + manager.mark_in_osd(id_) + while True: + try: + manager.flush_pg_stats([0, 1]) + break + except Exception: + log.exception('osds must not be started yet, waiting...') + time.sleep(1) + manager.wait_for_clean(timeout) + + check_stuck( + manager, + num_inactive=0, + num_unclean=0, + num_stale=0, + ) diff --git a/qa/tasks/ec_inconsistent_hinfo.py b/qa/tasks/ec_inconsistent_hinfo.py new file mode 100644 index 000000000..fa10f2c45 --- /dev/null +++ b/qa/tasks/ec_inconsistent_hinfo.py @@ -0,0 +1,225 @@ +""" +Inconsistent_hinfo +""" +import logging +import time +from dateutil.parser import parse +from tasks import ceph_manager +from tasks.util.rados import rados +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def wait_for_deep_scrub_complete(manager, pgid, check_time_now, inconsistent): + log.debug("waiting for pg %s deep-scrub complete (check_time_now=%s)" % + (pgid, check_time_now)) + for i in range(300): + time.sleep(5) + manager.flush_pg_stats([0, 1, 2, 3]) + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.debug('pg=%s' % pg); + assert pg + + last_deep_scrub_time = parse(pg['last_deep_scrub_stamp']).strftime('%s') + if last_deep_scrub_time < check_time_now: + log.debug('not scrubbed') + continue + + status = pg['state'].split('+') + if inconsistent: + assert 'inconsistent' in status + else: + assert 'inconsistent' not in status + return + + assert False, 'not scrubbed' + + +def wait_for_backfilling_complete(manager, pgid, from_osd, to_osd): + log.debug("waiting for pg %s backfill from osd.%s to osd.%s complete" % + (pgid, from_osd, to_osd)) + for i in range(300): + time.sleep(5) + manager.flush_pg_stats([0, 1, 2, 3]) + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.info('pg=%s' % pg); + assert pg + status = pg['state'].split('+') + if 'active' not in status: + log.debug('not active') + continue + if 'backfilling' in status: + assert from_osd in pg['acting'] and to_osd in pg['up'] + log.debug('backfilling') + continue + if to_osd not in pg['up']: + log.debug('backfill not started yet') + continue + log.debug('backfilled!') + break + +def task(ctx, config): + """ + Test handling of objects with inconsistent hash info during backfill and deep-scrub. + + A pretty rigid cluster is brought up and tested by this task + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'ec_inconsistent_hinfo task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + profile = config.get('erasure_code_profile', { + 'k': '2', + 'm': '1', + 'crush-failure-domain': 'osd' + }) + profile_name = profile.get('name', 'backfill_unfound') + manager.create_erasure_code_profile(profile_name, profile) + pool = manager.create_pool_with_unique_name( + pg_num=1, + erasure_code_profile_name=profile_name, + min_size=2) + manager.raw_cluster_cmd('osd', 'pool', 'set', pool, + 'pg_autoscale_mode', 'off') + + manager.flush_pg_stats([0, 1, 2, 3]) + manager.wait_for_clean() + + pool_id = manager.get_pool_num(pool) + pgid = '%d.0' % pool_id + pgs = manager.get_pg_stats() + acting = next((pg['acting'] for pg in pgs if pg['pgid'] == pgid), None) + log.info("acting=%s" % acting) + assert acting + primary = acting[0] + + # something that is always there, readable and never empty + dummyfile = '/etc/group' + + # kludge to make sure they get a map + rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile]) + + manager.flush_pg_stats([0, 1]) + manager.wait_for_recovery() + + log.debug("create test object") + obj = 'test' + rados(ctx, mon, ['-p', pool, 'put', obj, dummyfile]) + + victim = acting[1] + + log.info("remove test object hash info from osd.%s shard and test deep-scrub and repair" + % victim) + + manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', + object_name=obj, osd=victim) + check_time_now = time.strftime('%s') + manager.raw_cluster_cmd('pg', 'deep-scrub', pgid) + wait_for_deep_scrub_complete(manager, pgid, check_time_now, True) + + check_time_now = time.strftime('%s') + manager.raw_cluster_cmd('pg', 'repair', pgid) + wait_for_deep_scrub_complete(manager, pgid, check_time_now, False) + + log.info("remove test object hash info from primary osd.%s shard and test backfill" + % primary) + + log.debug("write some data") + rados(ctx, mon, ['-p', pool, 'bench', '30', 'write', '-b', '4096', + '--no-cleanup']) + + manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', + object_name=obj, osd=primary) + + # mark the osd out to trigger a rebalance/backfill + source = acting[1] + target = [x for x in [0, 1, 2, 3] if x not in acting][0] + manager.mark_out_osd(source) + + # wait for everything to peer, backfill and recover + wait_for_backfilling_complete(manager, pgid, source, target) + manager.wait_for_clean() + + manager.flush_pg_stats([0, 1, 2, 3]) + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.debug('pg=%s' % pg) + assert pg + assert 'clean' in pg['state'].split('+') + assert 'inconsistent' not in pg['state'].split('+') + unfound = manager.get_num_unfound_objects() + log.debug("there are %d unfound objects" % unfound) + assert unfound == 0 + + source, target = target, source + log.info("remove test object hash info from non-primary osd.%s shard and test backfill" + % source) + + manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', + object_name=obj, osd=source) + + # mark the osd in to trigger a rebalance/backfill + manager.mark_in_osd(target) + + # wait for everything to peer, backfill and recover + wait_for_backfilling_complete(manager, pgid, source, target) + manager.wait_for_clean() + + manager.flush_pg_stats([0, 1, 2, 3]) + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.debug('pg=%s' % pg) + assert pg + assert 'clean' in pg['state'].split('+') + assert 'inconsistent' not in pg['state'].split('+') + unfound = manager.get_num_unfound_objects() + log.debug("there are %d unfound objects" % unfound) + assert unfound == 0 + + log.info("remove hash info from two shards and test backfill") + + source = acting[2] + target = [x for x in [0, 1, 2, 3] if x not in acting][0] + manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', + object_name=obj, osd=primary) + manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', + object_name=obj, osd=source) + + # mark the osd out to trigger a rebalance/backfill + manager.mark_out_osd(source) + + # wait for everything to peer, backfill and detect unfound object + wait_for_backfilling_complete(manager, pgid, source, target) + + # verify that there is unfound object + manager.flush_pg_stats([0, 1, 2, 3]) + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.debug('pg=%s' % pg) + assert pg + assert 'backfill_unfound' in pg['state'].split('+') + unfound = manager.get_num_unfound_objects() + log.debug("there are %d unfound objects" % unfound) + assert unfound == 1 + m = manager.list_pg_unfound(pgid) + log.debug('list_pg_unfound=%s' % m) + assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] + + # mark stuff lost + pgs = manager.get_pg_stats() + manager.raw_cluster_cmd('pg', pgid, 'mark_unfound_lost', 'delete') + + # wait for everything to peer and be happy... + manager.flush_pg_stats([0, 1, 2, 3]) + manager.wait_for_recovery() diff --git a/qa/tasks/ec_lost_unfound.py b/qa/tasks/ec_lost_unfound.py new file mode 100644 index 000000000..57a9364ec --- /dev/null +++ b/qa/tasks/ec_lost_unfound.py @@ -0,0 +1,159 @@ +""" +Lost_unfound +""" +import logging +import time +from tasks import ceph_manager +from tasks.util.rados import rados +from teuthology import misc as teuthology +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test handling of lost objects on an ec pool. + + A pretty rigid cluster is brought up and tested by this task + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'lost_unfound task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + manager.wait_for_clean() + + profile = config.get('erasure_code_profile', { + 'k': '2', + 'm': '2', + 'crush-failure-domain': 'osd' + }) + profile_name = profile.get('name', 'lost_unfound') + manager.create_erasure_code_profile(profile_name, profile) + pool = manager.create_pool_with_unique_name( + erasure_code_profile_name=profile_name, + min_size=2) + + # something that is always there, readable and never empty + dummyfile = '/etc/group' + + # kludge to make sure they get a map + rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile]) + + manager.flush_pg_stats([0, 1]) + manager.wait_for_recovery() + + # create old objects + for f in range(1, 10): + rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f]) + + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.1', + 'injectargs', + '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000' + ) + + manager.kill_osd(0) + manager.mark_down_osd(0) + manager.kill_osd(3) + manager.mark_down_osd(3) + + for f in range(1, 10): + rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) + + # take out osd.1 and a necessary shard of those objects. + manager.kill_osd(1) + manager.mark_down_osd(1) + manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') + manager.revive_osd(0) + manager.wait_till_osd_is_up(0) + manager.revive_osd(3) + manager.wait_till_osd_is_up(3) + + manager.flush_pg_stats([0, 2, 3]) + manager.wait_till_active() + manager.flush_pg_stats([0, 2, 3]) + + # verify that there are unfound objects + unfound = manager.get_num_unfound_objects() + log.info("there are %d unfound objects" % unfound) + assert unfound + + testdir = teuthology.get_testdir(ctx) + procs = [] + if config.get('parallel_bench', True): + procs.append(mon.run( + args=[ + "/bin/sh", "-c", + " ".join(['adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage', + 'rados', + '--no-log-to-stderr', + '--name', 'client.admin', + '-b', str(4<<10), + '-p' , pool, + '-t', '20', + 'bench', '240', 'write', + ]).format(tdir=testdir), + ], + logger=log.getChild('radosbench.{id}'.format(id='client.admin')), + stdin=run.PIPE, + wait=False + )) + time.sleep(10) + + # mark stuff lost + pgs = manager.get_pg_stats() + for pg in pgs: + if pg['stat_sum']['num_objects_unfound'] > 0: + # verify that i can list them direct from the osd + log.info('listing missing/lost in %s state %s', pg['pgid'], + pg['state']); + m = manager.list_pg_unfound(pg['pgid']) + log.info('%s' % m) + assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] + + log.info("reverting unfound in %s", pg['pgid']) + manager.raw_cluster_cmd('pg', pg['pgid'], + 'mark_unfound_lost', 'delete') + else: + log.info("no unfound in %s", pg['pgid']) + + manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') + manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') + manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5') + manager.flush_pg_stats([0, 2, 3]) + manager.wait_for_recovery() + + if not config.get('parallel_bench', True): + time.sleep(20) + + # verify result + for f in range(1, 10): + err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-']) + assert err + err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-']) + assert err + err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-']) + assert err + + # see if osd.1 can cope + manager.revive_osd(1) + manager.wait_till_osd_is_up(1) + manager.wait_for_clean() + run.wait(procs) + manager.wait_for_clean() diff --git a/qa/tasks/exec_on_cleanup.py b/qa/tasks/exec_on_cleanup.py new file mode 100644 index 000000000..5a630781a --- /dev/null +++ b/qa/tasks/exec_on_cleanup.py @@ -0,0 +1,61 @@ +""" +Exececute custom commands during unwind/cleanup +""" +import logging +import contextlib + +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Execute commands on a given role + + tasks: + - ceph: + - kclient: [client.a] + - exec: + client.a: + - "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control" + - "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control" + - interactive: + + It stops and fails with the first command that does not return on success. It means + that if the first command fails, the second won't run at all. + + To avoid confusion it is recommended to explicitly enclose the commands in + double quotes. For instance if the command is false (without double quotes) it will + be interpreted as a boolean by the YAML parser. + + :param ctx: Context + :param config: Configuration + """ + try: + yield + finally: + log.info('Executing custom commands...') + assert isinstance(config, dict), "task exec got invalid config" + + testdir = teuthology.get_testdir(ctx) + + if 'all' in config and len(config) == 1: + a = config['all'] + roles = teuthology.all_roles(ctx.cluster) + config = dict((id_, a) for id_ in roles) + + for role, ls in config.items(): + (remote,) = ctx.cluster.only(role).remotes.keys() + log.info('Running commands on role %s host %s', role, remote.name) + for c in ls: + c.replace('$TESTDIR', testdir) + remote.run( + args=[ + 'sudo', + 'TESTDIR={tdir}'.format(tdir=testdir), + 'bash', + '-c', + c], + ) + diff --git a/qa/tasks/filestore_idempotent.py b/qa/tasks/filestore_idempotent.py new file mode 100644 index 000000000..319bef768 --- /dev/null +++ b/qa/tasks/filestore_idempotent.py @@ -0,0 +1,83 @@ +""" +Filestore/filejournal handler +""" +import logging +from teuthology.orchestra import run +import random + +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test filestore/filejournal handling of non-idempotent events. + + Currently this is a kludge; we require the ceph task precedes us just + so that we get the tarball installed to run the test binary. + + :param ctx: Context + :param config: Configuration + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + clients = config.keys() + + # just use the first client... + client = next(iter(clients)) + (remote,) = ctx.cluster.only(client).remotes.keys() + + testdir = teuthology.get_testdir(ctx) + + dir = '%s/ceph.data/test.%s' % (testdir, client) + + seed = int(random.uniform(1,100)) + start = 800 + random.randint(800,1200) + end = start + 50 + + try: + log.info('creating a working dir') + remote.run(args=['mkdir', dir]) + remote.run( + args=[ + 'cd', dir, + run.Raw('&&'), + 'wget','-q', '-Orun_seed_to.sh', + 'http://git.ceph.com/?p=ceph.git;a=blob_plain;f=src/test/objectstore/run_seed_to.sh;hb=HEAD', + run.Raw('&&'), + 'wget','-q', '-Orun_seed_to_range.sh', + 'http://git.ceph.com/?p=ceph.git;a=blob_plain;f=src/test/objectstore/run_seed_to_range.sh;hb=HEAD', + run.Raw('&&'), + 'chmod', '+x', 'run_seed_to.sh', 'run_seed_to_range.sh', + ]); + + log.info('running a series of tests') + proc = remote.run( + args=[ + 'cd', dir, + run.Raw('&&'), + './run_seed_to_range.sh', str(seed), str(start), str(end), + ], + wait=False, + check_status=False) + result = proc.wait() + + if result != 0: + remote.run( + args=[ + 'cp', '-a', dir, '{tdir}/archive/idempotent_failure'.format(tdir=testdir), + ]) + raise Exception("./run_seed_to_range.sh errored out") + + finally: + remote.run(args=[ + 'rm', '-rf', '--', dir + ]) + diff --git a/qa/tasks/fs.py b/qa/tasks/fs.py new file mode 100644 index 000000000..f7a9330e2 --- /dev/null +++ b/qa/tasks/fs.py @@ -0,0 +1,152 @@ +""" +CephFS sub-tasks. +""" + +import logging +import re + +from tasks.cephfs.filesystem import Filesystem, MDSCluster + +log = logging.getLogger(__name__) + +# Everything up to CEPH_MDSMAP_ALLOW_STANDBY_REPLAY +CEPH_MDSMAP_ALLOW_STANDBY_REPLAY = (1<<5) +CEPH_MDSMAP_LAST = CEPH_MDSMAP_ALLOW_STANDBY_REPLAY +UPGRADE_FLAGS_MASK = ((CEPH_MDSMAP_LAST<<1) - 1) +def pre_upgrade_save(ctx, config): + """ + That the upgrade procedure doesn't clobber state: save state. + """ + + mdsc = MDSCluster(ctx) + status = mdsc.status() + + state = {} + ctx['mds-upgrade-state'] = state + + for fs in list(status.get_filesystems()): + fscid = fs['id'] + mdsmap = fs['mdsmap'] + fs_state = {} + fs_state['epoch'] = mdsmap['epoch'] + fs_state['max_mds'] = mdsmap['max_mds'] + fs_state['flags'] = mdsmap['flags'] & UPGRADE_FLAGS_MASK + state[fscid] = fs_state + log.debug(f"fs fscid={fscid},name={mdsmap['fs_name']} state = {fs_state}") + + +def post_upgrade_checks(ctx, config): + """ + That the upgrade procedure doesn't clobber state. + """ + + state = ctx['mds-upgrade-state'] + + mdsc = MDSCluster(ctx) + status = mdsc.status() + + for fs in list(status.get_filesystems()): + fscid = fs['id'] + mdsmap = fs['mdsmap'] + fs_state = state[fscid] + log.debug(f"checking fs fscid={fscid},name={mdsmap['fs_name']} state = {fs_state}") + + # check state was restored to previous values + assert fs_state['max_mds'] == mdsmap['max_mds'] + assert fs_state['flags'] == (mdsmap['flags'] & UPGRADE_FLAGS_MASK) + + # now confirm that the upgrade procedure was followed + epoch = mdsmap['epoch'] + pre_upgrade_epoch = fs_state['epoch'] + assert pre_upgrade_epoch < epoch + should_decrease_max_mds = fs_state['max_mds'] > 1 + did_decrease_max_mds = False + should_disable_allow_standby_replay = fs_state['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY + did_disable_allow_standby_replay = False + for i in range(pre_upgrade_epoch+1, mdsmap['epoch']): + old_status = mdsc.status(epoch=i) + old_fs = old_status.get_fsmap(fscid) + old_mdsmap = old_fs['mdsmap'] + if should_decrease_max_mds and old_mdsmap['max_mds'] == 1: + log.debug(f"max_mds reduced in epoch {i}") + did_decrease_max_mds = True + if should_disable_allow_standby_replay and not (old_mdsmap['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY): + log.debug(f"allow_standby_replay disabled in epoch {i}") + did_disable_allow_standby_replay = True + assert not should_decrease_max_mds or did_decrease_max_mds + assert not should_disable_allow_standby_replay or did_disable_allow_standby_replay + + +def ready(ctx, config): + """ + That the file system is ready for clients. + """ + + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for configuration' + + timeout = config.get('timeout', 300) + + mdsc = MDSCluster(ctx) + status = mdsc.status() + + for filesystem in status.get_filesystems(): + fs = Filesystem(ctx, fscid=filesystem['id']) + fs.wait_for_daemons(timeout=timeout, status=status) + +def clients_evicted(ctx, config): + """ + Check clients are evicted, unmount (cleanup) if so. + """ + + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for configuration' + + clients = config.get('clients') + + if clients is None: + clients = {("client."+client_id): True for client_id in ctx.mounts} + + log.info("clients is {}".format(str(clients))) + + fs = Filesystem(ctx) + status = fs.status() + + has_session = set() + mounts = {} + for client in clients: + client_id = re.match("^client.([0-9]+)$", client).groups(1)[0] + mounts[client] = ctx.mounts.get(client_id) + + for rank in fs.get_ranks(status=status): + ls = fs.rank_asok(['session', 'ls'], rank=rank['rank'], status=status) + for session in ls: + for client, evicted in clients.items(): + mount = mounts.get(client) + if mount is not None: + global_id = mount.get_global_id() + if session['id'] == global_id: + if evicted: + raise RuntimeError("client still has session: {}".format(str(session))) + else: + log.info("client {} has a session with MDS {}.{}".format(client, fs.id, rank['rank'])) + has_session.add(client) + + no_session = set(clients) - has_session + should_assert = False + for client, evicted in clients.items(): + mount = mounts.get(client) + if mount is not None: + if evicted: + log.info("confirming client {} is blocklisted".format(client)) + assert fs.is_addr_blocklisted(mount.get_global_addr()) + elif client in no_session: + log.info("client {} should not be evicted but has no session with an MDS".format(client)) + fs.is_addr_blocklisted(mount.get_global_addr()) # for debugging + should_assert = True + if should_assert: + raise RuntimeError("some clients which should not be evicted have no session with an MDS?") diff --git a/qa/tasks/fwd_scrub.py b/qa/tasks/fwd_scrub.py new file mode 100644 index 000000000..44fd97baa --- /dev/null +++ b/qa/tasks/fwd_scrub.py @@ -0,0 +1,152 @@ +""" +Thrash mds by simulating failures +""" +import logging +import contextlib + +from gevent import sleep, GreenletExit +from gevent.greenlet import Greenlet +from gevent.event import Event +from teuthology import misc as teuthology + +from tasks import ceph_manager +from tasks.cephfs.filesystem import MDSCluster, Filesystem +from tasks.thrasher import Thrasher + +log = logging.getLogger(__name__) + +class ForwardScrubber(Thrasher, Greenlet): + """ + ForwardScrubber:: + + The ForwardScrubber does forward scrubbing of file-systems during execution + of other tasks (workunits, etc). + """ + + def __init__(self, fs, scrub_timeout=300, sleep_between_iterations=1): + super(ForwardScrubber, self).__init__() + + self.logger = log.getChild('fs.[{f}]'.format(f=fs.name)) + self.fs = fs + self.name = 'thrasher.fs.[{f}]'.format(f=fs.name) + self.stopping = Event() + self.scrub_timeout = scrub_timeout + self.sleep_between_iterations = sleep_between_iterations + + def _run(self): + try: + self.do_scrub() + except Exception as e: + self.set_thrasher_exception(e) + self.logger.exception("exception:") + # allow successful completion so gevent doesn't see an exception... + + def stop(self): + self.stopping.set() + + def do_scrub(self): + """ + Perform the file-system scrubbing + """ + self.logger.info(f'start scrubbing fs: {self.fs.name}') + + try: + while not self.stopping.is_set(): + self._scrub() + sleep(self.sleep_between_iterations) + except GreenletExit: + pass + + self.logger.info(f'end scrubbing fs: {self.fs.name}') + + def _scrub(self, path="/", recursive=True): + self.logger.info(f"scrubbing fs: {self.fs.name}") + scrubopts = ["force"] + if recursive: + scrubopts.append("recursive") + out_json = self.fs.run_scrub(["start", path, ",".join(scrubopts)]) + assert out_json is not None + + tag = out_json['scrub_tag'] + + assert tag is not None + assert out_json['return_code'] == 0 + assert out_json['mode'] == 'asynchronous' + + return self.fs.wait_until_scrub_complete(tag=tag, sleep=30, + timeout=self.scrub_timeout) + +def stop_all_fwd_scrubbers(thrashers): + for thrasher in thrashers: + if not isinstance(thrasher, ForwardScrubber): + continue + thrasher.stop() + thrasher.join() + if thrasher.exception is not None: + raise RuntimeError(f"error during scrub thrashing: {thrasher.exception}") + + +@contextlib.contextmanager +def task(ctx, config): + """ + Stress test the mds by running scrub iterations while another task/workunit + is running. + Example config: + + - fwd_scrub: + scrub_timeout: 300 + sleep_between_iterations: 1 + """ + + mds_cluster = MDSCluster(ctx) + + if config is None: + config = {} + assert isinstance(config, dict), \ + 'fwd_scrub task only accepts a dict for configuration' + mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds')) + assert len(mdslist) > 0, \ + 'fwd_scrub task requires at least 1 metadata server' + + (first,) = ctx.cluster.only(f'mds.{mdslist[0]}').remotes.keys() + manager = ceph_manager.CephManager( + first, ctx=ctx, logger=log.getChild('ceph_manager'), + ) + + # make sure everyone is in active, standby, or standby-replay + log.info('Wait for all MDSs to reach steady state...') + status = mds_cluster.status() + while True: + steady = True + for info in status.get_all(): + state = info['state'] + if state not in ('up:active', 'up:standby', 'up:standby-replay'): + steady = False + break + if steady: + break + sleep(2) + status = mds_cluster.status() + + log.info('Ready to start scrub thrashing') + + manager.wait_for_clean() + assert manager.is_clean() + + if 'cluster' not in config: + config['cluster'] = 'ceph' + + for fs in status.get_filesystems(): + fwd_scrubber = ForwardScrubber(Filesystem(ctx, fscid=fs['id']), + config['scrub_timeout'], + config['sleep_between_iterations']) + fwd_scrubber.start() + ctx.ceph[config['cluster']].thrashers.append(fwd_scrubber) + + try: + log.debug('Yielding') + yield + finally: + log.info('joining ForwardScrubbers') + stop_all_fwd_scrubbers(ctx.ceph[config['cluster']].thrashers) + log.info('done joining') diff --git a/qa/tasks/immutable_object_cache.py b/qa/tasks/immutable_object_cache.py new file mode 100644 index 000000000..b8034de47 --- /dev/null +++ b/qa/tasks/immutable_object_cache.py @@ -0,0 +1,72 @@ +""" +immutable object cache task +""" +import contextlib +import logging + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def immutable_object_cache(ctx, config): + """ + setup and cleanup immutable object cache + """ + log.info("start immutable object cache daemon") + for client, client_config in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + # make sure that there is one immutable object cache daemon on the same node. + remote.run( + args=[ + 'sudo', 'killall', '-s', '9', 'ceph-immutable-object-cache', run.Raw('||'), 'true', + ] + ) + remote.run( + args=[ + 'ceph-immutable-object-cache', '-b', + ] + ) + try: + yield + finally: + log.info("check and cleanup immutable object cache") + for client, client_config in config.items(): + client_config = client_config if client_config is not None else dict() + (remote,) = ctx.cluster.only(client).remotes.keys() + cache_path = client_config.get('immutable object cache path', '/tmp/ceph-immutable-object-cache') + ls_command = '"$(ls {} )"'.format(cache_path) + remote.run( + args=[ + 'test', '-n', run.Raw(ls_command), + ] + ) + remote.run( + args=[ + 'sudo', 'killall', '-s', '9', 'ceph-immutable-object-cache', run.Raw('||'), 'true', + ] + ) + remote.run( + args=[ + 'sudo', 'rm', '-rf', cache_path, run.Raw('||'), 'true', + ] + ) + +@contextlib.contextmanager +def task(ctx, config): + """ + This is task for start immutable_object_cache. + """ + assert isinstance(config, dict), \ + "task immutable_object_cache only supports a dictionary for configuration" + + managers = [] + config = teuthology.replace_all_with_clients(ctx.cluster, config) + managers.append( + lambda: immutable_object_cache(ctx=ctx, config=config) + ) + + with contextutil.nested(*managers): + yield diff --git a/qa/tasks/immutable_object_cache_thrash.py b/qa/tasks/immutable_object_cache_thrash.py new file mode 100644 index 000000000..0bf3ad3a0 --- /dev/null +++ b/qa/tasks/immutable_object_cache_thrash.py @@ -0,0 +1,79 @@ +""" +immutable object cache thrash task +""" +import contextlib +import logging + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run + +DEFAULT_KILL_DAEMON_TIME = 2 +DEFAULT_DEAD_TIME = 30 +DEFAULT_LIVE_TIME = 120 + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def thrashes_immutable_object_cache_daemon(ctx, config): + """ + thrashes immutable object cache daemon. + It can test reconnection feature of RO cache when RO daemon crash + TODO : replace sleep with better method. + """ + log.info("thrashes immutable object cache daemon") + + # just thrash one rbd client. + client, client_config = list(config.items())[0] + (remote,) = ctx.cluster.only(client).remotes.keys() + client_config = client_config if client_config is not None else dict() + kill_daemon_time = client_config.get('kill_daemon_time', DEFAULT_KILL_DAEMON_TIME) + dead_time = client_config.get('dead_time', DEFAULT_DEAD_TIME) + live_time = client_config.get('live_time', DEFAULT_LIVE_TIME) + + for i in range(kill_daemon_time): + log.info("ceph-immutable-object-cache crash....") + remote.run( + args=[ + 'sudo', 'killall', '-s', '9', 'ceph-immutable-object-cache', run.Raw('||'), 'true', + ] + ) + # librbd shoud normally run when ceph-immutable-object-cache + remote.run( + args=[ + 'sleep', '{dead_time}'.format(dead_time=dead_time), + ] + ) + # librbd should reconnect daemon + log.info("startup ceph-immutable-object-cache") + remote.run( + args=[ + 'ceph-immutable-object-cache', '-b', + ] + ) + remote.run( + args=[ + 'sleep', '{live_time}'.format(live_time=live_time), + ] + ) + try: + yield + finally: + log.info("cleanup") + +@contextlib.contextmanager +def task(ctx, config): + """ + This is task for testing immutable_object_cache thrash. + """ + assert isinstance(config, dict), \ + "task immutable_object_cache_thrash only supports a dictionary for configuration" + + managers = [] + config = teuthology.replace_all_with_clients(ctx.cluster, config) + managers.append( + lambda: thrashes_immutable_object_cache_daemon(ctx=ctx, config=config) + ) + + with contextutil.nested(*managers): + yield diff --git a/qa/tasks/kclient.py b/qa/tasks/kclient.py new file mode 100644 index 000000000..d7bc9fa83 --- /dev/null +++ b/qa/tasks/kclient.py @@ -0,0 +1,144 @@ +""" +Mount/unmount a ``kernel`` client. +""" +import contextlib +import logging + +from teuthology.misc import deep_merge +from teuthology.orchestra.run import CommandFailedError +from teuthology import misc +from teuthology.contextutil import MaxWhileTries +from tasks.cephfs.kernel_mount import KernelMount + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Mount/unmount a ``kernel`` client. + + The config is optional and defaults to mounting on all clients. If + a config is given, it is expected to be a list of clients to do + this operation on. This lets you e.g. set up one client with + ``ceph-fuse`` and another with ``kclient``. + + ``brxnet`` should be a Private IPv4 Address range, default range is + [192.168.0.0/16] + + Example that mounts all clients:: + + tasks: + - ceph: + - kclient: + - interactive: + - brxnet: [192.168.0.0/16] + + Example that uses both ``kclient` and ``ceph-fuse``:: + + tasks: + - ceph: + - ceph-fuse: [client.0] + - kclient: [client.1] + - interactive: + + + Pass a dictionary instead of lists to specify per-client config: + + tasks: + -kclient: + client.0: + debug: true + mntopts: ["nowsync"] + + :param ctx: Context + :param config: Configuration + """ + log.info('Mounting kernel clients...') + + if config is None: + ids = misc.all_roles_of_type(ctx.cluster, 'client') + client_roles = [f'client.{id_}' for id_ in ids] + config = dict([r, dict()] for r in client_roles) + elif isinstance(config, list): + client_roles = config + config = dict([r, dict()] for r in client_roles) + elif isinstance(config, dict): + client_roles = filter(lambda x: 'client.' in x, config.keys()) + else: + raise ValueError(f"Invalid config object: {config} ({config.__class__})") + log.info(f"config is {config}") + + clients = list(misc.get_clients(ctx=ctx, roles=client_roles)) + + test_dir = misc.get_testdir(ctx) + + for id_, remote in clients: + KernelMount.cleanup_stale_netnses_and_bridge(remote) + + mounts = {} + overrides = ctx.config.get('overrides', {}).get('kclient', {}) + top_overrides = dict(filter(lambda x: 'client.' not in x[0], overrides.items())) + for id_, remote in clients: + entity = f"client.{id_}" + client_config = config.get(entity) + if client_config is None: + client_config = {} + # top level overrides + deep_merge(client_config, top_overrides) + # mount specific overrides + client_config_overrides = overrides.get(entity) + deep_merge(client_config, client_config_overrides) + log.info(f"{entity} config is {client_config}") + + cephfs_name = client_config.get("cephfs_name") + if config.get("disabled", False) or not client_config.get('mounted', True): + continue + + kernel_mount = KernelMount( + ctx=ctx, + test_dir=test_dir, + client_id=id_, + client_remote=remote, + brxnet=ctx.teuthology_config.get('brxnet', None), + config=client_config, + cephfs_name=cephfs_name) + + mounts[id_] = kernel_mount + + if client_config.get('debug', False): + remote.run(args=["sudo", "bash", "-c", "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control"]) + remote.run(args=["sudo", "bash", "-c", "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control"]) + + kernel_mount.mount(mntopts=client_config.get('mntopts', [])) + + def umount_all(): + log.info('Unmounting kernel clients...') + + forced = False + for mount in mounts.values(): + if mount.is_mounted(): + try: + mount.umount() + except (CommandFailedError, MaxWhileTries): + log.warning("Ordinary umount failed, forcing...") + forced = True + mount.umount_wait(force=True) + + for id_, remote in clients: + KernelMount.cleanup_stale_netnses_and_bridge(remote) + + return forced + + ctx.mounts = mounts + try: + yield mounts + except: + umount_all() # ignore forced retval, we are already in error handling + finally: + + forced = umount_all() + if forced: + # The context managers within the kclient manager worked (i.e. + # the test workload passed) but for some reason we couldn't + # umount, so turn this into a test failure. + raise RuntimeError("Kernel mounts did not umount cleanly") diff --git a/qa/tasks/keycloak.py b/qa/tasks/keycloak.py new file mode 100644 index 000000000..055902836 --- /dev/null +++ b/qa/tasks/keycloak.py @@ -0,0 +1,463 @@ +""" +Deploy and configure Keycloak for Teuthology +""" +import contextlib +import logging +import os + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run +from teuthology.exceptions import ConfigError + +log = logging.getLogger(__name__) + +def get_keycloak_version(config): + for client, client_config in config.items(): + if 'keycloak_version' in client_config: + keycloak_version = client_config.get('keycloak_version') + return keycloak_version + +def get_keycloak_dir(ctx, config): + keycloak_version = get_keycloak_version(config) + current_version = 'keycloak-'+keycloak_version + return '{tdir}/{ver}'.format(tdir=teuthology.get_testdir(ctx),ver=current_version) + +def run_in_keycloak_dir(ctx, client, config, args, **kwargs): + return ctx.cluster.only(client).run( + args=[ 'cd', get_keycloak_dir(ctx,config), run.Raw('&&'), ] + args, + **kwargs + ) + +def get_toxvenv_dir(ctx): + return ctx.tox.venv_path + +def toxvenv_sh(ctx, remote, args, **kwargs): + activate = get_toxvenv_dir(ctx) + '/bin/activate' + return remote.sh(['source', activate, run.Raw('&&')] + args, **kwargs) + +@contextlib.contextmanager +def install_packages(ctx, config): + """ + Downloading the two required tar files + 1. Keycloak + 2. Wildfly (Application Server) + """ + assert isinstance(config, dict) + log.info('Installing packages for Keycloak...') + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + test_dir=teuthology.get_testdir(ctx) + current_version = get_keycloak_version(config) + link1 = 'https://downloads.jboss.org/keycloak/'+current_version+'/keycloak-'+current_version+'.tar.gz' + toxvenv_sh(ctx, remote, ['wget', link1]) + + file1 = 'keycloak-'+current_version+'.tar.gz' + toxvenv_sh(ctx, remote, ['tar', '-C', test_dir, '-xvzf', file1]) + + link2 ='https://downloads.jboss.org/keycloak/'+current_version+'/adapters/keycloak-oidc/keycloak-wildfly-adapter-dist-'+current_version+'.tar.gz' + toxvenv_sh(ctx, remote, ['cd', '{tdir}'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), 'wget', link2]) + + file2 = 'keycloak-wildfly-adapter-dist-'+current_version+'.tar.gz' + toxvenv_sh(ctx, remote, ['tar', '-C', '{tdir}'.format(tdir=get_keycloak_dir(ctx,config)), '-xvzf', '{tdr}/{file}'.format(tdr=get_keycloak_dir(ctx,config),file=file2)]) + + try: + yield + finally: + log.info('Removing packaged dependencies of Keycloak...') + for client in config: + ctx.cluster.only(client).run( + args=['rm', '-rf', '{tdir}'.format(tdir=get_keycloak_dir(ctx,config))], + ) + +@contextlib.contextmanager +def download_conf(ctx, config): + """ + Downloads confi.py used in run_admin_cmds + """ + assert isinstance(config, dict) + log.info('Downloading conf...') + testdir = teuthology.get_testdir(ctx) + conf_branch = 'main' + conf_repo = 'https://github.com/TRYTOBE8TME/scripts.git' + for (client, _) in config.items(): + ctx.cluster.only(client).run( + args=[ + 'git', 'clone', + '-b', conf_branch, + conf_repo, + '{tdir}/scripts'.format(tdir=testdir), + ], + ) + try: + yield + finally: + log.info('Removing conf...') + testdir = teuthology.get_testdir(ctx) + for client in config: + ctx.cluster.only(client).run( + args=[ + 'rm', + '-rf', + '{tdir}/scripts'.format(tdir=testdir), + ], + ) + +@contextlib.contextmanager +def build(ctx,config): + """ + Build process which needs to be done before starting a server. + """ + assert isinstance(config, dict) + log.info('Building Keycloak...') + for (client,_) in config.items(): + run_in_keycloak_dir(ctx, client, config,['cd', 'bin', run.Raw('&&'), './jboss-cli.sh', '--file=adapter-elytron-install-offline.cli']) + try: + yield + finally: + pass + +@contextlib.contextmanager +def run_keycloak(ctx,config): + """ + This includes two parts: + 1. Adding a user to keycloak which is actually used to log in when we start the server and check in browser. + 2. Starting the server. + """ + assert isinstance(config, dict) + log.info('Bringing up Keycloak...') + for (client,_) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + ctx.cluster.only(client).run( + args=[ + '{tdir}/bin/add-user-keycloak.sh'.format(tdir=get_keycloak_dir(ctx,config)), + '-r', 'master', + '-u', 'admin', + '-p', 'admin', + ], + ) + + toxvenv_sh(ctx, remote, ['cd', '{tdir}/bin'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), './standalone.sh', run.Raw('&'), 'exit']) + try: + yield + finally: + log.info('Stopping Keycloak Server...') + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + toxvenv_sh(ctx, remote, ['cd', '{tdir}/bin'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), './jboss-cli.sh', '--connect', 'command=:shutdown']) + +@contextlib.contextmanager +def run_admin_cmds(ctx,config): + """ + Running Keycloak Admin commands(kcadm commands) in order to get the token, aud value, thumbprint and realm name. + """ + assert isinstance(config, dict) + log.info('Running admin commands...') + for (client,_) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'config', 'credentials', + '--server', 'http://localhost:8080/auth', + '--realm', 'master', + '--user', 'admin', + '--password', 'admin', + '--client', 'admin-cli', + ], + ) + + realm_name='demorealm' + realm='realm={}'.format(realm_name) + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'create', 'realms', + '-s', realm, + '-s', 'enabled=true', + '-s', 'accessTokenLifespan=1800', + '-o', + ], + ) + + client_name='my_client' + client='clientId={}'.format(client_name) + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'create', 'clients', + '-r', realm_name, + '-s', client, + '-s', 'directAccessGrantsEnabled=true', + '-s', 'redirectUris=["http://localhost:8080/myapp/*"]', + ], + ) + + ans1= toxvenv_sh(ctx, remote, + [ + 'cd', '{tdir}/bin'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), + './kcadm.sh', 'get', 'clients', + '-r', realm_name, + '-F', 'id,clientId', run.Raw('|'), + 'jq', '-r', '.[] | select (.clientId == "my_client") | .id' + ]) + + pre0=ans1.rstrip() + pre1="clients/{}".format(pre0) + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'update', pre1, + '-r', realm_name, + '-s', 'enabled=true', + '-s', 'serviceAccountsEnabled=true', + '-s', 'redirectUris=["http://localhost:8080/myapp/*"]', + ], + ) + + ans2= pre1+'/client-secret' + + out2= toxvenv_sh(ctx, remote, + [ + 'cd', '{tdir}/bin'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), + './kcadm.sh', 'get', ans2, + '-r', realm_name, + '-F', 'value' + ]) + + ans0= '{client}:{secret}'.format(client=client_name,secret=out2[15:51]) + ans3= 'client_secret={}'.format(out2[15:51]) + clientid='client_id={}'.format(client_name) + + proto_map = pre1+"/protocol-mappers/models" + uname = "username=testuser" + upass = "password=testuser" + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'create', 'users', + '-s', uname, + '-s', 'enabled=true', + '-s', 'attributes.\"https://aws.amazon.com/tags\"=\"{"principal_tags":{"Department":["Engineering", "Marketing"]}}\"', + '-r', realm_name, + ], + ) + + sample = 'testuser' + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'set-password', + '-r', realm_name, + '--username', sample, + '--new-password', sample, + ], + ) + + file_path = '{tdir}/scripts/confi.py'.format(tdir=teuthology.get_testdir(ctx)) + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'create', proto_map, + '-r', realm_name, + '-f', file_path, + ], + ) + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'config', 'credentials', + '--server', 'http://localhost:8080/auth', + '--realm', realm_name, + '--user', sample, + '--password', sample, + '--client', 'admin-cli', + ], + ) + + out9= toxvenv_sh(ctx, remote, + [ + 'curl', '-k', '-v', + '-X', 'POST', + '-H', 'Content-Type:application/x-www-form-urlencoded', + '-d', 'scope=openid', + '-d', 'grant_type=password', + '-d', clientid, + '-d', ans3, + '-d', uname, + '-d', upass, + 'http://localhost:8080/auth/realms/'+realm_name+'/protocol/openid-connect/token', run.Raw('|'), + 'jq', '-r', '.access_token' + ]) + + user_token_pre = out9.rstrip() + user_token = '{}'.format(user_token_pre) + + out3= toxvenv_sh(ctx, remote, + [ + 'curl', '-k', '-v', + '-X', 'POST', + '-H', 'Content-Type:application/x-www-form-urlencoded', + '-d', 'scope=openid', + '-d', 'grant_type=client_credentials', + '-d', clientid, + '-d', ans3, + 'http://localhost:8080/auth/realms/'+realm_name+'/protocol/openid-connect/token', run.Raw('|'), + 'jq', '-r', '.access_token' + ]) + + pre2=out3.rstrip() + acc_token= 'token={}'.format(pre2) + ans4= '{}'.format(pre2) + + out4= toxvenv_sh(ctx, remote, + [ + 'curl', '-k', '-v', + '-X', 'GET', + '-H', 'Content-Type:application/x-www-form-urlencoded', + 'http://localhost:8080/auth/realms/'+realm_name+'/protocol/openid-connect/certs', run.Raw('|'), + 'jq', '-r', '.keys[].x5c[]' + ]) + + pre3=out4.rstrip() + cert_value='{}'.format(pre3) + start_value= "-----BEGIN CERTIFICATE-----\n" + end_value= "\n-----END CERTIFICATE-----" + user_data="" + user_data+=start_value + user_data+=cert_value + user_data+=end_value + + remote.write_file( + path='{tdir}/bin/certificate.crt'.format(tdir=get_keycloak_dir(ctx,config)), + data=user_data + ) + + out5= toxvenv_sh(ctx, remote, + [ + 'openssl', 'x509', + '-in', '{tdir}/bin/certificate.crt'.format(tdir=get_keycloak_dir(ctx,config)), + '--fingerprint', '--noout', '-sha1' + ]) + + pre_ans= '{}'.format(out5[17:76]) + ans5="" + + for character in pre_ans: + if(character!=':'): + ans5+=character + + str1 = 'curl' + str2 = '-k' + str3 = '-v' + str4 = '-X' + str5 = 'POST' + str6 = '-u' + str7 = '-d' + str8 = 'http://localhost:8080/auth/realms/'+realm_name+'/protocol/openid-connect/token/introspect' + + out6= toxvenv_sh(ctx, remote, + [ + str1, str2, str3, str4, str5, str6, ans0, str7, acc_token, str8, run.Raw('|'), 'jq', '-r', '.aud' + ]) + + out7= toxvenv_sh(ctx, remote, + [ + str1, str2, str3, str4, str5, str6, ans0, str7, acc_token, str8, run.Raw('|'), 'jq', '-r', '.sub' + ]) + + out8= toxvenv_sh(ctx, remote, + [ + str1, str2, str3, str4, str5, str6, ans0, str7, acc_token, str8, run.Raw('|'), 'jq', '-r', '.azp' + ]) + + ans6=out6.rstrip() + ans7=out7.rstrip() + ans8=out8.rstrip() + + os.environ['TOKEN']=ans4 + os.environ['THUMBPRINT']=ans5 + os.environ['AUD']=ans6 + os.environ['SUB']=ans7 + os.environ['AZP']=ans8 + os.environ['USER_TOKEN']=user_token + os.environ['KC_REALM']=realm_name + + try: + yield + finally: + log.info('Removing certificate.crt file...') + for (client,_) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=['rm', '-f', + '{tdir}/bin/certificate.crt'.format(tdir=get_keycloak_dir(ctx,config)), + ], + ) + + remote.run( + args=['rm', '-f', + '{tdir}/confi.py'.format(tdir=teuthology.get_testdir(ctx)), + ], + ) + +@contextlib.contextmanager +def task(ctx,config): + """ + To run keycloak the prerequisite is to run the tox task. Following is the way how to run + tox and then keycloak:: + + tasks: + - tox: [ client.0 ] + - keycloak: + client.0: + keycloak_version: 11.0.0 + + To pass extra arguments to nose (e.g. to run a certain test):: + + tasks: + - tox: [ client.0 ] + - keycloak: + client.0: + keycloak_version: 11.0.0 + - s3tests: + client.0: + extra_attrs: ['webidentity_test'] + + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task keycloak only supports a list or dictionary for configuration" + + if not hasattr(ctx, 'tox'): + raise ConfigError('keycloak must run after the tox task') + + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + + log.debug('Keycloak config is %s', config) + + with contextutil.nested( + lambda: install_packages(ctx=ctx, config=config), + lambda: build(ctx=ctx, config=config), + lambda: run_keycloak(ctx=ctx, config=config), + lambda: download_conf(ctx=ctx, config=config), + lambda: run_admin_cmds(ctx=ctx, config=config), + ): + yield + diff --git a/qa/tasks/keystone.py b/qa/tasks/keystone.py new file mode 100644 index 000000000..ad836006f --- /dev/null +++ b/qa/tasks/keystone.py @@ -0,0 +1,463 @@ +""" +Deploy and configure Keystone for Teuthology +""" +import argparse +import contextlib +import logging + +# still need this for python3.6 +from collections import OrderedDict +from itertools import chain + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run +from teuthology.packaging import install_package +from teuthology.packaging import remove_package +from teuthology.exceptions import ConfigError + +log = logging.getLogger(__name__) + + +def get_keystone_dir(ctx): + return '{tdir}/keystone'.format(tdir=teuthology.get_testdir(ctx)) + +def run_in_keystone_dir(ctx, client, args, **kwargs): + return ctx.cluster.only(client).run( + args=[ 'cd', get_keystone_dir(ctx), run.Raw('&&'), ] + args, + **kwargs + ) + +def get_toxvenv_dir(ctx): + return ctx.tox.venv_path + +def toxvenv_sh(ctx, remote, args, **kwargs): + activate = get_toxvenv_dir(ctx) + '/bin/activate' + return remote.sh(['source', activate, run.Raw('&&')] + args, **kwargs) + +def run_in_keystone_venv(ctx, client, args): + run_in_keystone_dir(ctx, client, + [ 'source', + '.tox/venv/bin/activate', + run.Raw('&&') + ] + args) + +def get_keystone_venved_cmd(ctx, cmd, args): + kbindir = get_keystone_dir(ctx) + '/.tox/venv/bin/' + return [ kbindir + 'python', kbindir + cmd ] + args + +@contextlib.contextmanager +def download(ctx, config): + """ + Download the Keystone from github. + Remove downloaded file upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, dict) + log.info('Downloading keystone...') + keystonedir = get_keystone_dir(ctx) + + for (client, cconf) in config.items(): + ctx.cluster.only(client).run( + args=[ + 'git', 'clone', + '-b', cconf.get('force-branch', 'master'), + 'https://github.com/openstack/keystone.git', + keystonedir, + ], + ) + + sha1 = cconf.get('sha1') + if sha1 is not None: + run_in_keystone_dir(ctx, client, [ + 'git', 'reset', '--hard', sha1, + ], + ) + + # hax for http://tracker.ceph.com/issues/23659 + run_in_keystone_dir(ctx, client, [ + 'sed', '-i', + 's/pysaml2<4.0.3,>=2.4.0/pysaml2>=4.5.0/', + 'requirements.txt' + ], + ) + try: + yield + finally: + log.info('Removing keystone...') + for client in config: + ctx.cluster.only(client).run( + args=[ 'rm', '-rf', keystonedir ], + ) + +patch_bindep_template = """\ +import fileinput +import sys +import os +fixed=False +os.chdir("{keystone_dir}") +for line in fileinput.input("bindep.txt", inplace=True): + if line == "python34-devel [platform:centos]\\n": + line="python34-devel [platform:centos-7]\\npython36-devel [platform:centos-8]\\n" + fixed=True + print(line,end="") + +print("Fixed line" if fixed else "No fix necessary", file=sys.stderr) +exit(0) +""" + +@contextlib.contextmanager +def install_packages(ctx, config): + """ + Download the packaged dependencies of Keystone. + Remove install packages upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, dict) + log.info('Installing packages for Keystone...') + + patch_bindep = patch_bindep_template \ + .replace("{keystone_dir}", get_keystone_dir(ctx)) + packages = {} + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + toxvenv_sh(ctx, remote, ['python'], stdin=patch_bindep) + # use bindep to read which dependencies we need from keystone/bindep.txt + toxvenv_sh(ctx, remote, ['pip', 'install', 'bindep']) + packages[client] = toxvenv_sh(ctx, remote, + ['bindep', '--brief', '--file', '{}/bindep.txt'.format(get_keystone_dir(ctx))], + check_status=False).splitlines() # returns 1 on success? + for dep in packages[client]: + install_package(dep, remote) + try: + yield + finally: + log.info('Removing packaged dependencies of Keystone...') + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + for dep in packages[client]: + remove_package(dep, remote) + +@contextlib.contextmanager +def setup_venv(ctx, config): + """ + Setup the virtualenv for Keystone using tox. + """ + assert isinstance(config, dict) + log.info('Setting up virtualenv for keystone...') + for (client, _) in config.items(): + run_in_keystone_dir(ctx, client, + [ 'source', + '{tvdir}/bin/activate'.format(tvdir=get_toxvenv_dir(ctx)), + run.Raw('&&'), + 'tox', '-e', 'venv', '--notest' + ]) + + run_in_keystone_venv(ctx, client, + [ 'pip', 'install', + 'python-openstackclient==5.2.1', + 'osc-lib==2.0.0' + ]) + try: + yield + finally: + pass + +@contextlib.contextmanager +def configure_instance(ctx, config): + assert isinstance(config, dict) + log.info('Configuring keystone...') + + keyrepo_dir = '{kdir}/etc/fernet-keys'.format(kdir=get_keystone_dir(ctx)) + for (client, _) in config.items(): + # prepare the config file + run_in_keystone_dir(ctx, client, + [ + 'source', + f'{get_toxvenv_dir(ctx)}/bin/activate', + run.Raw('&&'), + 'tox', '-e', 'genconfig' + ]) + run_in_keystone_dir(ctx, client, + [ + 'cp', '-f', + 'etc/keystone.conf.sample', + 'etc/keystone.conf' + ]) + run_in_keystone_dir(ctx, client, + [ + 'sed', + '-e', 's^#key_repository =.*^key_repository = {kr}^'.format(kr = keyrepo_dir), + '-i', 'etc/keystone.conf' + ]) + # log to a file that gets archived + log_file = '{p}/archive/keystone.{c}.log'.format(p=teuthology.get_testdir(ctx), c=client) + run_in_keystone_dir(ctx, client, + [ + 'sed', + '-e', 's^#log_file =.*^log_file = {}^'.format(log_file), + '-i', 'etc/keystone.conf' + ]) + # copy the config to archive + run_in_keystone_dir(ctx, client, [ + 'cp', 'etc/keystone.conf', + '{}/archive/keystone.{}.conf'.format(teuthology.get_testdir(ctx), client) + ]) + + # prepare key repository for Fetnet token authenticator + run_in_keystone_dir(ctx, client, [ 'mkdir', '-p', keyrepo_dir ]) + run_in_keystone_venv(ctx, client, [ 'keystone-manage', 'fernet_setup' ]) + + # sync database + run_in_keystone_venv(ctx, client, [ 'keystone-manage', 'db_sync' ]) + yield + +@contextlib.contextmanager +def run_keystone(ctx, config): + assert isinstance(config, dict) + log.info('Configuring keystone...') + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + cluster_name, _, client_id = teuthology.split_role(client) + + # start the public endpoint + client_public_with_id = 'keystone.public' + '.' + client_id + + public_host, public_port = ctx.keystone.public_endpoints[client] + run_cmd = get_keystone_venved_cmd(ctx, 'keystone-wsgi-public', + [ '--host', public_host, '--port', str(public_port), + # Let's put the Keystone in background, wait for EOF + # and after receiving it, send SIGTERM to the daemon. + # This crazy hack is because Keystone, in contrast to + # our other daemons, doesn't quit on stdin.close(). + # Teuthology relies on this behaviour. + run.Raw('& { read; kill %1; }') + ] + ) + ctx.daemons.add_daemon( + remote, 'keystone', client_public_with_id, + cluster=cluster_name, + args=run_cmd, + logger=log.getChild(client), + stdin=run.PIPE, + cwd=get_keystone_dir(ctx), + wait=False, + check_status=False, + ) + + # start the admin endpoint + client_admin_with_id = 'keystone.admin' + '.' + client_id + + admin_host, admin_port = ctx.keystone.admin_endpoints[client] + run_cmd = get_keystone_venved_cmd(ctx, 'keystone-wsgi-admin', + [ '--host', admin_host, '--port', str(admin_port), + run.Raw('& { read; kill %1; }') + ] + ) + ctx.daemons.add_daemon( + remote, 'keystone', client_admin_with_id, + cluster=cluster_name, + args=run_cmd, + logger=log.getChild(client), + stdin=run.PIPE, + cwd=get_keystone_dir(ctx), + wait=False, + check_status=False, + ) + + # sleep driven synchronization + run_in_keystone_venv(ctx, client, [ 'sleep', '15' ]) + try: + yield + finally: + log.info('Stopping Keystone admin instance') + ctx.daemons.get_daemon('keystone', client_admin_with_id, + cluster_name).stop() + + log.info('Stopping Keystone public instance') + ctx.daemons.get_daemon('keystone', client_public_with_id, + cluster_name).stop() + + +def dict_to_args(specials, items): + """ + Transform + [(key1, val1), (special, val_special), (key3, val3) ] + into: + [ '--key1', 'val1', '--key3', 'val3', 'val_special' ] + """ + args = [] + special_vals = OrderedDict((k, '') for k in specials.split(',')) + for (k, v) in items: + if k in special_vals: + special_vals[k] = v + else: + args.append('--{k}'.format(k=k)) + args.append(v) + args.extend(arg for arg in special_vals.values() if arg) + return args + +def run_section_cmds(ctx, cclient, section_cmd, specials, + section_config_list): + admin_host, admin_port = ctx.keystone.admin_endpoints[cclient] + + auth_section = [ + ( 'os-username', 'admin' ), + ( 'os-password', 'ADMIN' ), + ( 'os-user-domain-id', 'default' ), + ( 'os-project-name', 'admin' ), + ( 'os-project-domain-id', 'default' ), + ( 'os-identity-api-version', '3' ), + ( 'os-auth-url', 'http://{host}:{port}/v3'.format(host=admin_host, + port=admin_port) ), + ] + + for section_item in section_config_list: + run_in_keystone_venv(ctx, cclient, + [ 'openstack' ] + section_cmd.split() + + dict_to_args(specials, auth_section + list(section_item.items())) + + [ '--debug' ]) + +def create_endpoint(ctx, cclient, service, url, adminurl=None): + endpoint_sections = [ + {'service': service, 'interface': 'public', 'url': url}, + ] + if adminurl: + endpoint_sections.append( + {'service': service, 'interface': 'admin', 'url': adminurl} + ) + run_section_cmds(ctx, cclient, 'endpoint create', + 'service,interface,url', + endpoint_sections) + +@contextlib.contextmanager +def fill_keystone(ctx, config): + assert isinstance(config, dict) + + for (cclient, cconfig) in config.items(): + public_host, public_port = ctx.keystone.public_endpoints[cclient] + url = 'http://{host}:{port}/v3'.format(host=public_host, + port=public_port) + admin_host, admin_port = ctx.keystone.admin_endpoints[cclient] + admin_url = 'http://{host}:{port}/v3'.format(host=admin_host, + port=admin_port) + opts = {'password': 'ADMIN', + 'region-id': 'RegionOne', + 'internal-url': url, + 'admin-url': admin_url, + 'public-url': url} + bootstrap_args = chain.from_iterable(('--bootstrap-{}'.format(k), v) + for k, v in opts.items()) + run_in_keystone_venv(ctx, cclient, + ['keystone-manage', 'bootstrap'] + + list(bootstrap_args)) + + # configure tenants/projects + run_section_cmds(ctx, cclient, 'domain create', 'name', + cconfig.get('domains', [])) + run_section_cmds(ctx, cclient, 'project create', 'name', + cconfig.get('projects', [])) + run_section_cmds(ctx, cclient, 'user create', 'name', + cconfig.get('users', [])) + run_section_cmds(ctx, cclient, 'role create', 'name', + cconfig.get('roles', [])) + run_section_cmds(ctx, cclient, 'role add', 'name', + cconfig.get('role-mappings', [])) + run_section_cmds(ctx, cclient, 'service create', 'type', + cconfig.get('services', [])) + + # for the deferred endpoint creation; currently it's used in rgw.py + ctx.keystone.create_endpoint = create_endpoint + + # sleep driven synchronization -- just in case + run_in_keystone_venv(ctx, cclient, [ 'sleep', '3' ]) + try: + yield + finally: + pass + +def assign_ports(ctx, config, initial_port): + """ + Assign port numbers starting from @initial_port + """ + port = initial_port + role_endpoints = {} + for remote, roles_for_host in ctx.cluster.remotes.items(): + for role in roles_for_host: + if role in config: + role_endpoints[role] = (remote.name.split('@')[1], port) + port += 1 + + return role_endpoints + +@contextlib.contextmanager +def task(ctx, config): + """ + Deploy and configure Keystone + + Example of configuration: + + - install: + - ceph: + - tox: [ client.0 ] + - keystone: + client.0: + force-branch: master + domains: + - name: default + description: Default Domain + projects: + - name: admin + description: Admin Tenant + users: + - name: admin + password: ADMIN + project: admin + roles: [ name: admin, name: Member ] + role-mappings: + - name: admin + user: admin + project: admin + services: + - name: keystone + type: identity + description: Keystone Identity Service + - name: swift + type: object-store + description: Swift Service + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task keystone only supports a list or dictionary for configuration" + + if not hasattr(ctx, 'tox'): + raise ConfigError('keystone must run after the tox task') + + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + + log.debug('Keystone config is %s', config) + + ctx.keystone = argparse.Namespace() + ctx.keystone.public_endpoints = assign_ports(ctx, config, 5000) + ctx.keystone.admin_endpoints = assign_ports(ctx, config, 35357) + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: install_packages(ctx=ctx, config=config), + lambda: setup_venv(ctx=ctx, config=config), + lambda: configure_instance(ctx=ctx, config=config), + lambda: run_keystone(ctx=ctx, config=config), + lambda: fill_keystone(ctx=ctx, config=config), + ): + yield diff --git a/qa/tasks/kubeadm.py b/qa/tasks/kubeadm.py new file mode 100644 index 000000000..9f147c6e7 --- /dev/null +++ b/qa/tasks/kubeadm.py @@ -0,0 +1,562 @@ +""" +Kubernetes cluster task, deployed via kubeadm +""" +import argparse +import contextlib +import ipaddress +import logging +import random +import yaml +from io import BytesIO + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.config import config as teuth_config +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + + +def _kubectl(ctx, config, args, **kwargs): + cluster_name = config['cluster'] + ctx.kubeadm[cluster_name].bootstrap_remote.run( + args=['kubectl'] + args, + **kwargs, + ) + + +def kubectl(ctx, config): + if isinstance(config, str): + config = [config] + assert isinstance(config, list) + for c in config: + if isinstance(c, str): + _kubectl(ctx, config, c.split(' ')) + else: + _kubectl(ctx, config, c) + + +@contextlib.contextmanager +def preflight(ctx, config): + run.wait( + ctx.cluster.run( + args=[ + 'sudo', 'modprobe', 'br_netfilter', + run.Raw('&&'), + 'sudo', 'sysctl', 'net.bridge.bridge-nf-call-ip6tables=1', + run.Raw('&&'), + 'sudo', 'sysctl', 'net.bridge.bridge-nf-call-iptables=1', + run.Raw('&&'), + 'sudo', 'sysctl', 'net.ipv4.ip_forward=1', + run.Raw('&&'), + 'sudo', 'swapoff', '-a', + ], + wait=False, + ) + ) + + # set docker cgroup driver = systemd + # see https://kubernetes.io/docs/setup/production-environment/container-runtimes/#docker + # see https://github.com/kubernetes/kubeadm/issues/2066 + daemon_json = """ +{ + "exec-opts": ["native.cgroupdriver=systemd"], + "log-driver": "json-file", + "log-opts": { + "max-size": "100m" + }, + "storage-driver": "overlay2" +} +""" + for remote in ctx.cluster.remotes.keys(): + remote.write_file('/etc/docker/daemon.json', daemon_json, sudo=True) + run.wait( + ctx.cluster.run( + args=[ + 'sudo', 'systemctl', 'restart', 'docker', + run.Raw('||'), + 'true', + ], + wait=False, + ) + ) + yield + + +@contextlib.contextmanager +def kubeadm_install(ctx, config): + version = config.get('version', '1.21') + + os_type = teuthology.get_distro(ctx) + os_version = teuthology.get_distro_version(ctx) + + try: + if os_type in ['centos', 'rhel']: + os = f"CentOS_{os_version.split('.')[0]}" + log.info('Installing cri-o') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'curl', '-L', '-o', + '/etc/yum.repos.d/devel:kubic:libcontainers:stable.repo', + f'https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/{os}/devel:kubic:libcontainers:stable.repo', + run.Raw('&&'), + 'sudo', + 'curl', '-L', '-o', + f'/etc/yum.repos.d/devel:kubic:libcontainers:stable:cri-o:{version}.repo', + f'https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable:/cri-o:/{version}/{os}/devel:kubic:libcontainers:stable:cri-o:{version}.repo', + run.Raw('&&'), + 'sudo', 'dnf', 'install', '-y', 'cri-o', + ], + wait=False, + ) + ) + + log.info('Installing kube{adm,ctl,let}') + repo = """[kubernetes] +name=Kubernetes +baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-$basearch +enabled=1 +gpgcheck=1 +repo_gpgcheck=1 +gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg +""" + for remote in ctx.cluster.remotes.keys(): + remote.write_file( + '/etc/yum.repos.d/kubernetes.repo', + repo, + sudo=True, + ) + run.wait( + ctx.cluster.run( + args=[ + 'sudo', 'dnf', 'install', '-y', + 'kubelet', 'kubeadm', 'kubectl', + 'iproute-tc', 'bridge-utils', + ], + wait=False, + ) + ) + + # fix cni config + for remote in ctx.cluster.remotes.keys(): + conf = """# from https://github.com/cri-o/cri-o/blob/master/tutorials/kubernetes.md#flannel-network +{ + "name": "crio", + "type": "flannel" +} +""" + remote.write_file('/etc/cni/net.d/10-crio-flannel.conf', conf, sudo=True) + remote.run(args=[ + 'sudo', 'rm', '-f', + '/etc/cni/net.d/87-podman-bridge.conflist', + '/etc/cni/net.d/100-crio-bridge.conf', + ]) + + # start crio + run.wait( + ctx.cluster.run( + args=[ + 'sudo', 'systemctl', 'daemon-reload', + run.Raw('&&'), + 'sudo', 'systemctl', 'enable', 'crio', '--now', + ], + wait=False, + ) + ) + + elif os_type == 'ubuntu': + os = f"xUbuntu_{os_version}" + log.info('Installing kube{adm,ctl,let}') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', 'apt', 'update', + run.Raw('&&'), + 'sudo', 'apt', 'install', '-y', + 'apt-transport-https', 'ca-certificates', 'curl', + run.Raw('&&'), + 'sudo', 'curl', '-fsSLo', + '/usr/share/keyrings/kubernetes-archive-keyring.gpg', + 'https://packages.cloud.google.com/apt/doc/apt-key.gpg', + run.Raw('&&'), + 'echo', 'deb [signed-by=/usr/share/keyrings/kubernetes-archive-keyring.gpg] https://apt.kubernetes.io/ kubernetes-xenial main', + run.Raw('|'), + 'sudo', 'tee', '/etc/apt/sources.list.d/kubernetes.list', + run.Raw('&&'), + 'sudo', 'apt', 'update', + run.Raw('&&'), + 'sudo', 'apt', 'install', '-y', + 'kubelet', 'kubeadm', 'kubectl', + 'bridge-utils', + ], + wait=False, + ) + ) + + else: + raise RuntimeError(f'unsupported distro {os_type} for cri-o') + + run.wait( + ctx.cluster.run( + args=[ + 'sudo', 'systemctl', 'enable', '--now', 'kubelet', + run.Raw('&&'), + 'sudo', 'kubeadm', 'config', 'images', 'pull', + ], + wait=False, + ) + ) + + yield + + finally: + if config.get('uninstall', True): + log.info('Uninstalling kube{adm,let,ctl}') + if os_type in ['centos', 'rhel']: + run.wait( + ctx.cluster.run( + args=[ + 'sudo', 'rm', '-f', + '/etc/yum.repos.d/kubernetes.repo', + run.Raw('&&'), + 'sudo', 'dnf', 'remove', '-y', + 'kubeadm', 'kubelet', 'kubectl', 'cri-o', + ], + wait=False + ) + ) + elif os_type == 'ubuntu' and False: + run.wait( + ctx.cluster.run( + args=[ + 'sudo', 'rm', '-f', + '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list', + f'/etc/apt/sources.list.d/devel:kubic:libcontainers:stable:cri-o:{version}.list', + '/etc/apt/trusted.gpg.d/libcontainers-cri-o.gpg', + run.Raw('&&'), + 'sudo', 'apt', 'remove', '-y', + 'kkubeadm', 'kubelet', 'kubectl', 'cri-o', 'cri-o-runc', + ], + wait=False, + ) + ) + + +@contextlib.contextmanager +def kubeadm_init_join(ctx, config): + cluster_name = config['cluster'] + + bootstrap_remote = None + remotes = {} # remote -> ip + for remote, roles in ctx.cluster.remotes.items(): + for role in roles: + if role.startswith('host.'): + if not bootstrap_remote: + bootstrap_remote = remote + if remote not in remotes: + remotes[remote] = remote.ssh.get_transport().getpeername()[0] + if not bootstrap_remote: + raise RuntimeError('must define at least one host.something role') + ctx.kubeadm[cluster_name].bootstrap_remote = bootstrap_remote + ctx.kubeadm[cluster_name].remotes = remotes + ctx.kubeadm[cluster_name].token = 'abcdef.' + ''.join([ + random.choice('0123456789abcdefghijklmnopqrstuvwxyz') for _ in range(16) + ]) + log.info(f'Token: {ctx.kubeadm[cluster_name].token}') + log.info(f'Remotes: {ctx.kubeadm[cluster_name].remotes}') + + try: + # init + cmd = [ + 'sudo', 'kubeadm', 'init', + '--node-name', ctx.kubeadm[cluster_name].bootstrap_remote.shortname, + '--token', ctx.kubeadm[cluster_name].token, + '--pod-network-cidr', str(ctx.kubeadm[cluster_name].pod_subnet), + ] + bootstrap_remote.run(args=cmd) + + # join additional nodes + joins = [] + for remote, ip in ctx.kubeadm[cluster_name].remotes.items(): + if remote == bootstrap_remote: + continue + cmd = [ + 'sudo', 'kubeadm', 'join', + ctx.kubeadm[cluster_name].remotes[ctx.kubeadm[cluster_name].bootstrap_remote] + ':6443', + '--node-name', remote.shortname, + '--token', ctx.kubeadm[cluster_name].token, + '--discovery-token-unsafe-skip-ca-verification', + ] + joins.append(remote.run(args=cmd, wait=False)) + run.wait(joins) + yield + + except Exception as e: + log.exception(e) + raise + + finally: + log.info('Cleaning up node') + run.wait( + ctx.cluster.run( + args=['sudo', 'kubeadm', 'reset', 'cleanup-node', '-f'], + wait=False, + ) + ) + + +@contextlib.contextmanager +def kubectl_config(ctx, config): + cluster_name = config['cluster'] + bootstrap_remote = ctx.kubeadm[cluster_name].bootstrap_remote + + ctx.kubeadm[cluster_name].admin_conf = \ + bootstrap_remote.read_file('/etc/kubernetes/admin.conf', sudo=True) + + log.info('Setting up kubectl') + try: + ctx.cluster.run(args=[ + 'mkdir', '-p', '.kube', + run.Raw('&&'), + 'sudo', 'mkdir', '-p', '/root/.kube', + ]) + for remote in ctx.kubeadm[cluster_name].remotes.keys(): + remote.write_file('.kube/config', ctx.kubeadm[cluster_name].admin_conf) + remote.sudo_write_file('/root/.kube/config', + ctx.kubeadm[cluster_name].admin_conf) + yield + + except Exception as e: + log.exception(e) + raise + + finally: + log.info('Deconfiguring kubectl') + ctx.cluster.run(args=[ + 'rm', '-rf', '.kube', + run.Raw('&&'), + 'sudo', 'rm', '-rf', '/root/.kube', + ]) + + +def map_vnet(mip): + for mapping in teuth_config.get('vnet', []): + mnet = ipaddress.ip_network(mapping['machine_subnet']) + vnet = ipaddress.ip_network(mapping['virtual_subnet']) + if vnet.prefixlen >= mnet.prefixlen: + log.error(f"virtual_subnet {vnet} prefix >= machine_subnet {mnet} prefix") + return None + if mip in mnet: + pos = list(mnet.hosts()).index(mip) + log.info(f"{mip} is in {mnet} at pos {pos}") + sub = list(vnet.subnets(32 - mnet.prefixlen))[pos] + return sub + return None + + +@contextlib.contextmanager +def allocate_pod_subnet(ctx, config): + """ + Allocate a private subnet that will not collide with other test machines/clusters + """ + cluster_name = config['cluster'] + assert cluster_name == 'kubeadm', 'multiple subnets not yet implemented' + + log.info('Identifying pod subnet') + remote = list(ctx.cluster.remotes.keys())[0] + ip = remote.ssh.get_transport().getpeername()[0] + mip = ipaddress.ip_address(ip) + vnet = map_vnet(mip) + assert vnet + log.info(f'Pod subnet: {vnet}') + ctx.kubeadm[cluster_name].pod_subnet = vnet + yield + + +@contextlib.contextmanager +def pod_network(ctx, config): + cluster_name = config['cluster'] + pnet = config.get('pod_network', 'calico') + if pnet == 'flannel': + r = ctx.kubeadm[cluster_name].bootstrap_remote.run( + args=[ + 'curl', + 'https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml', + ], + stdout=BytesIO(), + ) + assert r.exitstatus == 0 + flannel = list(yaml.load_all(r.stdout.getvalue(), Loader=yaml.FullLoader)) + for o in flannel: + if o.get('data', {}).get('net-conf.json'): + log.info(f'Updating {o}') + o['data']['net-conf.json'] = o['data']['net-conf.json'].replace( + '10.244.0.0/16', + str(ctx.kubeadm[cluster_name].pod_subnet) + ) + log.info(f'Now {o}') + flannel_yaml = yaml.dump_all(flannel) + log.debug(f'Flannel:\n{flannel_yaml}') + _kubectl(ctx, config, ['apply', '-f', '-'], stdin=flannel_yaml) + + elif pnet == 'calico': + _kubectl(ctx, config, [ + 'create', '-f', + 'https://docs.projectcalico.org/manifests/tigera-operator.yaml' + ]) + cr = { + 'apiVersion': 'operator.tigera.io/v1', + 'kind': 'Installation', + 'metadata': {'name': 'default'}, + 'spec': { + 'calicoNetwork': { + 'ipPools': [ + { + 'blockSize': 26, + 'cidr': str(ctx.kubeadm[cluster_name].pod_subnet), + 'encapsulation': 'VXLANCrossSubnet', + 'natOutgoing': 'Enabled', + 'nodeSelector': 'all()', + } + ] + } + } + } + _kubectl(ctx, config, ['create', '-f', '-'], stdin=yaml.dump(cr)) + + else: + raise RuntimeError(f'unrecognized pod_network {pnet}') + + try: + yield + + finally: + if pnet == 'flannel': + _kubectl(ctx, config, [ + 'delete', '-f', + 'https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml', + ]) + + elif pnet == 'calico': + _kubectl(ctx, config, ['delete', 'installation', 'default']) + _kubectl(ctx, config, [ + 'delete', '-f', + 'https://docs.projectcalico.org/manifests/tigera-operator.yaml' + ]) + + +@contextlib.contextmanager +def setup_pvs(ctx, config): + """ + Create PVs for all scratch LVs and set up a trivial provisioner + """ + log.info('Scanning for scratch devices') + crs = [] + for remote in ctx.cluster.remotes.keys(): + ls = remote.read_file('/scratch_devs').decode('utf-8').strip().splitlines() + log.info(f'Scratch devices on {remote.shortname}: {ls}') + for dev in ls: + devname = dev.split('/')[-1].replace("_", "-") + crs.append({ + 'apiVersion': 'v1', + 'kind': 'PersistentVolume', + 'metadata': {'name': f'{remote.shortname}-{devname}'}, + 'spec': { + 'volumeMode': 'Block', + 'accessModes': ['ReadWriteOnce'], + 'capacity': {'storage': '100Gi'}, # doesn't matter? + 'persistentVolumeReclaimPolicy': 'Recycle', + 'storageClassName': 'scratch', + 'local': {'path': dev}, + 'nodeAffinity': { + 'required': { + 'nodeSelectorTerms': [ + { + 'matchExpressions': [ + { + 'key': 'kubernetes.io/hostname', + 'operator': 'In', + 'values': [remote.shortname] + } + ] + } + ] + } + } + } + }) + # overwriting first few MB is enough to make k8s happy + remote.run(args=[ + 'sudo', 'dd', 'if=/dev/zero', f'of={dev}', 'bs=1M', 'count=10' + ]) + crs.append({ + 'kind': 'StorageClass', + 'apiVersion': 'storage.k8s.io/v1', + 'metadata': {'name': 'scratch'}, + 'provisioner': 'kubernetes.io/no-provisioner', + 'volumeBindingMode': 'WaitForFirstConsumer', + }) + y = yaml.dump_all(crs) + log.info('Creating PVs + StorageClass') + log.debug(y) + _kubectl(ctx, config, ['create', '-f', '-'], stdin=y) + + yield + + +@contextlib.contextmanager +def final(ctx, config): + cluster_name = config['cluster'] + + # remove master node taint + _kubectl(ctx, config, [ + 'taint', 'node', + ctx.kubeadm[cluster_name].bootstrap_remote.shortname, + 'node-role.kubernetes.io/master-', + run.Raw('||'), + 'true', + ]) + + yield + + +@contextlib.contextmanager +def task(ctx, config): + if not config: + config = {} + assert isinstance(config, dict), \ + "task only supports a dictionary for configuration" + + log.info('Kubeadm start') + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('kubeadm', {})) + log.info('Config: ' + str(config)) + + # set up cluster context + if not hasattr(ctx, 'kubeadm'): + ctx.kubeadm = {} + if 'cluster' not in config: + config['cluster'] = 'kubeadm' + cluster_name = config['cluster'] + if cluster_name not in ctx.kubeadm: + ctx.kubeadm[cluster_name] = argparse.Namespace() + + with contextutil.nested( + lambda: preflight(ctx, config), + lambda: allocate_pod_subnet(ctx, config), + lambda: kubeadm_install(ctx, config), + lambda: kubeadm_init_join(ctx, config), + lambda: kubectl_config(ctx, config), + lambda: pod_network(ctx, config), + lambda: setup_pvs(ctx, config), + lambda: final(ctx, config), + ): + try: + log.info('Kubeadm complete, yielding') + yield + + finally: + log.info('Tearing down kubeadm') diff --git a/qa/tasks/locktest.py b/qa/tasks/locktest.py new file mode 100755 index 000000000..9de5ba40c --- /dev/null +++ b/qa/tasks/locktest.py @@ -0,0 +1,134 @@ +""" +locktests +""" +import logging + +from teuthology.orchestra import run +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Run locktests, from the xfstests suite, on the given + clients. Whether the clients are ceph-fuse or kernel does not + matter, and the two clients can refer to the same mount. + + The config is a list of two clients to run the locktest on. The + first client will be the host. + + For example: + tasks: + - ceph: + - ceph-fuse: [client.0, client.1] + - locktest: + [client.0, client.1] + + This task does not yield; there would be little point. + + :param ctx: Context + :param config: Configuration + """ + + assert isinstance(config, list) + log.info('fetching and building locktests...') + (host,) = ctx.cluster.only(config[0]).remotes + (client,) = ctx.cluster.only(config[1]).remotes + ( _, _, host_id) = config[0].partition('.') + ( _, _, client_id) = config[1].partition('.') + testdir = teuthology.get_testdir(ctx) + hostmnt = '{tdir}/mnt.{id}'.format(tdir=testdir, id=host_id) + clientmnt = '{tdir}/mnt.{id}'.format(tdir=testdir, id=client_id) + + try: + for client_name in config: + log.info('building on {client_}'.format(client_=client_name)) + ctx.cluster.only(client_name).run( + args=[ + # explicitly does not support multiple autotest tasks + # in a single run; the result archival would conflict + 'mkdir', '{tdir}/archive/locktest'.format(tdir=testdir), + run.Raw('&&'), + 'mkdir', '{tdir}/locktest'.format(tdir=testdir), + run.Raw('&&'), + 'wget', + '-nv', + 'https://raw.github.com/gregsfortytwo/xfstests-ceph/master/src/locktest.c', + '-O', '{tdir}/locktest/locktest.c'.format(tdir=testdir), + run.Raw('&&'), + 'g++', '{tdir}/locktest/locktest.c'.format(tdir=testdir), + '-o', '{tdir}/locktest/locktest'.format(tdir=testdir) + ], + logger=log.getChild('locktest_client.{id}'.format(id=client_name)), + ) + + log.info('built locktest on each client') + + host.run(args=['sudo', 'touch', + '{mnt}/locktestfile'.format(mnt=hostmnt), + run.Raw('&&'), + 'sudo', 'chown', 'ubuntu.ubuntu', + '{mnt}/locktestfile'.format(mnt=hostmnt) + ] + ) + + log.info('starting on host') + hostproc = host.run( + args=[ + '{tdir}/locktest/locktest'.format(tdir=testdir), + '-p', '6788', + '-d', + '{mnt}/locktestfile'.format(mnt=hostmnt), + ], + wait=False, + logger=log.getChild('locktest.host'), + ) + log.info('starting on client') + (_,_,hostaddr) = host.name.partition('@') + clientproc = client.run( + args=[ + '{tdir}/locktest/locktest'.format(tdir=testdir), + '-p', '6788', + '-d', + '-h', hostaddr, + '{mnt}/locktestfile'.format(mnt=clientmnt), + ], + logger=log.getChild('locktest.client'), + wait=False + ) + + hostresult = hostproc.wait() + clientresult = clientproc.wait() + if (hostresult != 0) or (clientresult != 0): + raise Exception("Did not pass locking test!") + log.info('finished locktest executable with results {r} and {s}'. \ + format(r=hostresult, s=clientresult)) + + finally: + log.info('cleaning up host dir') + host.run( + args=[ + 'mkdir', '-p', '{tdir}/locktest'.format(tdir=testdir), + run.Raw('&&'), + 'rm', '-f', '{tdir}/locktest/locktest.c'.format(tdir=testdir), + run.Raw('&&'), + 'rm', '-f', '{tdir}/locktest/locktest'.format(tdir=testdir), + run.Raw('&&'), + 'rmdir', '{tdir}/locktest' + ], + logger=log.getChild('.{id}'.format(id=config[0])), + ) + log.info('cleaning up client dir') + client.run( + args=[ + 'mkdir', '-p', '{tdir}/locktest'.format(tdir=testdir), + run.Raw('&&'), + 'rm', '-f', '{tdir}/locktest/locktest.c'.format(tdir=testdir), + run.Raw('&&'), + 'rm', '-f', '{tdir}/locktest/locktest'.format(tdir=testdir), + run.Raw('&&'), + 'rmdir', '{tdir}/locktest'.format(tdir=testdir) + ], + logger=log.getChild('.{id}'.format(\ + id=config[1])), + ) diff --git a/qa/tasks/logrotate.conf b/qa/tasks/logrotate.conf new file mode 100644 index 000000000..b0cb8012f --- /dev/null +++ b/qa/tasks/logrotate.conf @@ -0,0 +1,13 @@ +/var/log/ceph/*{daemon_type}*.log {{ + rotate 100 + size {max_size} + compress + sharedscripts + postrotate + killall {daemon_type} -1 || true + endscript + missingok + notifempty + su root root +}} + diff --git a/qa/tasks/lost_unfound.py b/qa/tasks/lost_unfound.py new file mode 100644 index 000000000..5a9142a70 --- /dev/null +++ b/qa/tasks/lost_unfound.py @@ -0,0 +1,180 @@ +""" +Lost_unfound +""" +import logging +import time +from tasks import ceph_manager +from tasks.util.rados import rados +from teuthology import misc as teuthology +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test handling of lost objects. + + A pretty rigid cluster is brought up and tested by this task + """ + POOL = 'unfound_pool' + if config is None: + config = {} + assert isinstance(config, dict), \ + 'lost_unfound task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + + manager.wait_for_clean() + + manager.create_pool(POOL) + + # something that is always there + dummyfile = '/etc/fstab' + + # take an osd out until the very end + manager.kill_osd(2) + manager.mark_down_osd(2) + manager.mark_out_osd(2) + + # kludge to make sure they get a map + rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile]) + + manager.flush_pg_stats([0, 1]) + manager.wait_for_recovery() + + # create old objects + for f in range(1, 10): + rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f]) + + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.1', + 'injectargs', + '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000' + ) + + manager.kill_osd(0) + manager.mark_down_osd(0) + + for f in range(1, 10): + rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) + + # bring osd.0 back up, let it peer, but don't replicate the new + # objects... + log.info('osd.0 command_args is %s' % 'foo') + log.info(ctx.daemons.get_daemon('osd', 0).command_args) + ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([ + '--osd-recovery-delay-start', '1000' + ]) + manager.revive_osd(0) + manager.mark_in_osd(0) + manager.wait_till_osd_is_up(0) + + manager.flush_pg_stats([1, 0]) + manager.wait_till_active() + + # take out osd.1 and the only copy of those objects. + manager.kill_osd(1) + manager.mark_down_osd(1) + manager.mark_out_osd(1) + manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') + + # bring up osd.2 so that things would otherwise, in theory, recovery fully + manager.revive_osd(2) + manager.mark_in_osd(2) + manager.wait_till_osd_is_up(2) + + manager.flush_pg_stats([0, 2]) + manager.wait_till_active() + manager.flush_pg_stats([0, 2]) + + # verify that there are unfound objects + unfound = manager.get_num_unfound_objects() + log.info("there are %d unfound objects" % unfound) + assert unfound + + testdir = teuthology.get_testdir(ctx) + procs = [] + if config.get('parallel_bench', True): + procs.append(mon.run( + args=[ + "/bin/sh", "-c", + " ".join(['adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage', + 'rados', + '--no-log-to-stderr', + '--name', 'client.admin', + '-b', str(4<<10), + '-p' , POOL, + '-t', '20', + 'bench', '240', 'write', + ]).format(tdir=testdir), + ], + logger=log.getChild('radosbench.{id}'.format(id='client.admin')), + stdin=run.PIPE, + wait=False + )) + time.sleep(10) + + # mark stuff lost + pgs = manager.get_pg_stats() + for pg in pgs: + if pg['stat_sum']['num_objects_unfound'] > 0: + primary = 'osd.%d' % pg['acting'][0] + + # verify that i can list them direct from the osd + log.info('listing missing/lost in %s state %s', pg['pgid'], + pg['state']); + m = manager.list_pg_unfound(pg['pgid']) + #log.info('%s' % m) + assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] + assert m['available_might_have_unfound'] == True + assert m['might_have_unfound'][0]['osd'] == "1" + assert m['might_have_unfound'][0]['status'] == "osd is down" + num_unfound=0 + for o in m['objects']: + if len(o['locations']) == 0: + num_unfound += 1 + assert m['num_unfound'] == num_unfound + + log.info("reverting unfound in %s on %s", pg['pgid'], primary) + manager.raw_cluster_cmd('pg', pg['pgid'], + 'mark_unfound_lost', 'revert') + else: + log.info("no unfound in %s", pg['pgid']) + + manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') + manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') + manager.flush_pg_stats([0, 2]) + manager.wait_for_recovery() + + # verify result + for f in range(1, 10): + err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-']) + assert err + err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-']) + assert err + err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-']) + assert not err + + # see if osd.1 can cope + manager.mark_in_osd(1) + manager.revive_osd(1) + manager.wait_till_osd_is_up(1) + manager.wait_for_clean() + run.wait(procs) + manager.wait_for_clean() diff --git a/qa/tasks/manypools.py b/qa/tasks/manypools.py new file mode 100644 index 000000000..7fe7e43e1 --- /dev/null +++ b/qa/tasks/manypools.py @@ -0,0 +1,73 @@ +""" +Force pg creation on all osds +""" +from teuthology import misc as teuthology +from teuthology.orchestra import run +import logging + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Create the specified number of pools and write 16 objects to them (thereby forcing + the PG creation on each OSD). This task creates pools from all the clients, + in parallel. It is easy to add other daemon types which have the appropriate + permissions, but I don't think anything else does. + The config is just the number of pools to create. I recommend setting + "mon create pg interval" to a very low value in your ceph config to speed + this up. + + You probably want to do this to look at memory consumption, and + maybe to test how performance changes with the number of PGs. For example: + + tasks: + - ceph: + config: + mon: + mon create pg interval: 1 + - manypools: 3000 + - radosbench: + clients: [client.0] + time: 360 + """ + + log.info('creating {n} pools'.format(n=config)) + + poolnum = int(config) + creator_remotes = [] + client_roles = teuthology.all_roles_of_type(ctx.cluster, 'client') + log.info('got client_roles={client_roles_}'.format(client_roles_=client_roles)) + for role in client_roles: + log.info('role={role_}'.format(role_=role)) + (creator_remote, ) = ctx.cluster.only('client.{id}'.format(id=role)).remotes.keys() + creator_remotes.append((creator_remote, 'client.{id}'.format(id=role))) + + remaining_pools = poolnum + poolprocs=dict() + while (remaining_pools > 0): + log.info('{n} pools remaining to create'.format(n=remaining_pools)) + for remote, role_ in creator_remotes: + poolnum = remaining_pools + remaining_pools -= 1 + if remaining_pools < 0: + continue + log.info('creating pool{num} on {role}'.format(num=poolnum, role=role_)) + proc = remote.run( + args=[ + 'ceph', + '--name', role_, + 'osd', 'pool', 'create', 'pool{num}'.format(num=poolnum), '8', + run.Raw('&&'), + 'rados', + '--name', role_, + '--pool', 'pool{num}'.format(num=poolnum), + 'bench', '0', 'write', '-t', '16', '--block-size', '1' + ], + wait = False + ) + log.info('waiting for pool and object creates') + poolprocs[remote] = proc + + run.wait(poolprocs.values()) + + log.info('created all {n} pools and wrote 16 objects to each'.format(n=poolnum)) diff --git a/qa/tasks/mds_creation_failure.py b/qa/tasks/mds_creation_failure.py new file mode 100644 index 000000000..58314086c --- /dev/null +++ b/qa/tasks/mds_creation_failure.py @@ -0,0 +1,69 @@ +# FIXME: this file has many undefined vars which are accessed! +# flake8: noqa +import logging +import contextlib +import time +from tasks import ceph_manager +from teuthology import misc +from teuthology.orchestra.run import CommandFailedError, Raw + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Go through filesystem creation with a synthetic failure in an MDS + in its 'up:creating' state, to exercise the retry behaviour. + """ + # Grab handles to the teuthology objects of interest + mdslist = list(misc.all_roles_of_type(ctx.cluster, 'mds')) + if len(mdslist) != 1: + # Require exactly one MDS, the code path for creation failure when + # a standby is available is different + raise RuntimeError("This task requires exactly one MDS") + + mds_id = mdslist[0] + (mds_remote,) = ctx.cluster.only('mds.{_id}'.format(_id=mds_id)).remotes.keys() + manager = ceph_manager.CephManager( + mds_remote, ctx=ctx, logger=log.getChild('ceph_manager'), + ) + + # Stop MDS + self.fs.set_max_mds(0) + self.fs.mds_stop(mds_id) + self.fs.mds_fail(mds_id) + + # Reset the filesystem so that next start will go into CREATING + manager.raw_cluster_cmd('fs', 'rm', "default", "--yes-i-really-mean-it") + manager.raw_cluster_cmd('fs', 'new', "default", "metadata", "data") + + # Start the MDS with mds_kill_create_at set, it will crash during creation + mds.restart_with_args(["--mds_kill_create_at=1"]) + try: + mds.wait_for_exit() + except CommandFailedError as e: + if e.exitstatus == 1: + log.info("MDS creation killed as expected") + else: + log.error("Unexpected status code %s" % e.exitstatus) + raise + + # Since I have intentionally caused a crash, I will clean up the resulting core + # file to avoid task.internal.coredump seeing it as a failure. + log.info("Removing core file from synthetic MDS failure") + mds_remote.run(args=['rm', '-f', Raw("{archive}/coredump/*.core".format(archive=misc.get_archive_dir(ctx)))]) + + # It should have left the MDS map state still in CREATING + status = self.fs.status().get_mds(mds_id) + assert status['state'] == 'up:creating' + + # Start the MDS again without the kill flag set, it should proceed with creation successfully + mds.restart() + + # Wait for state ACTIVE + self.fs.wait_for_state("up:active", timeout=120, mds_id=mds_id) + + # The system should be back up in a happy healthy state, go ahead and run any further tasks + # inside this context. + yield diff --git a/qa/tasks/mds_pre_upgrade.py b/qa/tasks/mds_pre_upgrade.py new file mode 100644 index 000000000..812d402ed --- /dev/null +++ b/qa/tasks/mds_pre_upgrade.py @@ -0,0 +1,27 @@ +""" +Prepare MDS cluster for upgrade. +""" + +import logging + +from tasks.cephfs.filesystem import Filesystem + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Prepare MDS cluster for upgrade. + + This task reduces ranks to 1 and stops all standbys. + """ + + if config is None: + config = {} + assert isinstance(config, dict), \ + 'snap-upgrade task only accepts a dict for configuration' + + fs = Filesystem(ctx) + fs.getinfo() # load name + fs.set_allow_standby_replay(False) + fs.set_max_mds(1) + fs.reach_max_mds() diff --git a/qa/tasks/mds_thrash.py b/qa/tasks/mds_thrash.py new file mode 100644 index 000000000..8c7f3cba5 --- /dev/null +++ b/qa/tasks/mds_thrash.py @@ -0,0 +1,434 @@ +""" +Thrash mds by simulating failures +""" +import logging +import contextlib +import itertools +import random +import time + +from gevent import sleep +from gevent.greenlet import Greenlet +from gevent.event import Event +from teuthology import misc as teuthology + +from tasks import ceph_manager +from tasks.cephfs.filesystem import MDSCluster, Filesystem, FSMissing +from tasks.thrasher import Thrasher + +log = logging.getLogger(__name__) + +class MDSThrasher(Thrasher, Greenlet): + """ + MDSThrasher:: + + The MDSThrasher thrashes MDSs during execution of other tasks (workunits, etc). + + The config is optional. Many of the config parameters are a a maximum value + to use when selecting a random value from a range. To always use the maximum + value, set no_random to true. The config is a dict containing some or all of: + + max_thrash: [default: 1] the maximum number of active MDSs per FS that will be thrashed at + any given time. + + max_thrash_delay: [default: 30] maximum number of seconds to delay before + thrashing again. + + max_replay_thrash_delay: [default: 4] maximum number of seconds to delay while in + the replay state before thrashing. + + max_revive_delay: [default: 10] maximum number of seconds to delay before + bringing back a thrashed MDS. + + randomize: [default: true] enables randomization and use the max/min values + + seed: [no default] seed the random number generator + + thrash_in_replay: [default: 0.0] likelihood that the MDS will be thrashed + during replay. Value should be between 0.0 and 1.0. + + thrash_max_mds: [default: 0.05] likelihood that the max_mds of the mds + cluster will be modified to a value [1, current) or (current, starting + max_mds]. Value should be between 0.0 and 1.0. + + thrash_while_stopping: [default: false] thrash an MDS while there + are MDS in up:stopping (because max_mds was changed and some + MDS were deactivated). + + thrash_weights: allows specific MDSs to be thrashed more/less frequently. + This option overrides anything specified by max_thrash. This option is a + dict containing mds.x: weight pairs. For example, [mds.a: 0.7, mds.b: + 0.3, mds.c: 0.0]. Each weight is a value from 0.0 to 1.0. Any MDSs not + specified will be automatically given a weight of 0.0 (not thrashed). + For a given MDS, by default the trasher delays for up to + max_thrash_delay, trashes, waits for the MDS to recover, and iterates. + If a non-zero weight is specified for an MDS, for each iteration the + thrasher chooses whether to thrash during that iteration based on a + random value [0-1] not exceeding the weight of that MDS. + + Examples:: + + + The following example sets the likelihood that mds.a will be thrashed + to 80%, mds.b to 20%, and other MDSs will not be thrashed. It also sets the + likelihood that an MDS will be thrashed in replay to 40%. + Thrash weights do not have to sum to 1. + + tasks: + - ceph: + - mds_thrash: + thrash_weights: + - mds.a: 0.8 + - mds.b: 0.2 + thrash_in_replay: 0.4 + - ceph-fuse: + - workunit: + clients: + all: [suites/fsx.sh] + + The following example disables randomization, and uses the max delay values: + + tasks: + - ceph: + - mds_thrash: + max_thrash_delay: 10 + max_revive_delay: 1 + max_replay_thrash_delay: 4 + + """ + + def __init__(self, ctx, manager, config, fs, max_mds): + super(MDSThrasher, self).__init__() + + self.config = config + self.ctx = ctx + self.logger = log.getChild('fs.[{f}]'.format(f = fs.name)) + self.fs = fs + self.manager = manager + self.max_mds = max_mds + self.name = 'thrasher.fs.[{f}]'.format(f = fs.name) + self.stopping = Event() + + self.randomize = bool(self.config.get('randomize', True)) + self.thrash_max_mds = float(self.config.get('thrash_max_mds', 0.05)) + self.max_thrash = int(self.config.get('max_thrash', 1)) + self.max_thrash_delay = float(self.config.get('thrash_delay', 120.0)) + self.thrash_in_replay = float(self.config.get('thrash_in_replay', False)) + assert self.thrash_in_replay >= 0.0 and self.thrash_in_replay <= 1.0, 'thrash_in_replay ({v}) must be between [0.0, 1.0]'.format( + v=self.thrash_in_replay) + self.max_replay_thrash_delay = float(self.config.get('max_replay_thrash_delay', 4.0)) + self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0)) + + def _run(self): + try: + self.do_thrash() + except FSMissing: + pass + except Exception as e: + # Log exceptions here so we get the full backtrace (gevent loses them). + # Also allow successful completion as gevent exception handling is a broken mess: + # + # 2017-02-03T14:34:01.259 CRITICAL:root: File "gevent.libev.corecext.pyx", line 367, in gevent.libev.corecext.loop.handle_error (src/gevent/libev/gevent.corecext.c:5051) + # File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 558, in handle_error + # self.print_exception(context, type, value, tb) + # File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 605, in print_exception + # traceback.print_exception(type, value, tb, file=errstream) + # File "/usr/lib/python2.7/traceback.py", line 124, in print_exception + # _print(file, 'Traceback (most recent call last):') + # File "/usr/lib/python2.7/traceback.py", line 13, in _print + # file.write(str+terminator) + # 2017-02-03T14:34:01.261 CRITICAL:root:IOError + self.set_thrasher_exception(e) + self.logger.exception("exception:") + # allow successful completion so gevent doesn't see an exception... + + def log(self, x): + """Write data to the logger assigned to MDSThrasher""" + self.logger.info(x) + + def stop(self): + self.stopping.set() + + def kill_mds(self, mds): + if self.config.get('powercycle'): + (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)). + remotes.keys()) + self.log('kill_mds on mds.{m} doing powercycle of {s}'. + format(m=mds, s=remote.name)) + self._assert_ipmi(remote) + remote.console.power_off() + else: + self.ctx.daemons.get_daemon('mds', mds).stop() + + @staticmethod + def _assert_ipmi(remote): + assert remote.console.has_ipmi_credentials, ( + "powercycling requested but RemoteConsole is not " + "initialized. Check ipmi config.") + + def revive_mds(self, mds): + """ + Revive mds -- do an ipmpi powercycle (if indicated by the config) + and then restart. + """ + if self.config.get('powercycle'): + (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)). + remotes.keys()) + self.log('revive_mds on mds.{m} doing powercycle of {s}'. + format(m=mds, s=remote.name)) + self._assert_ipmi(remote) + remote.console.power_on() + self.manager.make_admin_daemon_dir(self.ctx, remote) + args = [] + self.ctx.daemons.get_daemon('mds', mds).restart(*args) + + def wait_for_stable(self, rank = None, gid = None): + self.log('waiting for mds cluster to stabilize...') + for itercount in itertools.count(): + status = self.fs.status() + max_mds = status.get_fsmap(self.fs.id)['mdsmap']['max_mds'] + ranks = list(status.get_ranks(self.fs.id)) + stopping = sum(1 for _ in ranks if "up:stopping" == _['state']) + actives = sum(1 for _ in ranks + if "up:active" == _['state'] and "laggy_since" not in _) + + if not bool(self.config.get('thrash_while_stopping', False)) and stopping > 0: + if itercount % 5 == 0: + self.log('cluster is considered unstable while MDS are in up:stopping (!thrash_while_stopping)') + else: + if rank is not None: + try: + info = status.get_rank(self.fs.id, rank) + if info['gid'] != gid and "up:active" == info['state']: + self.log('mds.{name} has gained rank={rank}, replacing gid={gid}'.format(name = info['name'], rank = rank, gid = gid)) + return status + except: + pass # no rank present + if actives >= max_mds: + # no replacement can occur! + self.log("cluster has {actives} actives (max_mds is {max_mds}), no MDS can replace rank {rank}".format( + actives=actives, max_mds=max_mds, rank=rank)) + return status + else: + if actives == max_mds: + self.log('mds cluster has {count} alive and active, now stable!'.format(count = actives)) + return status, None + if itercount > 300/2: # 5 minutes + raise RuntimeError('timeout waiting for cluster to stabilize') + elif itercount % 5 == 0: + self.log('mds map: {status}'.format(status=status)) + else: + self.log('no change') + sleep(2) + + def do_thrash(self): + """ + Perform the random thrashing action + """ + + self.log('starting mds_do_thrash for fs {fs}'.format(fs = self.fs.name)) + stats = { + "max_mds": 0, + "deactivate": 0, + "kill": 0, + } + + while not self.stopping.is_set(): + delay = self.max_thrash_delay + if self.randomize: + delay = random.randrange(0.0, self.max_thrash_delay) + + if delay > 0.0: + self.log('waiting for {delay} secs before thrashing'.format(delay=delay)) + self.stopping.wait(delay) + if self.stopping.is_set(): + continue + + status = self.fs.status() + + if random.random() <= self.thrash_max_mds: + max_mds = status.get_fsmap(self.fs.id)['mdsmap']['max_mds'] + options = [i for i in range(1, self.max_mds + 1) if i != max_mds] + if len(options) > 0: + new_max_mds = random.choice(options) + self.log('thrashing max_mds: %d -> %d' % (max_mds, new_max_mds)) + self.fs.set_max_mds(new_max_mds) + stats['max_mds'] += 1 + self.wait_for_stable() + + count = 0 + for info in status.get_ranks(self.fs.id): + name = info['name'] + label = 'mds.' + name + rank = info['rank'] + gid = info['gid'] + + # if thrash_weights isn't specified and we've reached max_thrash, + # we're done + count = count + 1 + if 'thrash_weights' not in self.config and count > self.max_thrash: + break + + weight = 1.0 + if 'thrash_weights' in self.config: + weight = self.config['thrash_weights'].get(label, '0.0') + skip = random.randrange(0.0, 1.0) + if weight <= skip: + self.log('skipping thrash iteration with skip ({skip}) > weight ({weight})'.format(skip=skip, weight=weight)) + continue + + self.log('kill {label} (rank={rank})'.format(label=label, rank=rank)) + self.kill_mds(name) + stats['kill'] += 1 + + # wait for mon to report killed mds as crashed + last_laggy_since = None + itercount = 0 + while True: + status = self.fs.status() + info = status.get_mds(name) + if not info: + break + if 'laggy_since' in info: + last_laggy_since = info['laggy_since'] + break + if any([(f == name) for f in status.get_fsmap(self.fs.id)['mdsmap']['failed']]): + break + self.log( + 'waiting till mds map indicates {label} is laggy/crashed, in failed state, or {label} is removed from mdsmap'.format( + label=label)) + itercount = itercount + 1 + if itercount > 10: + self.log('mds map: {status}'.format(status=status)) + sleep(2) + + if last_laggy_since: + self.log( + '{label} reported laggy/crashed since: {since}'.format(label=label, since=last_laggy_since)) + else: + self.log('{label} down, removed from mdsmap'.format(label=label)) + + # wait for a standby mds to takeover and become active + status = self.wait_for_stable(rank, gid) + + # wait for a while before restarting old active to become new + # standby + delay = self.max_revive_delay + if self.randomize: + delay = random.randrange(0.0, self.max_revive_delay) + + self.log('waiting for {delay} secs before reviving {label}'.format( + delay=delay, label=label)) + sleep(delay) + + self.log('reviving {label}'.format(label=label)) + self.revive_mds(name) + + for itercount in itertools.count(): + if itercount > 300/2: # 5 minutes + raise RuntimeError('timeout waiting for MDS to revive') + status = self.fs.status() + info = status.get_mds(name) + if info and info['state'] in ('up:standby', 'up:standby-replay', 'up:active'): + self.log('{label} reported in {state} state'.format(label=label, state=info['state'])) + break + self.log( + 'waiting till mds map indicates {label} is in active, standby or standby-replay'.format(label=label)) + sleep(2) + + for stat in stats: + self.log("stat['{key}'] = {value}".format(key = stat, value = stats[stat])) + + # don't do replay thrashing right now +# for info in status.get_replays(self.fs.id): +# # this might race with replay -> active transition... +# if status['state'] == 'up:replay' and random.randrange(0.0, 1.0) < self.thrash_in_replay: +# delay = self.max_replay_thrash_delay +# if self.randomize: +# delay = random.randrange(0.0, self.max_replay_thrash_delay) +# sleep(delay) +# self.log('kill replaying mds.{id}'.format(id=self.to_kill)) +# self.kill_mds(self.to_kill) +# +# delay = self.max_revive_delay +# if self.randomize: +# delay = random.randrange(0.0, self.max_revive_delay) +# +# self.log('waiting for {delay} secs before reviving mds.{id}'.format( +# delay=delay, id=self.to_kill)) +# sleep(delay) +# +# self.log('revive mds.{id}'.format(id=self.to_kill)) +# self.revive_mds(self.to_kill) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Stress test the mds by thrashing while another task/workunit + is running. + + Please refer to MDSThrasher class for further information on the + available options. + """ + + mds_cluster = MDSCluster(ctx) + + if config is None: + config = {} + assert isinstance(config, dict), \ + 'mds_thrash task only accepts a dict for configuration' + mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds')) + assert len(mdslist) > 1, \ + 'mds_thrash task requires at least 2 metadata servers' + + # choose random seed + if 'seed' in config: + seed = int(config['seed']) + else: + seed = int(time.time()) + log.info('mds thrasher using random seed: {seed}'.format(seed=seed)) + random.seed(seed) + + (first,) = ctx.cluster.only('mds.{_id}'.format(_id=mdslist[0])).remotes.keys() + manager = ceph_manager.CephManager( + first, ctx=ctx, logger=log.getChild('ceph_manager'), + ) + + # make sure everyone is in active, standby, or standby-replay + log.info('Wait for all MDSs to reach steady state...') + status = mds_cluster.status() + while True: + steady = True + for info in status.get_all(): + state = info['state'] + if state not in ('up:active', 'up:standby', 'up:standby-replay'): + steady = False + break + if steady: + break + sleep(2) + status = mds_cluster.status() + log.info('Ready to start thrashing') + + manager.wait_for_clean() + assert manager.is_clean() + + if 'cluster' not in config: + config['cluster'] = 'ceph' + + for fs in status.get_filesystems(): + thrasher = MDSThrasher(ctx, manager, config, Filesystem(ctx, fscid=fs['id']), fs['mdsmap']['max_mds']) + thrasher.start() + ctx.ceph[config['cluster']].thrashers.append(thrasher) + + try: + log.debug('Yielding') + yield + finally: + log.info('joining mds_thrasher') + thrasher.stop() + if thrasher.exception is not None: + raise RuntimeError('error during thrashing') + thrasher.join() + log.info('done joining') diff --git a/qa/tasks/metadata.yaml b/qa/tasks/metadata.yaml new file mode 100644 index 000000000..ccdc3b077 --- /dev/null +++ b/qa/tasks/metadata.yaml @@ -0,0 +1,2 @@ +instance-id: test +local-hostname: test diff --git a/qa/tasks/mgr/__init__.py b/qa/tasks/mgr/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/tasks/mgr/__init__.py diff --git a/qa/tasks/mgr/dashboard/__init__.py b/qa/tasks/mgr/dashboard/__init__.py new file mode 100644 index 000000000..2b022e024 --- /dev/null +++ b/qa/tasks/mgr/dashboard/__init__.py @@ -0,0 +1 @@ +DEFAULT_API_VERSION = '1.0' diff --git a/qa/tasks/mgr/dashboard/helper.py b/qa/tasks/mgr/dashboard/helper.py new file mode 100644 index 000000000..2c6efa901 --- /dev/null +++ b/qa/tasks/mgr/dashboard/helper.py @@ -0,0 +1,722 @@ +# -*- coding: utf-8 -*- +# pylint: disable=W0212,too-many-return-statements,too-many-public-methods +from __future__ import absolute_import + +import json +import logging +import random +import re +import string +import time +from collections import namedtuple +from typing import List + +import requests +from tasks.mgr.mgr_test_case import MgrTestCase +from teuthology.exceptions import \ + CommandFailedError # pylint: disable=import-error + +from . import DEFAULT_API_VERSION + +log = logging.getLogger(__name__) + + +class DashboardTestCase(MgrTestCase): + # Display full error diffs + maxDiff = None + + # Increased x3 (20 -> 60) + TIMEOUT_HEALTH_CLEAR = 60 + + MGRS_REQUIRED = 2 + MDSS_REQUIRED = 1 + REQUIRE_FILESYSTEM = True + CLIENTS_REQUIRED = 1 + CEPHFS = False + ORCHESTRATOR = False + ORCHESTRATOR_TEST_DATA = { + 'inventory': [ + { + 'name': 'test-host0', + 'addr': '1.2.3.4', + 'devices': [ + { + 'path': '/dev/sda', + } + ] + }, + { + 'name': 'test-host1', + 'addr': '1.2.3.5', + 'devices': [ + { + 'path': '/dev/sdb', + } + ] + } + ], + 'daemons': [ + { + 'nodename': 'test-host0', + 'daemon_type': 'mon', + 'daemon_id': 'a' + }, + { + 'nodename': 'test-host0', + 'daemon_type': 'mgr', + 'daemon_id': 'x' + }, + { + 'nodename': 'test-host0', + 'daemon_type': 'osd', + 'daemon_id': '0' + }, + { + 'nodename': 'test-host1', + 'daemon_type': 'osd', + 'daemon_id': '1' + } + ] + } + + _session = None # type: requests.sessions.Session + _token = None + _resp = None # type: requests.models.Response + _loggedin = False + _base_uri = None + + AUTO_AUTHENTICATE = True + + AUTH_ROLES = ['administrator'] + + @classmethod + def create_user(cls, username, password, roles=None, + force_password=True, cmd_args=None): + # pylint: disable=too-many-arguments + """ + :param username: The name of the user. + :type username: str + :param password: The password. + :type password: str + :param roles: A list of roles. + :type roles: list + :param force_password: Force the use of the specified password. This + will bypass the password complexity check. Defaults to 'True'. + :type force_password: bool + :param cmd_args: Additional command line arguments for the + 'ac-user-create' command. + :type cmd_args: None | list[str] + """ + try: + cls._ceph_cmd(['dashboard', 'ac-user-show', username]) + cls._ceph_cmd(['dashboard', 'ac-user-delete', username]) + except CommandFailedError as ex: + if ex.exitstatus != 2: + raise ex + + user_create_args = [ + 'dashboard', 'ac-user-create', username + ] + if force_password: + user_create_args.append('--force-password') + if cmd_args: + user_create_args.extend(cmd_args) + cls._ceph_cmd_with_secret(user_create_args, password) + if roles: + set_roles_args = ['dashboard', 'ac-user-set-roles', username] + for idx, role in enumerate(roles): + if isinstance(role, str): + set_roles_args.append(role) + else: + assert isinstance(role, dict) + rolename = 'test_role_{}'.format(idx) + try: + cls._ceph_cmd(['dashboard', 'ac-role-show', rolename]) + cls._ceph_cmd(['dashboard', 'ac-role-delete', rolename]) + except CommandFailedError as ex: + if ex.exitstatus != 2: + raise ex + cls._ceph_cmd(['dashboard', 'ac-role-create', rolename]) + for mod, perms in role.items(): + args = ['dashboard', 'ac-role-add-scope-perms', rolename, mod] + args.extend(perms) + cls._ceph_cmd(args) + set_roles_args.append(rolename) + cls._ceph_cmd(set_roles_args) + + @classmethod + def create_pool(cls, name, pg_num, pool_type, application='rbd'): + data = { + 'pool': name, + 'pg_num': pg_num, + 'pool_type': pool_type, + 'application_metadata': [application] + } + if pool_type == 'erasure': + data['flags'] = ['ec_overwrites'] + cls._task_post("/api/pool", data) + + @classmethod + def login(cls, username, password, set_cookies=False): + if cls._loggedin: + cls.logout() + cls._post('/api/auth', {'username': username, + 'password': password}, set_cookies=set_cookies) + cls._assertEq(cls._resp.status_code, 201) + cls._token = cls.jsonBody()['token'] + cls._loggedin = True + + @classmethod + def logout(cls, set_cookies=False): + if cls._loggedin: + cls._post('/api/auth/logout', set_cookies=set_cookies) + cls._assertEq(cls._resp.status_code, 200) + cls._token = None + cls._loggedin = False + + @classmethod + def delete_user(cls, username, roles=None): + if roles is None: + roles = [] + cls._ceph_cmd(['dashboard', 'ac-user-delete', username]) + for idx, role in enumerate(roles): + if isinstance(role, dict): + cls._ceph_cmd(['dashboard', 'ac-role-delete', 'test_role_{}'.format(idx)]) + + @classmethod + def RunAs(cls, username, password, roles=None, force_password=True, + cmd_args=None, login=True): + # pylint: disable=too-many-arguments + def wrapper(func): + def execute(self, *args, **kwargs): + self.create_user(username, password, roles, + force_password, cmd_args) + if login: + self.login(username, password) + res = func(self, *args, **kwargs) + if login: + self.logout() + self.delete_user(username, roles) + return res + + return execute + + return wrapper + + @classmethod + def set_jwt_token(cls, token): + cls._token = token + + @classmethod + def setUpClass(cls): + super(DashboardTestCase, cls).setUpClass() + cls._assign_ports("dashboard", "ssl_server_port") + cls._load_module("dashboard") + cls.update_base_uri() + + if cls.CEPHFS: + cls.mds_cluster.clear_firewall() + + # To avoid any issues with e.g. unlink bugs, we destroy and recreate + # the filesystem rather than just doing a rm -rf of files + cls.mds_cluster.mds_stop() + cls.mds_cluster.mds_fail() + cls.mds_cluster.delete_all_filesystems() + cls.fs = None # is now invalid! + + cls.fs = cls.mds_cluster.newfs(create=True) + cls.fs.mds_restart() + + # In case some test messed with auth caps, reset them + # pylint: disable=not-an-iterable + client_mount_ids = [m.client_id for m in cls.mounts] + for client_id in client_mount_ids: + cls.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(client_id), + 'mds', 'allow', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}'.format(cls.fs.get_data_pool_name())) + + # wait for mds restart to complete... + cls.fs.wait_for_daemons() + + if cls.ORCHESTRATOR: + cls._load_module("test_orchestrator") + + cmd = ['orch', 'set', 'backend', 'test_orchestrator'] + cls.mgr_cluster.mon_manager.raw_cluster_cmd(*cmd) + + cmd = ['test_orchestrator', 'load_data', '-i', '-'] + cls.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd, stdin=json.dumps( + cls.ORCHESTRATOR_TEST_DATA + )) + + cls._token = None + cls._session = requests.Session() + cls._resp = None + + cls.create_user('admin', 'admin', cls.AUTH_ROLES) + if cls.AUTO_AUTHENTICATE: + cls.login('admin', 'admin') + + @classmethod + def update_base_uri(cls): + if cls._base_uri is None: + cls._base_uri = cls._get_uri("dashboard").rstrip('/') + + def setUp(self): + super(DashboardTestCase, self).setUp() + if not self._loggedin and self.AUTO_AUTHENTICATE: + self.login('admin', 'admin') + self.wait_for_health_clear(self.TIMEOUT_HEALTH_CLEAR) + + @classmethod + def tearDownClass(cls): + super(DashboardTestCase, cls).tearDownClass() + + # pylint: disable=inconsistent-return-statements, too-many-arguments, too-many-branches + @classmethod + def _request(cls, url, method, data=None, params=None, version=DEFAULT_API_VERSION, + set_cookies=False): + url = "{}{}".format(cls._base_uri, url) + log.debug("Request %s to %s", method, url) + headers = {} + cookies = {} + if cls._token: + if set_cookies: + cookies['token'] = cls._token + else: + headers['Authorization'] = "Bearer {}".format(cls._token) + if version is None: + headers['Accept'] = 'application/json' + else: + headers['Accept'] = 'application/vnd.ceph.api.v{}+json'.format(version) + + if set_cookies: + if method == 'GET': + cls._resp = cls._session.get(url, params=params, verify=False, + headers=headers, cookies=cookies) + elif method == 'POST': + cls._resp = cls._session.post(url, json=data, params=params, + verify=False, headers=headers, cookies=cookies) + elif method == 'DELETE': + cls._resp = cls._session.delete(url, json=data, params=params, + verify=False, headers=headers, cookies=cookies) + elif method == 'PUT': + cls._resp = cls._session.put(url, json=data, params=params, + verify=False, headers=headers, cookies=cookies) + else: + assert False + else: + if method == 'GET': + cls._resp = cls._session.get(url, params=params, verify=False, + headers=headers) + elif method == 'POST': + cls._resp = cls._session.post(url, json=data, params=params, + verify=False, headers=headers) + elif method == 'DELETE': + cls._resp = cls._session.delete(url, json=data, params=params, + verify=False, headers=headers) + elif method == 'PUT': + cls._resp = cls._session.put(url, json=data, params=params, + verify=False, headers=headers) + else: + assert False + try: + if not cls._resp.ok: + # Output response for easier debugging. + log.error("Request response: %s", cls._resp.text) + content_type = cls._resp.headers['content-type'] + if re.match(r'^application/.*json', + content_type) and cls._resp.text and cls._resp.text != "": + return cls._resp.json() + return cls._resp.text + except ValueError as ex: + log.exception("Failed to decode response: %s", cls._resp.text) + raise ex + + @classmethod + def _get(cls, url, params=None, version=DEFAULT_API_VERSION, set_cookies=False): + return cls._request(url, 'GET', params=params, version=version, set_cookies=set_cookies) + + @classmethod + def _view_cache_get(cls, url, retries=5): + retry = True + while retry and retries > 0: + retry = False + res = cls._get(url, version=DEFAULT_API_VERSION) + if isinstance(res, dict): + res = [res] + for view in res: + assert 'value' in view + if not view['value']: + retry = True + retries -= 1 + if retries == 0: + raise Exception("{} view cache exceeded number of retries={}" + .format(url, retries)) + return res + + @classmethod + def _post(cls, url, data=None, params=None, version=DEFAULT_API_VERSION, set_cookies=False): + cls._request(url, 'POST', data, params, version=version, set_cookies=set_cookies) + + @classmethod + def _delete(cls, url, data=None, params=None, version=DEFAULT_API_VERSION, set_cookies=False): + cls._request(url, 'DELETE', data, params, version=version, set_cookies=set_cookies) + + @classmethod + def _put(cls, url, data=None, params=None, version=DEFAULT_API_VERSION, set_cookies=False): + cls._request(url, 'PUT', data, params, version=version, set_cookies=set_cookies) + + @classmethod + def _assertEq(cls, v1, v2): + if not v1 == v2: + raise Exception("assertion failed: {} != {}".format(v1, v2)) + + @classmethod + def _assertIn(cls, v1, v2): + if v1 not in v2: + raise Exception("assertion failed: {} not in {}".format(v1, v2)) + + @classmethod + def _assertIsInst(cls, v1, v2): + if not isinstance(v1, v2): + raise Exception("assertion failed: {} not instance of {}".format(v1, v2)) + + # pylint: disable=too-many-arguments + @classmethod + def _task_request(cls, method, url, data, timeout, version=DEFAULT_API_VERSION, + set_cookies=False): + res = cls._request(url, method, data, version=version, set_cookies=set_cookies) + cls._assertIn(cls._resp.status_code, [200, 201, 202, 204, 400, 403, 404]) + + if cls._resp.status_code == 403: + return None + + if cls._resp.status_code != 202: + log.debug("task finished immediately") + return res + + cls._assertIn('name', res) + cls._assertIn('metadata', res) + task_name = res['name'] + task_metadata = res['metadata'] + + retries = int(timeout) + res_task = None + while retries > 0 and not res_task: + retries -= 1 + log.debug("task (%s, %s) is still executing", task_name, task_metadata) + time.sleep(1) + _res = cls._get('/api/task?name={}'.format(task_name), version=version) + cls._assertEq(cls._resp.status_code, 200) + executing_tasks = [task for task in _res['executing_tasks'] if + task['metadata'] == task_metadata] + finished_tasks = [task for task in _res['finished_tasks'] if + task['metadata'] == task_metadata] + if not executing_tasks and finished_tasks: + res_task = finished_tasks[0] + + if retries <= 0: + raise Exception("Waiting for task ({}, {}) to finish timed out. {}" + .format(task_name, task_metadata, _res)) + + log.debug("task (%s, %s) finished", task_name, task_metadata) + if res_task['success']: + if method == 'POST': + cls._resp.status_code = 201 + elif method == 'PUT': + cls._resp.status_code = 200 + elif method == 'DELETE': + cls._resp.status_code = 204 + return res_task['ret_value'] + + if 'status' in res_task['exception']: + cls._resp.status_code = res_task['exception']['status'] + else: + cls._resp.status_code = 500 + return res_task['exception'] + + @classmethod + def _task_post(cls, url, data=None, timeout=60, version=DEFAULT_API_VERSION, set_cookies=False): + return cls._task_request('POST', url, data, timeout, version=version, + set_cookies=set_cookies) + + @classmethod + def _task_delete(cls, url, timeout=60, version=DEFAULT_API_VERSION, set_cookies=False): + return cls._task_request('DELETE', url, None, timeout, version=version, + set_cookies=set_cookies) + + @classmethod + def _task_put(cls, url, data=None, timeout=60, version=DEFAULT_API_VERSION, set_cookies=False): + return cls._task_request('PUT', url, data, timeout, version=version, + set_cookies=set_cookies) + + @classmethod + def cookies(cls): + return cls._resp.cookies + + @classmethod + def jsonBody(cls): + return cls._resp.json() + + @classmethod + def reset_session(cls): + cls._session = requests.Session() + + def assertSubset(self, data, biggerData): + for key, value in data.items(): + self.assertEqual(biggerData[key], value) + + def assertJsonBody(self, data): + body = self._resp.json() + self.assertEqual(body, data) + + def assertJsonSubset(self, data): + self.assertSubset(data, self._resp.json()) + + def assertSchema(self, data, schema): + try: + return _validate_json(data, schema) + except _ValError as e: + self.assertEqual(data, str(e)) + + def assertSchemaBody(self, schema): + self.assertSchema(self.jsonBody(), schema) + + def assertBody(self, body): + self.assertEqual(self._resp.text, body) + + def assertStatus(self, status): + if isinstance(status, list): + self.assertIn(self._resp.status_code, status) + else: + self.assertEqual(self._resp.status_code, status) + + def assertHeaders(self, headers): + for name, value in headers.items(): + self.assertIn(name, self._resp.headers) + self.assertEqual(self._resp.headers[name], value) + + def assertError(self, code=None, component=None, detail=None): + body = self._resp.json() + if code: + self.assertEqual(body['code'], code) + if component: + self.assertEqual(body['component'], component) + if detail: + self.assertEqual(body['detail'], detail) + + @classmethod + def _ceph_cmd(cls, cmd): + res = cls.mgr_cluster.mon_manager.raw_cluster_cmd(*cmd) + log.debug("command result: %s", res) + return res + + @classmethod + def _ceph_cmd_result(cls, cmd): + exitstatus = cls.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd) + log.debug("command exit status: %d", exitstatus) + return exitstatus + + @classmethod + def _ceph_cmd_with_secret(cls, cmd: List[str], secret: str, return_exit_code: bool = False): + cmd.append('-i') + cmd.append('{}'.format(cls._ceph_create_tmp_file(secret))) + if return_exit_code: + return cls._ceph_cmd_result(cmd) + return cls._ceph_cmd(cmd) + + @classmethod + def _ceph_create_tmp_file(cls, content: str) -> str: + """Create a temporary file in the remote cluster""" + file_name = ''.join(random.choices(string.ascii_letters + string.digits, k=20)) + file_path = '/tmp/{}'.format(file_name) + cls._cmd(['sh', '-c', 'echo -n {} > {}'.format(content, file_path)]) + return file_path + + def set_config_key(self, key, value): + self._ceph_cmd(['config-key', 'set', key, value]) + + def get_config_key(self, key): + return self._ceph_cmd(['config-key', 'get', key]) + + @classmethod + def _cmd(cls, args): + return cls.mgr_cluster.admin_remote.run(args=args) + + @classmethod + def _rbd_cmd(cls, cmd): + args = ['rbd'] + args.extend(cmd) + cls._cmd(args) + + @classmethod + def _radosgw_admin_cmd(cls, cmd): + args = ['radosgw-admin'] + args.extend(cmd) + cls._cmd(args) + + @classmethod + def _rados_cmd(cls, cmd): + args = ['rados'] + args.extend(cmd) + cls._cmd(args) + + @classmethod + def mons(cls): + out = cls.ceph_cluster.mon_manager.raw_cluster_cmd('quorum_status') + j = json.loads(out) + return [mon['name'] for mon in j['monmap']['mons']] + + @classmethod + def find_object_in_list(cls, key, value, iterable): + """ + Get the first occurrence of an object within a list with + the specified key/value. + :param key: The name of the key. + :param value: The value to search for. + :param iterable: The list to process. + :return: Returns the found object or None. + """ + for obj in iterable: + if key in obj and obj[key] == value: + return obj + return None + + +# TODP: pass defaults=(False,) to namedtuple() if python3.7 +class JLeaf(namedtuple('JLeaf', ['typ', 'none'])): + def __new__(cls, typ, none=False): + return super().__new__(cls, typ, none) + + +JList = namedtuple('JList', ['elem_typ']) + +JTuple = namedtuple('JList', ['elem_typs']) + +JUnion = namedtuple('JUnion', ['elem_typs']) + + +class JObj(namedtuple('JObj', ['sub_elems', 'allow_unknown', 'none', 'unknown_schema'])): + def __new__(cls, sub_elems, allow_unknown=False, none=False, unknown_schema=None): + """ + :type sub_elems: dict[str, JAny | JLeaf | JList | JObj | type] + :type allow_unknown: bool + :type none: bool + :type unknown_schema: int, str, JAny | JLeaf | JList | JObj + :return: + """ + return super(JObj, cls).__new__(cls, sub_elems, allow_unknown, none, unknown_schema) + + +JAny = namedtuple('JAny', ['none']) + +module_options_object_schema = JObj({ + 'name': str, + 'type': str, + 'level': str, + 'flags': int, + 'default_value': JAny(none=True), + 'min': JAny(none=False), + 'max': JAny(none=False), + 'enum_allowed': JList(str), + 'see_also': JList(str), + 'desc': str, + 'long_desc': str, + 'tags': JList(str), +}) + +module_options_schema = JObj( + {}, + allow_unknown=True, + unknown_schema=module_options_object_schema) + +addrvec_schema = JList(JObj({ + 'addr': str, + 'nonce': int, + 'type': str +})) + +devices_schema = JList(JObj({ + 'daemons': JList(str), + 'devid': str, + 'location': JList(JObj({ + 'host': str, + 'dev': str, + 'path': str + })) +}, allow_unknown=True)) + + +class _ValError(Exception): + def __init__(self, msg, path): + path_str = ''.join('[{}]'.format(repr(p)) for p in path) + super(_ValError, self).__init__('In `input{}`: {}'.format(path_str, msg)) + + +# pylint: disable=dangerous-default-value,inconsistent-return-statements,too-many-branches +def _validate_json(val, schema, path=[]): + """ + >>> d = {'a': 1, 'b': 'x', 'c': range(10)} + ... ds = JObj({'a': int, 'b': str, 'c': JList(int)}) + ... _validate_json(d, ds) + True + >>> _validate_json({'num': 1}, JObj({'num': JUnion([int,float])})) + True + >>> _validate_json({'num': 'a'}, JObj({'num': JUnion([int,float])})) + False + """ + if isinstance(schema, JAny): + if not schema.none and val is None: + raise _ValError('val is None', path) + return True + if isinstance(schema, JLeaf): + if schema.none and val is None: + return True + if not isinstance(val, schema.typ): + raise _ValError('val not of type {}'.format(schema.typ), path) + return True + if isinstance(schema, JList): + if not isinstance(val, list): + raise _ValError('val="{}" is not a list'.format(val), path) + return all(_validate_json(e, schema.elem_typ, path + [i]) for i, e in enumerate(val)) + if isinstance(schema, JTuple): + return all(_validate_json(val[i], typ, path + [i]) + for i, typ in enumerate(schema.elem_typs)) + if isinstance(schema, JUnion): + for typ in schema.elem_typs: + try: + if _validate_json(val, typ, path): + return True + except _ValError: + pass + return False + if isinstance(schema, JObj): + if val is None and schema.none: + return True + if val is None: + raise _ValError('val is None', path) + if not hasattr(val, 'keys'): + raise _ValError('val="{}" is not a dict'.format(val), path) + missing_keys = set(schema.sub_elems.keys()).difference(set(val.keys())) + if missing_keys: + raise _ValError('missing keys: {}'.format(missing_keys), path) + unknown_keys = set(val.keys()).difference(set(schema.sub_elems.keys())) + if not schema.allow_unknown and unknown_keys: + raise _ValError('unknown keys: {}'.format(unknown_keys), path) + result = all( + _validate_json(val[key], sub_schema, path + [key]) + for key, sub_schema in schema.sub_elems.items() + ) + if unknown_keys and schema.allow_unknown and schema.unknown_schema: + result += all( + _validate_json(val[key], schema.unknown_schema, path + [key]) + for key in unknown_keys + ) + return result + if schema in [str, int, float, bool]: + return _validate_json(val, JLeaf(schema), path) + + assert False, str(path) diff --git a/qa/tasks/mgr/dashboard/test_api.py b/qa/tasks/mgr/dashboard/test_api.py new file mode 100644 index 000000000..22f235698 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_api.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +import unittest + +from . import DEFAULT_API_VERSION +from .helper import DashboardTestCase + + +class VersionReqTest(DashboardTestCase, unittest.TestCase): + def test_version(self): + for (version, expected_status) in [ + (DEFAULT_API_VERSION, 200), + (None, 415), + ("99.99", 415) + ]: + with self.subTest(version=version): + self._get('/api/summary', version=version) + self.assertStatus(expected_status) diff --git a/qa/tasks/mgr/dashboard/test_auth.py b/qa/tasks/mgr/dashboard/test_auth.py new file mode 100644 index 000000000..a2266229b --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_auth.py @@ -0,0 +1,352 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +import time + +import jwt +from teuthology.orchestra.run import \ + CommandFailedError # pylint: disable=import-error + +from .helper import DashboardTestCase, JLeaf, JObj + + +class AuthTest(DashboardTestCase): + + AUTO_AUTHENTICATE = False + + def setUp(self): + super(AuthTest, self).setUp() + self.reset_session() + + def _validate_jwt_token(self, token, username, permissions): + payload = jwt.decode(token, options={'verify_signature': False}) + self.assertIn('username', payload) + self.assertEqual(payload['username'], username) + + for scope, perms in permissions.items(): + self.assertIsNotNone(scope) + self.assertIn('read', perms) + self.assertIn('update', perms) + self.assertIn('create', perms) + self.assertIn('delete', perms) + + def test_login_without_password(self): + with self.assertRaises(CommandFailedError): + self.create_user('admin2', '', ['administrator'], force_password=True) + + def test_a_set_login_credentials(self): + # test with Authorization header + self.create_user('admin2', 'admin2', ['administrator']) + self._post("/api/auth", {'username': 'admin2', 'password': 'admin2'}) + self.assertStatus(201) + data = self.jsonBody() + self._validate_jwt_token(data['token'], "admin2", data['permissions']) + self.delete_user('admin2') + + # test with Cookies set + self.create_user('admin2', 'admin2', ['administrator']) + self._post("/api/auth", {'username': 'admin2', 'password': 'admin2'}, set_cookies=True) + self.assertStatus(201) + data = self.jsonBody() + self._validate_jwt_token(data['token'], "admin2", data['permissions']) + self.delete_user('admin2') + + def test_login_valid(self): + # test with Authorization header + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) + self.assertStatus(201) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + 'token': JLeaf(str), + 'username': JLeaf(str), + 'permissions': JObj(sub_elems={}, allow_unknown=True), + 'sso': JLeaf(bool), + 'pwdExpirationDate': JLeaf(int, none=True), + 'pwdUpdateRequired': JLeaf(bool) + }, allow_unknown=False)) + self._validate_jwt_token(data['token'], "admin", data['permissions']) + + # test with Cookies set + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(201) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + 'token': JLeaf(str), + 'username': JLeaf(str), + 'permissions': JObj(sub_elems={}, allow_unknown=True), + 'sso': JLeaf(bool), + 'pwdExpirationDate': JLeaf(int, none=True), + 'pwdUpdateRequired': JLeaf(bool) + }, allow_unknown=False)) + self._validate_jwt_token(data['token'], "admin", data['permissions']) + + def test_login_invalid(self): + # test with Authorization header + self._post("/api/auth", {'username': 'admin', 'password': 'inval'}) + self.assertStatus(400) + self.assertJsonBody({ + "component": "auth", + "code": "invalid_credentials", + "detail": "Invalid credentials" + }) + + def test_lockout_user(self): + # test with Authorization header + self._ceph_cmd(['dashboard', 'set-account-lockout-attempts', '3']) + for _ in range(3): + self._post("/api/auth", {'username': 'admin', 'password': 'inval'}) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) + self.assertStatus(400) + self.assertJsonBody({ + "component": "auth", + "code": "invalid_credentials", + "detail": "Invalid credentials" + }) + self._ceph_cmd(['dashboard', 'ac-user-enable', 'admin']) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) + self.assertStatus(201) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + 'token': JLeaf(str), + 'username': JLeaf(str), + 'permissions': JObj(sub_elems={}, allow_unknown=True), + 'sso': JLeaf(bool), + 'pwdExpirationDate': JLeaf(int, none=True), + 'pwdUpdateRequired': JLeaf(bool) + }, allow_unknown=False)) + self._validate_jwt_token(data['token'], "admin", data['permissions']) + + # test with Cookies set + self._ceph_cmd(['dashboard', 'set-account-lockout-attempts', '3']) + for _ in range(3): + self._post("/api/auth", {'username': 'admin', 'password': 'inval'}, set_cookies=True) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(400) + self.assertJsonBody({ + "component": "auth", + "code": "invalid_credentials", + "detail": "Invalid credentials" + }) + self._ceph_cmd(['dashboard', 'ac-user-enable', 'admin']) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(201) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + 'token': JLeaf(str), + 'username': JLeaf(str), + 'permissions': JObj(sub_elems={}, allow_unknown=True), + 'sso': JLeaf(bool), + 'pwdExpirationDate': JLeaf(int, none=True), + 'pwdUpdateRequired': JLeaf(bool) + }, allow_unknown=False)) + self._validate_jwt_token(data['token'], "admin", data['permissions']) + + def test_logout(self): + # test with Authorization header + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) + self.assertStatus(201) + data = self.jsonBody() + self._validate_jwt_token(data['token'], "admin", data['permissions']) + self.set_jwt_token(data['token']) + self._post("/api/auth/logout") + self.assertStatus(200) + self.assertJsonBody({ + "redirect_url": "#/login" + }) + self._get("/api/host", version='1.1') + self.assertStatus(401) + self.set_jwt_token(None) + + # test with Cookies set + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(201) + data = self.jsonBody() + self._validate_jwt_token(data['token'], "admin", data['permissions']) + self.set_jwt_token(data['token']) + self._post("/api/auth/logout", set_cookies=True) + self.assertStatus(200) + self.assertJsonBody({ + "redirect_url": "#/login" + }) + self._get("/api/host", set_cookies=True, version='1.1') + self.assertStatus(401) + self.set_jwt_token(None) + + def test_token_ttl(self): + # test with Authorization header + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5']) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + self._get("/api/host", version='1.1') + self.assertStatus(200) + time.sleep(6) + self._get("/api/host", version='1.1') + self.assertStatus(401) + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800']) + self.set_jwt_token(None) + + # test with Cookies set + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5']) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + self._get("/api/host", set_cookies=True, version='1.1') + self.assertStatus(200) + time.sleep(6) + self._get("/api/host", set_cookies=True, version='1.1') + self.assertStatus(401) + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800']) + self.set_jwt_token(None) + + def test_remove_from_blocklist(self): + # test with Authorization header + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5']) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + # the following call adds the token to the blocklist + self._post("/api/auth/logout") + self.assertStatus(200) + self._get("/api/host", version='1.1') + self.assertStatus(401) + time.sleep(6) + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800']) + self.set_jwt_token(None) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + # the following call removes expired tokens from the blocklist + self._post("/api/auth/logout") + self.assertStatus(200) + + # test with Cookies set + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5']) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + # the following call adds the token to the blocklist + self._post("/api/auth/logout", set_cookies=True) + self.assertStatus(200) + self._get("/api/host", set_cookies=True, version='1.1') + self.assertStatus(401) + time.sleep(6) + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800']) + self.set_jwt_token(None) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + # the following call removes expired tokens from the blocklist + self._post("/api/auth/logout", set_cookies=True) + self.assertStatus(200) + + def test_unauthorized(self): + # test with Authorization header + self._get("/api/host", version='1.1') + self.assertStatus(401) + + # test with Cookies set + self._get("/api/host", set_cookies=True, version='1.1') + self.assertStatus(401) + + def test_invalidate_token_by_admin(self): + # test with Authorization header + self._get("/api/host", version='1.1') + self.assertStatus(401) + self.create_user('user', 'user', ['read-only']) + time.sleep(1) + self._post("/api/auth", {'username': 'user', 'password': 'user'}) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + self._get("/api/host", version='1.1') + self.assertStatus(200) + time.sleep(1) + self._ceph_cmd_with_secret(['dashboard', 'ac-user-set-password', '--force-password', + 'user'], + 'user2') + time.sleep(1) + self._get("/api/host", version='1.1') + self.assertStatus(401) + self.set_jwt_token(None) + self._post("/api/auth", {'username': 'user', 'password': 'user2'}) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + self._get("/api/host", version='1.1') + self.assertStatus(200) + self.delete_user("user") + + # test with Cookies set + self._get("/api/host", set_cookies=True, version='1.1') + self.assertStatus(401) + self.create_user('user', 'user', ['read-only']) + time.sleep(1) + self._post("/api/auth", {'username': 'user', 'password': 'user'}, set_cookies=True) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + self._get("/api/host", set_cookies=True, version='1.1') + self.assertStatus(200) + time.sleep(1) + self._ceph_cmd_with_secret(['dashboard', 'ac-user-set-password', '--force-password', + 'user'], + 'user2') + time.sleep(1) + self._get("/api/host", set_cookies=True, version='1.1') + self.assertStatus(401) + self.set_jwt_token(None) + self._post("/api/auth", {'username': 'user', 'password': 'user2'}, set_cookies=True) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + self._get("/api/host", set_cookies=True, version='1.1') + self.assertStatus(200) + self.delete_user("user") + + def test_check_token(self): + # test with Authorization header + self.login("admin", "admin") + self._post("/api/auth/check", {"token": self.jsonBody()["token"]}) + self.assertStatus(200) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + "username": JLeaf(str), + "permissions": JObj(sub_elems={}, allow_unknown=True), + "sso": JLeaf(bool), + "pwdUpdateRequired": JLeaf(bool) + }, allow_unknown=False)) + self.logout() + + # test with Cookies set + self.login("admin", "admin", set_cookies=True) + self._post("/api/auth/check", {"token": self.jsonBody()["token"]}, set_cookies=True) + self.assertStatus(200) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + "username": JLeaf(str), + "permissions": JObj(sub_elems={}, allow_unknown=True), + "sso": JLeaf(bool), + "pwdUpdateRequired": JLeaf(bool) + }, allow_unknown=False)) + self.logout(set_cookies=True) + + def test_check_wo_token(self): + # test with Authorization header + self.login("admin", "admin") + self._post("/api/auth/check", {"token": ""}) + self.assertStatus(200) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + "login_url": JLeaf(str), + "cluster_status": JLeaf(str) + }, allow_unknown=False)) + self.logout() + + # test with Cookies set + self.login("admin", "admin", set_cookies=True) + self._post("/api/auth/check", {"token": ""}, set_cookies=True) + self.assertStatus(200) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + "login_url": JLeaf(str), + "cluster_status": JLeaf(str) + }, allow_unknown=False)) + self.logout(set_cookies=True) diff --git a/qa/tasks/mgr/dashboard/test_cephfs.py b/qa/tasks/mgr/dashboard/test_cephfs.py new file mode 100644 index 000000000..4295b580f --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_cephfs.py @@ -0,0 +1,292 @@ +# -*- coding: utf-8 -*- +# pylint: disable=too-many-public-methods + +from contextlib import contextmanager + +from .helper import DashboardTestCase, JLeaf, JList, JObj + + +class CephfsTest(DashboardTestCase): + CEPHFS = True + + AUTH_ROLES = ['cephfs-manager'] + + QUOTA_PATH = '/quotas' + + def assertToHave(self, data, key): + self.assertIn(key, data) + self.assertIsNotNone(data[key]) + + def get_fs_id(self): + return self.fs.get_namespace_id() + + def mk_dirs(self, path, expectedStatus=200): + self._post("/api/cephfs/{}/tree".format(self.get_fs_id()), + params={'path': path}) + self.assertStatus(expectedStatus) + + def rm_dir(self, path, expectedStatus=200): + self._delete("/api/cephfs/{}/tree".format(self.get_fs_id()), + params={'path': path}) + self.assertStatus(expectedStatus) + + def get_root_directory(self, expectedStatus=200): + data = self._get("/api/cephfs/{}/get_root_directory".format(self.get_fs_id())) + self.assertStatus(expectedStatus) + self.assertIsInstance(data, dict) + return data + + def ls_dir(self, path, expectedLength, depth=None): + return self._ls_dir(path, expectedLength, depth, "api") + + def ui_ls_dir(self, path, expectedLength, depth=None): + return self._ls_dir(path, expectedLength, depth, "ui-api") + + def _ls_dir(self, path, expectedLength, depth, baseApiPath): + params = {'path': path} + if depth is not None: + params['depth'] = depth + data = self._get("/{}/cephfs/{}/ls_dir".format(baseApiPath, self.get_fs_id()), + params=params) + self.assertStatus(200) + self.assertIsInstance(data, list) + self.assertEqual(len(data), expectedLength) + return data + + def set_quotas(self, max_bytes=None, max_files=None): + quotas = { + 'max_bytes': max_bytes, + 'max_files': max_files + } + self._put("/api/cephfs/{}/quota".format(self.get_fs_id()), data=quotas, + params={'path': self.QUOTA_PATH}) + self.assertStatus(200) + + def assert_quotas(self, max_bytes, files): + data = self.ls_dir('/', 1)[0] + self.assertEqual(data['quotas']['max_bytes'], max_bytes) + self.assertEqual(data['quotas']['max_files'], files) + + @contextmanager + def new_quota_dir(self): + self.mk_dirs(self.QUOTA_PATH) + self.set_quotas(1024 ** 3, 1024) + yield 1 + self.rm_dir(self.QUOTA_PATH) + + @DashboardTestCase.RunAs('test', 'test', ['block-manager']) + def test_access_permissions(self): + fs_id = self.get_fs_id() + self._get("/api/cephfs/{}/clients".format(fs_id)) + self.assertStatus(403) + self._get("/api/cephfs/{}".format(fs_id)) + self.assertStatus(403) + self._get("/api/cephfs/{}/mds_counters".format(fs_id)) + self.assertStatus(403) + self._get("/ui-api/cephfs/{}/tabs".format(fs_id)) + self.assertStatus(403) + + def test_cephfs_clients(self): + fs_id = self.get_fs_id() + data = self._get("/api/cephfs/{}/clients".format(fs_id)) + self.assertStatus(200) + + self.assertIn('status', data) + self.assertIn('data', data) + + def test_cephfs_evict_client_does_not_exist(self): + fs_id = self.get_fs_id() + self._delete("/api/cephfs/{}/client/1234".format(fs_id)) + self.assertStatus(404) + + def test_cephfs_evict_invalid_client_id(self): + fs_id = self.get_fs_id() + self._delete("/api/cephfs/{}/client/xyz".format(fs_id)) + self.assertStatus(400) + self.assertJsonBody({ + "component": 'cephfs', + "code": "invalid_cephfs_client_id", + "detail": "Invalid cephfs client ID xyz" + }) + + def test_cephfs_get(self): + fs_id = self.get_fs_id() + data = self._get("/api/cephfs/{}/".format(fs_id)) + self.assertStatus(200) + + self.assertToHave(data, 'cephfs') + self.assertToHave(data, 'standbys') + self.assertToHave(data, 'versions') + + def test_cephfs_mds_counters(self): + fs_id = self.get_fs_id() + data = self._get("/api/cephfs/{}/mds_counters".format(fs_id)) + self.assertStatus(200) + + self.assertIsInstance(data, dict) + self.assertIsNotNone(data) + + def test_cephfs_mds_counters_wrong(self): + self._get("/api/cephfs/baadbaad/mds_counters") + self.assertStatus(400) + self.assertJsonBody({ + "component": 'cephfs', + "code": "invalid_cephfs_id", + "detail": "Invalid cephfs ID baadbaad" + }) + + def test_cephfs_list(self): + data = self._get("/api/cephfs/") + self.assertStatus(200) + + self.assertIsInstance(data, list) + cephfs = data[0] + self.assertToHave(cephfs, 'id') + self.assertToHave(cephfs, 'mdsmap') + + def test_cephfs_get_quotas(self): + fs_id = self.get_fs_id() + data = self._get("/api/cephfs/{}/quota?path=/".format(fs_id)) + self.assertStatus(200) + self.assertSchema(data, JObj({ + 'max_bytes': int, + 'max_files': int + })) + + def test_cephfs_tabs(self): + fs_id = self.get_fs_id() + data = self._get("/ui-api/cephfs/{}/tabs".format(fs_id)) + self.assertStatus(200) + self.assertIsInstance(data, dict) + + # Pools + pools = data['pools'] + self.assertIsInstance(pools, list) + self.assertGreater(len(pools), 0) + for pool in pools: + self.assertEqual(pool['size'], pool['used'] + pool['avail']) + + # Ranks + self.assertToHave(data, 'ranks') + self.assertIsInstance(data['ranks'], list) + + # Name + self.assertToHave(data, 'name') + self.assertIsInstance(data['name'], str) + + # Standbys + self.assertToHave(data, 'standbys') + self.assertIsInstance(data['standbys'], str) + + # MDS counters + counters = data['mds_counters'] + self.assertIsInstance(counters, dict) + self.assertGreater(len(counters.keys()), 0) + for k, v in counters.items(): + self.assertEqual(v['name'], k) + + # Clients + self.assertToHave(data, 'clients') + clients = data['clients'] + self.assertToHave(clients, 'data') + self.assertIsInstance(clients['data'], list) + self.assertToHave(clients, 'status') + self.assertIsInstance(clients['status'], int) + + def test_ls_mk_rm_dir(self): + self.ls_dir('/', 0) + + self.mk_dirs('/pictures/birds') + self.ls_dir('/', 2, 3) + self.ls_dir('/pictures', 1) + + self.rm_dir('/pictures', 500) + self.rm_dir('/pictures/birds') + self.rm_dir('/pictures') + + self.ls_dir('/', 0) + + def test_snapshots(self): + fs_id = self.get_fs_id() + self.mk_dirs('/movies/dune/extended_version') + + self._post("/api/cephfs/{}/snapshot".format(fs_id), + params={'path': '/movies/dune', 'name': 'test'}) + self.assertStatus(200) + + data = self.ls_dir('/movies', 1) + self.assertSchema(data[0], JObj(sub_elems={ + 'name': JLeaf(str), + 'path': JLeaf(str), + 'parent': JLeaf(str), + 'snapshots': JList(JObj(sub_elems={ + 'name': JLeaf(str), + 'path': JLeaf(str), + 'created': JLeaf(str) + })), + 'quotas': JObj(sub_elems={ + 'max_bytes': JLeaf(int), + 'max_files': JLeaf(int) + }) + })) + snapshots = data[0]['snapshots'] + self.assertEqual(len(snapshots), 1) + snapshot = snapshots[0] + self.assertEqual(snapshot['name'], "test") + self.assertEqual(snapshot['path'], "/movies/dune/.snap/test") + + # Should have filtered out "_test_$timestamp" + data = self.ls_dir('/movies/dune', 1) + snapshots = data[0]['snapshots'] + self.assertEqual(len(snapshots), 0) + + self._delete("/api/cephfs/{}/snapshot".format(fs_id), + params={'path': '/movies/dune', 'name': 'test'}) + self.assertStatus(200) + + data = self.ls_dir('/movies', 1) + self.assertEqual(len(data[0]['snapshots']), 0) + + # Cleanup. Note, the CephFS Python extension (and therefor the Dashboard + # REST API) does not support recursive deletion of a directory. + self.rm_dir('/movies/dune/extended_version') + self.rm_dir('/movies/dune') + self.rm_dir('/movies') + + def test_quotas_default(self): + self.mk_dirs(self.QUOTA_PATH) + self.assert_quotas(0, 0) + self.rm_dir(self.QUOTA_PATH) + + def test_quotas_set_both(self): + with self.new_quota_dir(): + self.assert_quotas(1024 ** 3, 1024) + + def test_quotas_set_only_bytes(self): + with self.new_quota_dir(): + self.set_quotas(2048 ** 3) + self.assert_quotas(2048 ** 3, 1024) + + def test_quotas_set_only_files(self): + with self.new_quota_dir(): + self.set_quotas(None, 2048) + self.assert_quotas(1024 ** 3, 2048) + + def test_quotas_unset_both(self): + with self.new_quota_dir(): + self.set_quotas(0, 0) + self.assert_quotas(0, 0) + + def test_listing_of_root_dir(self): + self.ls_dir('/', 0) # Should not list root + ui_root = self.ui_ls_dir('/', 1)[0] # Should list root by default + root = self.get_root_directory() + self.assertEqual(ui_root, root) + + def test_listing_of_ui_api_ls_on_deeper_levels(self): + # The UI-API and API ls_dir methods should behave the same way on deeper levels + self.mk_dirs('/pictures') + api_ls = self.ls_dir('/pictures', 0) + ui_api_ls = self.ui_ls_dir('/pictures', 0) + self.assertEqual(api_ls, ui_api_ls) + self.rm_dir('/pictures') diff --git a/qa/tasks/mgr/dashboard/test_cluster.py b/qa/tasks/mgr/dashboard/test_cluster.py new file mode 100644 index 000000000..14f854279 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_cluster.py @@ -0,0 +1,23 @@ +from .helper import DashboardTestCase, JLeaf, JObj + + +class ClusterTest(DashboardTestCase): + + def setUp(self): + super().setUp() + self.reset_session() + + def test_get_status(self): + data = self._get('/api/cluster', version='0.1') + self.assertStatus(200) + self.assertSchema(data, JObj(sub_elems={ + "status": JLeaf(str) + }, allow_unknown=False)) + + def test_update_status(self): + req = {'status': 'POST_INSTALLED'} + self._put('/api/cluster', req, version='0.1') + self.assertStatus(200) + data = self._get('/api/cluster', version='0.1') + self.assertStatus(200) + self.assertEqual(data, req) diff --git a/qa/tasks/mgr/dashboard/test_cluster_configuration.py b/qa/tasks/mgr/dashboard/test_cluster_configuration.py new file mode 100644 index 000000000..9c8245d23 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_cluster_configuration.py @@ -0,0 +1,398 @@ +from __future__ import absolute_import + +from .helper import DashboardTestCase + + +class ClusterConfigurationTest(DashboardTestCase): + + def test_list(self): + data = self._get('/api/cluster_conf') + self.assertStatus(200) + self.assertIsInstance(data, list) + self.assertGreater(len(data), 1000) + for conf in data: + self._validate_single(conf) + + def test_get(self): + data = self._get('/api/cluster_conf/admin_socket') + self.assertStatus(200) + self._validate_single(data) + self.assertIn('enum_values', data) + + data = self._get('/api/cluster_conf/fantasy_name') + self.assertStatus(404) + + def test_get_specific_db_config_option(self): + config_name = 'mon_allow_pool_delete' + + orig_value = self._get_config_by_name(config_name) + + self._ceph_cmd(['config', 'set', 'mon', config_name, 'true']) + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + [{'section': 'mon', 'value': 'true'}], + timeout=30, + period=1) + + self._ceph_cmd(['config', 'set', 'mon', config_name, 'false']) + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + [{'section': 'mon', 'value': 'false'}], + timeout=30, + period=1) + + # restore value + if orig_value: + self._ceph_cmd(['config', 'set', 'mon', config_name, orig_value[0]['value']]) + + def test_filter_config_options(self): + config_names = ['osd_scrub_during_recovery', 'osd_scrub_begin_hour', 'osd_scrub_end_hour'] + data = self._get('/api/cluster_conf/filter?names={}'.format(','.join(config_names))) + self.assertStatus(200) + self.assertIsInstance(data, list) + self.assertEqual(len(data), 3) + for conf in data: + self._validate_single(conf) + self.assertIn(conf['name'], config_names) + + def test_filter_config_options_empty_names(self): + self._get('/api/cluster_conf/filter?names=') + self.assertStatus(404) + self.assertEqual(self._resp.json()['detail'], 'Config options `` not found') + + def test_filter_config_options_unknown_name(self): + self._get('/api/cluster_conf/filter?names=abc') + self.assertStatus(404) + self.assertEqual(self._resp.json()['detail'], 'Config options `abc` not found') + + def test_filter_config_options_contains_unknown_name(self): + config_names = ['osd_scrub_during_recovery', 'osd_scrub_begin_hour', 'abc'] + data = self._get('/api/cluster_conf/filter?names={}'.format(','.join(config_names))) + self.assertStatus(200) + self.assertIsInstance(data, list) + self.assertEqual(len(data), 2) + for conf in data: + self._validate_single(conf) + self.assertIn(conf['name'], config_names) + + def test_create(self): + config_name = 'debug_ms' + orig_value = self._get_config_by_name(config_name) + + # remove all existing settings for equal preconditions + self._clear_all_values_for_config_option(config_name) + + expected_result = [{'section': 'mon', 'value': '0/3'}] + + self._post('/api/cluster_conf', { + 'name': config_name, + 'value': expected_result + }) + self.assertStatus(201) + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + expected_result, + timeout=30, + period=1) + + # reset original value + self._clear_all_values_for_config_option(config_name) + self._reset_original_values(config_name, orig_value) + + def test_delete(self): + config_name = 'debug_ms' + orig_value = self._get_config_by_name(config_name) + + # set a config option + expected_result = [{'section': 'mon', 'value': '0/3'}] + self._post('/api/cluster_conf', { + 'name': config_name, + 'value': expected_result + }) + self.assertStatus(201) + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + expected_result, + timeout=30, + period=1) + + # delete it and check if it's deleted + self._delete('/api/cluster_conf/{}?section={}'.format(config_name, 'mon')) + self.assertStatus(204) + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + None, + timeout=30, + period=1) + + # reset original value + self._clear_all_values_for_config_option(config_name) + self._reset_original_values(config_name, orig_value) + + def test_create_cant_update_at_runtime(self): + config_name = 'public_bind_addr' # not updatable + config_value = [{'section': 'global', 'value': 'true'}] + orig_value = self._get_config_by_name(config_name) + + # try to set config option and check if it fails + self._post('/api/cluster_conf', { + 'name': config_name, + 'value': config_value + }) + self.assertStatus(400) + self.assertError(code='config_option_not_updatable_at_runtime', + component='cluster_configuration', + detail='Config option {} is/are not updatable at runtime'.format( + config_name)) + + # check if config option value is still the original one + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + orig_value, + timeout=30, + period=1) + + def test_create_two_values(self): + config_name = 'debug_ms' + orig_value = self._get_config_by_name(config_name) + + # remove all existing settings for equal preconditions + self._clear_all_values_for_config_option(config_name) + + expected_result = [{'section': 'mon', 'value': '0/3'}, + {'section': 'osd', 'value': '0/5'}] + + self._post('/api/cluster_conf', { + 'name': config_name, + 'value': expected_result + }) + self.assertStatus(201) + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + expected_result, + timeout=30, + period=1) + + # reset original value + self._clear_all_values_for_config_option(config_name) + self._reset_original_values(config_name, orig_value) + + def test_create_can_handle_none_values(self): + config_name = 'debug_ms' + orig_value = self._get_config_by_name(config_name) + + # remove all existing settings for equal preconditions + self._clear_all_values_for_config_option(config_name) + + self._post('/api/cluster_conf', { + 'name': config_name, + 'value': [{'section': 'mon', 'value': '0/3'}, + {'section': 'osd', 'value': None}] + }) + self.assertStatus(201) + + expected_result = [{'section': 'mon', 'value': '0/3'}] + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + expected_result, + timeout=30, + period=1) + + # reset original value + self._clear_all_values_for_config_option(config_name) + self._reset_original_values(config_name, orig_value) + + def test_create_can_handle_boolean_values(self): + config_name = 'mon_allow_pool_delete' + orig_value = self._get_config_by_name(config_name) + + # remove all existing settings for equal preconditions + self._clear_all_values_for_config_option(config_name) + + expected_result = [{'section': 'mon', 'value': 'true'}] + + self._post('/api/cluster_conf', { + 'name': config_name, + 'value': [{'section': 'mon', 'value': True}]}) + self.assertStatus(201) + + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + expected_result, + timeout=30, + period=1) + + # reset original value + self._clear_all_values_for_config_option(config_name) + self._reset_original_values(config_name, orig_value) + + def test_bulk_set(self): + expected_result = { + 'osd_max_backfills': {'section': 'osd', 'value': '1'}, + 'osd_recovery_max_active': {'section': 'osd', 'value': '3'}, + 'osd_recovery_max_single_start': {'section': 'osd', 'value': '1'}, + 'osd_recovery_sleep': {'section': 'osd', 'value': '2.000000'} + } + orig_values = dict() + + for config_name in expected_result: + orig_values[config_name] = self._get_config_by_name(config_name) + + # remove all existing settings for equal preconditions + self._clear_all_values_for_config_option(config_name) + + self._put('/api/cluster_conf', {'options': expected_result}) + self.assertStatus(200) + + for config_name, value in expected_result.items(): + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + [value], + timeout=30, + period=1) + + # reset original value + self._clear_all_values_for_config_option(config_name) + self._reset_original_values(config_name, orig_values[config_name]) + + def test_bulk_set_cant_update_at_runtime(self): + config_options = { + 'public_bind_addr': {'section': 'global', 'value': '1.2.3.4:567'}, # not updatable + 'public_network': {'section': 'global', 'value': '10.0.0.0/8'} # not updatable + } + orig_values = dict() + + for config_name in config_options: + orig_values[config_name] = self._get_config_by_name(config_name) + + # try to set config options and see if it fails + self._put('/api/cluster_conf', {'options': config_options}) + self.assertStatus(400) + self.assertError(code='config_option_not_updatable_at_runtime', + component='cluster_configuration', + detail='Config option {} is/are not updatable at runtime'.format( + ', '.join(config_options.keys()))) + + # check if config option values are still the original ones + for config_name, value in orig_values.items(): + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + value, + timeout=30, + period=1) + + def test_bulk_set_cant_update_at_runtime_partial(self): + config_options = { + 'public_bind_addr': {'section': 'global', 'value': 'true'}, # not updatable + 'log_to_stderr': {'section': 'global', 'value': 'true'} # updatable + } + orig_values = dict() + + for config_name in config_options: + orig_values[config_name] = self._get_config_by_name(config_name) + + # try to set config options and see if it fails + self._put('/api/cluster_conf', {'options': config_options}) + self.assertStatus(400) + self.assertError(code='config_option_not_updatable_at_runtime', + component='cluster_configuration', + detail='Config option {} is/are not updatable at runtime'.format( + 'public_bind_addr')) + + # check if config option values are still the original ones + for config_name, value in orig_values.items(): + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + value, + timeout=30, + period=1) + + def test_check_existence(self): + """ + This test case is intended to check the existence of all hard coded config options used by + the dashboard. + If you include further hard coded options in the dashboard, feel free to add them to the + list. + """ + hard_coded_options = [ + 'osd_max_backfills', # osd-recv-speed + 'osd_recovery_max_active', # osd-recv-speed + 'osd_recovery_max_single_start', # osd-recv-speed + 'osd_recovery_sleep', # osd-recv-speed + 'osd_scrub_during_recovery', # osd-pg-scrub + 'osd_scrub_begin_hour', # osd-pg-scrub + 'osd_scrub_end_hour', # osd-pg-scrub + 'osd_scrub_begin_week_day', # osd-pg-scrub + 'osd_scrub_end_week_day', # osd-pg-scrub + 'osd_scrub_min_interval', # osd-pg-scrub + 'osd_scrub_max_interval', # osd-pg-scrub + 'osd_deep_scrub_interval', # osd-pg-scrub + 'osd_scrub_auto_repair', # osd-pg-scrub + 'osd_max_scrubs', # osd-pg-scrub + 'osd_scrub_priority', # osd-pg-scrub + 'osd_scrub_sleep', # osd-pg-scrub + 'osd_scrub_auto_repair_num_errors', # osd-pg-scrub + 'osd_debug_deep_scrub_sleep', # osd-pg-scrub + 'osd_deep_scrub_keys', # osd-pg-scrub + 'osd_deep_scrub_large_omap_object_key_threshold', # osd-pg-scrub + 'osd_deep_scrub_large_omap_object_value_sum_threshold', # osd-pg-scrub + 'osd_deep_scrub_randomize_ratio', # osd-pg-scrub + 'osd_deep_scrub_stride', # osd-pg-scrub + 'osd_deep_scrub_update_digest_min_age', # osd-pg-scrub + 'osd_requested_scrub_priority', # osd-pg-scrub + 'osd_scrub_backoff_ratio', # osd-pg-scrub + 'osd_scrub_chunk_max', # osd-pg-scrub + 'osd_scrub_chunk_min', # osd-pg-scrub + 'osd_scrub_cost', # osd-pg-scrub + 'osd_scrub_interval_randomize_ratio', # osd-pg-scrub + 'osd_scrub_invalid_stats', # osd-pg-scrub + 'osd_scrub_load_threshold', # osd-pg-scrub + 'osd_scrub_max_preemptions', # osd-pg-scrub + 'mon_allow_pool_delete' # pool-list + ] + + for config_option in hard_coded_options: + self._get('/api/cluster_conf/{}'.format(config_option)) + self.assertStatus(200) + + def _validate_single(self, data): + self.assertIn('name', data) + self.assertIn('daemon_default', data) + self.assertIn('long_desc', data) + self.assertIn('level', data) + self.assertIn('default', data) + self.assertIn('see_also', data) + self.assertIn('tags', data) + self.assertIn('min', data) + self.assertIn('max', data) + self.assertIn('services', data) + self.assertIn('type', data) + self.assertIn('desc', data) + self.assertIn(data['type'], ['str', 'bool', 'float', 'int', 'size', 'uint', 'addr', + 'addrvec', 'uuid', 'secs', 'millisecs']) + + if 'value' in data: + self.assertIn('source', data) + self.assertIsInstance(data['value'], list) + + for entry in data['value']: + self.assertIsInstance(entry, dict) + self.assertIn('section', entry) + self.assertIn('value', entry) + + def _get_config_by_name(self, conf_name): + data = self._get('/api/cluster_conf/{}'.format(conf_name)) + if 'value' in data: + return data['value'] + return None + + def _clear_all_values_for_config_option(self, config_name): + values = self._get_config_by_name(config_name) + if values: + for value in values: + self._ceph_cmd(['config', 'rm', value['section'], config_name]) + + def _reset_original_values(self, config_name, orig_values): + if orig_values: + for value in orig_values: + self._ceph_cmd(['config', 'set', value['section'], config_name, value['value']]) diff --git a/qa/tasks/mgr/dashboard/test_crush_rule.py b/qa/tasks/mgr/dashboard/test_crush_rule.py new file mode 100644 index 000000000..1e37553b2 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_crush_rule.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +from .helper import DashboardTestCase, JList, JObj + + +class CrushRuleTest(DashboardTestCase): + + AUTH_ROLES = ['pool-manager'] + + rule_schema = JObj(sub_elems={ + 'max_size': int, + 'min_size': int, + 'rule_id': int, + 'rule_name': str, + 'ruleset': int, + 'steps': JList(JObj({}, allow_unknown=True)) + }, allow_unknown=True) + + def create_and_delete_rule(self, data): + name = data['name'] + # Creates rule + self._post('/api/crush_rule', data) + self.assertStatus(201) + # Makes sure rule exists + rule = self._get('/api/crush_rule/{}'.format(name)) + self.assertStatus(200) + self.assertSchemaBody(self.rule_schema) + self.assertEqual(rule['rule_name'], name) + # Deletes rule + self._delete('/api/crush_rule/{}'.format(name)) + self.assertStatus(204) + + @DashboardTestCase.RunAs('test', 'test', ['rgw-manager']) + def test_read_access_permissions(self): + self._get('/api/crush_rule') + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', ['read-only']) + def test_write_access_permissions(self): + self._get('/api/crush_rule') + self.assertStatus(200) + data = {'name': 'some_rule', 'root': 'default', 'failure_domain': 'osd'} + self._post('/api/crush_rule', data) + self.assertStatus(403) + self._delete('/api/crush_rule/default') + self.assertStatus(403) + + @classmethod + def tearDownClass(cls): + super(CrushRuleTest, cls).tearDownClass() + cls._ceph_cmd(['osd', 'crush', 'rule', 'rm', 'some_rule']) + cls._ceph_cmd(['osd', 'crush', 'rule', 'rm', 'another_rule']) + + def test_list(self): + self._get('/api/crush_rule') + self.assertStatus(200) + self.assertSchemaBody(JList(self.rule_schema)) + + def test_create(self): + self.create_and_delete_rule({ + 'name': 'some_rule', + 'root': 'default', + 'failure_domain': 'osd' + }) + + @DashboardTestCase.RunAs('test', 'test', ['pool-manager', 'cluster-manager']) + def test_create_with_ssd(self): + data = self._get('/api/osd/0') + self.assertStatus(200) + device_class = data['osd_metadata']['default_device_class'] + self.create_and_delete_rule({ + 'name': 'another_rule', + 'root': 'default', + 'failure_domain': 'osd', + 'device_class': device_class + }) + + def test_crush_rule_info(self): + self._get('/ui-api/crush_rule/info') + self.assertStatus(200) + self.assertSchemaBody(JObj({ + 'names': JList(str), + 'nodes': JList(JObj({}, allow_unknown=True)), + 'roots': JList(int) + })) diff --git a/qa/tasks/mgr/dashboard/test_erasure_code_profile.py b/qa/tasks/mgr/dashboard/test_erasure_code_profile.py new file mode 100644 index 000000000..7fb7c1c82 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_erasure_code_profile.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +from .helper import DashboardTestCase, JList, JObj + + +class ECPTest(DashboardTestCase): + + AUTH_ROLES = ['pool-manager'] + + @DashboardTestCase.RunAs('test', 'test', ['rgw-manager']) + def test_read_access_permissions(self): + self._get('/api/erasure_code_profile') + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', ['read-only']) + def test_write_access_permissions(self): + self._get('/api/erasure_code_profile') + self.assertStatus(200) + data = {'name': 'ecp32', 'k': 3, 'm': 2} + self._post('/api/erasure_code_profile', data) + self.assertStatus(403) + self._delete('/api/erasure_code_profile/default') + self.assertStatus(403) + + @classmethod + def tearDownClass(cls): + super(ECPTest, cls).tearDownClass() + cls._ceph_cmd(['osd', 'erasure-code-profile', 'rm', 'ecp32']) + cls._ceph_cmd(['osd', 'erasure-code-profile', 'rm', 'lrc']) + + def test_list(self): + data = self._get('/api/erasure_code_profile') + self.assertStatus(200) + + default = [p for p in data if p['name'] == 'default'] + if default: + default_ecp = { + 'k': 2, + 'technique': 'reed_sol_van', + 'm': 1, + 'name': 'default', + 'plugin': 'jerasure' + } + if 'crush-failure-domain' in default[0]: + default_ecp['crush-failure-domain'] = default[0]['crush-failure-domain'] + self.assertSubset(default_ecp, default[0]) + get_data = self._get('/api/erasure_code_profile/default') + self.assertEqual(get_data, default[0]) + + def test_create(self): + data = {'name': 'ecp32', 'k': 3, 'm': 2} + self._post('/api/erasure_code_profile', data) + self.assertStatus(201) + + self._get('/api/erasure_code_profile/ecp32') + self.assertJsonSubset({ + 'crush-device-class': '', + 'crush-failure-domain': 'osd', + 'crush-root': 'default', + 'jerasure-per-chunk-alignment': 'false', + 'k': 3, + 'm': 2, + 'name': 'ecp32', + 'plugin': 'jerasure', + 'technique': 'reed_sol_van', + }) + + self.assertStatus(200) + + self._delete('/api/erasure_code_profile/ecp32') + self.assertStatus(204) + + def test_create_plugin(self): + data = {'name': 'lrc', 'k': '2', 'm': '2', 'l': '2', 'plugin': 'lrc'} + self._post('/api/erasure_code_profile', data) + self.assertJsonBody(None) + self.assertStatus(201) + + self._get('/api/erasure_code_profile/lrc') + self.assertJsonBody({ + 'crush-device-class': '', + 'crush-failure-domain': 'host', + 'crush-root': 'default', + 'k': 2, + 'l': '2', + 'm': 2, + 'name': 'lrc', + 'plugin': 'lrc' + }) + + self.assertStatus(200) + + self._delete('/api/erasure_code_profile/lrc') + self.assertStatus(204) + + def test_ecp_info(self): + self._get('/ui-api/erasure_code_profile/info') + self.assertSchemaBody(JObj({ + 'names': JList(str), + 'plugins': JList(str), + 'directory': str, + 'nodes': JList(JObj({}, allow_unknown=True)) + })) diff --git a/qa/tasks/mgr/dashboard/test_health.py b/qa/tasks/mgr/dashboard/test_health.py new file mode 100644 index 000000000..693b7b65c --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_health.py @@ -0,0 +1,301 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +from .helper import (DashboardTestCase, JAny, JLeaf, JList, JObj, + addrvec_schema, module_options_schema) + + +class HealthTest(DashboardTestCase): + CEPHFS = True + + __pg_info_schema = JObj({ + 'object_stats': JObj({ + 'num_objects': int, + 'num_object_copies': int, + 'num_objects_degraded': int, + 'num_objects_misplaced': int, + 'num_objects_unfound': int + }), + 'pgs_per_osd': float, + 'statuses': JObj({}, allow_unknown=True, unknown_schema=int) + }) + + __mdsmap_schema = JObj({ + 'session_autoclose': int, + 'balancer': str, + 'up': JObj({}, allow_unknown=True), + 'last_failure_osd_epoch': int, + 'in': JList(int), + 'last_failure': int, + 'max_file_size': int, + 'explicitly_allowed_features': int, + 'damaged': JList(int), + 'tableserver': int, + 'failed': JList(int), + 'metadata_pool': int, + 'epoch': int, + 'stopped': JList(int), + 'max_mds': int, + 'compat': JObj({ + 'compat': JObj({}, allow_unknown=True), + 'ro_compat': JObj({}, allow_unknown=True), + 'incompat': JObj({}, allow_unknown=True) + }), + 'required_client_features': JObj({}, allow_unknown=True), + 'data_pools': JList(int), + 'info': JObj({}, allow_unknown=True), + 'fs_name': str, + 'created': str, + 'standby_count_wanted': int, + 'enabled': bool, + 'modified': str, + 'session_timeout': int, + 'flags': int, + 'ever_allowed_features': int, + 'root': int + }) + + def test_minimal_health(self): + data = self._get('/api/health/minimal') + self.assertStatus(200) + schema = JObj({ + 'client_perf': JObj({ + 'read_bytes_sec': int, + 'read_op_per_sec': int, + 'recovering_bytes_per_sec': int, + 'write_bytes_sec': int, + 'write_op_per_sec': int + }), + 'df': JObj({ + 'stats': JObj({ + 'total_avail_bytes': int, + 'total_bytes': int, + 'total_used_raw_bytes': int, + }) + }), + 'fs_map': JObj({ + 'filesystems': JList( + JObj({ + 'mdsmap': self.__mdsmap_schema + }), + ), + 'standbys': JList(JObj({}, allow_unknown=True)), + }), + 'health': JObj({ + 'checks': JList(JObj({}, allow_unknown=True)), + 'mutes': JList(JObj({}, allow_unknown=True)), + 'status': str, + }), + 'hosts': int, + 'iscsi_daemons': JObj({ + 'up': int, + 'down': int + }), + 'mgr_map': JObj({ + 'active_name': str, + 'standbys': JList(JLeaf(dict)) + }), + 'mon_status': JObj({ + 'monmap': JObj({ + 'mons': JList(JLeaf(dict)), + }), + 'quorum': JList(int) + }), + 'osd_map': JObj({ + 'osds': JList( + JObj({ + 'in': int, + 'up': int, + 'state': JList(str) + })), + }), + 'pg_info': self.__pg_info_schema, + 'pools': JList(JLeaf(dict)), + 'rgw': int, + 'scrub_status': str + }) + self.assertSchema(data, schema) + + def test_full_health(self): + data = self._get('/api/health/full') + self.assertStatus(200) + module_info_schema = JObj({ + 'can_run': bool, + 'error_string': str, + 'name': str, + 'module_options': module_options_schema + }) + schema = JObj({ + 'client_perf': JObj({ + 'read_bytes_sec': int, + 'read_op_per_sec': int, + 'recovering_bytes_per_sec': int, + 'write_bytes_sec': int, + 'write_op_per_sec': int + }), + 'df': JObj({ + 'pools': JList(JObj({ + 'stats': JObj({ + 'stored': int, + 'stored_data': int, + 'stored_omap': int, + 'objects': int, + 'kb_used': int, + 'bytes_used': int, + 'data_bytes_used': int, + 'omap_bytes_used': int, + 'percent_used': float, + 'max_avail': int, + 'quota_objects': int, + 'quota_bytes': int, + 'dirty': int, + 'rd': int, + 'rd_bytes': int, + 'wr': int, + 'wr_bytes': int, + 'compress_bytes_used': int, + 'compress_under_bytes': int, + 'stored_raw': int, + 'avail_raw': int + }), + 'name': str, + 'id': int + })), + 'stats': JObj({ + 'total_avail_bytes': int, + 'total_bytes': int, + 'total_used_bytes': int, + 'total_used_raw_bytes': int, + 'total_used_raw_ratio': float, + 'num_osds': int, + 'num_per_pool_osds': int, + 'num_per_pool_omap_osds': int + }) + }), + 'fs_map': JObj({ + 'compat': JObj({ + 'compat': JObj({}, allow_unknown=True, unknown_schema=str), + 'incompat': JObj( + {}, allow_unknown=True, unknown_schema=str), + 'ro_compat': JObj( + {}, allow_unknown=True, unknown_schema=str) + }), + 'default_fscid': int, + 'epoch': int, + 'feature_flags': JObj( + {}, allow_unknown=True, unknown_schema=bool), + 'filesystems': JList( + JObj({ + 'id': int, + 'mdsmap': self.__mdsmap_schema + }), + ), + 'standbys': JList(JObj({}, allow_unknown=True)), + }), + 'health': JObj({ + 'checks': JList(JObj({}, allow_unknown=True)), + 'mutes': JList(JObj({}, allow_unknown=True)), + 'status': str, + }), + 'hosts': int, + 'iscsi_daemons': JObj({ + 'up': int, + 'down': int + }), + 'mgr_map': JObj({ + 'active_addr': str, + 'active_addrs': JObj({ + 'addrvec': addrvec_schema + }), + 'active_change': str, # timestamp + 'active_mgr_features': int, + 'active_gid': int, + 'active_name': str, + 'always_on_modules': JObj({}, allow_unknown=True), + 'available': bool, + 'available_modules': JList(module_info_schema), + 'epoch': int, + 'modules': JList(str), + 'services': JObj( + {'dashboard': str}, # This module should always be present + allow_unknown=True, unknown_schema=str + ), + 'standbys': JList(JObj({ + 'available_modules': JList(module_info_schema), + 'gid': int, + 'name': str, + 'mgr_features': int + }, allow_unknown=True)) + }, allow_unknown=True), + 'mon_status': JObj({ + 'election_epoch': int, + 'extra_probe_peers': JList(JAny(none=True)), + 'feature_map': JObj( + {}, allow_unknown=True, unknown_schema=JList(JObj({ + 'features': str, + 'num': int, + 'release': str + })) + ), + 'features': JObj({ + 'quorum_con': str, + 'quorum_mon': JList(str), + 'required_con': str, + 'required_mon': JList(str) + }), + 'monmap': JObj({ + # @TODO: expand on monmap schema + 'mons': JList(JLeaf(dict)), + }, allow_unknown=True), + 'name': str, + 'outside_quorum': JList(int), + 'quorum': JList(int), + 'quorum_age': int, + 'rank': int, + 'state': str, + # @TODO: What type should be expected here? + 'sync_provider': JList(JAny(none=True)), + 'stretch_mode': bool + }), + 'osd_map': JObj({ + # @TODO: define schema for crush map and osd_metadata, among + # others + 'osds': JList( + JObj({ + 'in': int, + 'up': int, + }, allow_unknown=True)), + }, allow_unknown=True), + 'pg_info': self.__pg_info_schema, + 'pools': JList(JLeaf(dict)), + 'rgw': int, + 'scrub_status': str + }) + self.assertSchema(data, schema) + + cluster_pools = self.ceph_cluster.mon_manager.list_pools() + self.assertEqual(len(cluster_pools), len(data['pools'])) + for pool in data['pools']: + self.assertIn(pool['pool_name'], cluster_pools) + + @DashboardTestCase.RunAs('test', 'test', ['pool-manager']) + def test_health_permissions(self): + data = self._get('/api/health/full') + self.assertStatus(200) + + schema = JObj({ + 'client_perf': JObj({}, allow_unknown=True), + 'df': JObj({}, allow_unknown=True), + 'health': JObj({ + 'checks': JList(JObj({}, allow_unknown=True)), + 'mutes': JList(JObj({}, allow_unknown=True)), + 'status': str + }), + 'pools': JList(JLeaf(dict)), + }) + self.assertSchema(data, schema) + + cluster_pools = self.ceph_cluster.mon_manager.list_pools() + self.assertEqual(len(cluster_pools), len(data['pools'])) + for pool in data['pools']: + self.assertIn(pool['pool_name'], cluster_pools) diff --git a/qa/tasks/mgr/dashboard/test_host.py b/qa/tasks/mgr/dashboard/test_host.py new file mode 100644 index 000000000..78d784473 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_host.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +from .helper import DashboardTestCase, JList, JObj, devices_schema + + +class HostControllerTest(DashboardTestCase): + + AUTH_ROLES = ['read-only'] + + URL_HOST = '/api/host' + URL_UI_HOST = '/ui-api/host' + + ORCHESTRATOR = True + + @classmethod + def setUpClass(cls): + super(HostControllerTest, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + cmd = ['test_orchestrator', 'load_data', '-i', '-'] + cls.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd, stdin='{}') + + @property + def test_data_inventory(self): + return self.ORCHESTRATOR_TEST_DATA['inventory'] + + @property + def test_data_daemons(self): + return self.ORCHESTRATOR_TEST_DATA['daemons'] + + @DashboardTestCase.RunAs('test', 'test', ['block-manager']) + def test_access_permissions(self): + self._get(self.URL_HOST, version='1.1') + self.assertStatus(403) + + def test_host_list(self): + data = self._get(self.URL_HOST, version='1.1') + self.assertStatus(200) + + orch_hostnames = {inventory_node['name'] for inventory_node in + self.ORCHESTRATOR_TEST_DATA['inventory']} + + for server in data: + self.assertIn('services', server) + self.assertIn('hostname', server) + self.assertIn('ceph_version', server) + self.assertIsNotNone(server['hostname']) + self.assertIsNotNone(server['ceph_version']) + for service in server['services']: + self.assertIn('type', service) + self.assertIn('id', service) + self.assertIsNotNone(service['type']) + self.assertIsNotNone(service['id']) + + self.assertIn('sources', server) + in_ceph, in_orchestrator = server['sources']['ceph'], server['sources']['orchestrator'] + if in_ceph: + self.assertGreaterEqual(len(server['services']), 1) + if not in_orchestrator: + self.assertNotIn(server['hostname'], orch_hostnames) + if in_orchestrator: + self.assertEqual(len(server['services']), 0) + self.assertIn(server['hostname'], orch_hostnames) + + def test_host_list_with_sources(self): + data = self._get('{}?sources=orchestrator'.format(self.URL_HOST), version='1.1') + self.assertStatus(200) + test_hostnames = {inventory_node['name'] for inventory_node in + self.ORCHESTRATOR_TEST_DATA['inventory']} + resp_hostnames = {host['hostname'] for host in data} + self.assertEqual(test_hostnames, resp_hostnames) + + data = self._get('{}?sources=ceph'.format(self.URL_HOST), version='1.1') + self.assertStatus(200) + test_hostnames = {inventory_node['name'] for inventory_node in + self.ORCHESTRATOR_TEST_DATA['inventory']} + resp_hostnames = {host['hostname'] for host in data} + self.assertEqual(len(test_hostnames.intersection(resp_hostnames)), 0) + + def test_host_devices(self): + hosts = self._get('{}'.format(self.URL_HOST), version='1.1') + hosts = [host['hostname'] for host in hosts if host['hostname'] != ''] + assert hosts[0] + data = self._get('{}/devices'.format('{}/{}'.format(self.URL_HOST, hosts[0]))) + self.assertStatus(200) + self.assertSchema(data, devices_schema) + + def test_host_daemons(self): + hosts = self._get('{}'.format(self.URL_HOST), version='1.1') + hosts = [host['hostname'] for host in hosts if host['hostname'] != ''] + assert hosts[0] + data = self._get('{}/daemons'.format('{}/{}'.format(self.URL_HOST, hosts[0]))) + self.assertStatus(200) + self.assertSchema(data, JList(JObj({ + 'hostname': str, + 'daemon_id': str, + 'daemon_type': str + }))) + + def test_host_smart(self): + hosts = self._get('{}'.format(self.URL_HOST), version='1.1') + hosts = [host['hostname'] for host in hosts if host['hostname'] != ''] + assert hosts[0] + self._get('{}/smart'.format('{}/{}'.format(self.URL_HOST, hosts[0]))) + self.assertStatus(200) + + def _validate_inventory(self, data, resp_data): + self.assertEqual(data['name'], resp_data['name']) + self.assertEqual(len(data['devices']), len(resp_data['devices'])) + + if not data['devices']: + return + test_devices = sorted(data['devices'], key=lambda d: d['path']) + resp_devices = sorted(resp_data['devices'], key=lambda d: d['path']) + + for test, resp in zip(test_devices, resp_devices): + self._validate_device(test, resp) + + def _validate_device(self, data, resp_data): + for key, value in data.items(): + self.assertEqual(value, resp_data[key]) + + def test_inventory_get(self): + # get a inventory + node = self.test_data_inventory[0] + resp = self._get('{}/{}/inventory'.format(self.URL_HOST, node['name'])) + self.assertStatus(200) + self._validate_inventory(node, resp) + + def test_inventory_list(self): + # get all inventory + data = self._get('{}/inventory'.format(self.URL_UI_HOST)) + self.assertStatus(200) + + def sorting_key(node): + return node['name'] + + test_inventory = sorted(self.test_data_inventory, key=sorting_key) + resp_inventory = sorted(data, key=sorting_key) + self.assertEqual(len(test_inventory), len(resp_inventory)) + for test, resp in zip(test_inventory, resp_inventory): + self._validate_inventory(test, resp) + + +class HostControllerNoOrchestratorTest(DashboardTestCase): + def test_host_create(self): + self._post('/api/host?hostname=foo', {'status': ''}, version='0.1') + self.assertStatus(503) + self.assertError(code='orchestrator_status_unavailable', + component='orchestrator') + + def test_host_delete(self): + self._delete('/api/host/bar') + self.assertStatus(503) + self.assertError(code='orchestrator_status_unavailable', + component='orchestrator') diff --git a/qa/tasks/mgr/dashboard/test_logs.py b/qa/tasks/mgr/dashboard/test_logs.py new file mode 100644 index 000000000..63f6e16ed --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_logs.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +from .helper import DashboardTestCase, JList, JObj, addrvec_schema + + +class LogsTest(DashboardTestCase): + CEPHFS = True + + def test_logs(self): + data = self._get("/api/logs/all") + self.assertStatus(200) + log_entry_schema = JList(JObj({ + 'addrs': JObj({ + 'addrvec': addrvec_schema + }), + 'channel': str, + 'message': str, + 'name': str, + 'priority': str, + 'rank': str, + 'seq': int, + 'stamp': str + })) + schema = JObj({ + 'audit_log': log_entry_schema, + 'clog': log_entry_schema + }) + self.assertSchema(data, schema) + + @DashboardTestCase.RunAs('test', 'test', ['pool-manager']) + def test_log_perms(self): + self._get("/api/logs/all") + self.assertStatus(403) diff --git a/qa/tasks/mgr/dashboard/test_mgr_module.py b/qa/tasks/mgr/dashboard/test_mgr_module.py new file mode 100644 index 000000000..40bfa0025 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_mgr_module.py @@ -0,0 +1,150 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +import logging + +import requests + +from .helper import (DashboardTestCase, JLeaf, JList, JObj, + module_options_object_schema, module_options_schema) + +logger = logging.getLogger(__name__) + + +class MgrModuleTestCase(DashboardTestCase): + MGRS_REQUIRED = 1 + + def wait_until_rest_api_accessible(self): + """ + Wait until the REST API is accessible. + """ + + def _check_connection(): + try: + # Try reaching an API endpoint successfully. + self._get('/api/mgr/module') + if self._resp.status_code == 200: + return True + except requests.ConnectionError: + pass + return False + + self.wait_until_true(_check_connection, timeout=30) + + +class MgrModuleTest(MgrModuleTestCase): + + def test_list_disabled_module(self): + self._ceph_cmd(['mgr', 'module', 'disable', 'iostat']) + self.wait_until_rest_api_accessible() + data = self._get('/api/mgr/module') + self.assertStatus(200) + self.assertSchema( + data, + JList( + JObj(sub_elems={ + 'name': JLeaf(str), + 'enabled': JLeaf(bool), + 'always_on': JLeaf(bool), + 'options': module_options_schema + }))) + module_info = self.find_object_in_list('name', 'iostat', data) + self.assertIsNotNone(module_info) + self.assertFalse(module_info['enabled']) + + def test_list_enabled_module(self): + self._ceph_cmd(['mgr', 'module', 'enable', 'iostat']) + self.wait_until_rest_api_accessible() + data = self._get('/api/mgr/module') + self.assertStatus(200) + self.assertSchema( + data, + JList( + JObj(sub_elems={ + 'name': JLeaf(str), + 'enabled': JLeaf(bool), + 'always_on': JLeaf(bool), + 'options': module_options_schema + }))) + module_info = self.find_object_in_list('name', 'iostat', data) + self.assertIsNotNone(module_info) + self.assertTrue(module_info['enabled']) + + def test_get(self): + data = self._get('/api/mgr/module/telemetry') + self.assertStatus(200) + self.assertSchema( + data, + JObj( + allow_unknown=True, + sub_elems={ + 'channel_basic': bool, + 'channel_ident': bool, + 'channel_crash': bool, + 'channel_device': bool, + 'contact': str, + 'description': str, + 'enabled': bool, + 'interval': int, + 'last_opt_revision': int, + 'leaderboard': bool, + 'organization': str, + 'proxy': str, + 'url': str + })) + + def test_module_options(self): + data = self._get('/api/mgr/module/telemetry/options') + self.assertStatus(200) + schema = JObj({ + 'channel_basic': module_options_object_schema, + 'channel_crash': module_options_object_schema, + 'channel_device': module_options_object_schema, + 'channel_ident': module_options_object_schema, + 'contact': module_options_object_schema, + 'description': module_options_object_schema, + 'device_url': module_options_object_schema, + 'enabled': module_options_object_schema, + 'interval': module_options_object_schema, + 'last_opt_revision': module_options_object_schema, + 'leaderboard': module_options_object_schema, + 'log_level': module_options_object_schema, + 'log_to_cluster': module_options_object_schema, + 'log_to_cluster_level': module_options_object_schema, + 'log_to_file': module_options_object_schema, + 'organization': module_options_object_schema, + 'proxy': module_options_object_schema, + 'url': module_options_object_schema + }) + self.assertSchema(data, schema) + + def test_module_enable(self): + self._post('/api/mgr/module/telemetry/enable') + self.assertStatus(200) + + def test_disable(self): + self._post('/api/mgr/module/iostat/disable') + self.assertStatus(200) + + def test_put(self): + self.set_config_key('config/mgr/mgr/iostat/log_level', 'critical') + self.set_config_key('config/mgr/mgr/iostat/log_to_cluster', 'False') + self.set_config_key('config/mgr/mgr/iostat/log_to_cluster_level', 'info') + self.set_config_key('config/mgr/mgr/iostat/log_to_file', 'True') + self._put( + '/api/mgr/module/iostat', + data={ + 'config': { + 'log_level': 'debug', + 'log_to_cluster': True, + 'log_to_cluster_level': 'warning', + 'log_to_file': False + } + }) + self.assertStatus(200) + data = self._get('/api/mgr/module/iostat') + self.assertStatus(200) + self.assertEqual(data['log_level'], 'debug') + self.assertTrue(data['log_to_cluster']) + self.assertEqual(data['log_to_cluster_level'], 'warning') + self.assertFalse(data['log_to_file']) diff --git a/qa/tasks/mgr/dashboard/test_monitor.py b/qa/tasks/mgr/dashboard/test_monitor.py new file mode 100644 index 000000000..e32c2c10c --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_monitor.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +from .helper import DashboardTestCase + + +class MonitorTest(DashboardTestCase): + AUTH_ROLES = ['cluster-manager'] + + @DashboardTestCase.RunAs('test', 'test', ['block-manager']) + def test_access_permissions(self): + self._get('/api/monitor') + self.assertStatus(403) + + def test_monitor_default(self): + data = self._get("/api/monitor") + self.assertStatus(200) + + self.assertIn('mon_status', data) + self.assertIn('in_quorum', data) + self.assertIn('out_quorum', data) + self.assertIsNotNone(data['mon_status']) + self.assertIsNotNone(data['in_quorum']) + self.assertIsNotNone(data['out_quorum']) diff --git a/qa/tasks/mgr/dashboard/test_motd.py b/qa/tasks/mgr/dashboard/test_motd.py new file mode 100644 index 000000000..2edbf36ba --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_motd.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# pylint: disable=too-many-public-methods + +from __future__ import absolute_import + +import time + +from .helper import DashboardTestCase + + +class MotdTest(DashboardTestCase): + @classmethod + def tearDownClass(cls): + cls._ceph_cmd(['dashboard', 'motd', 'clear']) + super(MotdTest, cls).tearDownClass() + + def setUp(self): + super(MotdTest, self).setUp() + self._ceph_cmd(['dashboard', 'motd', 'clear']) + + def test_none(self): + data = self._get('/ui-api/motd') + self.assertStatus(200) + self.assertIsNone(data) + + def test_set(self): + self._ceph_cmd(['dashboard', 'motd', 'set', 'info', '0', 'foo bar baz']) + data = self._get('/ui-api/motd') + self.assertStatus(200) + self.assertIsInstance(data, dict) + + def test_expired(self): + self._ceph_cmd(['dashboard', 'motd', 'set', 'info', '2s', 'foo bar baz']) + time.sleep(5) + data = self._get('/ui-api/motd') + self.assertStatus(200) + self.assertIsNone(data) diff --git a/qa/tasks/mgr/dashboard/test_orchestrator.py b/qa/tasks/mgr/dashboard/test_orchestrator.py new file mode 100644 index 000000000..2a804c4c2 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_orchestrator.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +from .helper import DashboardTestCase + + +class OrchestratorControllerTest(DashboardTestCase): + + AUTH_ROLES = ['cluster-manager'] + + URL_STATUS = '/ui-api/orchestrator/status' + + ORCHESTRATOR = True + + @classmethod + def setUpClass(cls): + super(OrchestratorControllerTest, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + cmd = ['test_orchestrator', 'load_data', '-i', '-'] + cls.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd, stdin='{}') + + def test_status_get(self): + data = self._get(self.URL_STATUS) + self.assertStatus(200) + self.assertTrue(data['available']) diff --git a/qa/tasks/mgr/dashboard/test_osd.py b/qa/tasks/mgr/dashboard/test_osd.py new file mode 100644 index 000000000..71cf3d871 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_osd.py @@ -0,0 +1,368 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +import json + +from .helper import (DashboardTestCase, JAny, JLeaf, JList, JObj, JTuple, + devices_schema) + + +class OsdTest(DashboardTestCase): + + AUTH_ROLES = ['cluster-manager'] + + @classmethod + def setUpClass(cls): + super(OsdTest, cls).setUpClass() + cls._load_module('test_orchestrator') + cmd = ['orch', 'set', 'backend', 'test_orchestrator'] + cls.mgr_cluster.mon_manager.raw_cluster_cmd(*cmd) + + def tearDown(self): + self._put('/api/osd/0/mark', data={'action': 'in'}) + + @DashboardTestCase.RunAs('test', 'test', ['block-manager']) + def test_access_permissions(self): + self._get('/api/osd') + self.assertStatus(403) + self._get('/api/osd/0') + self.assertStatus(403) + + def assert_in_and_not_none(self, data, properties): + self.assertSchema(data, JObj({p: JAny(none=False) for p in properties}, allow_unknown=True)) + + def test_list(self): + data = self._get('/api/osd') + self.assertStatus(200) + + self.assertGreaterEqual(len(data), 1) + data = data[0] + self.assert_in_and_not_none(data, ['host', 'tree', 'state', 'stats', 'stats_history']) + self.assert_in_and_not_none(data['host'], ['name']) + self.assert_in_and_not_none(data['tree'], ['id']) + self.assert_in_and_not_none(data['stats'], ['numpg', 'stat_bytes_used', 'stat_bytes', + 'op_r', 'op_w']) + self.assert_in_and_not_none(data['stats_history'], ['op_out_bytes', 'op_in_bytes']) + self.assertSchema(data['stats_history']['op_out_bytes'], + JList(JTuple([JLeaf(float), JLeaf(float)]))) + + def test_details(self): + data = self._get('/api/osd/0') + self.assertStatus(200) + self.assert_in_and_not_none(data, ['osd_metadata']) + + def test_histogram(self): + data = self._get('/api/osd/0/histogram') + self.assertStatus(200) + self.assert_in_and_not_none(data['osd'], ['op_w_latency_in_bytes_histogram', + 'op_r_latency_out_bytes_histogram']) + + def test_scrub(self): + self._post('/api/osd/0/scrub?deep=False') + self.assertStatus(200) + + self._post('/api/osd/0/scrub?deep=True') + self.assertStatus(200) + + def test_safe_to_delete(self): + data = self._get('/api/osd/safe_to_delete?svc_ids=0') + self.assertStatus(200) + self.assertSchema(data, JObj({ + 'is_safe_to_delete': JAny(none=True), + 'message': str + })) + self.assertTrue(data['is_safe_to_delete']) + + def test_osd_smart(self): + self._get('/api/osd/0/smart') + self.assertStatus(200) + + def test_mark_out_and_in(self): + self._put('/api/osd/0/mark', data={'action': 'out'}) + self.assertStatus(200) + + self._put('/api/osd/0/mark', data={'action': 'in'}) + self.assertStatus(200) + + def test_mark_down(self): + self._put('/api/osd/0/mark', data={'action': 'down'}) + self.assertStatus(200) + + def test_reweight(self): + self._post('/api/osd/0/reweight', {'weight': 0.4}) + self.assertStatus(200) + + def get_reweight_value(): + self._get('/api/osd/0') + response = self.jsonBody() + if 'osd_map' in response and 'weight' in response['osd_map']: + return round(response['osd_map']['weight'], 1) + return None + self.wait_until_equal(get_reweight_value, 0.4, 10) + self.assertStatus(200) + + # Undo + self._post('/api/osd/0/reweight', {'weight': 1}) + + def test_create_lost_destroy_remove(self): + sample_data = { + 'uuid': 'f860ca2e-757d-48ce-b74a-87052cad563f', + 'svc_id': 5 + } + + # Create + self._task_post('/api/osd', { + 'method': 'bare', + 'data': sample_data, + 'tracking_id': 'bare-5' + }) + self.assertStatus(201) + + # invalid method + self._task_post('/api/osd', { + 'method': 'xyz', + 'data': { + 'uuid': 'f860ca2e-757d-48ce-b74a-87052cad563f', + 'svc_id': 5 + }, + 'tracking_id': 'bare-5' + }) + self.assertStatus(400) + + # Lost + self._put('/api/osd/5/mark', data={'action': 'lost'}) + self.assertStatus(200) + # Destroy + self._post('/api/osd/5/destroy') + self.assertStatus(200) + # Purge + self._post('/api/osd/5/purge') + self.assertStatus(200) + + def test_create_with_drive_group(self): + data = { + 'method': 'drive_groups', + 'data': [ + { + 'service_type': 'osd', + 'service_id': 'test', + 'host_pattern': '*', + 'data_devices': { + 'vendor': 'abc', + 'model': 'cba', + 'rotational': True, + 'size': '4 TB' + }, + 'wal_devices': { + 'vendor': 'def', + 'model': 'fed', + 'rotational': False, + 'size': '1 TB' + }, + 'db_devices': { + 'vendor': 'ghi', + 'model': 'ihg', + 'rotational': False, + 'size': '512 GB' + }, + 'wal_slots': 5, + 'db_slots': 5, + 'encrypted': True + } + ], + 'tracking_id': 'test' + } + self._post('/api/osd', data) + self.assertStatus(201) + + def test_safe_to_destroy(self): + osd_dump = json.loads(self._ceph_cmd(['osd', 'dump', '-f', 'json'])) + max_id = max(map(lambda e: e['osd'], osd_dump['osds'])) + + def get_pg_status_equal_unknown(osd_ids): + self._get('/api/osd/safe_to_destroy?ids={}'.format(osd_ids)) + if 'message' in self.jsonBody(): + return 'pgs have unknown state' in self.jsonBody()['message'] + return False + + # 1 OSD safe to destroy + unused_osd_id = max_id + 10 + self.wait_until_equal( + lambda: get_pg_status_equal_unknown(unused_osd_id), False, 30) + self.assertStatus(200) + self.assertJsonBody({ + 'is_safe_to_destroy': True, + 'active': [], + 'missing_stats': [], + 'safe_to_destroy': [unused_osd_id], + 'stored_pgs': [], + }) + + # multiple OSDs safe to destroy + unused_osd_ids = [max_id + 11, max_id + 12] + self.wait_until_equal( + lambda: get_pg_status_equal_unknown(str(unused_osd_ids)), False, 30) + self.assertStatus(200) + self.assertJsonBody({ + 'is_safe_to_destroy': True, + 'active': [], + 'missing_stats': [], + 'safe_to_destroy': unused_osd_ids, + 'stored_pgs': [], + }) + + # 1 OSD unsafe to destroy + def get_destroy_status(): + self._get('/api/osd/safe_to_destroy?ids=0') + if 'is_safe_to_destroy' in self.jsonBody(): + return self.jsonBody()['is_safe_to_destroy'] + return None + self.wait_until_equal(get_destroy_status, False, 10) + self.assertStatus(200) + + def test_osd_devices(self): + data = self._get('/api/osd/0/devices') + self.assertStatus(200) + self.assertSchema(data, devices_schema) + + +class OsdFlagsTest(DashboardTestCase): + def __init__(self, *args, **kwargs): + super(OsdFlagsTest, self).__init__(*args, **kwargs) + self._initial_flags = ['sortbitwise', 'recovery_deletes', 'purged_snapdirs', + 'pglog_hardlimit'] # These flags cannot be unset + + @classmethod + def _put_flags(cls, flags, ids=None): + url = '/api/osd/flags' + data = {'flags': flags} + + if ids: + url = url + '/individual' + data['ids'] = ids + + cls._put(url, data=data) + return cls._resp.json() + + def test_list_osd_flags(self): + flags = self._get('/api/osd/flags') + self.assertStatus(200) + self.assertEqual(len(flags), 4) + self.assertCountEqual(flags, self._initial_flags) + + def test_add_osd_flag(self): + flags = self._put_flags([ + 'sortbitwise', 'recovery_deletes', 'purged_snapdirs', 'noout', + 'pause', 'pglog_hardlimit' + ]) + self.assertCountEqual(flags, [ + 'sortbitwise', 'recovery_deletes', 'purged_snapdirs', 'noout', + 'pause', 'pglog_hardlimit' + ]) + + # Restore flags + self._put_flags(self._initial_flags) + + def test_get_indiv_flag(self): + initial = self._get('/api/osd/flags/individual') + self.assertStatus(200) + self.assertSchema(initial, JList(JObj({ + 'osd': int, + 'flags': JList(str) + }))) + + self._ceph_cmd(['osd', 'set-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2']) + flags_added = self._get('/api/osd/flags/individual') + self.assertStatus(200) + for osd in flags_added: + if osd['osd'] in [0, 1, 2]: + self.assertIn('noout', osd['flags']) + self.assertIn('noin', osd['flags']) + for osd_initial in initial: + if osd['osd'] == osd_initial['osd']: + self.assertGreater(len(osd['flags']), len(osd_initial['flags'])) + + self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2']) + flags_removed = self._get('/api/osd/flags/individual') + self.assertStatus(200) + for osd in flags_removed: + if osd['osd'] in [0, 1, 2]: + self.assertNotIn('noout', osd['flags']) + self.assertNotIn('noin', osd['flags']) + + def test_add_indiv_flag(self): + flags_update = {'noup': None, 'nodown': None, 'noin': None, 'noout': True} + svc_id = 0 + + resp = self._put_flags(flags_update, [svc_id]) + self._check_indiv_flags_resp(resp, [svc_id], ['noout'], [], ['noup', 'nodown', 'noin']) + self._check_indiv_flags_osd([svc_id], ['noout'], ['noup', 'nodown', 'noin']) + + self._ceph_cmd(['osd', 'unset-group', 'noout', 'osd.{}'.format(svc_id)]) + + def test_add_multiple_indiv_flags(self): + flags_update = {'noup': None, 'nodown': None, 'noin': True, 'noout': True} + svc_id = 0 + + resp = self._put_flags(flags_update, [svc_id]) + self._check_indiv_flags_resp(resp, [svc_id], ['noout', 'noin'], [], ['noup', 'nodown']) + self._check_indiv_flags_osd([svc_id], ['noout', 'noin'], ['noup', 'nodown']) + + self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.{}'.format(svc_id)]) + + def test_add_multiple_indiv_flags_multiple_osds(self): + flags_update = {'noup': None, 'nodown': None, 'noin': True, 'noout': True} + svc_id = [0, 1, 2] + + resp = self._put_flags(flags_update, svc_id) + self._check_indiv_flags_resp(resp, svc_id, ['noout', 'noin'], [], ['noup', 'nodown']) + self._check_indiv_flags_osd([svc_id], ['noout', 'noin'], ['noup', 'nodown']) + + self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2']) + + def test_remove_indiv_flag(self): + flags_update = {'noup': None, 'nodown': None, 'noin': None, 'noout': False} + svc_id = 0 + self._ceph_cmd(['osd', 'set-group', 'noout', 'osd.{}'.format(svc_id)]) + + resp = self._put_flags(flags_update, [svc_id]) + self._check_indiv_flags_resp(resp, [svc_id], [], ['noout'], ['noup', 'nodown', 'noin']) + self._check_indiv_flags_osd([svc_id], [], ['noup', 'nodown', 'noin', 'noout']) + + def test_remove_multiple_indiv_flags(self): + flags_update = {'noup': None, 'nodown': None, 'noin': False, 'noout': False} + svc_id = 0 + self._ceph_cmd(['osd', 'set-group', 'noout,noin', 'osd.{}'.format(svc_id)]) + + resp = self._put_flags(flags_update, [svc_id]) + self._check_indiv_flags_resp(resp, [svc_id], [], ['noout', 'noin'], ['noup', 'nodown']) + self._check_indiv_flags_osd([svc_id], [], ['noout', 'noin', 'noup', 'nodown']) + + def test_remove_multiple_indiv_flags_multiple_osds(self): + flags_update = {'noup': None, 'nodown': None, 'noin': False, 'noout': False} + svc_id = [0, 1, 2] + self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2']) + + resp = self._put_flags(flags_update, svc_id) + self._check_indiv_flags_resp(resp, svc_id, [], ['noout', 'noin'], ['noup', 'nodown']) + self._check_indiv_flags_osd([svc_id], [], ['noout', 'noin', 'noup', 'nodown']) + + def _check_indiv_flags_resp(self, resp, ids, added, removed, ignored): + self.assertStatus(200) + self.assertCountEqual(resp['ids'], ids) + self.assertCountEqual(resp['added'], added) + self.assertCountEqual(resp['removed'], removed) + + for flag in ignored: + self.assertNotIn(flag, resp['added']) + self.assertNotIn(flag, resp['removed']) + + def _check_indiv_flags_osd(self, ids, activated_flags, deactivated_flags): + osds = json.loads(self._ceph_cmd(['osd', 'dump', '--format=json']))['osds'] + for osd in osds: + if osd['osd'] in ids: + for flag in activated_flags: + self.assertIn(flag, osd['state']) + for flag in deactivated_flags: + self.assertNotIn(flag, osd['state']) diff --git a/qa/tasks/mgr/dashboard/test_perf_counters.py b/qa/tasks/mgr/dashboard/test_perf_counters.py new file mode 100644 index 000000000..c01368bce --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_perf_counters.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +from .helper import DashboardTestCase, JObj + + +class PerfCountersControllerTest(DashboardTestCase): + + def test_perf_counters_list(self): + data = self._get('/api/perf_counters') + self.assertStatus(200) + + self.assertIsInstance(data, dict) + for mon in self.mons(): + self.assertIn('mon.{}'.format(mon), data) + + osds = self.ceph_cluster.mon_manager.get_osd_dump() + for osd in osds: + self.assertIn('osd.{}'.format(osd['osd']), data) + + def _validate_perf(self, srv_id, srv_type, data, allow_empty): + self.assertIsInstance(data, dict) + self.assertEqual(srv_type, data['service']['type']) + self.assertEqual(str(srv_id), data['service']['id']) + self.assertIsInstance(data['counters'], list) + if not allow_empty: + self.assertGreater(len(data['counters']), 0) + for counter in data['counters'][0:1]: + self.assertIsInstance(counter, dict) + self.assertIn('description', counter) + self.assertIn('name', counter) + self.assertIn('unit', counter) + self.assertIn('value', counter) + + def test_perf_counters_mon_get(self): + mon = self.mons()[0] + data = self._get('/api/perf_counters/mon/{}'.format(mon)) + self.assertStatus(200) + self._validate_perf(mon, 'mon', data, allow_empty=False) + + def test_perf_counters_mgr_get(self): + mgr = list(self.mgr_cluster.mgr_ids)[0] + data = self._get('/api/perf_counters/mgr/{}'.format(mgr)) + self.assertStatus(200) + self._validate_perf(mgr, 'mgr', data, allow_empty=False) + + def test_perf_counters_mds_get(self): + for mds in self.mds_cluster.mds_ids: + data = self._get('/api/perf_counters/mds/{}'.format(mds)) + self.assertStatus(200) + self._validate_perf(mds, 'mds', data, allow_empty=True) + + def test_perf_counters_osd_get(self): + for osd in self.ceph_cluster.mon_manager.get_osd_dump(): + osd = osd['osd'] + data = self._get('/api/perf_counters/osd/{}'.format(osd)) + self.assertStatus(200) + self._validate_perf(osd, 'osd', data, allow_empty=False) + + def test_perf_counters_not_found(self): + osds = self.ceph_cluster.mon_manager.get_osd_dump() + unused_id = int(list(map(lambda o: o['osd'], osds)).pop()) + 1 + + self._get('/api/perf_counters/osd/{}'.format(unused_id)) + self.assertStatus(404) + schema = JObj(sub_elems={ + 'status': str, + 'detail': str, + }, allow_unknown=True) + self.assertEqual(self._resp.json()['detail'], "'osd.{}' not found".format(unused_id)) + self.assertSchemaBody(schema) diff --git a/qa/tasks/mgr/dashboard/test_pool.py b/qa/tasks/mgr/dashboard/test_pool.py new file mode 100644 index 000000000..055ba2b00 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_pool.py @@ -0,0 +1,433 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +import logging +import time +from contextlib import contextmanager + +from .helper import DashboardTestCase, JAny, JList, JObj, JUnion + +log = logging.getLogger(__name__) + + +class PoolTest(DashboardTestCase): + AUTH_ROLES = ['pool-manager'] + + pool_schema = JObj(sub_elems={ + 'pool_name': str, + 'type': str, + 'application_metadata': JList(str), + 'flags': int, + 'flags_names': str, + }, allow_unknown=True) + + pool_list_stat_schema = JObj(sub_elems={ + 'latest': JUnion([int, float]), + 'rate': float, + 'rates': JList(JAny(none=False)), + }) + + pool_list_stats_schema = JObj(sub_elems={ + 'avail_raw': pool_list_stat_schema, + 'bytes_used': pool_list_stat_schema, + 'max_avail': pool_list_stat_schema, + 'percent_used': pool_list_stat_schema, + 'rd_bytes': pool_list_stat_schema, + 'wr_bytes': pool_list_stat_schema, + 'rd': pool_list_stat_schema, + 'wr': pool_list_stat_schema, + }, allow_unknown=True) + + pool_rbd_conf_schema = JList(JObj(sub_elems={ + 'name': str, + 'value': str, + 'source': int + })) + + @contextmanager + def __yield_pool(self, name=None, data=None, deletion_name=None): + """ + Use either just a name or whole description of a pool to create one. + This also validates the correct creation and deletion after the pool was used. + + :param name: Name of the pool + :param data: Describes the pool in full length + :param deletion_name: Only needed if the pool was renamed + :return: + """ + data = self._create_pool(name, data) + yield data + self._delete_pool(deletion_name or data['pool']) + + def _create_pool(self, name, data): + data = data or { + 'pool': name, + 'pg_num': '32', + 'pool_type': 'replicated', + 'compression_algorithm': 'snappy', + 'compression_mode': 'passive', + 'compression_max_blob_size': '131072', + 'compression_required_ratio': '0.875', + 'application_metadata': ['rbd'], + 'configuration': { + 'rbd_qos_bps_limit': 1024000, + 'rbd_qos_iops_limit': 5000, + } + } + self._task_post('/api/pool/', data) + self.assertStatus(201) + self._validate_pool_properties(data, self._get_pool(data['pool'])) + return data + + def _delete_pool(self, name): + self._task_delete('/api/pool/' + name) + self.assertStatus(204) + + def _validate_pool_properties(self, data, pool, timeout=DashboardTestCase.TIMEOUT_HEALTH_CLEAR): + # pylint: disable=too-many-branches + for prop, value in data.items(): + if prop == 'pool_type': + self.assertEqual(pool['type'], value) + elif prop == 'size': + self.assertEqual(pool[prop], int(value), + '{}: {} != {}'.format(prop, pool[prop], value)) + elif prop == 'pg_num': + self._check_pg_num(pool['pool_name'], int(value)) + elif prop == 'application_metadata': + self.assertIsInstance(pool[prop], list) + self.assertEqual(value, pool[prop]) + elif prop == 'pool': + self.assertEqual(pool['pool_name'], value) + elif prop.startswith('compression'): + if value is not None: + if prop.endswith('size'): + value = int(value) + elif prop.endswith('ratio'): + value = float(value) + self.assertEqual(pool['options'][prop], value) + else: + self.assertEqual(pool['options'], {}) + elif prop == 'configuration': + # configuration cannot really be checked here for two reasons: + # 1. The default value cannot be given to this method, which becomes relevant + # when resetting a value, because it's not always zero. + # 2. The expected `source` cannot be given to this method, and it cannot + # reliably be determined (see 1) + pass + else: + self.assertEqual(pool[prop], value, '{}: {} != {}'.format(prop, pool[prop], value)) + + self.wait_for_health_clear(timeout) + + def _get_pool(self, pool_name): + pool = self._get("/api/pool/" + pool_name) + self.assertStatus(200) + self.assertSchemaBody(self.pool_schema) + return pool + + def _check_pg_num(self, pool_name, pg_num): + """ + If both properties have not the same value, the cluster goes into a warning state, which + will only happen during a pg update on an existing pool. The test that does that is + currently commented out because our QA systems can't deal with the change. Feel free to test + it locally. + """ + self.wait_until_equal( + lambda: self._get_pool(pool_name)['pg_placement_num'], + expect_val=pg_num, + timeout=180 + ) + + pool = self._get_pool(pool_name) + + for prop in ['pg_num', 'pg_placement_num']: + self.assertEqual(pool[prop], int(pg_num), + '{}: {} != {}'.format(prop, pool[prop], pg_num)) + + @DashboardTestCase.RunAs('test', 'test', [{'pool': ['create', 'update', 'delete']}]) + def test_read_access_permissions(self): + self._get('/api/pool') + self.assertStatus(403) + self._get('/api/pool/bla') + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', [{'pool': ['read', 'update', 'delete']}]) + def test_create_access_permissions(self): + self._task_post('/api/pool/', {}) + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', [{'pool': ['read', 'create', 'update']}]) + def test_delete_access_permissions(self): + self._delete('/api/pool/ddd') + self.assertStatus(403) + + def test_pool_configuration(self): + pool_name = 'device_health_metrics' + data = self._get('/api/pool/{}/configuration'.format(pool_name)) + self.assertStatus(200) + self.assertSchema(data, JList(JObj({ + 'name': str, + 'value': str, + 'source': int + }))) + + def test_pool_list(self): + data = self._get("/api/pool") + self.assertStatus(200) + + cluster_pools = self.ceph_cluster.mon_manager.list_pools() + self.assertEqual(len(cluster_pools), len(data)) + self.assertSchemaBody(JList(self.pool_schema)) + for pool in data: + self.assertNotIn('pg_status', pool) + self.assertNotIn('stats', pool) + self.assertIn(pool['pool_name'], cluster_pools) + + def test_pool_list_attrs(self): + data = self._get("/api/pool?attrs=type,flags") + self.assertStatus(200) + + cluster_pools = self.ceph_cluster.mon_manager.list_pools() + self.assertEqual(len(cluster_pools), len(data)) + for pool in data: + self.assertIn('pool_name', pool) + self.assertIn('type', pool) + self.assertIn('flags', pool) + self.assertNotIn('flags_names', pool) + self.assertNotIn('pg_status', pool) + self.assertNotIn('stats', pool) + self.assertIn(pool['pool_name'], cluster_pools) + + def test_pool_list_stats(self): + data = self._get("/api/pool?stats=true") + self.assertStatus(200) + + cluster_pools = self.ceph_cluster.mon_manager.list_pools() + self.assertEqual(len(cluster_pools), len(data)) + self.assertSchemaBody(JList(self.pool_schema)) + for pool in data: + self.assertIn('pool_name', pool) + self.assertIn('type', pool) + self.assertIn('application_metadata', pool) + self.assertIn('flags', pool) + self.assertIn('pg_status', pool) + self.assertSchema(pool['stats'], self.pool_list_stats_schema) + self.assertIn('flags_names', pool) + self.assertIn(pool['pool_name'], cluster_pools) + + def test_pool_get(self): + cluster_pools = self.ceph_cluster.mon_manager.list_pools() + pool = self._get("/api/pool/{}?stats=true&attrs=type,flags,stats" + .format(cluster_pools[0])) + self.assertEqual(pool['pool_name'], cluster_pools[0]) + self.assertIn('type', pool) + self.assertIn('flags', pool) + self.assertNotIn('pg_status', pool) + self.assertSchema(pool['stats'], self.pool_list_stats_schema) + self.assertNotIn('flags_names', pool) + self.assertSchema(pool['configuration'], self.pool_rbd_conf_schema) + + def test_pool_create_with_two_applications(self): + self.__yield_pool(None, { + 'pool': 'dashboard_pool1', + 'pg_num': '32', + 'pool_type': 'replicated', + 'application_metadata': ['rbd', 'sth'], + }) + + def test_pool_create_with_ecp_and_rule(self): + self._ceph_cmd(['osd', 'crush', 'rule', 'create-erasure', 'ecrule']) + self._ceph_cmd( + ['osd', 'erasure-code-profile', 'set', 'ecprofile', 'crush-failure-domain=osd']) + self.__yield_pool(None, { + 'pool': 'dashboard_pool2', + 'pg_num': '32', + 'pool_type': 'erasure', + 'application_metadata': ['rbd'], + 'erasure_code_profile': 'ecprofile', + 'crush_rule': 'ecrule', + }) + self._ceph_cmd(['osd', 'erasure-code-profile', 'rm', 'ecprofile']) + + def test_pool_create_with_compression(self): + pool = { + 'pool': 'dashboard_pool3', + 'pg_num': '32', + 'pool_type': 'replicated', + 'compression_algorithm': 'zstd', + 'compression_mode': 'aggressive', + 'compression_max_blob_size': '10000000', + 'compression_required_ratio': '0.8', + 'application_metadata': ['rbd'], + 'configuration': { + 'rbd_qos_bps_limit': 2048, + 'rbd_qos_iops_limit': None, + }, + } + with self.__yield_pool(None, pool): + expected_configuration = [{ + 'name': 'rbd_qos_bps_limit', + 'source': 1, + 'value': '2048', + }, { + 'name': 'rbd_qos_iops_limit', + 'source': 0, + 'value': '0', + }] + new_pool = self._get_pool(pool['pool']) + for conf in expected_configuration: + self.assertIn(conf, new_pool['configuration']) + + def test_pool_create_with_quotas(self): + pools = [ + { + 'pool_data': { + 'pool': 'dashboard_pool_quota1', + 'pg_num': '32', + 'pool_type': 'replicated', + }, + 'pool_quotas_to_check': { + 'quota_max_objects': 0, + 'quota_max_bytes': 0, + } + }, + { + 'pool_data': { + 'pool': 'dashboard_pool_quota2', + 'pg_num': '32', + 'pool_type': 'replicated', + 'quota_max_objects': 1024, + 'quota_max_bytes': 1000, + }, + 'pool_quotas_to_check': { + 'quota_max_objects': 1024, + 'quota_max_bytes': 1000, + } + } + ] + + for pool in pools: + pool_name = pool['pool_data']['pool'] + with self.__yield_pool(pool_name, pool['pool_data']): + self._validate_pool_properties(pool['pool_quotas_to_check'], + self._get_pool(pool_name)) + + def test_pool_update_name(self): + name = 'pool_update' + updated_name = 'pool_updated_name' + with self.__yield_pool(name, None, updated_name): + props = {'pool': updated_name} + self._task_put('/api/pool/{}'.format(name), props) + time.sleep(5) + self.assertStatus(200) + self._validate_pool_properties(props, self._get_pool(updated_name)) + + def test_pool_update_metadata(self): + pool_name = 'pool_update_metadata' + with self.__yield_pool(pool_name): + props = {'application_metadata': ['rbd', 'sth']} + self._task_put('/api/pool/{}'.format(pool_name), props) + self._validate_pool_properties(props, self._get_pool(pool_name), + self.TIMEOUT_HEALTH_CLEAR * 2) + + properties = {'application_metadata': ['rgw']} + self._task_put('/api/pool/' + pool_name, properties) + self._validate_pool_properties(properties, self._get_pool(pool_name), + self.TIMEOUT_HEALTH_CLEAR * 2) + + properties = {'application_metadata': ['rbd', 'sth']} + self._task_put('/api/pool/' + pool_name, properties) + self._validate_pool_properties(properties, self._get_pool(pool_name), + self.TIMEOUT_HEALTH_CLEAR * 2) + + properties = {'application_metadata': ['rgw']} + self._task_put('/api/pool/' + pool_name, properties) + self._validate_pool_properties(properties, self._get_pool(pool_name), + self.TIMEOUT_HEALTH_CLEAR * 2) + + def test_pool_update_configuration(self): + pool_name = 'pool_update_configuration' + with self.__yield_pool(pool_name): + configuration = { + 'rbd_qos_bps_limit': 1024, + 'rbd_qos_iops_limit': None, + } + expected_configuration = [{ + 'name': 'rbd_qos_bps_limit', + 'source': 1, + 'value': '1024', + }, { + 'name': 'rbd_qos_iops_limit', + 'source': 0, + 'value': '0', + }] + self._task_put('/api/pool/' + pool_name, {'configuration': configuration}) + time.sleep(5) + pool_config = self._get_pool(pool_name)['configuration'] + for conf in expected_configuration: + self.assertIn(conf, pool_config) + + def test_pool_update_compression(self): + pool_name = 'pool_update_compression' + with self.__yield_pool(pool_name): + properties = { + 'compression_algorithm': 'zstd', + 'compression_mode': 'aggressive', + 'compression_max_blob_size': '10000000', + 'compression_required_ratio': '0.8', + } + self._task_put('/api/pool/' + pool_name, properties) + time.sleep(5) + self._validate_pool_properties(properties, self._get_pool(pool_name)) + + def test_pool_update_unset_compression(self): + pool_name = 'pool_update_unset_compression' + with self.__yield_pool(pool_name): + self._task_put('/api/pool/' + pool_name, {'compression_mode': 'unset'}) + time.sleep(5) + self._validate_pool_properties({ + 'compression_algorithm': None, + 'compression_mode': None, + 'compression_max_blob_size': None, + 'compression_required_ratio': None, + }, self._get_pool(pool_name)) + + def test_pool_update_quotas(self): + pool_name = 'pool_update_quotas' + with self.__yield_pool(pool_name): + properties = { + 'quota_max_objects': 1024, + 'quota_max_bytes': 1000, + } + self._task_put('/api/pool/' + pool_name, properties) + time.sleep(5) + self._validate_pool_properties(properties, self._get_pool(pool_name)) + + def test_pool_create_fail(self): + data = {'pool_type': u'replicated', 'rule_name': u'dnf', 'pg_num': u'8', 'pool': u'sadfs'} + self._task_post('/api/pool/', data) + self.assertStatus(400) + self.assertJsonBody({ + 'component': 'pool', + 'code': "2", + 'detail': "[errno -2] specified rule dnf doesn't exist" + }) + + def test_pool_info(self): + self._get("/ui-api/pool/info") + self.assertSchemaBody(JObj({ + 'pool_names': JList(str), + 'compression_algorithms': JList(str), + 'compression_modes': JList(str), + 'is_all_bluestore': bool, + 'bluestore_compression_algorithm': str, + 'osd_count': int, + 'crush_rules_replicated': JList(JObj({}, allow_unknown=True)), + 'crush_rules_erasure': JList(JObj({}, allow_unknown=True)), + 'pg_autoscale_default_mode': str, + 'pg_autoscale_modes': JList(str), + 'erasure_code_profiles': JList(JObj({}, allow_unknown=True)), + 'used_rules': JObj({}, allow_unknown=True), + 'used_profiles': JObj({}, allow_unknown=True), + 'nodes': JList(JObj({}, allow_unknown=True)), + })) diff --git a/qa/tasks/mgr/dashboard/test_rbd.py b/qa/tasks/mgr/dashboard/test_rbd.py new file mode 100644 index 000000000..997d10f2a --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_rbd.py @@ -0,0 +1,908 @@ +# -*- coding: utf-8 -*- +# pylint: disable=too-many-public-methods + +from __future__ import absolute_import + +import time + +from .helper import DashboardTestCase, JLeaf, JList, JObj + + +class RbdTest(DashboardTestCase): + AUTH_ROLES = ['pool-manager', 'block-manager', 'cluster-manager'] + LIST_VERSION = '2.0' + + @DashboardTestCase.RunAs('test', 'test', [{'rbd-image': ['create', 'update', 'delete']}]) + def test_read_access_permissions(self): + self._get('/api/block/image?offset=0&limit=-1&search=&sort=+name', + version=RbdTest.LIST_VERSION) + self.assertStatus(403) + self.get_image('pool', None, 'image') + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', [{'rbd-image': ['read', 'update', 'delete']}]) + def test_create_access_permissions(self): + self.create_image('pool', None, 'name', 0) + self.assertStatus(403) + self.create_snapshot('pool', None, 'image', 'snapshot') + self.assertStatus(403) + self.copy_image('src_pool', None, 'src_image', 'dest_pool', None, 'dest_image') + self.assertStatus(403) + self.clone_image('parent_pool', None, 'parent_image', 'parent_snap', 'pool', None, 'name') + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', [{'rbd-image': ['read', 'create', 'delete']}]) + def test_update_access_permissions(self): + self.edit_image('pool', None, 'image') + self.assertStatus(403) + self.update_snapshot('pool', None, 'image', 'snapshot', None, None) + self.assertStatus(403) + self.rollback_snapshot('rbd', None, 'rollback_img', 'snap1') + self.assertStatus(403) + self.flatten_image('pool', None, 'image') + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', [{'rbd-image': ['read', 'create', 'update']}]) + def test_delete_access_permissions(self): + self.remove_image('pool', None, 'image') + self.assertStatus(403) + self.remove_snapshot('pool', None, 'image', 'snapshot') + self.assertStatus(403) + + @classmethod + def create_namespace(cls, pool, namespace): + data = {'namespace': namespace} + return cls._post('/api/block/pool/{}/namespace'.format(pool), data) + + @classmethod + def remove_namespace(cls, pool, namespace): + return cls._delete('/api/block/pool/{}/namespace/{}'.format(pool, namespace)) + + @classmethod + def create_image(cls, pool, namespace, name, size, **kwargs): + data = {'name': name, 'pool_name': pool, 'namespace': namespace, 'size': size} + data.update(kwargs) + return cls._task_post('/api/block/image', data) + + @classmethod + def get_image(cls, pool, namespace, name): + namespace = '{}%2F'.format(namespace) if namespace else '' + return cls._get('/api/block/image/{}%2F{}{}'.format(pool, namespace, name)) + + @classmethod + def clone_image(cls, parent_pool, parent_namespace, parent_image, parent_snap, pool, namespace, + name, **kwargs): + # pylint: disable=too-many-arguments + data = {'child_image_name': name, 'child_namespace': namespace, 'child_pool_name': pool} + data.update(kwargs) + parent_namespace = '{}%2F'.format(parent_namespace) if parent_namespace else '' + return cls._task_post('/api/block/image/{}%2F{}{}/snap/{}/clone' + .format(parent_pool, parent_namespace, parent_image, parent_snap), + data) + + @classmethod + def copy_image(cls, src_pool, src_namespace, src_image, dest_pool, dest_namespace, dest_image, + **kwargs): + # pylint: disable=too-many-arguments + data = {'dest_image_name': dest_image, + 'dest_pool_name': dest_pool, + 'dest_namespace': dest_namespace} + data.update(kwargs) + src_namespace = '{}%2F'.format(src_namespace) if src_namespace else '' + return cls._task_post('/api/block/image/{}%2F{}{}/copy' + .format(src_pool, src_namespace, src_image), data) + + @classmethod + def remove_image(cls, pool, namespace, image): + namespace = '{}%2F'.format(namespace) if namespace else '' + return cls._task_delete('/api/block/image/{}%2F{}{}'.format(pool, namespace, image)) + + # pylint: disable=too-many-arguments + @classmethod + def edit_image(cls, pool, namespace, image, name=None, size=None, features=None, **kwargs): + kwargs.update({'name': name, 'size': size, 'features': features}) + namespace = '{}%2F'.format(namespace) if namespace else '' + return cls._task_put('/api/block/image/{}%2F{}{}'.format(pool, namespace, image), kwargs) + + @classmethod + def flatten_image(cls, pool, namespace, image): + namespace = '{}%2F'.format(namespace) if namespace else '' + return cls._task_post('/api/block/image/{}%2F{}{}/flatten'.format(pool, namespace, image)) + + @classmethod + def create_snapshot(cls, pool, namespace, image, snapshot): + namespace = '{}%2F'.format(namespace) if namespace else '' + return cls._task_post('/api/block/image/{}%2F{}{}/snap'.format(pool, namespace, image), + {'snapshot_name': snapshot}) + + @classmethod + def remove_snapshot(cls, pool, namespace, image, snapshot): + namespace = '{}%2F'.format(namespace) if namespace else '' + return cls._task_delete('/api/block/image/{}%2F{}{}/snap/{}'.format(pool, namespace, image, + snapshot)) + + @classmethod + def update_snapshot(cls, pool, namespace, image, snapshot, new_name, is_protected): + namespace = '{}%2F'.format(namespace) if namespace else '' + return cls._task_put('/api/block/image/{}%2F{}{}/snap/{}'.format(pool, namespace, image, + snapshot), + {'new_snap_name': new_name, 'is_protected': is_protected}) + + @classmethod + def rollback_snapshot(cls, pool, namespace, image, snapshot): + namespace = '{}%2F'.format(namespace) if namespace else '' + return cls._task_post('/api/block/image/{}%2F{}{}/snap/{}/rollback'.format(pool, + namespace, + image, + snapshot)) + + @classmethod + def setUpClass(cls): + super(RbdTest, cls).setUpClass() + cls.create_pool('rbd', 2**3, 'replicated') + cls.create_pool('rbd_iscsi', 2**3, 'replicated') + + cls.create_image('rbd', None, 'img1', 2**30) + cls.create_image('rbd', None, 'img2', 2*2**30) + cls.create_image('rbd_iscsi', None, 'img1', 2**30) + cls.create_image('rbd_iscsi', None, 'img2', 2*2**30) + + osd_metadata = cls.ceph_cluster.mon_manager.get_osd_metadata() + cls.bluestore_support = True + for osd in osd_metadata: + if osd['osd_objectstore'] != 'bluestore': + cls.bluestore_support = False + break + + @classmethod + def tearDownClass(cls): + super(RbdTest, cls).tearDownClass() + cls._ceph_cmd(['osd', 'pool', 'delete', 'rbd', 'rbd', '--yes-i-really-really-mean-it']) + cls._ceph_cmd(['osd', 'pool', 'delete', 'rbd_iscsi', 'rbd_iscsi', + '--yes-i-really-really-mean-it']) + cls._ceph_cmd(['osd', 'pool', 'delete', 'rbd_data', 'rbd_data', + '--yes-i-really-really-mean-it']) + + def create_image_in_trash(self, pool, name, delay=0): + self.create_image(pool, None, name, 10240) + img = self._get('/api/block/image/{}%2F{}'.format(pool, name)) + + self._task_post("/api/block/image/{}%2F{}/move_trash".format(pool, name), + {'delay': delay}) + self.assertStatus([200, 201]) + return img['id'] + + @classmethod + def remove_trash(cls, pool, image_id, force=False): + return cls._task_delete('/api/block/image/trash/{}%2F{}/?force={}'.format( + pool, image_id, force)) + + @classmethod + def restore_trash(cls, pool, namespace, image_id, new_image_name): + data = {'new_image_name': new_image_name} + namespace = '{}%2F'.format(namespace) if namespace else '' + return cls._task_post('/api/block/image/trash/{}%2F{}{}/restore'.format(pool, + namespace, + image_id), data) + + @classmethod + def purge_trash(cls, pool): + return cls._task_post('/api/block/image/trash/purge?pool_name={}'.format(pool)) + + @classmethod + def get_trash(cls, pool, image_id): + trash = cls._get('/api/block/image/trash/?pool_name={}'.format(pool)) + if isinstance(trash, list): + for trash_pool in trash: + for image in trash_pool['value']: + if image['id'] == image_id: + return image + + return None + + def _validate_image(self, img, **kwargs): + """ + Example of an RBD image json: + + { + "size": 1073741824, + "obj_size": 4194304, + "mirror_mode": "journal", + "num_objs": 256, + "order": 22, + "block_name_prefix": "rbd_data.10ae2ae8944a", + "name": "img1", + "pool_name": "rbd", + "features": 61, + "features_name": ["deep-flatten", "exclusive-lock", "fast-diff", "layering", + "object-map"] + } + """ + schema = JObj(sub_elems={ + 'size': JLeaf(int), + 'obj_size': JLeaf(int), + 'num_objs': JLeaf(int), + 'order': JLeaf(int), + 'block_name_prefix': JLeaf(str), + 'name': JLeaf(str), + 'id': JLeaf(str), + 'unique_id': JLeaf(str), + 'image_format': JLeaf(int), + 'pool_name': JLeaf(str), + 'namespace': JLeaf(str, none=True), + 'features': JLeaf(int), + 'features_name': JList(JLeaf(str)), + 'stripe_count': JLeaf(int, none=True), + 'stripe_unit': JLeaf(int, none=True), + 'parent': JObj(sub_elems={'pool_name': JLeaf(str), + 'pool_namespace': JLeaf(str, none=True), + 'image_name': JLeaf(str), + 'snap_name': JLeaf(str)}, none=True), + 'data_pool': JLeaf(str, none=True), + 'snapshots': JList(JLeaf(dict)), + 'timestamp': JLeaf(str, none=True), + 'disk_usage': JLeaf(int, none=True), + 'total_disk_usage': JLeaf(int, none=True), + 'configuration': JList(JObj(sub_elems={ + 'name': JLeaf(str), + 'source': JLeaf(int), + 'value': JLeaf(str), + })), + 'mirror_mode': JLeaf(str), + }) + self.assertSchema(img, schema) + + for k, v in kwargs.items(): + if isinstance(v, list): + self.assertSetEqual(set(img[k]), set(v)) + else: + self.assertEqual(img[k], v) + + def _validate_snapshot(self, snap, **kwargs): + self.assertIn('id', snap) + self.assertIn('name', snap) + self.assertIn('is_protected', snap) + self.assertIn('timestamp', snap) + self.assertIn('size', snap) + self.assertIn('children', snap) + + for k, v in kwargs.items(): + if isinstance(v, list): + self.assertSetEqual(set(snap[k]), set(v)) + else: + self.assertEqual(snap[k], v) + + def _validate_snapshot_list(self, snap_list, snap_name=None, **kwargs): + found = False + for snap in snap_list: + self.assertIn('name', snap) + if snap_name and snap['name'] == snap_name: + found = True + self._validate_snapshot(snap, **kwargs) + break + if snap_name and not found: + self.fail("Snapshot {} not found".format(snap_name)) + + def test_list(self): + data = self._get('/api/block/image?offset=0&limit=-1&search=&sort=+name', + version=RbdTest.LIST_VERSION) + self.assertStatus(200) + self.assertEqual(len(data), 2) + + for pool_view in data: + self.assertIsNotNone(pool_view['value']) + self.assertIn('pool_name', pool_view) + self.assertIn(pool_view['pool_name'], ['rbd', 'rbd_iscsi']) + image_list = pool_view['value'] + self.assertEqual(len(image_list), 2) + + for img in image_list: + self.assertIn('name', img) + self.assertIn('pool_name', img) + self.assertIn(img['pool_name'], ['rbd', 'rbd_iscsi']) + if img['name'] == 'img1': + self._validate_image(img, size=1073741824, + num_objs=256, obj_size=4194304, + features_name=['deep-flatten', + 'exclusive-lock', + 'fast-diff', + 'layering', + 'object-map']) + elif img['name'] == 'img2': + self._validate_image(img, size=2147483648, + num_objs=512, obj_size=4194304, + features_name=['deep-flatten', + 'exclusive-lock', + 'fast-diff', + 'layering', + 'object-map']) + else: + assert False, "Unexcepted image '{}' in result list".format(img['name']) + + def test_create(self): + rbd_name = 'test_rbd' + self.create_image('rbd', None, rbd_name, 10240) + self.assertStatus(201) + + img = self.get_image('rbd', None, 'test_rbd') + self.assertStatus(200) + + self._validate_image(img, name=rbd_name, size=10240, + num_objs=1, obj_size=4194304, + features_name=['deep-flatten', + 'exclusive-lock', + 'fast-diff', 'layering', + 'object-map']) + + self.remove_image('rbd', None, rbd_name) + + def test_create_with_configuration(self): + pool = 'rbd' + image_name = 'image_with_config' + size = 10240 + configuration = { + 'rbd_qos_bps_limit': 10240, + 'rbd_qos_bps_burst': 10240 * 2, + } + expected = [{ + 'name': 'rbd_qos_bps_limit', + 'source': 2, + 'value': str(10240), + }, { + 'name': 'rbd_qos_bps_burst', + 'source': 2, + 'value': str(10240 * 2), + }] + + self.create_image(pool, None, image_name, size, configuration=configuration) + self.assertStatus(201) + img = self.get_image('rbd', None, image_name) + self.assertStatus(200) + for conf in expected: + self.assertIn(conf, img['configuration']) + + self.remove_image(pool, None, image_name) + + def test_create_rbd_in_data_pool(self): + if not self.bluestore_support: + self.skipTest('requires bluestore cluster') + + self.create_pool('data_pool', 2**4, 'erasure') + + rbd_name = 'test_rbd_in_data_pool' + self.create_image('rbd', None, rbd_name, 10240, data_pool='data_pool') + self.assertStatus(201) + + img = self.get_image('rbd', None, 'test_rbd_in_data_pool') + self.assertStatus(200) + + self._validate_image(img, name=rbd_name, size=10240, + num_objs=1, obj_size=4194304, + data_pool='data_pool', + features_name=['data-pool', 'deep-flatten', + 'exclusive-lock', + 'fast-diff', 'layering', + 'object-map']) + + self.remove_image('rbd', None, rbd_name) + self.assertStatus(204) + self._ceph_cmd(['osd', 'pool', 'delete', 'data_pool', 'data_pool', + '--yes-i-really-really-mean-it']) + + def test_create_rbd_twice(self): + res = self.create_image('rbd', None, 'test_rbd_twice', 10240) + + res = self.create_image('rbd', None, 'test_rbd_twice', 10240) + self.assertStatus(400) + self.assertEqual(res, {"code": '17', 'status': 400, "component": "rbd", + "detail": "[errno 17] RBD image already exists (error creating " + "image)", + 'task': {'name': 'rbd/create', + 'metadata': {'pool_name': 'rbd', 'namespace': None, + 'image_name': 'test_rbd_twice'}}}) + self.remove_image('rbd', None, 'test_rbd_twice') + self.assertStatus(204) + + def test_snapshots_and_clone_info(self): + self.create_snapshot('rbd', None, 'img1', 'snap1') + self.create_snapshot('rbd', None, 'img1', 'snap2') + self._rbd_cmd(['snap', 'protect', 'rbd/img1@snap1']) + self._rbd_cmd(['clone', 'rbd/img1@snap1', 'rbd_iscsi/img1_clone']) + + img = self.get_image('rbd', None, 'img1') + self.assertStatus(200) + self._validate_image(img, name='img1', size=1073741824, + num_objs=256, obj_size=4194304, parent=None, + features_name=['deep-flatten', 'exclusive-lock', + 'fast-diff', 'layering', + 'object-map']) + for snap in img['snapshots']: + if snap['name'] == 'snap1': + self._validate_snapshot(snap, is_protected=True) + self.assertEqual(len(snap['children']), 1) + self.assertDictEqual(snap['children'][0], + {'pool_name': 'rbd_iscsi', + 'image_name': 'img1_clone'}) + elif snap['name'] == 'snap2': + self._validate_snapshot(snap, is_protected=False) + + img = self.get_image('rbd_iscsi', None, 'img1_clone') + self.assertStatus(200) + self._validate_image(img, name='img1_clone', size=1073741824, + num_objs=256, obj_size=4194304, + parent={'pool_name': 'rbd', 'pool_namespace': '', + 'image_name': 'img1', 'snap_name': 'snap1'}, + features_name=['deep-flatten', 'exclusive-lock', + 'fast-diff', 'layering', + 'object-map']) + self.remove_image('rbd_iscsi', None, 'img1_clone') + self.assertStatus(204) + + def test_disk_usage(self): + self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '50M', 'rbd/img2']) + self.create_snapshot('rbd', None, 'img2', 'snap1') + self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '20M', 'rbd/img2']) + self.create_snapshot('rbd', None, 'img2', 'snap2') + self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '10M', 'rbd/img2']) + self.create_snapshot('rbd', None, 'img2', 'snap3') + self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '5M', 'rbd/img2']) + img = self.get_image('rbd', None, 'img2') + self.assertStatus(200) + self._validate_image(img, name='img2', size=2147483648, + total_disk_usage=268435456, disk_usage=67108864) + + def test_delete_non_existent_image(self): + res = self.remove_image('rbd', None, 'i_dont_exist') + self.assertStatus(404) + self.assertEqual(res, {u'code': 404, "status": 404, "component": None, + "detail": "(404, 'Image not found')", + 'task': {'name': 'rbd/delete', + 'metadata': {'image_spec': 'rbd/i_dont_exist'}}}) + + def test_image_delete(self): + self.create_image('rbd', None, 'delete_me', 2**30) + self.assertStatus(201) + self.create_snapshot('rbd', None, 'delete_me', 'snap1') + self.assertStatus(201) + self.create_snapshot('rbd', None, 'delete_me', 'snap2') + self.assertStatus(201) + + img = self.get_image('rbd', None, 'delete_me') + self.assertStatus(200) + self._validate_image(img, name='delete_me', size=2**30) + self.assertEqual(len(img['snapshots']), 2) + + self.remove_snapshot('rbd', None, 'delete_me', 'snap1') + self.assertStatus(204) + self.remove_snapshot('rbd', None, 'delete_me', 'snap2') + self.assertStatus(204) + + img = self.get_image('rbd', None, 'delete_me') + self.assertStatus(200) + self._validate_image(img, name='delete_me', size=2**30) + self.assertEqual(len(img['snapshots']), 0) + + self.remove_image('rbd', None, 'delete_me') + self.assertStatus(204) + + def test_image_delete_with_snapshot(self): + self.create_image('rbd', None, 'delete_me', 2**30) + self.assertStatus(201) + self.create_snapshot('rbd', None, 'delete_me', 'snap1') + self.assertStatus(201) + self.create_snapshot('rbd', None, 'delete_me', 'snap2') + self.assertStatus(201) + + img = self.get_image('rbd', None, 'delete_me') + self.assertStatus(200) + self._validate_image(img, name='delete_me', size=2**30) + self.assertEqual(len(img['snapshots']), 2) + + self.remove_image('rbd', None, 'delete_me') + self.assertStatus(204) + + def test_image_rename(self): + self.create_image('rbd', None, 'edit_img', 2**30) + self.assertStatus(201) + self.get_image('rbd', None, 'edit_img') + self.assertStatus(200) + self.edit_image('rbd', None, 'edit_img', 'new_edit_img') + self.assertStatus(200) + self.get_image('rbd', None, 'edit_img') + self.assertStatus(404) + self.get_image('rbd', None, 'new_edit_img') + self.assertStatus(200) + self.remove_image('rbd', None, 'new_edit_img') + self.assertStatus(204) + + def test_image_resize(self): + self.create_image('rbd', None, 'edit_img', 2**30) + self.assertStatus(201) + img = self.get_image('rbd', None, 'edit_img') + self.assertStatus(200) + self._validate_image(img, size=2**30) + self.edit_image('rbd', None, 'edit_img', size=2*2**30) + self.assertStatus(200) + img = self.get_image('rbd', None, 'edit_img') + self.assertStatus(200) + self._validate_image(img, size=2*2**30) + self.remove_image('rbd', None, 'edit_img') + self.assertStatus(204) + + def test_image_change_features(self): + self.create_image('rbd', None, 'edit_img', 2**30, features=["layering"]) + self.assertStatus(201) + img = self.get_image('rbd', None, 'edit_img') + self.assertStatus(200) + self._validate_image(img, features_name=["layering"]) + self.edit_image('rbd', None, 'edit_img', + features=["fast-diff", "object-map", "exclusive-lock"]) + self.assertStatus(200) + img = self.get_image('rbd', None, 'edit_img') + self.assertStatus(200) + self._validate_image(img, features_name=['exclusive-lock', + 'fast-diff', 'layering', + 'object-map']) + self.edit_image('rbd', None, 'edit_img', + features=["journaling", "exclusive-lock"]) + self.assertStatus(200) + img = self.get_image('rbd', None, 'edit_img') + self.assertStatus(200) + self._validate_image(img, features_name=['exclusive-lock', + 'journaling', 'layering']) + self.remove_image('rbd', None, 'edit_img') + self.assertStatus(204) + + def test_image_change_config(self): + pool = 'rbd' + image = 'image_with_config' + initial_conf = { + 'rbd_qos_bps_limit': 10240, + 'rbd_qos_write_iops_limit': None + } + initial_expect = [{ + 'name': 'rbd_qos_bps_limit', + 'source': 2, + 'value': '10240', + }, { + 'name': 'rbd_qos_write_iops_limit', + 'source': 0, + 'value': '0', + }] + new_conf = { + 'rbd_qos_bps_limit': 0, + 'rbd_qos_bps_burst': 20480, + 'rbd_qos_write_iops_limit': None + } + new_expect = [{ + 'name': 'rbd_qos_bps_limit', + 'source': 2, + 'value': '0', + }, { + 'name': 'rbd_qos_bps_burst', + 'source': 2, + 'value': '20480', + }, { + 'name': 'rbd_qos_write_iops_limit', + 'source': 0, + 'value': '0', + }] + + self.create_image(pool, None, image, 2**30, configuration=initial_conf) + self.assertStatus(201) + img = self.get_image(pool, None, image) + self.assertStatus(200) + for conf in initial_expect: + self.assertIn(conf, img['configuration']) + + self.edit_image(pool, None, image, configuration=new_conf) + img = self.get_image(pool, None, image) + self.assertStatus(200) + for conf in new_expect: + self.assertIn(conf, img['configuration']) + + self.remove_image(pool, None, image) + self.assertStatus(204) + + def test_update_snapshot(self): + self.create_snapshot('rbd', None, 'img1', 'snap5') + self.assertStatus(201) + img = self.get_image('rbd', None, 'img1') + self._validate_snapshot_list(img['snapshots'], 'snap5', is_protected=False) + + self.update_snapshot('rbd', None, 'img1', 'snap5', 'snap6', None) + self.assertStatus(200) + img = self.get_image('rbd', None, 'img1') + self._validate_snapshot_list(img['snapshots'], 'snap6', is_protected=False) + + self.update_snapshot('rbd', None, 'img1', 'snap6', None, True) + self.assertStatus(200) + img = self.get_image('rbd', None, 'img1') + self._validate_snapshot_list(img['snapshots'], 'snap6', is_protected=True) + + self.update_snapshot('rbd', None, 'img1', 'snap6', 'snap5', False) + self.assertStatus(200) + img = self.get_image('rbd', None, 'img1') + self._validate_snapshot_list(img['snapshots'], 'snap5', is_protected=False) + + self.remove_snapshot('rbd', None, 'img1', 'snap5') + self.assertStatus(204) + + def test_snapshot_rollback(self): + self.create_image('rbd', None, 'rollback_img', 2**30, + features=["layering", "exclusive-lock", "fast-diff", + "object-map"]) + self.assertStatus(201) + self.create_snapshot('rbd', None, 'rollback_img', 'snap1') + self.assertStatus(201) + + img = self.get_image('rbd', None, 'rollback_img') + self.assertStatus(200) + self.assertEqual(img['disk_usage'], 0) + + self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '5M', + 'rbd/rollback_img']) + + img = self.get_image('rbd', None, 'rollback_img') + self.assertStatus(200) + self.assertGreater(img['disk_usage'], 0) + + self.rollback_snapshot('rbd', None, 'rollback_img', 'snap1') + self.assertStatus([201, 200]) + + img = self.get_image('rbd', None, 'rollback_img') + self.assertStatus(200) + self.assertEqual(img['disk_usage'], 0) + + self.remove_snapshot('rbd', None, 'rollback_img', 'snap1') + self.assertStatus(204) + self.remove_image('rbd', None, 'rollback_img') + self.assertStatus(204) + + def test_clone(self): + self.create_image('rbd', None, 'cimg', 2**30, features=["layering"]) + self.assertStatus(201) + self.create_snapshot('rbd', None, 'cimg', 'snap1') + self.assertStatus(201) + self.update_snapshot('rbd', None, 'cimg', 'snap1', None, True) + self.assertStatus(200) + self.clone_image('rbd', None, 'cimg', 'snap1', 'rbd', None, 'cimg-clone', + features=["layering", "exclusive-lock", "fast-diff", + "object-map"]) + self.assertStatus([200, 201]) + + img = self.get_image('rbd', None, 'cimg-clone') + self.assertStatus(200) + self._validate_image(img, features_name=['exclusive-lock', + 'fast-diff', 'layering', + 'object-map'], + parent={'pool_name': 'rbd', 'pool_namespace': '', + 'image_name': 'cimg', 'snap_name': 'snap1'}) + + res = self.remove_image('rbd', None, 'cimg') + self.assertStatus(400) + self.assertIn('code', res) + self.assertEqual(res['code'], '16') + + self.remove_image('rbd', None, 'cimg-clone') + self.assertStatus(204) + self.remove_image('rbd', None, 'cimg') + self.assertStatus(204) + + def test_copy(self): + self.create_image('rbd', None, 'coimg', 2**30, + features=["layering", "exclusive-lock", "fast-diff", + "object-map"]) + self.assertStatus(201) + + self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '5M', + 'rbd/coimg']) + + self.copy_image('rbd', None, 'coimg', 'rbd_iscsi', None, 'coimg-copy', + features=["layering", "fast-diff", "exclusive-lock", + "object-map"]) + self.assertStatus([200, 201]) + + img = self.get_image('rbd', None, 'coimg') + self.assertStatus(200) + self._validate_image(img, features_name=['layering', 'exclusive-lock', + 'fast-diff', 'object-map']) + + img_copy = self.get_image('rbd_iscsi', None, 'coimg-copy') + self._validate_image(img_copy, features_name=['exclusive-lock', + 'fast-diff', 'layering', + 'object-map'], + disk_usage=img['disk_usage']) + + self.remove_image('rbd', None, 'coimg') + self.assertStatus(204) + self.remove_image('rbd_iscsi', None, 'coimg-copy') + self.assertStatus(204) + + def test_flatten(self): + self.create_snapshot('rbd', None, 'img1', 'snapf') + self.update_snapshot('rbd', None, 'img1', 'snapf', None, True) + self.clone_image('rbd', None, 'img1', 'snapf', 'rbd_iscsi', None, 'img1_snapf_clone') + + img = self.get_image('rbd_iscsi', None, 'img1_snapf_clone') + self.assertStatus(200) + self.assertIsNotNone(img['parent']) + + self.flatten_image('rbd_iscsi', None, 'img1_snapf_clone') + self.assertStatus([200, 201]) + + img = self.get_image('rbd_iscsi', None, 'img1_snapf_clone') + self.assertStatus(200) + self.assertIsNone(img['parent']) + + self.update_snapshot('rbd', None, 'img1', 'snapf', None, False) + self.remove_snapshot('rbd', None, 'img1', 'snapf') + self.assertStatus(204) + + self.remove_image('rbd_iscsi', None, 'img1_snapf_clone') + self.assertStatus(204) + + def test_default_features(self): + default_features = self._get('/api/block/image/default_features') + self.assertEqual(default_features, [ + 'deep-flatten', 'exclusive-lock', 'fast-diff', 'layering', 'object-map']) + + def test_clone_format_version(self): + config_name = 'rbd_default_clone_format' + + def _get_config_by_name(conf_name): + data = self._get('/api/cluster_conf/{}'.format(conf_name)) + if 'value' in data: + return data['value'] + return None + + # with rbd_default_clone_format = auto + clone_format_version = self._get('/api/block/image/clone_format_version') + self.assertEqual(clone_format_version, 1) + self.assertStatus(200) + + # with rbd_default_clone_format = 1 + value = [{'section': "global", 'value': "1"}] + self._post('/api/cluster_conf', { + 'name': config_name, + 'value': value + }) + self.wait_until_equal( + lambda: _get_config_by_name(config_name), + value, + timeout=60) + clone_format_version = self._get('/api/block/image/clone_format_version') + self.assertEqual(clone_format_version, 1) + self.assertStatus(200) + + # with rbd_default_clone_format = 2 + value = [{'section': "global", 'value': "2"}] + self._post('/api/cluster_conf', { + 'name': config_name, + 'value': value + }) + self.wait_until_equal( + lambda: _get_config_by_name(config_name), + value, + timeout=60) + clone_format_version = self._get('/api/block/image/clone_format_version') + self.assertEqual(clone_format_version, 2) + self.assertStatus(200) + + value = [] + self._post('/api/cluster_conf', { + 'name': config_name, + 'value': value + }) + self.wait_until_equal( + lambda: _get_config_by_name(config_name), + None, + timeout=60) + + def test_image_with_namespace(self): + self.create_namespace('rbd', 'ns') + self.create_image('rbd', 'ns', 'test', 10240) + self.assertStatus(201) + + img = self.get_image('rbd', 'ns', 'test') + self.assertStatus(200) + + self._validate_image(img, name='test', size=10240, + pool_name='rbd', namespace='ns', + num_objs=1, obj_size=4194304, + features_name=['deep-flatten', + 'exclusive-lock', + 'fast-diff', 'layering', + 'object-map']) + + self.remove_image('rbd', 'ns', 'test') + self.remove_namespace('rbd', 'ns') + + def test_move_image_to_trash(self): + img_id = self.create_image_in_trash('rbd', 'test_rbd') + + self.get_image('rbd', None, 'test_rbd') + self.assertStatus(404) + + time.sleep(1) + + image = self.get_trash('rbd', img_id) + self.assertIsNotNone(image) + + self.remove_trash('rbd', img_id) + + def test_list_trash(self): + img_id = self.create_image_in_trash('rbd', 'test_rbd', 0) + data = self._get('/api/block/image/trash/?pool_name={}'.format('rbd')) + self.assertStatus(200) + self.assertIsInstance(data, list) + self.assertIsNotNone(data) + + self.remove_trash('rbd', img_id) + self.assertStatus(204) + + def test_restore_trash(self): + img_id = self.create_image_in_trash('rbd', 'test_rbd') + + self.restore_trash('rbd', None, img_id, 'test_rbd') + + self.get_image('rbd', None, 'test_rbd') + self.assertStatus(200) + + image = self.get_trash('rbd', img_id) + self.assertIsNone(image) + + self.remove_image('rbd', None, 'test_rbd') + + def test_remove_expired_trash(self): + img_id = self.create_image_in_trash('rbd', 'test_rbd', 0) + self.remove_trash('rbd', img_id, False) + self.assertStatus(204) + + image = self.get_trash('rbd', img_id) + self.assertIsNone(image) + + def test_remove_not_expired_trash(self): + img_id = self.create_image_in_trash('rbd', 'test_rbd', 9999) + self.remove_trash('rbd', img_id, False) + self.assertStatus(400) + + time.sleep(1) + + image = self.get_trash('rbd', img_id) + self.assertIsNotNone(image) + + self.remove_trash('rbd', img_id, True) + + def test_remove_not_expired_trash_with_force(self): + img_id = self.create_image_in_trash('rbd', 'test_rbd', 9999) + self.remove_trash('rbd', img_id, True) + self.assertStatus(204) + + image = self.get_trash('rbd', img_id) + self.assertIsNone(image) + + def test_purge_trash(self): + id_expired = self.create_image_in_trash('rbd', 'test_rbd_expired', 0) + id_not_expired = self.create_image_in_trash('rbd', 'test_rbd', 9999) + + time.sleep(1) + + self.purge_trash('rbd') + self.assertStatus([200, 201]) + + time.sleep(1) + + trash_not_expired = self.get_trash('rbd', id_not_expired) + self.assertIsNotNone(trash_not_expired) + + self.wait_until_equal(lambda: self.get_trash('rbd', id_expired), None, 60) + + def test_list_namespaces(self): + self.create_namespace('rbd', 'ns') + + namespaces = self._get('/api/block/pool/rbd/namespace') + self.assertStatus(200) + self.assertEqual(len(namespaces), 1) + + self.remove_namespace('rbd', 'ns') diff --git a/qa/tasks/mgr/dashboard/test_rbd_mirroring.py b/qa/tasks/mgr/dashboard/test_rbd_mirroring.py new file mode 100644 index 000000000..b6a86e405 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_rbd_mirroring.py @@ -0,0 +1,195 @@ +# -*- coding: utf-8 -*- +# pylint: disable=too-many-public-methods + +from __future__ import absolute_import + +from .helper import DashboardTestCase + + +class RbdMirroringTest(DashboardTestCase): + AUTH_ROLES = ['pool-manager', 'block-manager'] + + @classmethod + def get_pool(cls, pool): + data = cls._get('/api/block/mirroring/pool/{}'.format(pool)) + if isinstance(data, dict): + return data + return {} + + @classmethod + def update_pool(cls, pool, mirror_mode): + data = {'mirror_mode': mirror_mode} + return cls._task_put('/api/block/mirroring/pool/{}'.format(pool), + data) + + @classmethod + def list_peers(cls, pool): + data = cls._get('/api/block/mirroring/pool/{}/peer'.format(pool)) + if isinstance(data, list): + return data + return [] + + @classmethod + def get_peer(cls, pool, peer_uuid): + data = cls._get('/api/block/mirroring/pool/{}/peer/{}'.format(pool, peer_uuid)) + if isinstance(data, dict): + return data + return {} + + @classmethod + def create_peer(cls, pool, cluster_name, client_id, **kwargs): + data = {'cluster_name': cluster_name, 'client_id': client_id} + data.update(kwargs) + return cls._task_post('/api/block/mirroring/pool/{}/peer'.format(pool), + data) + + @classmethod + def update_peer(cls, pool, peer_uuid, **kwargs): + return cls._task_put('/api/block/mirroring/pool/{}/peer/{}'.format(pool, peer_uuid), + kwargs) + + @classmethod + def delete_peer(cls, pool, peer_uuid): + return cls._task_delete('/api/block/mirroring/pool/{}/peer/{}'.format(pool, peer_uuid)) + + @classmethod + def setUpClass(cls): + super(RbdMirroringTest, cls).setUpClass() + cls.create_pool('rbd', 2**3, 'replicated') + + @classmethod + def tearDownClass(cls): + super(RbdMirroringTest, cls).tearDownClass() + cls._ceph_cmd(['osd', 'pool', 'delete', 'rbd', 'rbd', '--yes-i-really-really-mean-it']) + + @DashboardTestCase.RunAs('test', 'test', [{'rbd-mirroring': ['create', 'update', 'delete']}]) + def test_read_access_permissions(self): + self.get_pool('rbd') + self.assertStatus(403) + self.list_peers('rbd') + self.assertStatus(403) + self.get_peer('rbd', '123') + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', [{'rbd-mirroring': ['read', 'update', 'delete']}]) + def test_create_access_permissions(self): + self.create_peer('rbd', 'remote', 'id') + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', [{'rbd-mirroring': ['read', 'create', 'delete']}]) + def test_update_access_permissions(self): + self.update_peer('rbd', '123') + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', [{'rbd-mirroring': ['read', 'create', 'update']}]) + def test_delete_access_permissions(self): + self.delete_peer('rbd', '123') + self.assertStatus(403) + + def test_mirror_mode(self): + self.update_pool('rbd', 'disabled') + mode = self.get_pool('rbd').get('mirror_mode') + self.assertEqual(mode, 'disabled') + + self.update_pool('rbd', 'image') + mode = self.get_pool('rbd').get('mirror_mode') + self.assertEqual(mode, 'image') + + self.update_pool('rbd', 'pool') + mode = self.get_pool('rbd').get('mirror_mode') + self.assertEqual(mode, 'pool') + + self.update_pool('rbd', 'disabled') + mode = self.get_pool('rbd').get('mirror_mode') + self.assertEqual(mode, 'disabled') + + def test_set_invalid_mirror_mode(self): + self.update_pool('rbd', 'invalid') + self.assertStatus(400) + + def test_set_same_mirror_mode(self): + self.update_pool('rbd', 'disabled') + self.update_pool('rbd', 'disabled') + self.assertStatus(200) + + def test_peer(self): + self.update_pool('rbd', 'image') + self.assertStatus(200) + + peers = self.list_peers('rbd') + self.assertStatus(200) + self.assertEqual([], peers) + + uuid = self.create_peer('rbd', 'remote', 'admin')['uuid'] + self.assertStatus(201) + + peers = self.list_peers('rbd') + self.assertStatus(200) + self.assertEqual([uuid], peers) + + expected_peer = { + 'uuid': uuid, + 'cluster_name': 'remote', + 'site_name': 'remote', + 'client_id': 'admin', + 'mon_host': '', + 'key': '', + 'direction': 'rx-tx', + 'mirror_uuid': '' + } + peer = self.get_peer('rbd', uuid) + self.assertEqual(expected_peer, peer) + + self.update_peer('rbd', uuid, mon_host='1.2.3.4') + self.assertStatus(200) + + expected_peer['mon_host'] = '1.2.3.4' + peer = self.get_peer('rbd', uuid) + self.assertEqual(expected_peer, peer) + + self.delete_peer('rbd', uuid) + self.assertStatus(204) + + self.update_pool('rbd', 'disabled') + self.assertStatus(200) + + def test_disable_mirror_with_peers(self): + self.update_pool('rbd', 'image') + self.assertStatus(200) + + uuid = self.create_peer('rbd', 'remote', 'admin')['uuid'] + self.assertStatus(201) + + self.update_pool('rbd', 'disabled') + self.assertStatus(400) + + self.delete_peer('rbd', uuid) + self.assertStatus(204) + + self.update_pool('rbd', 'disabled') + self.assertStatus(200) + + def test_site_name(self): + expected_site_name = {'site_name': 'site-a'} + self._task_put('/api/block/mirroring/site_name', expected_site_name) + self.assertStatus(200) + + site_name = self._get('/api/block/mirroring/site_name') + self.assertStatus(200) + self.assertEqual(expected_site_name, site_name) + + def test_bootstrap(self): + self.update_pool('rbd', 'image') + token_data = self._task_post('/api/block/mirroring/pool/rbd/bootstrap/token', {}) + self.assertStatus(200) + + import_data = { + 'token': token_data['token'], + 'direction': 'invalid'} + self._task_post('/api/block/mirroring/pool/rbd/bootstrap/peer', import_data) + self.assertStatus(400) + + # cannot import "youself" as peer + import_data['direction'] = 'rx' + self._task_post('/api/block/mirroring/pool/rbd/bootstrap/peer', import_data) + self.assertStatus(400) diff --git a/qa/tasks/mgr/dashboard/test_requests.py b/qa/tasks/mgr/dashboard/test_requests.py new file mode 100644 index 000000000..0d7fb75ad --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_requests.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +from . import DEFAULT_API_VERSION +from .helper import DashboardTestCase + + +class RequestsTest(DashboardTestCase): + def test_gzip(self): + self._get('/api/summary') + self.assertHeaders({ + 'Content-Encoding': 'gzip', + 'Content-Type': 'application/vnd.ceph.api.v{}+json'.format(DEFAULT_API_VERSION) + }) + + def test_force_no_gzip(self): + self._get('/api/summary', params=dict( + headers={'Accept-Encoding': 'identity'} + )) + self.assertNotIn('Content-Encoding', self._resp.headers) + self.assertHeaders({ + 'Content-Type': 'application/json' + }) + + def test_server(self): + self._get('/api/summary') + self.assertHeaders({ + 'server': 'Ceph-Dashboard', + 'Content-Type': 'application/vnd.ceph.api.v{}+json'.format(DEFAULT_API_VERSION), + 'Content-Security-Policy': "frame-ancestors 'self';", + 'X-Content-Type-Options': 'nosniff', + 'Strict-Transport-Security': 'max-age=63072000; includeSubDomains; preload' + }) diff --git a/qa/tasks/mgr/dashboard/test_rgw.py b/qa/tasks/mgr/dashboard/test_rgw.py new file mode 100644 index 000000000..53577a87a --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_rgw.py @@ -0,0 +1,867 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +import base64 +import logging +import time +from urllib import parse + +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives.hashes import SHA1 +from cryptography.hazmat.primitives.twofactor.totp import TOTP + +from .helper import DashboardTestCase, JLeaf, JList, JObj + +logger = logging.getLogger(__name__) + + +class RgwTestCase(DashboardTestCase): + + maxDiff = None + create_test_user = False + + AUTH_ROLES = ['rgw-manager'] + + @classmethod + def setUpClass(cls): + super(RgwTestCase, cls).setUpClass() + # Create the administrator account. + cls._radosgw_admin_cmd([ + 'user', 'create', '--uid', 'admin', '--display-name', 'admin', + '--system', '--access-key', 'admin', '--secret', 'admin' + ]) + # Update the dashboard configuration. + cls._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-secret-key'], 'admin') + cls._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-access-key'], 'admin') + # Create a test user? + if cls.create_test_user: + cls._radosgw_admin_cmd([ + 'user', 'create', '--uid', 'teuth-test-user', '--display-name', + 'teuth-test-user' + ]) + cls._radosgw_admin_cmd([ + 'caps', 'add', '--uid', 'teuth-test-user', '--caps', + 'metadata=write' + ]) + cls._radosgw_admin_cmd([ + 'subuser', 'create', '--uid', 'teuth-test-user', '--subuser', + 'teuth-test-subuser', '--access', 'full', '--key-type', 's3', + '--access-key', 'xyz123' + ]) + cls._radosgw_admin_cmd([ + 'subuser', 'create', '--uid', 'teuth-test-user', '--subuser', + 'teuth-test-subuser2', '--access', 'full', '--key-type', + 'swift' + ]) + + @classmethod + def tearDownClass(cls): + # Delete administrator account. + cls._radosgw_admin_cmd(['user', 'rm', '--uid', 'admin']) + if cls.create_test_user: + cls._radosgw_admin_cmd(['user', 'rm', '--uid=teuth-test-user', '--purge-data']) + super(RgwTestCase, cls).tearDownClass() + + def get_rgw_user(self, uid, stats=True): + return self._get('/api/rgw/user/{}?stats={}'.format(uid, stats)) + + +class RgwApiCredentialsTest(RgwTestCase): + + AUTH_ROLES = ['rgw-manager'] + + def test_invalid_credentials(self): + self._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-secret-key'], 'invalid') + self._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-access-key'], 'invalid') + resp = self._get('/api/rgw/user') + self.assertStatus(404) + self.assertIn('detail', resp) + self.assertIn('component', resp) + self.assertIn('Error connecting to Object Gateway', resp['detail']) + self.assertEqual(resp['component'], 'rgw') + + def test_success(self): + # Set the default credentials. + self._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-secret-key'], 'admin') + self._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-access-key'], 'admin') + data = self._get('/ui-api/rgw/status') + self.assertStatus(200) + self.assertIn('available', data) + self.assertIn('message', data) + self.assertTrue(data['available']) + + +class RgwSiteTest(RgwTestCase): + + AUTH_ROLES = ['rgw-manager'] + + def test_get_placement_targets(self): + data = self._get('/api/rgw/site?query=placement-targets') + self.assertStatus(200) + self.assertSchema(data, JObj({ + 'zonegroup': str, + 'placement_targets': JList(JObj({ + 'name': str, + 'data_pool': str + })) + })) + + def test_get_realms(self): + data = self._get('/api/rgw/site?query=realms') + self.assertStatus(200) + self.assertSchema(data, JList(str)) + + +class RgwBucketTest(RgwTestCase): + + _mfa_token_serial = '1' + _mfa_token_seed = '23456723' + _mfa_token_time_step = 2 + + AUTH_ROLES = ['rgw-manager'] + + @classmethod + def setUpClass(cls): + cls.create_test_user = True + super(RgwBucketTest, cls).setUpClass() + # Create MFA TOTP token for test user. + cls._radosgw_admin_cmd([ + 'mfa', 'create', '--uid', 'teuth-test-user', '--totp-serial', cls._mfa_token_serial, + '--totp-seed', cls._mfa_token_seed, '--totp-seed-type', 'base32', + '--totp-seconds', str(cls._mfa_token_time_step), '--totp-window', '1' + ]) + # Create tenanted users. + cls._radosgw_admin_cmd([ + 'user', 'create', '--tenant', 'testx', '--uid', 'teuth-test-user', + '--display-name', 'tenanted teuth-test-user' + ]) + cls._radosgw_admin_cmd([ + 'user', 'create', '--tenant', 'testx2', '--uid', 'teuth-test-user2', + '--display-name', 'tenanted teuth-test-user 2' + ]) + + @classmethod + def tearDownClass(cls): + cls._radosgw_admin_cmd( + ['user', 'rm', '--tenant', 'testx', '--uid=teuth-test-user', '--purge-data']) + cls._radosgw_admin_cmd( + ['user', 'rm', '--tenant', 'testx2', '--uid=teuth-test-user2', '--purge-data']) + super(RgwBucketTest, cls).tearDownClass() + + def _get_mfa_token_pin(self): + totp_key = base64.b32decode(self._mfa_token_seed) + totp = TOTP(totp_key, 6, SHA1(), self._mfa_token_time_step, backend=default_backend(), + enforce_key_length=False) + time_value = int(time.time()) + return totp.generate(time_value) + + def test_all(self): + # Create a new bucket. + self._post( + '/api/rgw/bucket', + params={ + 'bucket': 'teuth-test-bucket', + 'uid': 'admin', + 'zonegroup': 'default', + 'placement_target': 'default-placement' + }) + self.assertStatus(201) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + 'bucket_info': JObj(sub_elems={ + 'bucket': JObj(allow_unknown=True, sub_elems={ + 'name': JLeaf(str), + 'bucket_id': JLeaf(str), + 'tenant': JLeaf(str) + }), + 'quota': JObj(sub_elems={}, allow_unknown=True), + 'creation_time': JLeaf(str) + }, allow_unknown=True) + }, allow_unknown=True)) + data = data['bucket_info']['bucket'] + self.assertEqual(data['name'], 'teuth-test-bucket') + self.assertEqual(data['tenant'], '') + + # List all buckets. + data = self._get('/api/rgw/bucket', version='1.1') + self.assertStatus(200) + self.assertEqual(len(data), 1) + self.assertIn('teuth-test-bucket', data) + + # List all buckets with stats. + data = self._get('/api/rgw/bucket?stats=true', version='1.1') + self.assertStatus(200) + self.assertEqual(len(data), 1) + self.assertSchema(data[0], JObj(sub_elems={ + 'bid': JLeaf(str), + 'bucket': JLeaf(str), + 'bucket_quota': JObj(sub_elems={}, allow_unknown=True), + 'id': JLeaf(str), + 'owner': JLeaf(str), + 'usage': JObj(sub_elems={}, allow_unknown=True), + 'tenant': JLeaf(str), + }, allow_unknown=True)) + + # List all buckets names without stats. + data = self._get('/api/rgw/bucket?stats=false', version='1.1') + self.assertStatus(200) + self.assertEqual(data, ['teuth-test-bucket']) + + # Get the bucket. + data = self._get('/api/rgw/bucket/teuth-test-bucket') + self.assertStatus(200) + self.assertSchema(data, JObj(sub_elems={ + 'id': JLeaf(str), + 'bid': JLeaf(str), + 'tenant': JLeaf(str), + 'bucket': JLeaf(str), + 'bucket_quota': JObj(sub_elems={}, allow_unknown=True), + 'owner': JLeaf(str), + 'mfa_delete': JLeaf(str), + 'usage': JObj(sub_elems={}, allow_unknown=True), + 'versioning': JLeaf(str) + }, allow_unknown=True)) + self.assertEqual(data['bucket'], 'teuth-test-bucket') + self.assertEqual(data['owner'], 'admin') + self.assertEqual(data['placement_rule'], 'default-placement') + self.assertEqual(data['versioning'], 'Suspended') + + # Update bucket: change owner, enable versioning. + self._put( + '/api/rgw/bucket/teuth-test-bucket', + params={ + 'bucket_id': data['id'], + 'uid': 'teuth-test-user', + 'versioning_state': 'Enabled' + }) + self.assertStatus(200) + data = self._get('/api/rgw/bucket/teuth-test-bucket') + self.assertStatus(200) + self.assertSchema(data, JObj(sub_elems={ + 'owner': JLeaf(str), + 'bid': JLeaf(str), + 'tenant': JLeaf(str) + }, allow_unknown=True)) + self.assertEqual(data['owner'], 'teuth-test-user') + self.assertEqual(data['versioning'], 'Enabled') + + # Update bucket: enable MFA Delete. + self._put( + '/api/rgw/bucket/teuth-test-bucket', + params={ + 'bucket_id': data['id'], + 'uid': 'teuth-test-user', + 'versioning_state': 'Enabled', + 'mfa_delete': 'Enabled', + 'mfa_token_serial': self._mfa_token_serial, + 'mfa_token_pin': self._get_mfa_token_pin() + }) + self.assertStatus(200) + data = self._get('/api/rgw/bucket/teuth-test-bucket') + self.assertStatus(200) + self.assertEqual(data['versioning'], 'Enabled') + self.assertEqual(data['mfa_delete'], 'Enabled') + + # Update bucket: disable versioning & MFA Delete. + time.sleep(self._mfa_token_time_step * 3) # Required to get new TOTP pin. + self._put( + '/api/rgw/bucket/teuth-test-bucket', + params={ + 'bucket_id': data['id'], + 'uid': 'teuth-test-user', + 'versioning_state': 'Suspended', + 'mfa_delete': 'Disabled', + 'mfa_token_serial': self._mfa_token_serial, + 'mfa_token_pin': self._get_mfa_token_pin() + }) + self.assertStatus(200) + data = self._get('/api/rgw/bucket/teuth-test-bucket') + self.assertStatus(200) + self.assertEqual(data['versioning'], 'Suspended') + self.assertEqual(data['mfa_delete'], 'Disabled') + + # Delete the bucket. + self._delete('/api/rgw/bucket/teuth-test-bucket') + self.assertStatus(204) + data = self._get('/api/rgw/bucket', version='1.1') + self.assertStatus(200) + self.assertEqual(len(data), 0) + + def test_crud_w_tenant(self): + # Create a new bucket. The tenant of the user is used when + # the bucket is created. + self._post( + '/api/rgw/bucket', + params={ + 'bucket': 'teuth-test-bucket', + 'uid': 'testx$teuth-test-user', + 'zonegroup': 'default', + 'placement_target': 'default-placement' + }) + self.assertStatus(201) + # It's not possible to validate the result because there + # IS NO result object returned by the RGW Admin OPS API + # when a tenanted bucket is created. + data = self.jsonBody() + self.assertIsNone(data) + + # List all buckets. + data = self._get('/api/rgw/bucket', version='1.1') + self.assertStatus(200) + self.assertEqual(len(data), 1) + self.assertIn('testx/teuth-test-bucket', data) + + def _verify_tenant_bucket(bucket, tenant, uid): + full_bucket_name = '{}/{}'.format(tenant, bucket) + _data = self._get('/api/rgw/bucket/{}'.format( + parse.quote_plus(full_bucket_name))) + self.assertStatus(200) + self.assertSchema(_data, JObj(sub_elems={ + 'owner': JLeaf(str), + 'bucket': JLeaf(str), + 'tenant': JLeaf(str), + 'bid': JLeaf(str) + }, allow_unknown=True)) + self.assertEqual(_data['owner'], '{}${}'.format(tenant, uid)) + self.assertEqual(_data['bucket'], bucket) + self.assertEqual(_data['tenant'], tenant) + self.assertEqual(_data['bid'], full_bucket_name) + return _data + + # Get the bucket. + data = _verify_tenant_bucket('teuth-test-bucket', 'testx', 'teuth-test-user') + self.assertEqual(data['placement_rule'], 'default-placement') + self.assertEqual(data['versioning'], 'Suspended') + + # Update bucket: different user with different tenant, enable versioning. + self._put( + '/api/rgw/bucket/{}'.format( + parse.quote_plus('testx/teuth-test-bucket')), + params={ + 'bucket_id': data['id'], + 'uid': 'testx2$teuth-test-user2', + 'versioning_state': 'Enabled' + }) + data = _verify_tenant_bucket('teuth-test-bucket', 'testx2', 'teuth-test-user2') + self.assertEqual(data['versioning'], 'Enabled') + + # Change owner to a non-tenanted user + self._put( + '/api/rgw/bucket/{}'.format( + parse.quote_plus('testx2/teuth-test-bucket')), + params={ + 'bucket_id': data['id'], + 'uid': 'admin' + }) + self.assertStatus(200) + data = self._get('/api/rgw/bucket/teuth-test-bucket') + self.assertStatus(200) + self.assertIn('owner', data) + self.assertEqual(data['owner'], 'admin') + self.assertEqual(data['tenant'], '') + self.assertEqual(data['bucket'], 'teuth-test-bucket') + self.assertEqual(data['bid'], 'teuth-test-bucket') + self.assertEqual(data['versioning'], 'Enabled') + + # Change owner back to tenanted user, suspend versioning. + self._put( + '/api/rgw/bucket/teuth-test-bucket', + params={ + 'bucket_id': data['id'], + 'uid': 'testx$teuth-test-user', + 'versioning_state': 'Suspended' + }) + self.assertStatus(200) + data = _verify_tenant_bucket('teuth-test-bucket', 'testx', 'teuth-test-user') + self.assertEqual(data['versioning'], 'Suspended') + + # Delete the bucket. + self._delete('/api/rgw/bucket/{}'.format( + parse.quote_plus('testx/teuth-test-bucket'))) + self.assertStatus(204) + data = self._get('/api/rgw/bucket', version='1.1') + self.assertStatus(200) + self.assertEqual(len(data), 0) + + def test_crud_w_locking(self): + # Create + self._post('/api/rgw/bucket', + params={ + 'bucket': 'teuth-test-bucket', + 'uid': 'teuth-test-user', + 'zonegroup': 'default', + 'placement_target': 'default-placement', + 'lock_enabled': 'true', + 'lock_mode': 'GOVERNANCE', + 'lock_retention_period_days': '0', + 'lock_retention_period_years': '1' + }) + self.assertStatus(201) + # Read + data = self._get('/api/rgw/bucket/teuth-test-bucket') + self.assertStatus(200) + self.assertSchema( + data, + JObj(sub_elems={ + 'lock_enabled': JLeaf(bool), + 'lock_mode': JLeaf(str), + 'lock_retention_period_days': JLeaf(int), + 'lock_retention_period_years': JLeaf(int) + }, + allow_unknown=True)) + self.assertTrue(data['lock_enabled']) + self.assertEqual(data['lock_mode'], 'GOVERNANCE') + self.assertEqual(data['lock_retention_period_days'], 0) + self.assertEqual(data['lock_retention_period_years'], 1) + # Update + self._put('/api/rgw/bucket/teuth-test-bucket', + params={ + 'bucket_id': data['id'], + 'uid': 'teuth-test-user', + 'lock_mode': 'COMPLIANCE', + 'lock_retention_period_days': '15', + 'lock_retention_period_years': '0' + }) + self.assertStatus(200) + data = self._get('/api/rgw/bucket/teuth-test-bucket') + self.assertTrue(data['lock_enabled']) + self.assertEqual(data['lock_mode'], 'COMPLIANCE') + self.assertEqual(data['lock_retention_period_days'], 15) + self.assertEqual(data['lock_retention_period_years'], 0) + self.assertStatus(200) + + # Update: Disabling bucket versioning should fail if object locking enabled + self._put('/api/rgw/bucket/teuth-test-bucket', + params={ + 'bucket_id': data['id'], + 'uid': 'teuth-test-user', + 'versioning_state': 'Suspended' + }) + self.assertStatus(409) + + # Delete + self._delete('/api/rgw/bucket/teuth-test-bucket') + self.assertStatus(204) + + +class RgwDaemonTest(RgwTestCase): + + AUTH_ROLES = ['rgw-manager'] + + @DashboardTestCase.RunAs('test', 'test', [{ + 'rgw': ['create', 'update', 'delete'] + }]) + def test_read_access_permissions(self): + self._get('/api/rgw/daemon') + self.assertStatus(403) + self._get('/api/rgw/daemon/id') + self.assertStatus(403) + + def test_list(self): + data = self._get('/api/rgw/daemon') + self.assertStatus(200) + self.assertEqual(len(data), 1) + data = data[0] + self.assertIn('id', data) + self.assertIn('version', data) + self.assertIn('server_hostname', data) + self.assertIn('zonegroup_name', data) + self.assertIn('zone_name', data) + + def test_get(self): + data = self._get('/api/rgw/daemon') + self.assertStatus(200) + + data = self._get('/api/rgw/daemon/{}'.format(data[0]['id'])) + self.assertStatus(200) + self.assertIn('rgw_metadata', data) + self.assertIn('rgw_id', data) + self.assertIn('rgw_status', data) + self.assertTrue(data['rgw_metadata']) + + def test_status(self): + data = self._get('/ui-api/rgw/status') + self.assertStatus(200) + self.assertIn('available', data) + self.assertIn('message', data) + self.assertTrue(data['available']) + + +class RgwUserTest(RgwTestCase): + + AUTH_ROLES = ['rgw-manager'] + + @classmethod + def setUpClass(cls): + super(RgwUserTest, cls).setUpClass() + + def _assert_user_data(self, data): + self.assertSchema(data, JObj(sub_elems={ + 'caps': JList(JObj(sub_elems={}, allow_unknown=True)), + 'display_name': JLeaf(str), + 'email': JLeaf(str), + 'keys': JList(JObj(sub_elems={}, allow_unknown=True)), + 'max_buckets': JLeaf(int), + 'subusers': JList(JLeaf(str)), + 'suspended': JLeaf(int), + 'swift_keys': JList(JObj(sub_elems={}, allow_unknown=True)), + 'tenant': JLeaf(str), + 'user_id': JLeaf(str), + 'uid': JLeaf(str) + }, allow_unknown=True)) + self.assertGreaterEqual(len(data['keys']), 1) + + def test_get(self): + data = self.get_rgw_user('admin') + self.assertStatus(200) + self._assert_user_data(data) + self.assertEqual(data['user_id'], 'admin') + self.assertTrue(data['stats']) + self.assertIsInstance(data['stats'], dict) + # Test without stats. + data = self.get_rgw_user('admin', False) + self.assertStatus(200) + self._assert_user_data(data) + self.assertEqual(data['user_id'], 'admin') + + def test_list(self): + data = self._get('/api/rgw/user') + self.assertStatus(200) + self.assertGreaterEqual(len(data), 1) + self.assertIn('admin', data) + + def test_get_emails(self): + data = self._get('/api/rgw/user/get_emails') + self.assertStatus(200) + self.assertSchema(data, JList(str)) + + def test_create_get_update_delete(self): + # Create a new user. + self._post('/api/rgw/user', params={ + 'uid': 'teuth-test-user', + 'display_name': 'display name' + }) + self.assertStatus(201) + data = self.jsonBody() + self._assert_user_data(data) + self.assertEqual(data['user_id'], 'teuth-test-user') + self.assertEqual(data['display_name'], 'display name') + + # Get the user. + data = self.get_rgw_user('teuth-test-user') + self.assertStatus(200) + self._assert_user_data(data) + self.assertEqual(data['tenant'], '') + self.assertEqual(data['user_id'], 'teuth-test-user') + self.assertEqual(data['uid'], 'teuth-test-user') + + # Update the user. + self._put( + '/api/rgw/user/teuth-test-user', + params={'display_name': 'new name'}) + self.assertStatus(200) + data = self.jsonBody() + self._assert_user_data(data) + self.assertEqual(data['display_name'], 'new name') + + # Delete the user. + self._delete('/api/rgw/user/teuth-test-user') + self.assertStatus(204) + self.get_rgw_user('teuth-test-user') + self.assertStatus(500) + resp = self.jsonBody() + self.assertIn('detail', resp) + self.assertIn('failed request with status code 404', resp['detail']) + self.assertIn('"Code":"NoSuchUser"', resp['detail']) + self.assertIn('"HostId"', resp['detail']) + self.assertIn('"RequestId"', resp['detail']) + + def test_create_get_update_delete_w_tenant(self): + # Create a new user. + self._post( + '/api/rgw/user', + params={ + 'uid': 'test01$teuth-test-user', + 'display_name': 'display name' + }) + self.assertStatus(201) + data = self.jsonBody() + self._assert_user_data(data) + self.assertEqual(data['user_id'], 'teuth-test-user') + self.assertEqual(data['display_name'], 'display name') + + # Get the user. + data = self.get_rgw_user('test01$teuth-test-user') + self.assertStatus(200) + self._assert_user_data(data) + self.assertEqual(data['tenant'], 'test01') + self.assertEqual(data['user_id'], 'teuth-test-user') + self.assertEqual(data['uid'], 'test01$teuth-test-user') + + # Update the user. + self._put( + '/api/rgw/user/test01$teuth-test-user', + params={'display_name': 'new name'}) + self.assertStatus(200) + data = self.jsonBody() + self._assert_user_data(data) + self.assertEqual(data['display_name'], 'new name') + + # Delete the user. + self._delete('/api/rgw/user/test01$teuth-test-user') + self.assertStatus(204) + self.get_rgw_user('test01$teuth-test-user') + self.assertStatus(500) + resp = self.jsonBody() + self.assertIn('detail', resp) + self.assertIn('failed request with status code 404', resp['detail']) + self.assertIn('"Code":"NoSuchUser"', resp['detail']) + self.assertIn('"HostId"', resp['detail']) + self.assertIn('"RequestId"', resp['detail']) + + +class RgwUserCapabilityTest(RgwTestCase): + + AUTH_ROLES = ['rgw-manager'] + + @classmethod + def setUpClass(cls): + cls.create_test_user = True + super(RgwUserCapabilityTest, cls).setUpClass() + + def test_set(self): + self._post( + '/api/rgw/user/teuth-test-user/capability', + params={ + 'type': 'usage', + 'perm': 'read' + }) + self.assertStatus(201) + data = self.jsonBody() + self.assertEqual(len(data), 1) + data = data[0] + self.assertEqual(data['type'], 'usage') + self.assertEqual(data['perm'], 'read') + + # Get the user data to validate the capabilities. + data = self.get_rgw_user('teuth-test-user') + self.assertStatus(200) + self.assertGreaterEqual(len(data['caps']), 1) + self.assertEqual(data['caps'][0]['type'], 'usage') + self.assertEqual(data['caps'][0]['perm'], 'read') + + def test_delete(self): + self._delete( + '/api/rgw/user/teuth-test-user/capability', + params={ + 'type': 'metadata', + 'perm': 'write' + }) + self.assertStatus(204) + + # Get the user data to validate the capabilities. + data = self.get_rgw_user('teuth-test-user') + self.assertStatus(200) + self.assertEqual(len(data['caps']), 0) + + +class RgwUserKeyTest(RgwTestCase): + + AUTH_ROLES = ['rgw-manager'] + + @classmethod + def setUpClass(cls): + cls.create_test_user = True + super(RgwUserKeyTest, cls).setUpClass() + + def test_create_s3(self): + self._post( + '/api/rgw/user/teuth-test-user/key', + params={ + 'key_type': 's3', + 'generate_key': 'false', + 'access_key': 'abc987', + 'secret_key': 'aaabbbccc' + }) + data = self.jsonBody() + self.assertStatus(201) + self.assertGreaterEqual(len(data), 3) + key = self.find_object_in_list('access_key', 'abc987', data) + self.assertIsInstance(key, object) + self.assertEqual(key['secret_key'], 'aaabbbccc') + + def test_create_swift(self): + self._post( + '/api/rgw/user/teuth-test-user/key', + params={ + 'key_type': 'swift', + 'subuser': 'teuth-test-subuser', + 'generate_key': 'false', + 'secret_key': 'xxxyyyzzz' + }) + data = self.jsonBody() + self.assertStatus(201) + self.assertGreaterEqual(len(data), 2) + key = self.find_object_in_list('secret_key', 'xxxyyyzzz', data) + self.assertIsInstance(key, object) + + def test_delete_s3(self): + self._delete( + '/api/rgw/user/teuth-test-user/key', + params={ + 'key_type': 's3', + 'access_key': 'xyz123' + }) + self.assertStatus(204) + + def test_delete_swift(self): + self._delete( + '/api/rgw/user/teuth-test-user/key', + params={ + 'key_type': 'swift', + 'subuser': 'teuth-test-user:teuth-test-subuser2' + }) + self.assertStatus(204) + + +class RgwUserQuotaTest(RgwTestCase): + + AUTH_ROLES = ['rgw-manager'] + + @classmethod + def setUpClass(cls): + cls.create_test_user = True + super(RgwUserQuotaTest, cls).setUpClass() + + def _assert_quota(self, data): + self.assertIn('user_quota', data) + self.assertIn('max_objects', data['user_quota']) + self.assertIn('enabled', data['user_quota']) + self.assertIn('max_size_kb', data['user_quota']) + self.assertIn('max_size', data['user_quota']) + self.assertIn('bucket_quota', data) + self.assertIn('max_objects', data['bucket_quota']) + self.assertIn('enabled', data['bucket_quota']) + self.assertIn('max_size_kb', data['bucket_quota']) + self.assertIn('max_size', data['bucket_quota']) + + def test_get_quota(self): + data = self._get('/api/rgw/user/teuth-test-user/quota') + self.assertStatus(200) + self._assert_quota(data) + + def test_set_user_quota(self): + self._put( + '/api/rgw/user/teuth-test-user/quota', + params={ + 'quota_type': 'user', + 'enabled': 'true', + 'max_size_kb': 2048, + 'max_objects': 101 + }) + self.assertStatus(200) + + data = self._get('/api/rgw/user/teuth-test-user/quota') + self.assertStatus(200) + self._assert_quota(data) + self.assertEqual(data['user_quota']['max_objects'], 101) + self.assertTrue(data['user_quota']['enabled']) + self.assertEqual(data['user_quota']['max_size_kb'], 2048) + + def test_set_bucket_quota(self): + self._put( + '/api/rgw/user/teuth-test-user/quota', + params={ + 'quota_type': 'bucket', + 'enabled': 'false', + 'max_size_kb': 4096, + 'max_objects': 2000 + }) + self.assertStatus(200) + + data = self._get('/api/rgw/user/teuth-test-user/quota') + self.assertStatus(200) + self._assert_quota(data) + self.assertEqual(data['bucket_quota']['max_objects'], 2000) + self.assertFalse(data['bucket_quota']['enabled']) + self.assertEqual(data['bucket_quota']['max_size_kb'], 4096) + + +class RgwUserSubuserTest(RgwTestCase): + + AUTH_ROLES = ['rgw-manager'] + + @classmethod + def setUpClass(cls): + cls.create_test_user = True + super(RgwUserSubuserTest, cls).setUpClass() + + def test_create_swift(self): + self._post( + '/api/rgw/user/teuth-test-user/subuser', + params={ + 'subuser': 'tux', + 'access': 'readwrite', + 'key_type': 'swift' + }) + self.assertStatus(201) + data = self.jsonBody() + subuser = self.find_object_in_list('id', 'teuth-test-user:tux', data) + self.assertIsInstance(subuser, object) + self.assertEqual(subuser['permissions'], 'read-write') + + # Get the user data to validate the keys. + data = self.get_rgw_user('teuth-test-user') + self.assertStatus(200) + key = self.find_object_in_list('user', 'teuth-test-user:tux', + data['swift_keys']) + self.assertIsInstance(key, object) + + def test_create_s3(self): + self._post( + '/api/rgw/user/teuth-test-user/subuser', + params={ + 'subuser': 'hugo', + 'access': 'write', + 'generate_secret': 'false', + 'access_key': 'yyy', + 'secret_key': 'xxx' + }) + self.assertStatus(201) + data = self.jsonBody() + subuser = self.find_object_in_list('id', 'teuth-test-user:hugo', data) + self.assertIsInstance(subuser, object) + self.assertEqual(subuser['permissions'], 'write') + + # Get the user data to validate the keys. + data = self.get_rgw_user('teuth-test-user') + self.assertStatus(200) + key = self.find_object_in_list('user', 'teuth-test-user:hugo', + data['keys']) + self.assertIsInstance(key, object) + self.assertEqual(key['secret_key'], 'xxx') + + def test_delete_w_purge(self): + self._delete( + '/api/rgw/user/teuth-test-user/subuser/teuth-test-subuser2') + self.assertStatus(204) + + # Get the user data to check that the keys don't exist anymore. + data = self.get_rgw_user('teuth-test-user') + self.assertStatus(200) + key = self.find_object_in_list( + 'user', 'teuth-test-user:teuth-test-subuser2', data['swift_keys']) + self.assertIsNone(key) + + def test_delete_wo_purge(self): + self._delete( + '/api/rgw/user/teuth-test-user/subuser/teuth-test-subuser', + params={'purge_keys': 'false'}) + self.assertStatus(204) + + # Get the user data to check whether they keys still exist. + data = self.get_rgw_user('teuth-test-user') + self.assertStatus(200) + key = self.find_object_in_list( + 'user', 'teuth-test-user:teuth-test-subuser', data['keys']) + self.assertIsInstance(key, object) diff --git a/qa/tasks/mgr/dashboard/test_role.py b/qa/tasks/mgr/dashboard/test_role.py new file mode 100644 index 000000000..dbfaea9e4 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_role.py @@ -0,0 +1,145 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +from .helper import DashboardTestCase + + +class RoleTest(DashboardTestCase): + @classmethod + def _create_role(cls, name=None, description=None, scopes_permissions=None): + data = {} + if name: + data['name'] = name + if description: + data['description'] = description + if scopes_permissions: + data['scopes_permissions'] = scopes_permissions + cls._post('/api/role', data) + + def test_crud_role(self): + self._create_role(name='role1', + description='Description 1', + scopes_permissions={'osd': ['read']}) + self.assertStatus(201) + self.assertJsonBody({ + 'name': 'role1', + 'description': 'Description 1', + 'scopes_permissions': {'osd': ['read']}, + 'system': False + }) + + self._get('/api/role/role1') + self.assertStatus(200) + self.assertJsonBody({ + 'name': 'role1', + 'description': 'Description 1', + 'scopes_permissions': {'osd': ['read']}, + 'system': False + }) + + self._put('/api/role/role1', { + 'description': 'Description 2', + 'scopes_permissions': {'osd': ['read', 'update']}, + }) + self.assertStatus(200) + self.assertJsonBody({ + 'name': 'role1', + 'description': 'Description 2', + 'scopes_permissions': {'osd': ['read', 'update']}, + 'system': False + }) + + self._delete('/api/role/role1') + self.assertStatus(204) + + def test_list_roles(self): + roles = self._get('/api/role') + self.assertStatus(200) + + self.assertGreaterEqual(len(roles), 1) + for role in roles: + self.assertIn('name', role) + self.assertIn('description', role) + self.assertIn('scopes_permissions', role) + self.assertIn('system', role) + + def test_get_role_does_not_exist(self): + self._get('/api/role/role2') + self.assertStatus(404) + + def test_create_role_already_exists(self): + self._create_role(name='read-only', + description='Description 1', + scopes_permissions={'osd': ['read']}) + self.assertStatus(400) + self.assertError(code='role_already_exists', + component='role') + + def test_create_role_no_name(self): + self._create_role(description='Description 1', + scopes_permissions={'osd': ['read']}) + self.assertStatus(400) + self.assertError(code='name_required', + component='role') + + def test_create_role_invalid_scope(self): + self._create_role(name='role1', + description='Description 1', + scopes_permissions={'invalid-scope': ['read']}) + self.assertStatus(400) + self.assertError(code='invalid_scope', + component='role') + + def test_create_role_invalid_permission(self): + self._create_role(name='role1', + description='Description 1', + scopes_permissions={'osd': ['invalid-permission']}) + self.assertStatus(400) + self.assertError(code='invalid_permission', + component='role') + + def test_delete_role_does_not_exist(self): + self._delete('/api/role/role2') + self.assertStatus(404) + + def test_delete_system_role(self): + self._delete('/api/role/read-only') + self.assertStatus(400) + self.assertError(code='cannot_delete_system_role', + component='role') + + def test_delete_role_associated_with_user(self): + self.create_user("user", "user", ['read-only']) + self._create_role(name='role1', + description='Description 1', + scopes_permissions={'user': ['create', 'read', 'update', 'delete']}) + self.assertStatus(201) + self._put('/api/user/user', {'roles': ['role1']}) + self.assertStatus(200) + + self._delete('/api/role/role1') + self.assertStatus(400) + self.assertError(code='role_is_associated_with_user', + component='role') + + self._put('/api/user/user', {'roles': ['administrator']}) + self.assertStatus(200) + self._delete('/api/role/role1') + self.assertStatus(204) + self.delete_user("user") + + def test_update_role_does_not_exist(self): + self._put('/api/role/role2', {}) + self.assertStatus(404) + + def test_update_system_role(self): + self._put('/api/role/read-only', {}) + self.assertStatus(400) + self.assertError(code='cannot_update_system_role', + component='role') + + def test_clone_role(self): + self._post('/api/role/read-only/clone', {'new_name': 'foo'}) + self.assertStatus(201) + self._delete('/api/role/foo') diff --git a/qa/tasks/mgr/dashboard/test_settings.py b/qa/tasks/mgr/dashboard/test_settings.py new file mode 100644 index 000000000..d6ad1e762 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_settings.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +from .helper import DashboardTestCase, JAny, JList, JObj + + +class SettingsTest(DashboardTestCase): + def setUp(self): + super(SettingsTest, self).setUp() + self.settings = self._get('/api/settings') + + def tearDown(self): + self._put( + '/api/settings', + {setting['name']: setting['value'] + for setting in self.settings}) + + def test_list_settings(self): + settings = self._get('/api/settings') + self.assertGreater(len(settings), 10) + self.assertSchema( + settings, + JList( + JObj({ + 'default': JAny(none=False), + 'name': str, + 'type': str, + 'value': JAny(none=False) + }))) + self.assertStatus(200) + + def test_get_setting(self): + setting = self._get('/api/settings/rgw-api-access-key') + self.assertSchema( + setting, + JObj({ + 'default': JAny(none=False), + 'name': str, + 'type': str, + 'value': JAny(none=False) + })) + self.assertStatus(200) + + def test_set_setting(self): + self._put('/api/settings/rgw-api-access-key', {'value': 'foo'}) + self.assertStatus(200) + + value = self._get('/api/settings/rgw-api-access-key')['value'] + self.assertEqual('foo', value) + + def test_bulk_set(self): + self._put('/api/settings', { + 'RGW_API_ACCESS_KEY': 'dummy-key', + 'RGW_API_SECRET_KEY': 'dummy-secret', + }) + self.assertStatus(200) + + access_key = self._get('/api/settings/rgw-api-access-key')['value'] + self.assertStatus(200) + self.assertEqual('dummy-key', access_key) + + secret_key = self._get('/api/settings/rgw-api-secret-key')['value'] + self.assertStatus(200) + self.assertEqual('dummy-secret', secret_key) diff --git a/qa/tasks/mgr/dashboard/test_summary.py b/qa/tasks/mgr/dashboard/test_summary.py new file mode 100644 index 000000000..a31f89146 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_summary.py @@ -0,0 +1,39 @@ +from __future__ import absolute_import + +from .helper import DashboardTestCase + + +class SummaryTest(DashboardTestCase): + CEPHFS = True + + def test_summary(self): + data = self._get("/api/summary") + self.assertStatus(200) + + self.assertIn('health_status', data) + self.assertIn('mgr_id', data) + self.assertIn('have_mon_connection', data) + self.assertIn('rbd_mirroring', data) + self.assertIn('executing_tasks', data) + self.assertIn('finished_tasks', data) + self.assertIn('version', data) + self.assertIsNotNone(data['health_status']) + self.assertIsNotNone(data['mgr_id']) + self.assertIsNotNone(data['have_mon_connection']) + self.assertEqual(data['rbd_mirroring'], {'errors': 0, 'warnings': 0}) + + @DashboardTestCase.RunAs('test', 'test', ['pool-manager']) + def test_summary_permissions(self): + data = self._get("/api/summary") + self.assertStatus(200) + + self.assertIn('health_status', data) + self.assertIn('mgr_id', data) + self.assertIn('have_mon_connection', data) + self.assertNotIn('rbd_mirroring', data) + self.assertIn('executing_tasks', data) + self.assertIn('finished_tasks', data) + self.assertIn('version', data) + self.assertIsNotNone(data['health_status']) + self.assertIsNotNone(data['mgr_id']) + self.assertIsNotNone(data['have_mon_connection']) diff --git a/qa/tasks/mgr/dashboard/test_telemetry.py b/qa/tasks/mgr/dashboard/test_telemetry.py new file mode 100644 index 000000000..65c62c748 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_telemetry.py @@ -0,0 +1,98 @@ +from .helper import DashboardTestCase, JObj + + +class TelemetryTest(DashboardTestCase): + + pre_enabled_status = True + + @classmethod + def setUpClass(cls): + super(TelemetryTest, cls).setUpClass() + data = cls._get('/api/mgr/module/telemetry') + cls.pre_enabled_status = data['enabled'] + + # identify ourselves so we can filter these reports out on the server side + cls._put( + '/api/settings', + { + 'mgr/telemetry/channel_ident': True, + 'mgr/telemetry/organization': 'ceph-qa', + } + ) + + @classmethod + def tearDownClass(cls): + if cls.pre_enabled_status: + cls._enable_module() + else: + cls._disable_module() + super(TelemetryTest, cls).tearDownClass() + + def test_disable_module(self): + self._enable_module() + self._check_telemetry_enabled(True) + self._disable_module() + self._check_telemetry_enabled(False) + + def test_enable_module_correct_license(self): + self._disable_module() + self._check_telemetry_enabled(False) + + self._put('/api/telemetry', { + 'enable': True, + 'license_name': 'sharing-1-0' + }) + self.assertStatus(200) + self._check_telemetry_enabled(True) + + def test_enable_module_empty_license(self): + self._disable_module() + self._check_telemetry_enabled(False) + + self._put('/api/telemetry', { + 'enable': True, + 'license_name': '' + }) + self.assertStatus(400) + self.assertError(code='telemetry_enable_license_missing') + self._check_telemetry_enabled(False) + + def test_enable_module_invalid_license(self): + self._disable_module() + self._check_telemetry_enabled(False) + + self._put('/api/telemetry', { + 'enable': True, + 'license_name': 'invalid-license' + }) + self.assertStatus(400) + self.assertError(code='telemetry_enable_license_missing') + self._check_telemetry_enabled(False) + + def test_get_report(self): + self._enable_module() + data = self._get('/api/telemetry/report') + self.assertStatus(200) + schema = JObj({ + 'report': JObj({}, allow_unknown=True), + 'device_report': JObj({}, allow_unknown=True) + }) + self.assertSchema(data, schema) + + @classmethod + def _enable_module(cls): + cls._put('/api/telemetry', { + 'enable': True, + 'license_name': 'sharing-1-0' + }) + + @classmethod + def _disable_module(cls): + cls._put('/api/telemetry', { + 'enable': False + }) + + def _check_telemetry_enabled(self, enabled): + data = self._get('/api/mgr/module/telemetry') + self.assertStatus(200) + self.assertEqual(data['enabled'], enabled) diff --git a/qa/tasks/mgr/dashboard/test_user.py b/qa/tasks/mgr/dashboard/test_user.py new file mode 100644 index 000000000..3a6464f5a --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_user.py @@ -0,0 +1,565 @@ +# -*- coding: utf-8 -*- +# pylint: disable=too-many-public-methods + +from __future__ import absolute_import + +import time +from datetime import datetime, timedelta + +from .helper import DashboardTestCase + + +class UserTest(DashboardTestCase): + @classmethod + def setUpClass(cls): + super(UserTest, cls).setUpClass() + cls._ceph_cmd(['dashboard', 'set-pwd-policy-enabled', 'true']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-length-enabled', 'true']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-oldpwd-enabled', 'true']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-username-enabled', 'true']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-exclusion-list-enabled', 'true']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-complexity-enabled', 'true']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-sequential-chars-enabled', 'true']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-repetitive-chars-enabled', 'true']) + + @classmethod + def tearDownClass(cls): + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-username-enabled', 'false']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-exclusion-list-enabled', 'false']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-complexity-enabled', 'false']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-sequential-chars-enabled', 'false']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-repetitive-chars-enabled', 'false']) + super(UserTest, cls).tearDownClass() + + @classmethod + def _create_user(cls, username=None, password=None, name=None, email=None, roles=None, + enabled=True, pwd_expiration_date=None, pwd_update_required=False): + data = {} + if username: + data['username'] = username + if password: + data['password'] = password + if name: + data['name'] = name + if email: + data['email'] = email + if roles: + data['roles'] = roles + if pwd_expiration_date: + data['pwdExpirationDate'] = pwd_expiration_date + data['pwdUpdateRequired'] = pwd_update_required + data['enabled'] = enabled + cls._post("/api/user", data) + + @classmethod + def _reset_login_to_admin(cls, username=None): + cls.logout() + if username: + cls.delete_user(username) + cls.login('admin', 'admin') + + def test_crud_user(self): + self._create_user(username='user1', + password='mypassword10#', + name='My Name', + email='my@email.com', + roles=['administrator']) + self.assertStatus(201) + user = self.jsonBody() + + self._get('/api/user/user1') + self.assertStatus(200) + self.assertJsonBody({ + 'username': 'user1', + 'name': 'My Name', + 'email': 'my@email.com', + 'roles': ['administrator'], + 'lastUpdate': user['lastUpdate'], + 'enabled': True, + 'pwdExpirationDate': None, + 'pwdUpdateRequired': False + }) + + self._put('/api/user/user1', { + 'name': 'My New Name', + 'email': 'mynew@email.com', + 'roles': ['block-manager'], + }) + self.assertStatus(200) + user = self.jsonBody() + self.assertJsonBody({ + 'username': 'user1', + 'name': 'My New Name', + 'email': 'mynew@email.com', + 'roles': ['block-manager'], + 'lastUpdate': user['lastUpdate'], + 'enabled': True, + 'pwdExpirationDate': None, + 'pwdUpdateRequired': False + }) + + self._delete('/api/user/user1') + self.assertStatus(204) + + def test_crd_disabled_user(self): + self._create_user(username='klara', + password='mypassword10#', + name='Klara Musterfrau', + email='klara@musterfrau.com', + roles=['administrator'], + enabled=False) + self.assertStatus(201) + user = self.jsonBody() + + # Restart dashboard module. + self._unload_module('dashboard') + self._load_module('dashboard') + time.sleep(10) + + self._get('/api/user/klara') + self.assertStatus(200) + self.assertJsonBody({ + 'username': 'klara', + 'name': 'Klara Musterfrau', + 'email': 'klara@musterfrau.com', + 'roles': ['administrator'], + 'lastUpdate': user['lastUpdate'], + 'enabled': False, + 'pwdExpirationDate': None, + 'pwdUpdateRequired': False + }) + + self._delete('/api/user/klara') + self.assertStatus(204) + + def test_list_users(self): + self._get('/api/user') + self.assertStatus(200) + user = self.jsonBody() + self.assertEqual(len(user), 1) + user = user[0] + self.assertJsonBody([{ + 'username': 'admin', + 'name': None, + 'email': None, + 'roles': ['administrator'], + 'lastUpdate': user['lastUpdate'], + 'enabled': True, + 'pwdExpirationDate': None, + 'pwdUpdateRequired': False + }]) + + def test_create_user_already_exists(self): + self._create_user(username='admin', + password='mypassword10#', + name='administrator', + email='my@email.com', + roles=['administrator']) + self.assertStatus(400) + self.assertError(code='username_already_exists', + component='user') + + def test_create_user_invalid_role(self): + self._create_user(username='user1', + password='mypassword10#', + name='My Name', + email='my@email.com', + roles=['invalid-role']) + self.assertStatus(400) + self.assertError(code='role_does_not_exist', + component='user') + + def test_create_user_invalid_chars_in_name(self): + self._create_user(username='userö', + password='mypassword10#', + name='administrator', + email='my@email.com', + roles=['administrator']) + self.assertStatus(400) + self.assertError(code='ceph_type_not_valid', + component='user') + + def test_delete_user_does_not_exist(self): + self._delete('/api/user/user2') + self.assertStatus(404) + + @DashboardTestCase.RunAs('test', 'test', [{'user': ['create', 'read', 'update', 'delete']}]) + def test_delete_current_user(self): + self._delete('/api/user/test') + self.assertStatus(400) + self.assertError(code='cannot_delete_current_user', + component='user') + + @DashboardTestCase.RunAs('test', 'test', [{'user': ['create', 'read', 'update', 'delete']}]) + def test_disable_current_user(self): + self._put('/api/user/test', {'enabled': False}) + self.assertStatus(400) + self.assertError(code='cannot_disable_current_user', + component='user') + + def test_update_user_does_not_exist(self): + self._put('/api/user/user2', {'name': 'My New Name'}) + self.assertStatus(404) + + def test_update_user_invalid_role(self): + self._put('/api/user/admin', {'roles': ['invalid-role']}) + self.assertStatus(400) + self.assertError(code='role_does_not_exist', + component='user') + + def test_change_password_from_other_user(self): + self._post('/api/user/test2/change_password', { + 'old_password': 'abc', + 'new_password': 'xyz' + }) + self.assertStatus(400) + self.assertError(code='invalid_user_context', component='user') + + def test_change_password_old_not_match(self): + self._post('/api/user/admin/change_password', { + 'old_password': 'foo', + 'new_password': 'bar' + }) + self.assertStatus(400) + self.assertError(code='invalid_old_password', component='user') + + def test_change_password_as_old_password(self): + self.create_user('test1', 'mypassword10#', ['read-only'], force_password=False) + self.login('test1', 'mypassword10#') + self._post('/api/user/test1/change_password', { + 'old_password': 'mypassword10#', + 'new_password': 'mypassword10#' + }) + self.assertStatus(400) + self.assertError('password_policy_validation_failed', 'user', + 'Password must not be the same as the previous one.') + self._reset_login_to_admin('test1') + + def test_change_password_contains_username(self): + self.create_user('test1', 'mypassword10#', ['read-only'], force_password=False) + self.login('test1', 'mypassword10#') + self._post('/api/user/test1/change_password', { + 'old_password': 'mypassword10#', + 'new_password': 'mypasstest1@#' + }) + self.assertStatus(400) + self.assertError('password_policy_validation_failed', 'user', + 'Password must not contain username.') + self._reset_login_to_admin('test1') + + def test_change_password_contains_forbidden_words(self): + self.create_user('test1', 'mypassword10#', ['read-only'], force_password=False) + self.login('test1', 'mypassword10#') + self._post('/api/user/test1/change_password', { + 'old_password': 'mypassword10#', + 'new_password': 'mypassOSD01' + }) + self.assertStatus(400) + self.assertError('password_policy_validation_failed', 'user', + 'Password must not contain the keyword "OSD".') + self._reset_login_to_admin('test1') + + def test_change_password_contains_sequential_characters(self): + self.create_user('test1', 'mypassword10#', ['read-only'], force_password=False) + self.login('test1', 'mypassword10#') + self._post('/api/user/test1/change_password', { + 'old_password': 'mypassword10#', + 'new_password': 'mypass123456!@$' + }) + self.assertStatus(400) + self.assertError('password_policy_validation_failed', 'user', + 'Password must not contain sequential characters.') + self._reset_login_to_admin('test1') + + def test_change_password_contains_repetetive_characters(self): + self.create_user('test1', 'mypassword10#', ['read-only'], force_password=False) + self.login('test1', 'mypassword10#') + self._post('/api/user/test1/change_password', { + 'old_password': 'mypassword10#', + 'new_password': 'aaaaA1@!#' + }) + self.assertStatus(400) + self.assertError('password_policy_validation_failed', 'user', + 'Password must not contain repetitive characters.') + self._reset_login_to_admin('test1') + + @DashboardTestCase.RunAs('test1', 'mypassword10#', ['read-only'], False) + def test_change_password(self): + self._post('/api/user/test1/change_password', { + 'old_password': 'mypassword10#', + 'new_password': 'newpassword01#' + }) + self.assertStatus(200) + self.logout() + self._post('/api/auth', {'username': 'test1', 'password': 'mypassword10#'}) + self.assertStatus(400) + self.assertError(code='invalid_credentials', component='auth') + + def test_create_user_password_cli(self): + exitcode = self._ceph_cmd_with_secret(['dashboard', 'ac-user-create', + 'test1'], + 'mypassword10#', + return_exit_code=True) + self.assertEqual(exitcode, 0) + self.delete_user('test1') + + @DashboardTestCase.RunAs('test2', 'foo_bar_10#', force_password=False, login=False) + def test_change_user_password_cli(self): + exitcode = self._ceph_cmd_with_secret(['dashboard', 'ac-user-set-password', + 'test2'], + 'foo_new-password01#', + return_exit_code=True) + self.assertEqual(exitcode, 0) + + def test_create_user_password_force_cli(self): + exitcode = self._ceph_cmd_with_secret(['dashboard', 'ac-user-create', + '--force-password', 'test11'], + 'bar', + return_exit_code=True) + self.assertEqual(exitcode, 0) + self.delete_user('test11') + + @DashboardTestCase.RunAs('test22', 'foo_bar_10#', force_password=False, login=False) + def test_change_user_password_force_cli(self): + exitcode = self._ceph_cmd_with_secret(['dashboard', 'ac-user-set-password', + '--force-password', 'test22'], + 'bar', + return_exit_code=True) + self.assertEqual(exitcode, 0) + + def test_create_user_password_cli_fail(self): + exitcode = self._ceph_cmd_with_secret(['dashboard', 'ac-user-create', + 'test3'], + 'foo', + return_exit_code=True) + self.assertNotEqual(exitcode, 0) + + @DashboardTestCase.RunAs('test4', 'x1z_tst+_10#', force_password=False, login=False) + def test_change_user_password_cli_fail(self): + exitcode = self._ceph_cmd_with_secret(['dashboard', 'ac-user-set-password', + 'test4'], + 'bar', + return_exit_code=True) + self.assertNotEqual(exitcode, 0) + + def test_create_user_with_pwd_expiration_date(self): + future_date = datetime.utcnow() + timedelta(days=10) + future_date = int(time.mktime(future_date.timetuple())) + + self._create_user(username='user1', + password='mypassword10#', + name='My Name', + email='my@email.com', + roles=['administrator'], + pwd_expiration_date=future_date) + self.assertStatus(201) + user = self.jsonBody() + + self._get('/api/user/user1') + self.assertStatus(200) + self.assertJsonBody({ + 'username': 'user1', + 'name': 'My Name', + 'email': 'my@email.com', + 'roles': ['administrator'], + 'lastUpdate': user['lastUpdate'], + 'enabled': True, + 'pwdExpirationDate': future_date, + 'pwdUpdateRequired': False + }) + self._delete('/api/user/user1') + + def test_create_with_pwd_expiration_date_not_valid(self): + past_date = datetime.utcnow() - timedelta(days=10) + past_date = int(time.mktime(past_date.timetuple())) + + self._create_user(username='user1', + password='mypassword10#', + name='My Name', + email='my@email.com', + roles=['administrator'], + pwd_expiration_date=past_date) + self.assertStatus(400) + self.assertError(code='pwd_past_expiration_date', component='user') + + def test_create_with_default_expiration_date(self): + future_date_1 = datetime.utcnow() + timedelta(days=9) + future_date_1 = int(time.mktime(future_date_1.timetuple())) + future_date_2 = datetime.utcnow() + timedelta(days=11) + future_date_2 = int(time.mktime(future_date_2.timetuple())) + + self._ceph_cmd(['dashboard', 'set-user-pwd-expiration-span', '10']) + self._create_user(username='user1', + password='mypassword10#', + name='My Name', + email='my@email.com', + roles=['administrator']) + self.assertStatus(201) + + user = self._get('/api/user/user1') + self.assertStatus(200) + self.assertIsNotNone(user['pwdExpirationDate']) + self.assertGreater(user['pwdExpirationDate'], future_date_1) + self.assertLess(user['pwdExpirationDate'], future_date_2) + + self._delete('/api/user/user1') + self._ceph_cmd(['dashboard', 'set-user-pwd-expiration-span', '0']) + + def test_pwd_expiration_date_update(self): + self._ceph_cmd(['dashboard', 'set-user-pwd-expiration-span', '10']) + self.create_user('user1', 'mypassword10#', ['administrator']) + + user_1 = self._get('/api/user/user1') + self.assertStatus(200) + + # Let's wait 1 s to ensure pwd expiration date is not the same + time.sleep(1) + + self.login('user1', 'mypassword10#') + self._post('/api/user/user1/change_password', { + 'old_password': 'mypassword10#', + 'new_password': 'newpassword01#' + }) + self.assertStatus(200) + + # Compare password expiration dates. + self._reset_login_to_admin() + user_1_pwd_changed = self._get('/api/user/user1') + self.assertStatus(200) + self.assertLess(user_1['pwdExpirationDate'], user_1_pwd_changed['pwdExpirationDate']) + + # Cleanup + self.delete_user('user1') + self._ceph_cmd(['dashboard', 'set-user-pwd-expiration-span', '0']) + + def test_pwd_update_required(self): + self._create_user(username='user1', + password='mypassword10#', + name='My Name', + email='my@email.com', + roles=['administrator'], + pwd_update_required=True) + self.assertStatus(201) + + user_1 = self._get('/api/user/user1') + self.assertStatus(200) + self.assertEqual(user_1['pwdUpdateRequired'], True) + + self.login('user1', 'mypassword10#') + self.assertStatus(201) + + self._get('/api/osd') + self.assertStatus(403) + self._reset_login_to_admin('user1') + + def test_pwd_update_required_change_pwd(self): + self._create_user(username='user1', + password='mypassword10#', + name='My Name', + email='my@email.com', + roles=['administrator'], + pwd_update_required=True) + self.assertStatus(201) + + self.login('user1', 'mypassword10#') + self._post('/api/user/user1/change_password', { + 'old_password': 'mypassword10#', + 'new_password': 'newpassword01#' + }) + + self.login('user1', 'newpassword01#') + user_1 = self._get('/api/user/user1') + self.assertStatus(200) + self.assertEqual(user_1['pwdUpdateRequired'], False) + self._get('/api/osd') + self.assertStatus(200) + self._reset_login_to_admin('user1') + + def test_validate_password_weak(self): + self._post('/api/user/validate_password', { + 'password': 'mypassword1' + }) + self.assertStatus(200) + self.assertJsonBody({ + 'valid': True, + 'credits': 11, + 'valuation': 'Weak' + }) + + def test_validate_password_ok(self): + self._post('/api/user/validate_password', { + 'password': 'mypassword1!@' + }) + self.assertStatus(200) + self.assertJsonBody({ + 'valid': True, + 'credits': 17, + 'valuation': 'OK' + }) + + def test_validate_password_strong(self): + self._post('/api/user/validate_password', { + 'password': 'testpassword0047!@' + }) + self.assertStatus(200) + self.assertJsonBody({ + 'valid': True, + 'credits': 22, + 'valuation': 'Strong' + }) + + def test_validate_password_very_strong(self): + self._post('/api/user/validate_password', { + 'password': 'testpassword#!$!@$' + }) + self.assertStatus(200) + self.assertJsonBody({ + 'valid': True, + 'credits': 30, + 'valuation': 'Very strong' + }) + + def test_validate_password_fail(self): + self._post('/api/user/validate_password', { + 'password': 'foo' + }) + self.assertStatus(200) + self.assertJsonBody({ + 'valid': False, + 'credits': 0, + 'valuation': 'Password is too weak.' + }) + + def test_validate_password_fail_name(self): + self._post('/api/user/validate_password', { + 'password': 'x1zhugo_10', + 'username': 'hugo' + }) + self.assertStatus(200) + self.assertJsonBody({ + 'valid': False, + 'credits': 0, + 'valuation': 'Password must not contain username.' + }) + + def test_validate_password_fail_oldpwd(self): + self._post('/api/user/validate_password', { + 'password': 'x1zt-st10', + 'old_password': 'x1zt-st10' + }) + self.assertStatus(200) + self.assertJsonBody({ + 'valid': False, + 'credits': 0, + 'valuation': 'Password must not be the same as the previous one.' + }) + + def test_create_user_pwd_update_required(self): + self.create_user('foo', 'bar', cmd_args=['--pwd_update_required']) + self._get('/api/user/foo') + self.assertStatus(200) + self.assertJsonSubset({ + 'username': 'foo', + 'pwdUpdateRequired': True + }) + self.delete_user('foo') diff --git a/qa/tasks/mgr/mgr_test_case.py b/qa/tasks/mgr/mgr_test_case.py new file mode 100644 index 000000000..f5392d3ba --- /dev/null +++ b/qa/tasks/mgr/mgr_test_case.py @@ -0,0 +1,220 @@ +import json +import logging + +from unittest import SkipTest + +from teuthology import misc +from tasks.ceph_test_case import CephTestCase + +# TODO move definition of CephCluster away from the CephFS stuff +from tasks.cephfs.filesystem import CephCluster + + +log = logging.getLogger(__name__) + + +class MgrCluster(CephCluster): + def __init__(self, ctx): + super(MgrCluster, self).__init__(ctx) + self.mgr_ids = list(misc.all_roles_of_type(ctx.cluster, 'mgr')) + + if len(self.mgr_ids) == 0: + raise RuntimeError( + "This task requires at least one manager daemon") + + self.mgr_daemons = dict( + [(mgr_id, self._ctx.daemons.get_daemon('mgr', mgr_id)) for mgr_id + in self.mgr_ids]) + + def mgr_stop(self, mgr_id): + self.mgr_daemons[mgr_id].stop() + + def mgr_fail(self, mgr_id): + self.mon_manager.raw_cluster_cmd("mgr", "fail", mgr_id) + + def mgr_restart(self, mgr_id): + self.mgr_daemons[mgr_id].restart() + + def get_mgr_map(self): + return json.loads( + self.mon_manager.raw_cluster_cmd("mgr", "dump", "--format=json-pretty")) + + def get_active_id(self): + return self.get_mgr_map()["active_name"] + + def get_standby_ids(self): + return [s['name'] for s in self.get_mgr_map()["standbys"]] + + def set_module_conf(self, module, key, val): + self.mon_manager.raw_cluster_cmd("config", "set", "mgr", + "mgr/{0}/{1}".format( + module, key + ), val) + + def set_module_localized_conf(self, module, mgr_id, key, val, force): + cmd = ["config", "set", "mgr", + "/".join(["mgr", module, mgr_id, key]), + val] + if force: + cmd.append("--force") + self.mon_manager.raw_cluster_cmd(*cmd) + + +class MgrTestCase(CephTestCase): + MGRS_REQUIRED = 1 + + @classmethod + def setup_mgrs(cls): + # Stop all the daemons + for daemon in cls.mgr_cluster.mgr_daemons.values(): + daemon.stop() + + for mgr_id in cls.mgr_cluster.mgr_ids: + cls.mgr_cluster.mgr_fail(mgr_id) + + # Unload all non-default plugins + loaded = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "module", "ls"))['enabled_modules'] + unload_modules = set(loaded) - {"cephadm", "restful"} + + for m in unload_modules: + cls.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "module", "disable", m) + + # Start all the daemons + for daemon in cls.mgr_cluster.mgr_daemons.values(): + daemon.restart() + + # Wait for an active to come up + cls.wait_until_true(lambda: cls.mgr_cluster.get_active_id() != "", + timeout=20) + + expect_standbys = set(cls.mgr_cluster.mgr_ids) \ + - {cls.mgr_cluster.get_active_id()} + cls.wait_until_true( + lambda: set(cls.mgr_cluster.get_standby_ids()) == expect_standbys, + timeout=20) + + @classmethod + def setUpClass(cls): + # The test runner should have populated this + assert cls.mgr_cluster is not None + + if len(cls.mgr_cluster.mgr_ids) < cls.MGRS_REQUIRED: + raise SkipTest( + "Only have {0} manager daemons, {1} are required".format( + len(cls.mgr_cluster.mgr_ids), cls.MGRS_REQUIRED)) + + cls.setup_mgrs() + + @classmethod + def _unload_module(cls, module_name): + def is_disabled(): + enabled_modules = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd( + 'mgr', 'module', 'ls'))['enabled_modules'] + return module_name not in enabled_modules + + if is_disabled(): + return + + log.debug("Unloading Mgr module %s ...", module_name) + cls.mgr_cluster.mon_manager.raw_cluster_cmd('mgr', 'module', 'disable', module_name) + cls.wait_until_true(is_disabled, timeout=30) + + @classmethod + def _load_module(cls, module_name): + loaded = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "module", "ls"))['enabled_modules'] + if module_name in loaded: + # The enable command is idempotent, but our wait for a restart + # isn't, so let's return now if it's already loaded + return + + initial_mgr_map = cls.mgr_cluster.get_mgr_map() + + # check if the the module is configured as an always on module + mgr_daemons = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "metadata")) + + for daemon in mgr_daemons: + if daemon["name"] == initial_mgr_map["active_name"]: + ceph_version = daemon["ceph_release"] + always_on = initial_mgr_map["always_on_modules"].get(ceph_version, []) + if module_name in always_on: + return + + log.debug("Loading Mgr module %s ...", module_name) + initial_gid = initial_mgr_map['active_gid'] + cls.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "module", "enable", module_name, "--force") + + # Wait for the module to load + def has_restarted(): + mgr_map = cls.mgr_cluster.get_mgr_map() + done = mgr_map['active_gid'] != initial_gid and mgr_map['available'] + if done: + log.debug("Restarted after module load (new active {0}/{1})".format( + mgr_map['active_name'], mgr_map['active_gid'])) + return done + cls.wait_until_true(has_restarted, timeout=30) + + + @classmethod + def _get_uri(cls, service_name): + # Little dict hack so that I can assign into this from + # the get_or_none function + mgr_map = {'x': None} + + def _get_or_none(): + mgr_map['x'] = cls.mgr_cluster.get_mgr_map() + result = mgr_map['x']['services'].get(service_name, None) + return result + + cls.wait_until_true(lambda: _get_or_none() is not None, 30) + + uri = mgr_map['x']['services'][service_name] + + log.debug("Found {0} at {1} (daemon {2}/{3})".format( + service_name, uri, mgr_map['x']['active_name'], + mgr_map['x']['active_gid'])) + + return uri + + @classmethod + def _assign_ports(cls, module_name, config_name, min_port=7789): + """ + To avoid the need to run lots of hosts in teuthology tests to + get different URLs per mgr, we will hand out different ports + to each mgr here. + + This is already taken care of for us when running in a vstart + environment. + """ + # Start handing out ports well above Ceph's range. + assign_port = min_port + + for mgr_id in cls.mgr_cluster.mgr_ids: + cls.mgr_cluster.mgr_stop(mgr_id) + cls.mgr_cluster.mgr_fail(mgr_id) + + for mgr_id in cls.mgr_cluster.mgr_ids: + log.debug("Using port {0} for {1} on mgr.{2}".format( + assign_port, module_name, mgr_id + )) + cls.mgr_cluster.set_module_localized_conf(module_name, mgr_id, + config_name, + str(assign_port), + force=True) + assign_port += 1 + + for mgr_id in cls.mgr_cluster.mgr_ids: + cls.mgr_cluster.mgr_restart(mgr_id) + + def is_available(): + mgr_map = cls.mgr_cluster.get_mgr_map() + done = mgr_map['available'] + if done: + log.debug("Available after assign ports (new active {0}/{1})".format( + mgr_map['active_name'], mgr_map['active_gid'])) + return done + cls.wait_until_true(is_available, timeout=30) diff --git a/qa/tasks/mgr/test_cache.py b/qa/tasks/mgr/test_cache.py new file mode 100644 index 000000000..71131cbc6 --- /dev/null +++ b/qa/tasks/mgr/test_cache.py @@ -0,0 +1,83 @@ +import json + +from .mgr_test_case import MgrTestCase + +class TestCache(MgrTestCase): + + def setUp(self): + super(TestCache, self).setUp() + self.setup_mgrs() + self._load_module("cli_api") + self.ttl = 10 + self.enable_cache(self.ttl) + + def tearDown(self): + self.disable_cache() + + def get_hit_miss_ratio(self): + perf_dump_command = f"daemon mgr.{self.mgr_cluster.get_active_id()} perf dump" + perf_dump_res = self.cluster_cmd(perf_dump_command) + perf_dump = json.loads(perf_dump_res) + h = perf_dump["mgr"]["cache_hit"] + m = perf_dump["mgr"]["cache_miss"] + return int(h), int(m) + + def enable_cache(self, ttl): + set_ttl = f"config set mgr mgr_ttl_cache_expire_seconds {ttl}" + self.cluster_cmd(set_ttl) + + def disable_cache(self): + set_ttl = "config set mgr mgr_ttl_cache_expire_seconds 0" + self.cluster_cmd(set_ttl) + + + def test_init_cache(self): + get_ttl = "config get mgr mgr_ttl_cache_expire_seconds" + res = self.cluster_cmd(get_ttl) + self.assertEquals(int(res), 10) + + def test_health_not_cached(self): + get_health = "mgr api get health" + + h_start, m_start = self.get_hit_miss_ratio() + self.cluster_cmd(get_health) + h, m = self.get_hit_miss_ratio() + + self.assertEquals(h, h_start) + self.assertEquals(m, m_start) + + def test_osdmap(self): + get_osdmap = "mgr api get osd_map" + + # store in cache + self.cluster_cmd(get_osdmap) + # get from cache + res = self.cluster_cmd(get_osdmap) + osd_map = json.loads(res) + self.assertIn("osds", osd_map) + self.assertGreater(len(osd_map["osds"]), 0) + self.assertIn("epoch", osd_map) + + + + def test_hit_miss_ratio(self): + get_osdmap = "mgr api get osd_map" + + hit_start, miss_start = self.get_hit_miss_ratio() + + def wait_miss(): + self.cluster_cmd(get_osdmap) + _, m = self.get_hit_miss_ratio() + return m == miss_start + 1 + + # Miss, add osd_map to cache + self.wait_until_true(wait_miss, self.ttl + 5) + h, m = self.get_hit_miss_ratio() + self.assertEquals(h, hit_start) + self.assertEquals(m, miss_start+1) + + # Hit, get osd_map from cache + self.cluster_cmd(get_osdmap) + h, m = self.get_hit_miss_ratio() + self.assertEquals(h, hit_start+1) + self.assertEquals(m, miss_start+1) diff --git a/qa/tasks/mgr/test_crash.py b/qa/tasks/mgr/test_crash.py new file mode 100644 index 000000000..49191127f --- /dev/null +++ b/qa/tasks/mgr/test_crash.py @@ -0,0 +1,108 @@ +import json +import logging +import datetime + +from .mgr_test_case import MgrTestCase + + +log = logging.getLogger(__name__) +UUID = 'd5775432-0742-44a3-a435-45095e32e6b1' +DATEFMT = '%Y-%m-%d %H:%M:%S.%f' + + +class TestCrash(MgrTestCase): + + def setUp(self): + super(TestCrash, self).setUp() + self.setup_mgrs() + self._load_module('crash') + + # Whip up some crash data + self.crashes = dict() + now = datetime.datetime.utcnow() + + for i in (0, 1, 3, 4, 8): + timestamp = now - datetime.timedelta(days=i) + timestamp = timestamp.strftime(DATEFMT) + 'Z' + crash_id = '_'.join((timestamp, UUID)).replace(' ', '_') + self.crashes[crash_id] = { + 'crash_id': crash_id, 'timestamp': timestamp, + } + + self.assertEqual( + 0, + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'post', '-i', '-', + stdin=json.dumps(self.crashes[crash_id]), + ) + ) + + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'ls', + ) + log.warning("setUp: crash ls returns %s" % retstr) + + self.oldest_crashid = crash_id + + def tearDown(self): + for crash in self.crashes.values(): + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'rm', crash['crash_id'] + ) + + def test_info(self): + for crash in self.crashes.values(): + log.warning('test_info: crash %s' % crash) + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'ls' + ) + log.warning('ls output: %s' % retstr) + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'info', crash['crash_id'], + ) + log.warning('crash info output: %s' % retstr) + crashinfo = json.loads(retstr) + self.assertIn('crash_id', crashinfo) + self.assertIn('timestamp', crashinfo) + + def test_ls(self): + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'ls', + ) + for crash in self.crashes.values(): + self.assertIn(crash['crash_id'], retstr) + + def test_rm(self): + crashid = next(iter(self.crashes.keys())) + self.assertEqual( + 0, + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'rm', crashid, + ) + ) + + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'ls', + ) + self.assertNotIn(crashid, retstr) + + def test_stat(self): + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'stat', + ) + self.assertIn('5 crashes recorded', retstr) + self.assertIn('4 older than 1 days old:', retstr) + self.assertIn('3 older than 3 days old:', retstr) + self.assertIn('1 older than 7 days old:', retstr) + + def test_prune(self): + self.assertEqual( + 0, + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'prune', '5' + ) + ) + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'ls', + ) + self.assertNotIn(self.oldest_crashid, retstr) diff --git a/qa/tasks/mgr/test_dashboard.py b/qa/tasks/mgr/test_dashboard.py new file mode 100644 index 000000000..c3459ec02 --- /dev/null +++ b/qa/tasks/mgr/test_dashboard.py @@ -0,0 +1,177 @@ +import logging +import ssl + +import requests +from requests.adapters import HTTPAdapter + +from .mgr_test_case import MgrTestCase + +log = logging.getLogger(__name__) + + +class TestDashboard(MgrTestCase): + MGRS_REQUIRED = 3 + + def setUp(self): + super(TestDashboard, self).setUp() + + self._assign_ports("dashboard", "ssl_server_port") + self._load_module("dashboard") + self.mgr_cluster.mon_manager.raw_cluster_cmd("dashboard", + "create-self-signed-cert") + + def tearDown(self): + self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr", + "mgr/dashboard/standby_behaviour", + "redirect") + self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr", + "mgr/dashboard/standby_error_status_code", + "500") + + def wait_until_webserver_available(self, url): + def _check_connection(): + try: + requests.get(url, allow_redirects=False, verify=False) + return True + except requests.ConnectionError: + pass + return False + self.wait_until_true(_check_connection, timeout=30) + + def test_standby(self): + # skip this test if mgr_standby_modules=false + if self.mgr_cluster.mon_manager.raw_cluster_cmd( + "config", "get", "mgr", "mgr_standby_modules").strip() == "false": + log.info("Skipping test_standby since mgr_standby_modules=false") + return + + original_active_id = self.mgr_cluster.get_active_id() + original_uri = self._get_uri("dashboard") + log.info("Originally running manager '{}' at {}".format( + original_active_id, original_uri)) + + # Force a failover and wait until the previously active manager + # is listed as standby. + self.mgr_cluster.mgr_fail(original_active_id) + self.wait_until_true( + lambda: original_active_id in self.mgr_cluster.get_standby_ids(), + timeout=30) + + failed_active_id = self.mgr_cluster.get_active_id() + failed_over_uri = self._get_uri("dashboard") + log.info("After failover running manager '{}' at {}".format( + failed_active_id, failed_over_uri)) + + self.assertNotEqual(original_uri, failed_over_uri) + + # Wait until web server of the standby node is settled. + self.wait_until_webserver_available(original_uri) + + # The original active daemon should have come back up as a standby + # and be doing redirects to the new active daemon. + r = requests.get(original_uri, allow_redirects=False, verify=False) + self.assertEqual(r.status_code, 303) + self.assertEqual(r.headers['Location'], failed_over_uri) + + # Ensure that every URL redirects to the active daemon. + r = requests.get("{}/runtime.js".format(original_uri.strip('/')), + allow_redirects=False, + verify=False) + self.assertEqual(r.status_code, 303) + self.assertEqual(r.headers['Location'], failed_over_uri) + + def test_standby_disable_redirect(self): + self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr", + "mgr/dashboard/standby_behaviour", + "error") + + original_active_id = self.mgr_cluster.get_active_id() + original_uri = self._get_uri("dashboard") + log.info("Originally running manager '{}' at {}".format( + original_active_id, original_uri)) + + # Force a failover and wait until the previously active manager + # is listed as standby. + self.mgr_cluster.mgr_fail(original_active_id) + self.wait_until_true( + lambda: original_active_id in self.mgr_cluster.get_standby_ids(), + timeout=30) + + failed_active_id = self.mgr_cluster.get_active_id() + failed_over_uri = self._get_uri("dashboard") + log.info("After failover running manager '{}' at {}".format( + failed_active_id, failed_over_uri)) + + self.assertNotEqual(original_uri, failed_over_uri) + + # Wait until web server of the standby node is settled. + self.wait_until_webserver_available(original_uri) + + # Redirection should be disabled now, instead a 500 must be returned. + r = requests.get(original_uri, allow_redirects=False, verify=False) + self.assertEqual(r.status_code, 500) + + self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr", + "mgr/dashboard/standby_error_status_code", + "503") + + # The customized HTTP status code (503) must be returned. + r = requests.get(original_uri, allow_redirects=False, verify=False) + self.assertEqual(r.status_code, 503) + + def test_urls(self): + base_uri = self._get_uri("dashboard") + + # This is a very simple smoke test to check that the dashboard can + # give us a 200 response to requests. We're not testing that + # the content is correct or even renders! + + urls = [ + "/", + ] + + failures = [] + + for url in urls: + r = requests.get(base_uri + url, allow_redirects=False, + verify=False) + if r.status_code >= 300 and r.status_code < 400: + log.error("Unexpected redirect to: {0} (from {1})".format( + r.headers['Location'], base_uri)) + if r.status_code != 200: + failures.append(url) + + log.info("{0}: {1} ({2} bytes)".format( + url, r.status_code, len(r.content) + )) + + self.assertListEqual(failures, []) + + def test_tls(self): + class CustomHTTPAdapter(HTTPAdapter): + def __init__(self, ssl_version): + self.ssl_version = ssl_version + super().__init__() + + def init_poolmanager(self, *args, **kwargs): + kwargs['ssl_version'] = self.ssl_version + return super().init_poolmanager(*args, **kwargs) + + uri = self._get_uri("dashboard") + + # TLSv1 + with self.assertRaises(requests.exceptions.SSLError): + session = requests.Session() + session.mount(uri, CustomHTTPAdapter(ssl.PROTOCOL_TLSv1)) + session.get(uri, allow_redirects=False, verify=False) + + # TLSv1.1 + with self.assertRaises(requests.exceptions.SSLError): + session = requests.Session() + session.mount(uri, CustomHTTPAdapter(ssl.PROTOCOL_TLSv1_1)) + session.get(uri, allow_redirects=False, verify=False) + + session = requests.Session() + session.mount(uri, CustomHTTPAdapter(ssl.PROTOCOL_TLS)) + r = session.get(uri, allow_redirects=False, verify=False) + self.assertEqual(r.status_code, 200) diff --git a/qa/tasks/mgr/test_failover.py b/qa/tasks/mgr/test_failover.py new file mode 100644 index 000000000..a4e840883 --- /dev/null +++ b/qa/tasks/mgr/test_failover.py @@ -0,0 +1,148 @@ + +import logging +import json + +from .mgr_test_case import MgrTestCase + + +log = logging.getLogger(__name__) + + +class TestFailover(MgrTestCase): + MGRS_REQUIRED = 2 + + def setUp(self): + super(TestFailover, self).setUp() + self.setup_mgrs() + + def test_timeout(self): + """ + That when an active mgr stops responding, a standby is promoted + after mon_mgr_beacon_grace. + """ + + # Query which mgr is active + original_active = self.mgr_cluster.get_active_id() + original_standbys = self.mgr_cluster.get_standby_ids() + + # Stop that daemon + self.mgr_cluster.mgr_stop(original_active) + + # Assert that the other mgr becomes active + self.wait_until_true( + lambda: self.mgr_cluster.get_active_id() in original_standbys, + timeout=60 + ) + + self.mgr_cluster.mgr_restart(original_active) + self.wait_until_true( + lambda: original_active in self.mgr_cluster.get_standby_ids(), + timeout=10 + ) + + def test_timeout_nostandby(self): + """ + That when an active mgr stop responding, and no standby is + available, the active mgr is removed from the map anyway. + """ + # Query which mgr is active + original_active = self.mgr_cluster.get_active_id() + original_standbys = self.mgr_cluster.get_standby_ids() + + for s in original_standbys: + self.mgr_cluster.mgr_stop(s) + self.mgr_cluster.mgr_fail(s) + + self.assertListEqual(self.mgr_cluster.get_standby_ids(), []) + self.assertEqual(self.mgr_cluster.get_active_id(), original_active) + + grace = int(self.mgr_cluster.get_config("mon_mgr_beacon_grace")) + log.info("Should time out in about {0} seconds".format(grace)) + + self.mgr_cluster.mgr_stop(original_active) + + # Now wait for the mon to notice the mgr is gone and remove it + # from the map. + self.wait_until_equal( + lambda: self.mgr_cluster.get_active_id(), + "", + timeout=grace * 2 + ) + + self.assertListEqual(self.mgr_cluster.get_standby_ids(), []) + self.assertEqual(self.mgr_cluster.get_active_id(), "") + + def test_explicit_fail(self): + """ + That when a user explicitly fails a daemon, a standby immediately + replaces it. + :return: + """ + # Query which mgr is active + original_active = self.mgr_cluster.get_active_id() + original_standbys = self.mgr_cluster.get_standby_ids() + + self.mgr_cluster.mgr_fail(original_active) + + # A standby should take over + self.wait_until_true( + lambda: self.mgr_cluster.get_active_id() in original_standbys, + timeout=60 + ) + + # The one we failed should come back as a standby (he isn't + # really dead) + self.wait_until_true( + lambda: original_active in self.mgr_cluster.get_standby_ids(), + timeout=10 + ) + + # Both daemons should have fully populated metadata + # (regression test for http://tracker.ceph.com/issues/21260) + meta = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "metadata")) + id_to_meta = dict([(i['name'], i) for i in meta]) + for i in [original_active] + original_standbys: + self.assertIn(i, id_to_meta) + self.assertIn('ceph_version', id_to_meta[i]) + + # We should be able to fail back over again: the exercises + # our re-initialization of the python runtime within + # a single process lifetime. + + # Get rid of any bystander standbys so that the original_active + # will be selected as next active. + new_active = self.mgr_cluster.get_active_id() + for daemon in original_standbys: + if daemon != new_active: + self.mgr_cluster.mgr_stop(daemon) + self.mgr_cluster.mgr_fail(daemon) + + self.assertListEqual(self.mgr_cluster.get_standby_ids(), + [original_active]) + + self.mgr_cluster.mgr_stop(new_active) + self.mgr_cluster.mgr_fail(new_active) + + self.assertEqual(self.mgr_cluster.get_active_id(), original_active) + self.assertEqual(self.mgr_cluster.get_standby_ids(), []) + + def test_standby_timeout(self): + """ + That when a standby daemon stops sending beacons, it is + removed from the list of standbys + :return: + """ + original_active = self.mgr_cluster.get_active_id() + original_standbys = self.mgr_cluster.get_standby_ids() + + victim = original_standbys[0] + self.mgr_cluster.mgr_stop(victim) + + expect_standbys = set(original_standbys) - {victim} + + self.wait_until_true( + lambda: set(self.mgr_cluster.get_standby_ids()) == expect_standbys, + timeout=60 + ) + self.assertEqual(self.mgr_cluster.get_active_id(), original_active) diff --git a/qa/tasks/mgr/test_insights.py b/qa/tasks/mgr/test_insights.py new file mode 100644 index 000000000..aa2548881 --- /dev/null +++ b/qa/tasks/mgr/test_insights.py @@ -0,0 +1,192 @@ +import logging +import json +import datetime +import time + +from .mgr_test_case import MgrTestCase + + +log = logging.getLogger(__name__) +UUID = 'd5775432-0742-44a3-a435-45095e32e6b2' +DATEFMT = '%Y-%m-%d %H:%M:%S.%f' + +class TestInsights(MgrTestCase): + def setUp(self): + super(TestInsights, self).setUp() + self.setup_mgrs() + self._load_module("insights") + self._load_module("selftest") + self.crash_ids = [] + + def tearDown(self): + self._clear_crashes() + + def _insights(self): + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd("insights") + return json.loads(retstr) + + def _add_crash(self, hours, make_invalid = False): + now = datetime.datetime.utcnow() + timestamp = now - datetime.timedelta(hours = hours) + timestamp = timestamp.strftime(DATEFMT) + 'Z' + crash_id = '_'.join((timestamp, UUID)).replace(' ', '_') + crash = { + 'crash_id': crash_id, + 'timestamp': timestamp, + } + if make_invalid: + crash["timestamp"] = "not a timestamp" + + ret = self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'post', '-i', '-', + stdin=json.dumps(crash) + ) + self.crash_ids.append(crash_id) + self.assertEqual(0, ret) + + def _clear_crashes(self): + for crash_id in self.crash_ids: + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'rm', crash_id + ) + + def _wait_for_health_history_checks(self, *args): + """Wait for a set of health checks to appear in the health history""" + timeout = datetime.datetime.utcnow() + \ + datetime.timedelta(seconds = 15) + while True: + report = self._insights() + missing = False + for check in args: + if check not in report["health"]["history"]["checks"]: + missing = True + break + if not missing: + return + self.assertGreater(timeout, + datetime.datetime.utcnow()) + time.sleep(0.25) + + def _wait_for_curr_health_cleared(self, check): + timeout = datetime.datetime.utcnow() + \ + datetime.timedelta(seconds = 15) + while True: + report = self._insights() + if check not in report["health"]["current"]["checks"]: + return + self.assertGreater(timeout, + datetime.datetime.utcnow()) + time.sleep(0.25) + + def test_health_history(self): + # use empty health history as starting point + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + "insights", "prune-health", "0") + report = self._insights() + self.assertFalse(report["health"]["history"]["checks"]) + + # generate health check history entries. we want to avoid the edge case + # of running these tests at _exactly_ the top of the hour so we can + # explicitly control when hourly work occurs. for this we use the + # current time offset to a half hour. + now = datetime.datetime.utcnow() + now = datetime.datetime( + year = now.year, + month = now.month, + day = now.day, + hour = now.hour, + minute = 30) + + check_names = set() + for hours in [-18, -11, -5, -1, 0]: + # change the insight module's perception of "now" ... + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + "mgr", "self-test", "insights_set_now_offset", str(hours)) + + # ... to simulate health check arrivals in the past + unique_check_name = "insights_health_check_{}".format(hours) + health_check = { + unique_check_name: { + "severity": "warning", + "summary": "summary", + "detail": ["detail"] + } + } + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + "mgr", "self-test", "health", "set", + json.dumps(health_check)) + + check_names.add(unique_check_name) + + # and also set the same health check to test deduplication + dupe_check_name = "insights_health_check" + health_check = { + dupe_check_name: { + "severity": "warning", + "summary": "summary", + "detail": ["detail"] + } + } + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + "mgr", "self-test", "health", "set", + json.dumps(health_check)) + + check_names.add(dupe_check_name) + + # wait for the health check to show up in the history report + self._wait_for_health_history_checks(unique_check_name, dupe_check_name) + + # clear out the current health checks before moving on + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + "mgr", "self-test", "health", "clear") + self._wait_for_curr_health_cleared(unique_check_name) + + report = self._insights() + for check in check_names: + self.assertIn(check, report["health"]["history"]["checks"]) + + # restart the manager + active_id = self.mgr_cluster.get_active_id() + self.mgr_cluster.mgr_restart(active_id) + + # pruning really removes history + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + "insights", "prune-health", "0") + report = self._insights() + self.assertFalse(report["health"]["history"]["checks"]) + + def test_schema(self): + """TODO: assert conformance to a full schema specification?""" + report = self._insights() + for key in ["osd_metadata", + "pg_summary", + "mon_status", + "manager_map", + "service_map", + "mon_map", + "crush_map", + "fs_map", + "osd_tree", + "df", + "osd_dump", + "config", + "health", + "crashes", + "version", + "errors"]: + self.assertIn(key, report) + + def test_crash_history(self): + self._clear_crashes() + report = self._insights() + self.assertFalse(report["crashes"]["summary"]) + self.assertFalse(report["errors"]) + + # crashes show up in the report + self._add_crash(1) + report = self._insights() + self.assertTrue(report["crashes"]["summary"]) + self.assertFalse(report["errors"]) + log.warning("{}".format(json.dumps(report["crashes"], indent=2))) + + self._clear_crashes() diff --git a/qa/tasks/mgr/test_module_selftest.py b/qa/tasks/mgr/test_module_selftest.py new file mode 100644 index 000000000..b054642db --- /dev/null +++ b/qa/tasks/mgr/test_module_selftest.py @@ -0,0 +1,257 @@ + +import time +import requests +import errno +import logging + +from teuthology.exceptions import CommandFailedError + +from .mgr_test_case import MgrTestCase + + +log = logging.getLogger(__name__) + + +class TestModuleSelftest(MgrTestCase): + """ + That modules with a self-test command can be loaded and execute it + without errors. + + This is not a substitute for really testing the modules, but it + is quick and is designed to catch regressions that could occur + if data structures change in a way that breaks how the modules + touch them. + """ + MGRS_REQUIRED = 1 + + def setUp(self): + super(TestModuleSelftest, self).setUp() + self.setup_mgrs() + + def _selftest_plugin(self, module_name): + self._load_module("selftest") + self._load_module(module_name) + + # Execute the module's self_test() method + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "module", module_name) + + def test_zabbix(self): + # Set these mandatory config fields so that the zabbix module + # won't trigger health/log errors on load/serve. + self.mgr_cluster.set_module_conf("zabbix", "zabbix_host", "localhost") + self.mgr_cluster.set_module_conf("zabbix", "identifier", "foo") + self._selftest_plugin("zabbix") + + def test_prometheus(self): + self._assign_ports("prometheus", "server_port", min_port=8100) + self._selftest_plugin("prometheus") + + def test_influx(self): + self._selftest_plugin("influx") + + def test_diskprediction_local(self): + self._load_module("selftest") + python_version = self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "python-version") + if tuple(int(v) for v in python_version.split('.')) >= (3, 8): + # https://tracker.ceph.com/issues/45147 + self.skipTest(f'python {python_version} not compatible with ' + 'diskprediction_local') + self._selftest_plugin("diskprediction_local") + + def test_telegraf(self): + self._selftest_plugin("telegraf") + + def test_iostat(self): + self._selftest_plugin("iostat") + + def test_devicehealth(self): + self._selftest_plugin("devicehealth") + # Clean up the pool that the module creates, because otherwise + # it's low PG count causes test failures. + pool_name = "device_health_metrics" + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "osd", "pool", "delete", pool_name, pool_name, + "--yes-i-really-really-mean-it") + + def test_selftest_run(self): + self._load_module("selftest") + self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", "run") + + def test_telemetry(self): + self._selftest_plugin("telemetry") + + def test_crash(self): + self._selftest_plugin("crash") + + def test_orchestrator(self): + self._selftest_plugin("orchestrator") + + + def test_selftest_config_update(self): + """ + That configuration updates are seen by running mgr modules + """ + self._load_module("selftest") + + def get_value(): + return self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "config", "get", "testkey").strip() + + self.assertEqual(get_value(), "None") + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "config", "set", "mgr", "mgr/selftest/testkey", "foo") + self.wait_until_equal(get_value, "foo", timeout=10) + + def get_localized_value(): + return self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "config", "get_localized", "testkey").strip() + + self.assertEqual(get_localized_value(), "foo") + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "config", "set", "mgr", "mgr/selftest/{}/testkey".format( + self.mgr_cluster.get_active_id()), + "bar") + self.wait_until_equal(get_localized_value, "bar", timeout=10) + + + def test_selftest_command_spam(self): + # Use the selftest module to stress the mgr daemon + self._load_module("selftest") + + # Use the dashboard to test that the mgr is still able to do its job + self._assign_ports("dashboard", "ssl_server_port") + self._load_module("dashboard") + self.mgr_cluster.mon_manager.raw_cluster_cmd("dashboard", + "create-self-signed-cert") + + original_active = self.mgr_cluster.get_active_id() + original_standbys = self.mgr_cluster.get_standby_ids() + + self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", + "background", "start", + "command_spam") + + dashboard_uri = self._get_uri("dashboard") + + delay = 10 + periods = 10 + for i in range(0, periods): + t1 = time.time() + # Check that an HTTP module remains responsive + r = requests.get(dashboard_uri, verify=False) + self.assertEqual(r.status_code, 200) + + # Check that a native non-module command remains responsive + self.mgr_cluster.mon_manager.raw_cluster_cmd("osd", "df") + + time.sleep(delay - (time.time() - t1)) + + self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", + "background", "stop") + + # Check that all mgr daemons are still running + self.assertEqual(original_active, self.mgr_cluster.get_active_id()) + self.assertEqual(original_standbys, self.mgr_cluster.get_standby_ids()) + + def test_module_commands(self): + """ + That module-handled commands have appropriate behavior on + disabled/failed/recently-enabled modules. + """ + + # Calling a command on a disabled module should return the proper + # error code. + self._load_module("selftest") + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "module", "disable", "selftest") + with self.assertRaises(CommandFailedError) as exc_raised: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "run") + + self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP) + + # Calling a command that really doesn't exist should give me EINVAL. + with self.assertRaises(CommandFailedError) as exc_raised: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "osd", "albatross") + + self.assertEqual(exc_raised.exception.exitstatus, errno.EINVAL) + + # Enabling a module and then immediately using ones of its commands + # should work (#21683) + self._load_module("selftest") + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "config", "get", "testkey") + + # Calling a command for a failed module should return the proper + # error code. + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "background", "start", "throw_exception") + with self.assertRaises(CommandFailedError) as exc_raised: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "run" + ) + self.assertEqual(exc_raised.exception.exitstatus, errno.EIO) + + # A health alert should be raised for a module that has thrown + # an exception from its serve() method + self.wait_for_health( + "Module 'selftest' has failed: Synthetic exception in serve", + timeout=30) + + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "module", "disable", "selftest") + + self.wait_for_health_clear(timeout=30) + + def test_module_remote(self): + """ + Use the selftest module to exercise inter-module communication + """ + self._load_module("selftest") + # The "self-test remote" operation just happens to call into + # influx. + self._load_module("influx") + + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "remote") + + def test_selftest_cluster_log(self): + """ + Use the selftest module to test the cluster/audit log interface. + """ + priority_map = { + "info": "INF", + "security": "SEC", + "warning": "WRN", + "error": "ERR" + } + self._load_module("selftest") + for priority in priority_map.keys(): + message = "foo bar {}".format(priority) + log_message = "[{}] {}".format(priority_map[priority], message) + # Check for cluster/audit logs: + # 2018-09-24 09:37:10.977858 mgr.x [INF] foo bar info + # 2018-09-24 09:37:10.977860 mgr.x [SEC] foo bar security + # 2018-09-24 09:37:10.977863 mgr.x [WRN] foo bar warning + # 2018-09-24 09:37:10.977866 mgr.x [ERR] foo bar error + with self.assert_cluster_log(log_message): + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "cluster-log", "cluster", + priority, message) + with self.assert_cluster_log(log_message, watch_channel="audit"): + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "cluster-log", "audit", + priority, message) + + def test_selftest_cluster_log_unknown_channel(self): + """ + Use the selftest module to test the cluster/audit log interface. + """ + with self.assertRaises(CommandFailedError) as exc_raised: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "cluster-log", "xyz", + "ERR", "The channel does not exist") + self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP) diff --git a/qa/tasks/mgr/test_orchestrator_cli.py b/qa/tasks/mgr/test_orchestrator_cli.py new file mode 100644 index 000000000..3fccef9a6 --- /dev/null +++ b/qa/tasks/mgr/test_orchestrator_cli.py @@ -0,0 +1,250 @@ +import errno +import json +import logging + + +from .mgr_test_case import MgrTestCase + + +log = logging.getLogger(__name__) + + +class TestOrchestratorCli(MgrTestCase): + MGRS_REQUIRED = 1 + + def _cmd(self, module, *args): + return self.mgr_cluster.mon_manager.raw_cluster_cmd(module, *args) + + def _orch_cmd(self, *args): + return self._cmd("orch", *args) + + def _progress_cmd(self, *args): + return self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", *args) + + def _orch_cmd_result(self, *args, **kwargs): + """ + raw_cluster_cmd doesn't support kwargs. + """ + return self.mgr_cluster.mon_manager.raw_cluster_cmd_result("orch", *args, **kwargs) + + def _test_orchestrator_cmd_result(self, *args, **kwargs): + return self.mgr_cluster.mon_manager.raw_cluster_cmd_result("test_orchestrator", *args, **kwargs) + + def setUp(self): + super(TestOrchestratorCli, self).setUp() + + self._load_module("orchestrator") + self._load_module("test_orchestrator") + self._orch_cmd("set", "backend", "test_orchestrator") + + def test_status(self): + ret = self._orch_cmd("status") + self.assertIn("test_orchestrator", ret) + + def test_device_ls(self): + ret = self._orch_cmd("device", "ls") + self.assertIn("localhost", ret) + + def test_device_ls_refresh(self): + ret = self._orch_cmd("device", "ls", "--refresh") + self.assertIn("localhost", ret) + + def test_device_ls_hoshs(self): + ret = self._orch_cmd("device", "ls", "localhost", "host1") + self.assertIn("localhost", ret) + + + def test_device_ls_json(self): + ret = self._orch_cmd("device", "ls", "--format", "json") + self.assertIn("localhost", ret) + self.assertIsInstance(json.loads(ret), list) + + def test_ps(self): + ret = self._orch_cmd("ps") + self.assertIn("mgr", ret) + + def test_ps_json(self): + ret = self._orch_cmd("ps", "--format", "json") + self.assertIsInstance(json.loads(ret), list) + self.assertIn("mgr", ret) + + + def test_service_action(self): + self._orch_cmd("restart", "mds.cephfs") + self._orch_cmd("stop", "mds.cephfs") + self._orch_cmd("start", "mds.cephfs") + + def test_service_instance_action(self): + self._orch_cmd("daemon", "restart", "mds.a") + self._orch_cmd("daemon", "stop", "mds.a") + self._orch_cmd("daemon", "start", "mds.a") + + def test_osd_create(self): + drive_group = """ +service_type: osd +service_id: any.sda +placement: + host_pattern: '*' +data_devices: + all: True +""" + res = self._orch_cmd_result("apply", "osd", "-i", "-", + stdin=drive_group) + self.assertEqual(res, 0) + + def test_blink_device_light(self): + def _ls_lights(what): + return json.loads(self._cmd("device", "ls-lights"))[what] + + metadata = json.loads(self._cmd("osd", "metadata")) + dev_name_ids = [osd["device_ids"] for osd in metadata] + _, dev_id = [d.split('=') for d in dev_name_ids if len(d.split('=')) == 2][0] + + for t in ["ident", "fault"]: + self.assertNotIn(dev_id, _ls_lights(t)) + self._cmd("device", "light", "on", dev_id, t) + self.assertIn(dev_id, _ls_lights(t)) + + health = { + 'ident': 'DEVICE_IDENT_ON', + 'fault': 'DEVICE_FAULT_ON', + }[t] + self.wait_for_health(health, 30) + + self._cmd("device", "light", "off", dev_id, t) + self.assertNotIn(dev_id, _ls_lights(t)) + + self.wait_for_health_clear(30) + + def test_mds_add(self): + self._orch_cmd('daemon', 'add', 'mds', 'fsname') + + def test_rgw_add(self): + self._orch_cmd('daemon', 'add', 'rgw', 'realm', 'zone') + + def test_nfs_add(self): + self._orch_cmd('daemon', 'add', "nfs", "service_name") + + def test_osd_rm(self): + self._orch_cmd('daemon', "rm", "osd.0", '--force') + + def test_mds_rm(self): + self._orch_cmd("daemon", "rm", "mds.fsname") + + def test_rgw_rm(self): + self._orch_cmd("daemon", "rm", "rgw.myrealm.myzone") + + def test_nfs_rm(self): + self._orch_cmd("daemon", "rm", "nfs.service_name") + + def test_host_ls(self): + out = self._orch_cmd("host", "ls", "--format=json") + hosts = json.loads(out) + self.assertEqual(len(hosts), 1) + self.assertEqual(hosts[0]["hostname"], "localhost") + + def test_host_add(self): + self._orch_cmd("host", "add", "hostname") + + def test_host_rm(self): + self._orch_cmd("host", "rm", "hostname") + + def test_mon_update(self): + self._orch_cmd("apply", "mon", "3 host1:1.2.3.0/24 host2:1.2.3.0/24 host3:10.0.0.0/8") + self._orch_cmd("apply", "mon", "3 host1:1.2.3.4 host2:1.2.3.4 host3:10.0.0.1") + + def test_mgr_update(self): + self._orch_cmd("apply", "mgr", "3") + + def test_nfs_update(self): + self._orch_cmd("apply", "nfs", "service_name", "2") + + def test_error(self): + ret = self._orch_cmd_result("host", "add", "raise_validation_error") + self.assertEqual(ret, errno.EINVAL) + ret = self._orch_cmd_result("host", "add", "raise_error") + self.assertEqual(ret, errno.EINVAL) + ret = self._orch_cmd_result("host", "add", "raise_bug") + self.assertEqual(ret, errno.EINVAL) + ret = self._orch_cmd_result("host", "add", "raise_not_implemented") + self.assertEqual(ret, errno.ENOENT) + ret = self._orch_cmd_result("host", "add", "raise_no_orchestrator") + self.assertEqual(ret, errno.ENOENT) + ret = self._orch_cmd_result("host", "add", "raise_import_error") + self.assertEqual(ret, errno.ENOENT) + + def test_load_data(self): + data = { + 'inventory': [ + { + 'name': 'host0', + 'devices': [ + { + 'type': 'hdd', + 'id': '/dev/sda', + 'size': 1024**4 * 4, + 'rotates': True + } + ] + }, + { + 'name': 'host1', + 'devices': [ + { + 'type': 'hdd', + 'id': '/dev/sda', + 'size': 1024**4 * 4, + 'rotates': True + } + ] + } + ], + 'daemons': [ + { + 'hostname': 'host0', + 'daemon_type': 'mon', + 'daemon_id': 'a' + }, + { + 'hostname': 'host1', + 'daemon_type': 'osd', + 'daemon_id': '1' + } + ] + } + + ret = self._test_orchestrator_cmd_result('load_data', '-i', '-', stdin=json.dumps(data)) + self.assertEqual(ret, 0) + out = self._orch_cmd('device', 'ls', '--format=json') + inventory = data['inventory'] + inventory_result = json.loads(out) + self.assertEqual(len(inventory), len(inventory_result)) + + out = self._orch_cmd('device', 'ls', 'host0', '--format=json') + inventory_result = json.loads(out) + self.assertEqual(len(inventory_result), 1) + self.assertEqual(inventory_result[0]['name'], 'host0') + + out = self._orch_cmd('ps', '--format=json') + daemons = data['daemons'] + daemons_result = json.loads(out) + self.assertEqual(len(daemons), len(daemons_result)) + + out = self._orch_cmd('ps', 'host0', '--format=json') + daemons_result = json.loads(out) + self.assertEqual(len(daemons_result), 1) + self.assertEqual(daemons_result[0]['hostname'], 'host0') + + # test invalid input file: invalid json + json_str = '{ "inventory: ' + ret = self._test_orchestrator_cmd_result('load_data', '-i', '-', stdin=json_str) + self.assertEqual(ret, errno.EINVAL) + + # test invalid input file: missing key + json_str = '{ "inventory": [{"devices": []}] }' + ret = self._test_orchestrator_cmd_result('load_data', '-i', '-', stdin=json_str) + self.assertEqual(ret, errno.EINVAL) + + # load empty data for other tests + ret = self._test_orchestrator_cmd_result('load_data', '-i', '-', stdin='{}') + self.assertEqual(ret, 0) diff --git a/qa/tasks/mgr/test_progress.py b/qa/tasks/mgr/test_progress.py new file mode 100644 index 000000000..082653f62 --- /dev/null +++ b/qa/tasks/mgr/test_progress.py @@ -0,0 +1,380 @@ + +import json +import logging +import time +from .mgr_test_case import MgrTestCase +from contextlib import contextmanager + +log = logging.getLogger(__name__) + + +class TestProgress(MgrTestCase): + POOL = "progress_data" + + # How long we expect to wait at most between taking an OSD out + # and seeing the progress event pop up. + EVENT_CREATION_PERIOD = 60 + + WRITE_PERIOD = 30 + + # Generous period for OSD recovery, should be same order of magnitude + # to how long it took to write the data to begin with + RECOVERY_PERIOD = WRITE_PERIOD * 4 + + def _get_progress(self): + out = self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "json") + return json.loads(out) + + def _all_events(self): + """ + To avoid racing on completion, we almost always want to look + for events in the total list of active and complete, so + munge them into a single list. + """ + p = self._get_progress() + log.info(json.dumps(p, indent=2)) + return p['events'] + p['completed'] + + def _events_in_progress(self): + """ + this function returns all events that are in progress + """ + p = self._get_progress() + log.info(json.dumps(p, indent=2)) + return p['events'] + + def _completed_events(self): + """ + This function returns all events that are completed + """ + p = self._get_progress() + log.info(json.dumps(p, indent=2)) + return p['completed'] + + def is_osd_marked_out(self, ev): + return ev['message'].endswith('marked out') + + def is_osd_marked_in(self, ev): + return ev['message'].endswith('marked in') + + def _get_osd_in_out_events(self, marked='both'): + """ + Return the event that deals with OSDs being + marked in, out or both + """ + + marked_in_events = [] + marked_out_events = [] + + events_in_progress = self._events_in_progress() + for ev in events_in_progress: + if self.is_osd_marked_out(ev): + marked_out_events.append(ev) + elif self.is_osd_marked_in(ev): + marked_in_events.append(ev) + + if marked == 'both': + return [marked_in_events] + [marked_out_events] + elif marked == 'in': + return marked_in_events + else: + return marked_out_events + + def _osd_in_out_events_count(self, marked='both'): + """ + Count the number of on going recovery events that deals with + OSDs being marked in, out or both. + """ + events_in_progress = self._events_in_progress() + marked_in_count = 0 + marked_out_count = 0 + + for ev in events_in_progress: + if self.is_osd_marked_out(ev): + marked_out_count += 1 + elif self.is_osd_marked_in(ev): + marked_in_count += 1 + + if marked == 'both': + return marked_in_count + marked_out_count + elif marked == 'in': + return marked_in_count + else: + return marked_out_count + + def _setup_pool(self, size=None): + self.mgr_cluster.mon_manager.create_pool(self.POOL) + if size is not None: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'pool', 'set', self.POOL, 'size', str(size)) + + def _osd_in_out_completed_events_count(self, marked='both'): + """ + Count the number of completed recovery events that deals with + OSDs being marked in, out, or both. + """ + + completed_events = self._completed_events() + marked_in_count = 0 + marked_out_count = 0 + + for ev in completed_events: + if self.is_osd_marked_out(ev): + marked_out_count += 1 + elif self.is_osd_marked_in(ev): + marked_in_count += 1 + + if marked == 'both': + return marked_in_count + marked_out_count + elif marked == 'in': + return marked_in_count + else: + return marked_out_count + + def _write_some_data(self, t): + """ + To adapt to test systems of varying performance, we write + data for a defined time period, rather than to a defined + capacity. This will hopefully result in a similar timescale + for PG recovery after an OSD failure. + """ + + args = [ + "rados", "-p", self.POOL, "bench", str(t), "write", "-t", "16"] + + self.mgr_cluster.admin_remote.run(args=args, wait=True) + + def _osd_count(self): + osd_map = self.mgr_cluster.mon_manager.get_osd_dump_json() + return len(osd_map['osds']) + + @contextmanager + def recovery_backfill_disabled(self): + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'set', 'nobackfill') + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'set', 'norecover') + yield + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'unset', 'nobackfill') + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'unset', 'norecover') + + def setUp(self): + super(TestProgress, self).setUp() + # Ensure we have at least four OSDs + if self._osd_count() < 4: + self.skipTest("Not enough OSDS!") + + # Remove any filesystems so that we can remove their pools + if self.mds_cluster: + self.mds_cluster.mds_stop() + self.mds_cluster.mds_fail() + self.mds_cluster.delete_all_filesystems() + + # Remove all other pools + for pool in self.mgr_cluster.mon_manager.get_osd_dump_json()['pools']: + self.mgr_cluster.mon_manager.remove_pool(pool['pool_name']) + + self._load_module("progress") + self.mgr_cluster.mon_manager.raw_cluster_cmd('progress', 'clear') + + def _simulate_failure(self, osd_ids=None): + """ + Common lead-in to several tests: get some data in the cluster, + then mark an OSD out to trigger the start of a progress event. + + Return the JSON representation of the failure event. + """ + + if osd_ids is None: + osd_ids = [0] + + self._setup_pool() + self._write_some_data(self.WRITE_PERIOD) + with self.recovery_backfill_disabled(): + for osd_id in osd_ids: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'out', str(osd_id)) + + # Wait for a progress event to pop up + self.wait_until_equal(lambda: self._osd_in_out_events_count('out'), 1, + timeout=self.EVENT_CREATION_PERIOD, + period=1) + + ev = self._get_osd_in_out_events('out')[0] + log.info(json.dumps(ev, indent=1)) + self.assertIn("Rebalancing after osd.0 marked out", ev['message']) + return ev + + def _simulate_back_in(self, osd_ids, initial_event): + for osd_id in osd_ids: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'in', str(osd_id)) + + # First Event should complete promptly + self.wait_until_true(lambda: self._is_complete(initial_event['id']), + timeout=self.RECOVERY_PERIOD) + + with self.recovery_backfill_disabled(): + + try: + # Wait for progress event marked in to pop up + self.wait_until_equal(lambda: self._osd_in_out_events_count('in'), 1, + timeout=self.EVENT_CREATION_PERIOD, + period=1) + except RuntimeError as ex: + if not "Timed out after" in str(ex): + raise ex + + log.info("There was no PGs affected by osd being marked in") + return None + + new_event = self._get_osd_in_out_events('in')[0] + return new_event + + def _no_events_anywhere(self): + """ + Whether there are any live or completed events + """ + p = self._get_progress() + total_events = len(p['events']) + len(p['completed']) + return total_events == 0 + + def _is_quiet(self): + """ + Whether any progress events are live. + """ + return len(self._get_progress()['events']) == 0 + + def _is_complete(self, ev_id): + progress = self._get_progress() + live_ids = [ev['id'] for ev in progress['events']] + complete_ids = [ev['id'] for ev in progress['completed']] + if ev_id in complete_ids: + assert ev_id not in live_ids + return True + else: + assert ev_id in live_ids + return False + + def _is_inprogress_or_complete(self, ev_id): + for ev in self._events_in_progress(): + if ev['id'] == ev_id: + return ev['progress'] > 0 + # check if the event completed + return self._is_complete(ev_id) + + def tearDown(self): + if self.POOL in self.mgr_cluster.mon_manager.pools: + self.mgr_cluster.mon_manager.remove_pool(self.POOL) + + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'unset', 'nobackfill') + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'unset', 'norecover') + + osd_map = self.mgr_cluster.mon_manager.get_osd_dump_json() + for osd in osd_map['osds']: + if osd['weight'] == 0.0: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'in', str(osd['osd'])) + + super(TestProgress, self).tearDown() + + def test_osd_healthy_recovery(self): + """ + The simple recovery case: an OSD goes down, its PGs get a new + placement, and we wait for the PG to get healthy in its new + locations. + """ + ev = self._simulate_failure() + + # Wait for progress event to ultimately reach completion + self.wait_until_true(lambda: self._is_complete(ev['id']), + timeout=self.RECOVERY_PERIOD) + self.assertEqual(self._osd_in_out_events_count(), 0) + + def test_pool_removal(self): + """ + That a pool removed during OSD recovery causes the + progress event to be correctly marked complete once there + is no more data to move. + """ + ev = self._simulate_failure() + + self.mgr_cluster.mon_manager.remove_pool(self.POOL) + + # Event should complete promptly + self.wait_until_true(lambda: self._is_complete(ev['id']), + timeout=self.RECOVERY_PERIOD) + self.assertEqual(self._osd_in_out_events_count(), 0) + + def test_osd_came_back(self): + """ + When a recovery is underway, but then the out OSD + comes back in, such that recovery is no longer necessary. + It should create another event for when osd is marked in + and cancel the one that is still ongoing. + """ + ev1 = self._simulate_failure() + + ev2 = self._simulate_back_in([0], ev1) + + if ev2 is not None: + # Wait for progress event to ultimately complete + self.wait_until_true(lambda: self._is_complete(ev2['id']), + timeout=self.RECOVERY_PERIOD) + + self.assertEqual(self._osd_in_out_events_count(), 0) + + def test_turn_off_module(self): + """ + When the the module is turned off, there should not + be any on going events or completed events. + Also module should not accept any kind of Remote Event + coming in from other module, however, once it is turned + back, on creating an event should be working as it is. + """ + + pool_size = 3 + self._setup_pool(size=pool_size) + self._write_some_data(self.WRITE_PERIOD) + self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "off") + + with self.recovery_backfill_disabled(): + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'out', '0') + + time.sleep(self.EVENT_CREATION_PERIOD/2) + + with self.recovery_backfill_disabled(): + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'in', '0') + + time.sleep(self.EVENT_CREATION_PERIOD/2) + + self.assertTrue(self._no_events_anywhere()) + + self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "on") + + self._write_some_data(self.WRITE_PERIOD) + + with self.recovery_backfill_disabled(): + + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'out', '0') + + # Wait for a progress event to pop up + self.wait_until_equal(lambda: self._osd_in_out_events_count('out'), 1, + timeout=self.EVENT_CREATION_PERIOD, + period=1) + + ev1 = self._get_osd_in_out_events('out')[0] + + log.info(json.dumps(ev1, indent=1)) + + self.wait_until_true(lambda: self._is_complete(ev1['id']), + check_fn=lambda: self._is_inprogress_or_complete(ev1['id']), + timeout=self.RECOVERY_PERIOD) + self.assertTrue(self._is_quiet()) diff --git a/qa/tasks/mgr/test_prometheus.py b/qa/tasks/mgr/test_prometheus.py new file mode 100644 index 000000000..376556ab3 --- /dev/null +++ b/qa/tasks/mgr/test_prometheus.py @@ -0,0 +1,79 @@ +import json +import logging +import requests + +from .mgr_test_case import MgrTestCase + +log = logging.getLogger(__name__) + + +class TestPrometheus(MgrTestCase): + MGRS_REQUIRED = 3 + + def setUp(self): + super(TestPrometheus, self).setUp() + self.setup_mgrs() + + def test_file_sd_command(self): + self._assign_ports("prometheus", "server_port") + self._load_module("prometheus") + + result = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd( + "prometheus", "file_sd_config")) + mgr_map = self.mgr_cluster.get_mgr_map() + self.assertEqual(len(result[0]['targets']), len(mgr_map['standbys']) + 1) + + + + def test_standby(self): + self._assign_ports("prometheus", "server_port") + self._load_module("prometheus") + + original_active = self.mgr_cluster.get_active_id() + + original_uri = self._get_uri("prometheus") + log.info("Originally running at {0}".format(original_uri)) + + self.mgr_cluster.mgr_fail(original_active) + + failed_over_uri = self._get_uri("prometheus") + log.info("After failover running at {0}".format(failed_over_uri)) + + self.assertNotEqual(original_uri, failed_over_uri) + + # The original active daemon should have come back up as a standby + # and serve some html under "/" and an empty answer under /metrics + r = requests.get(original_uri, allow_redirects=False) + self.assertEqual(r.status_code, 200) + r = requests.get(original_uri + "metrics", allow_redirects=False) + self.assertEqual(r.status_code, 200) + self.assertEqual(r.headers["content-type"], "text/plain;charset=utf-8") + self.assertEqual(r.headers["server"], "Ceph-Prometheus") + + def test_urls(self): + self._assign_ports("prometheus", "server_port") + self._load_module("prometheus") + + base_uri = self._get_uri("prometheus") + + # This is a very simple smoke test to check that the module can + # give us a 200 response to requests. We're not testing that + # the content is correct or even renders! + + urls = [ + "/", + "/metrics" + ] + + failures = [] + + for url in urls: + r = requests.get(base_uri + url, allow_redirects=False) + if r.status_code != 200: + failures.append(url) + + log.info("{0}: {1} ({2} bytes)".format( + url, r.status_code, len(r.content) + )) + + self.assertListEqual(failures, []) diff --git a/qa/tasks/mon_clock_skew_check.py b/qa/tasks/mon_clock_skew_check.py new file mode 100644 index 000000000..59d4169d1 --- /dev/null +++ b/qa/tasks/mon_clock_skew_check.py @@ -0,0 +1,73 @@ +""" +Handle clock skews in monitors. +""" +import logging +import time +from tasks import ceph_manager +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +class ClockSkewCheck: + """ + Check if there are any clock skews among the monitors in the + quorum. + + This task accepts the following options: + + interval amount of seconds to wait before check. (default: 30.0) + expect-skew 'true' or 'false', to indicate whether to expect a skew during + the run or not. If 'true', the test will fail if no skew is + found, and succeed if a skew is indeed found; if 'false', it's + the other way around. (default: false) + + - mon_clock_skew_check: + expect-skew: true + """ + + def __init__(self, ctx, manager, config, logger): + self.ctx = ctx + self.manager = manager + + self.stopping = False + self.logger = logger + self.config = config + + if self.config is None: + self.config = dict() + + +def task(ctx, config): + if config is None: + config = {} + assert isinstance(config, dict), \ + 'mon_clock_skew_check task only accepts a dict for configuration' + interval = float(config.get('interval', 30.0)) + expect_skew = config.get('expect-skew', False) + + log.info('Beginning mon_clock_skew_check...') + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + quorum_size = len(teuthology.get_mon_names(ctx)) + manager.wait_for_mon_quorum_size(quorum_size) + + # wait a bit + log.info('sleeping for {s} seconds'.format( + s=interval)) + time.sleep(interval) + + health = manager.get_mon_health(True) + log.info('got health %s' % health) + if expect_skew: + if 'MON_CLOCK_SKEW' not in health['checks']: + raise RuntimeError('expected MON_CLOCK_SKEW but got none') + else: + if 'MON_CLOCK_SKEW' in health['checks']: + raise RuntimeError('got MON_CLOCK_SKEW but expected none') + diff --git a/qa/tasks/mon_recovery.py b/qa/tasks/mon_recovery.py new file mode 100644 index 000000000..fa7aa1a8d --- /dev/null +++ b/qa/tasks/mon_recovery.py @@ -0,0 +1,80 @@ +""" +Monitor recovery +""" +import logging +from tasks import ceph_manager +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test monitor recovery. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + mons = [f.split('.')[1] for f in teuthology.get_mon_names(ctx)] + log.info("mon ids = %s" % mons) + + manager.wait_for_mon_quorum_size(len(mons)) + + log.info('verifying all monitors are in the quorum') + for m in mons: + s = manager.get_mon_status(m) + assert s['state'] == 'leader' or s['state'] == 'peon' + assert len(s['quorum']) == len(mons) + + log.info('restarting each monitor in turn') + for m in mons: + # stop a monitor + manager.kill_mon(m) + manager.wait_for_mon_quorum_size(len(mons) - 1) + + # restart + manager.revive_mon(m) + manager.wait_for_mon_quorum_size(len(mons)) + + # in forward and reverse order, + rmons = mons + rmons.reverse() + for mons in mons, rmons: + log.info('stopping all monitors') + for m in mons: + manager.kill_mon(m) + + log.info('forming a minimal quorum for %s, then adding monitors' % mons) + qnum = (len(mons) // 2) + 1 + num = 0 + for m in mons: + manager.revive_mon(m) + num += 1 + if num >= qnum: + manager.wait_for_mon_quorum_size(num) + + # on both leader and non-leader ranks... + for rank in [0, 1]: + # take one out + log.info('removing mon %s' % mons[rank]) + manager.kill_mon(mons[rank]) + manager.wait_for_mon_quorum_size(len(mons) - 1) + + log.info('causing some monitor log activity') + m = 30 + for n in range(1, m): + manager.raw_cluster_cmd('log', '%d of %d' % (n, m)) + + log.info('adding mon %s back in' % mons[rank]) + manager.revive_mon(mons[rank]) + manager.wait_for_mon_quorum_size(len(mons)) diff --git a/qa/tasks/mon_thrash.py b/qa/tasks/mon_thrash.py new file mode 100644 index 000000000..4224acf03 --- /dev/null +++ b/qa/tasks/mon_thrash.py @@ -0,0 +1,386 @@ +""" +Monitor thrash +""" +import logging +import contextlib +import random +import time +import gevent +import json +import math +from teuthology import misc as teuthology +from tasks import ceph_manager +from tasks.cephfs.filesystem import MDSCluster +from tasks.thrasher import Thrasher + +log = logging.getLogger(__name__) + +def _get_mons(ctx): + """ + Get monitor names from the context value. + """ + mons = [f[len('mon.'):] for f in teuthology.get_mon_names(ctx)] + return mons + +class MonitorThrasher(Thrasher): + """ + How it works:: + + - pick a monitor + - kill it + - wait for quorum to be formed + - sleep for 'revive_delay' seconds + - revive monitor + - wait for quorum to be formed + - sleep for 'thrash_delay' seconds + + Options:: + + seed Seed to use on the RNG to reproduce a previous + behaviour (default: None; i.e., not set) + revive_delay Number of seconds to wait before reviving + the monitor (default: 10) + thrash_delay Number of seconds to wait in-between + test iterations (default: 0) + store_thrash Thrash monitor store before killing the monitor being thrashed (default: False) + store_thrash_probability Probability of thrashing a monitor's store + (default: 50) + thrash_many Thrash multiple monitors instead of just one. If + 'maintain_quorum' is set to False, then we will + thrash up to as many monitors as there are + available. (default: False) + maintain_quorum Always maintain quorum, taking care on how many + monitors we kill during the thrashing. If we + happen to only have one or two monitors configured, + if this option is set to True, then we won't run + this task as we cannot guarantee maintenance of + quorum. Setting it to false however would allow the + task to run with as many as just one single monitor. + (default: True) + freeze_mon_probability: how often to freeze the mon instead of killing it, + in % (default: 0) + freeze_mon_duration: how many seconds to freeze the mon (default: 15) + scrub Scrub after each iteration (default: True) + check_mds_failover Check if mds failover happened (default: False) + + Note: if 'store_thrash' is set to True, then 'maintain_quorum' must also + be set to True. + + For example:: + + tasks: + - ceph: + - mon_thrash: + revive_delay: 20 + thrash_delay: 1 + store_thrash: true + store_thrash_probability: 40 + seed: 31337 + maintain_quorum: true + thrash_many: true + check_mds_failover: True + - ceph-fuse: + - workunit: + clients: + all: + - mon/workloadgen.sh + """ + def __init__(self, ctx, manager, config, name, logger): + super(MonitorThrasher, self).__init__() + + self.ctx = ctx + self.manager = manager + self.manager.wait_for_clean() + + self.stopping = False + self.logger = logger + self.config = config + self.name = name + + if self.config is None: + self.config = dict() + + """ Test reproducibility """ + self.random_seed = self.config.get('seed', None) + + if self.random_seed is None: + self.random_seed = int(time.time()) + + self.rng = random.Random() + self.rng.seed(int(self.random_seed)) + + """ Monitor thrashing """ + self.revive_delay = float(self.config.get('revive_delay', 10.0)) + self.thrash_delay = float(self.config.get('thrash_delay', 0.0)) + + self.thrash_many = self.config.get('thrash_many', False) + self.maintain_quorum = self.config.get('maintain_quorum', True) + + self.scrub = self.config.get('scrub', True) + + self.freeze_mon_probability = float(self.config.get('freeze_mon_probability', 10)) + self.freeze_mon_duration = float(self.config.get('freeze_mon_duration', 15.0)) + + assert self.max_killable() > 0, \ + 'Unable to kill at least one monitor with the current config.' + + """ Store thrashing """ + self.store_thrash = self.config.get('store_thrash', False) + self.store_thrash_probability = int( + self.config.get('store_thrash_probability', 50)) + if self.store_thrash: + assert self.store_thrash_probability > 0, \ + 'store_thrash is set, probability must be > 0' + assert self.maintain_quorum, \ + 'store_thrash = true must imply maintain_quorum = true' + + #MDS failover + self.mds_failover = self.config.get('check_mds_failover', False) + + if self.mds_failover: + self.mds_cluster = MDSCluster(ctx) + + self.thread = gevent.spawn(self.do_thrash) + + def log(self, x): + """ + locally log info messages + """ + self.logger.info(x) + + def do_join(self): + """ + Break out of this processes thrashing loop. + """ + self.stopping = True + self.thread.get() + + def should_thrash_store(self): + """ + If allowed, indicate that we should thrash a certain percentage of + the time as determined by the store_thrash_probability value. + """ + if not self.store_thrash: + return False + return self.rng.randrange(0, 101) < self.store_thrash_probability + + def thrash_store(self, mon): + """ + Thrash the monitor specified. + :param mon: monitor to thrash + """ + self.log('thrashing mon.{id} store'.format(id=mon)) + out = self.manager.raw_cluster_cmd( + 'tell', 'mon.%s' % mon, 'sync_force', + '--yes-i-really-mean-it') + j = json.loads(out) + assert j['ret'] == 0, \ + 'error forcing store sync on mon.{id}:\n{ret}'.format( + id=mon,ret=out) + + def should_freeze_mon(self): + """ + Indicate that we should freeze a certain percentago of the time + as determined by the freeze_mon_probability value. + """ + return self.rng.randrange(0, 101) < self.freeze_mon_probability + + def freeze_mon(self, mon): + """ + Send STOP signal to freeze the monitor. + """ + log.info('Sending STOP to mon %s', mon) + self.manager.signal_mon(mon, 19) # STOP + + def unfreeze_mon(self, mon): + """ + Send CONT signal to unfreeze the monitor. + """ + log.info('Sending CONT to mon %s', mon) + self.manager.signal_mon(mon, 18) # CONT + + def kill_mon(self, mon): + """ + Kill the monitor specified + """ + self.log('killing mon.{id}'.format(id=mon)) + self.manager.kill_mon(mon) + + def revive_mon(self, mon): + """ + Revive the monitor specified + """ + self.log('killing mon.{id}'.format(id=mon)) + self.log('reviving mon.{id}'.format(id=mon)) + self.manager.revive_mon(mon) + + def max_killable(self): + """ + Return the maximum number of monitors we can kill. + """ + m = len(_get_mons(self.ctx)) + if self.maintain_quorum: + return max(math.ceil(m/2.0)-1, 0) + else: + return m + + def do_thrash(self): + """ + _do_thrash() wrapper. + """ + try: + self._do_thrash() + except Exception as e: + # See _run exception comment for MDSThrasher + self.set_thrasher_exception(e) + self.logger.exception("exception:") + # Allow successful completion so gevent doesn't see an exception. + # The DaemonWatchdog will observe the error and tear down the test. + + def _do_thrash(self): + """ + Continuously loop and thrash the monitors. + """ + #status before mon thrashing + if self.mds_failover: + oldstatus = self.mds_cluster.status() + + self.log('start thrashing') + self.log('seed: {s}, revive delay: {r}, thrash delay: {t} '\ + 'thrash many: {tm}, maintain quorum: {mq} '\ + 'store thrash: {st}, probability: {stp} '\ + 'freeze mon: prob {fp} duration {fd}'.format( + s=self.random_seed,r=self.revive_delay,t=self.thrash_delay, + tm=self.thrash_many, mq=self.maintain_quorum, + st=self.store_thrash,stp=self.store_thrash_probability, + fp=self.freeze_mon_probability,fd=self.freeze_mon_duration, + )) + + while not self.stopping: + mons = _get_mons(self.ctx) + self.manager.wait_for_mon_quorum_size(len(mons)) + self.log('making sure all monitors are in the quorum') + for m in mons: + s = self.manager.get_mon_status(m) + assert s['state'] == 'leader' or s['state'] == 'peon' + assert len(s['quorum']) == len(mons) + + kill_up_to = self.rng.randrange(1, self.max_killable()+1) + mons_to_kill = self.rng.sample(mons, kill_up_to) + self.log('monitors to thrash: {m}'.format(m=mons_to_kill)) + + mons_to_freeze = [] + for mon in mons: + if mon in mons_to_kill: + continue + if self.should_freeze_mon(): + mons_to_freeze.append(mon) + self.log('monitors to freeze: {m}'.format(m=mons_to_freeze)) + + for mon in mons_to_kill: + self.log('thrashing mon.{m}'.format(m=mon)) + + """ we only thrash stores if we are maintaining quorum """ + if self.should_thrash_store() and self.maintain_quorum: + self.thrash_store(mon) + + self.kill_mon(mon) + + if mons_to_freeze: + for mon in mons_to_freeze: + self.freeze_mon(mon) + self.log('waiting for {delay} secs to unfreeze mons'.format( + delay=self.freeze_mon_duration)) + time.sleep(self.freeze_mon_duration) + for mon in mons_to_freeze: + self.unfreeze_mon(mon) + + if self.maintain_quorum: + self.manager.wait_for_mon_quorum_size(len(mons)-len(mons_to_kill)) + for m in mons: + if m in mons_to_kill: + continue + s = self.manager.get_mon_status(m) + assert s['state'] == 'leader' or s['state'] == 'peon' + assert len(s['quorum']) == len(mons)-len(mons_to_kill) + + self.log('waiting for {delay} secs before reviving monitors'.format( + delay=self.revive_delay)) + time.sleep(self.revive_delay) + + for mon in mons_to_kill: + self.revive_mon(mon) + # do more freezes + if mons_to_freeze: + for mon in mons_to_freeze: + self.freeze_mon(mon) + self.log('waiting for {delay} secs to unfreeze mons'.format( + delay=self.freeze_mon_duration)) + time.sleep(self.freeze_mon_duration) + for mon in mons_to_freeze: + self.unfreeze_mon(mon) + + self.manager.wait_for_mon_quorum_size(len(mons)) + for m in mons: + s = self.manager.get_mon_status(m) + assert s['state'] == 'leader' or s['state'] == 'peon' + assert len(s['quorum']) == len(mons) + + if self.scrub: + self.log('triggering scrub') + try: + self.manager.raw_cluster_cmd('mon', 'scrub') + except Exception as e: + log.warning("Ignoring exception while triggering scrub: %s", e) + + if self.thrash_delay > 0.0: + self.log('waiting for {delay} secs before continuing thrashing'.format( + delay=self.thrash_delay)) + time.sleep(self.thrash_delay) + + #status after thrashing + if self.mds_failover: + status = self.mds_cluster.status() + assert not oldstatus.hadfailover(status), \ + 'MDS Failover' + + +@contextlib.contextmanager +def task(ctx, config): + """ + Stress test the monitor by thrashing them while another task/workunit + is running. + + Please refer to MonitorThrasher class for further information on the + available options. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'mon_thrash task only accepts a dict for configuration' + assert len(_get_mons(ctx)) > 2, \ + 'mon_thrash task requires at least 3 monitors' + + if 'cluster' not in config: + config['cluster'] = 'ceph' + + log.info('Beginning mon_thrash...') + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + thrash_proc = MonitorThrasher(ctx, + manager, config, "MonitorThrasher", + logger=log.getChild('mon_thrasher')) + ctx.ceph[config['cluster']].thrashers.append(thrash_proc) + try: + log.debug('Yielding') + yield + finally: + log.info('joining mon_thrasher') + thrash_proc.do_join() + mons = _get_mons(ctx) + manager.wait_for_mon_quorum_size(len(mons)) diff --git a/qa/tasks/multibench.py b/qa/tasks/multibench.py new file mode 100644 index 000000000..c2a7299f1 --- /dev/null +++ b/qa/tasks/multibench.py @@ -0,0 +1,61 @@ +""" +Multibench testing +""" +import contextlib +import logging +import time +import copy +import gevent + +from tasks import radosbench + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run multibench + + The config should be as follows: + + multibench: + time: <seconds to run total> + segments: <number of concurrent benches> + radosbench: <config for radosbench> + + example: + + tasks: + - ceph: + - multibench: + clients: [client.0] + time: 360 + - interactive: + """ + log.info('Beginning multibench...') + assert isinstance(config, dict), \ + "please list clients to run on" + + def run_one(num): + """Run test spawn from gevent""" + start = time.time() + if not config.get('radosbench'): + benchcontext = {} + else: + benchcontext = copy.copy(config.get('radosbench')) + iterations = 0 + while time.time() - start < int(config.get('time', 600)): + log.info("Starting iteration %s of segment %s"%(iterations, num)) + benchcontext['pool'] = str(num) + "-" + str(iterations) + with radosbench.task(ctx, benchcontext): + time.sleep() + iterations += 1 + log.info("Starting %s threads"%(str(config.get('segments', 3)),)) + segments = [ + gevent.spawn(run_one, i) + for i in range(0, int(config.get('segments', 3)))] + + try: + yield + finally: + [i.get() for i in segments] diff --git a/qa/tasks/netem.py b/qa/tasks/netem.py new file mode 100644 index 000000000..1d9fd98f7 --- /dev/null +++ b/qa/tasks/netem.py @@ -0,0 +1,268 @@ +""" +Task to run tests with network delay between two remotes using tc and netem. +Reference:https://wiki.linuxfoundation.org/networking/netem. + +""" + +import logging +import contextlib +from paramiko import SSHException +import socket +import time +import gevent +import argparse + +log = logging.getLogger(__name__) + + +def set_priority(interface): + + # create a priority queueing discipline + return ['sudo', 'tc', 'qdisc', 'add', 'dev', interface, 'root', 'handle', '1:', 'prio'] + + +def show_tc(interface): + + # shows tc device present + return ['sudo', 'tc', 'qdisc', 'show', 'dev', interface] + + +def del_tc(interface): + + return ['sudo', 'tc', 'qdisc', 'del', 'dev', interface, 'root'] + + +def cmd_prefix(interface): + + # prepare command to set delay + cmd1 = ['sudo', 'tc', 'qdisc', 'add', 'dev', interface, 'parent', + '1:1', 'handle', '2:', 'netem', 'delay'] + + # prepare command to change delay + cmd2 = ['sudo', 'tc', 'qdisc', 'replace', 'dev', interface, 'root', 'netem', 'delay'] + + # prepare command to apply filter to the matched ip/host + + cmd3 = ['sudo', 'tc', 'filter', 'add', 'dev', interface, + 'parent', '1:0', 'protocol', 'ip', 'pref', '55', + 'handle', '::55', 'u32', 'match', 'ip', 'dst'] + + return cmd1, cmd2, cmd3 + + +def static_delay(remote, host, interface, delay): + + """ Sets a constant delay between two hosts to emulate network delays using tc qdisc and netem""" + + set_delay, change_delay, set_ip = cmd_prefix(interface) + + ip = socket.gethostbyname(host.hostname) + + tc = remote.sh(show_tc(interface)) + if tc.strip().find('refcnt') == -1: + # call set_priority() func to create priority queue + # if not already created(indicated by -1) + log.info('Create priority queue') + remote.run(args=set_priority(interface)) + + # set static delay, with +/- 5ms jitter with normal distribution as default + log.info('Setting delay to %s' % delay) + set_delay.extend(['%s' % delay, '5ms', 'distribution', 'normal']) + remote.run(args=set_delay) + + # set delay to a particular remote node via ip + log.info('Delay set on %s' % remote) + set_ip.extend(['%s' % ip, 'flowid', '2:1']) + remote.run(args=set_ip) + else: + # if the device is already created, only change the delay + log.info('Setting delay to %s' % delay) + change_delay.extend(['%s' % delay, '5ms', 'distribution', 'normal']) + remote.run(args=change_delay) + + +def variable_delay(remote, host, interface, delay_range=[]): + + """ Vary delay between two values""" + + set_delay, change_delay, set_ip = cmd_prefix(interface) + + ip = socket.gethostbyname(host.hostname) + + # delay1 has to be lower than delay2 + delay1 = delay_range[0] + delay2 = delay_range[1] + + tc = remote.sh(show_tc(interface)) + if tc.strip().find('refcnt') == -1: + # call set_priority() func to create priority queue + # if not already created(indicated by -1) + remote.run(args=set_priority(interface)) + + # set variable delay + log.info('Setting varying delay') + set_delay.extend(['%s' % delay1, '%s' % delay2]) + remote.run(args=set_delay) + + # set delay to a particular remote node via ip + log.info('Delay set on %s' % remote) + set_ip.extend(['%s' % ip, 'flowid', '2:1']) + remote.run(args=set_ip) + else: + # if the device is already created, only change the delay + log.info('Setting varying delay') + change_delay.extend(['%s' % delay1, '%s' % delay2]) + remote.run(args=change_delay) + + +def delete_dev(remote, interface): + + """ Delete the qdisc if present""" + + log.info('Delete tc') + tc = remote.sh(show_tc(interface)) + if tc.strip().find('refcnt') != -1: + remote.run(args=del_tc(interface)) + + +class Toggle: + + stop_event = gevent.event.Event() + + def __init__(self, ctx, remote, host, interface, interval): + self.ctx = ctx + self.remote = remote + self.host = host + self.interval = interval + self.interface = interface + self.ip = socket.gethostbyname(self.host.hostname) + + def packet_drop(self): + + """ Drop packets to the remote ip specified""" + + _, _, set_ip = cmd_prefix(self.interface) + + tc = self.remote.sh(show_tc(self.interface)) + if tc.strip().find('refcnt') == -1: + self.remote.run(args=set_priority(self.interface)) + # packet drop to specific ip + log.info('Drop all packets to %s' % self.host) + set_ip.extend(['%s' % self.ip, 'action', 'drop']) + self.remote.run(args=set_ip) + + def link_toggle(self): + + """ + For toggling packet drop and recovery in regular interval. + If interval is 5s, link is up for 5s and link is down for 5s + """ + + while not self.stop_event.is_set(): + self.stop_event.wait(timeout=self.interval) + # simulate link down + try: + self.packet_drop() + log.info('link down') + except SSHException: + log.debug('Failed to run command') + + self.stop_event.wait(timeout=self.interval) + # if qdisc exist,delete it. + try: + delete_dev(self.remote, self.interface) + log.info('link up') + except SSHException: + log.debug('Failed to run command') + + def begin(self, gname): + self.thread = gevent.spawn(self.link_toggle) + self.ctx.netem.names[gname] = self.thread + + def end(self, gname): + self.stop_event.set() + log.info('gname is {}'.format(self.ctx.netem.names[gname])) + self.ctx.netem.names[gname].get() + + def cleanup(self): + """ + Invoked during unwinding if the test fails or exits before executing task 'link_recover' + """ + log.info('Clean up') + self.stop_event.set() + self.thread.get() + + +@contextlib.contextmanager +def task(ctx, config): + + """ + - netem: + clients: [c1.rgw.0] + iface: eno1 + dst_client: [c2.rgw.1] + delay: 10ms + + - netem: + clients: [c1.rgw.0] + iface: eno1 + dst_client: [c2.rgw.1] + delay_range: [10ms, 20ms] # (min, max) + + - netem: + clients: [rgw.1, mon.0] + iface: eno1 + gname: t1 + dst_client: [c2.rgw.1] + link_toggle_interval: 10 # no unit mentioned. By default takes seconds. + + - netem: + clients: [rgw.1, mon.0] + iface: eno1 + link_recover: [t1, t2] + + + """ + + log.info('config %s' % config) + + assert isinstance(config, dict), \ + "please list clients to run on" + if not hasattr(ctx, 'netem'): + ctx.netem = argparse.Namespace() + ctx.netem.names = {} + + if config.get('dst_client') is not None: + dst = config.get('dst_client') + (host,) = ctx.cluster.only(dst).remotes.keys() + + for role in config.get('clients', None): + (remote,) = ctx.cluster.only(role).remotes.keys() + ctx.netem.remote = remote + if config.get('delay', False): + static_delay(remote, host, config.get('iface'), config.get('delay')) + if config.get('delay_range', False): + variable_delay(remote, host, config.get('iface'), config.get('delay_range')) + if config.get('link_toggle_interval', False): + log.info('Toggling link for %s' % config.get('link_toggle_interval')) + global toggle + toggle = Toggle(ctx, remote, host, config.get('iface'), config.get('link_toggle_interval')) + toggle.begin(config.get('gname')) + if config.get('link_recover', False): + log.info('Recovering link') + for gname in config.get('link_recover'): + toggle.end(gname) + log.info('sleeping') + time.sleep(config.get('link_toggle_interval')) + delete_dev(ctx.netem.remote, config.get('iface')) + del ctx.netem.names[gname] + + try: + yield + finally: + if ctx.netem.names: + toggle.cleanup() + for role in config.get('clients'): + (remote,) = ctx.cluster.only(role).remotes.keys() + delete_dev(remote, config.get('iface')) + diff --git a/qa/tasks/netsplit.py b/qa/tasks/netsplit.py new file mode 100644 index 000000000..b6614dc50 --- /dev/null +++ b/qa/tasks/netsplit.py @@ -0,0 +1,73 @@ +""" +Functions to netsplit test machines. + +At present, you must specify monitors to disconnect, and it +drops those IP pairs. This means OSDs etc on the hosts which use +the same IP will also be blocked! If you are using multiple IPs on the +same host within the cluster, daemons on those other IPs will get +through. +""" +import logging +import re + +log = logging.getLogger(__name__) + +def get_ip_and_ports(ctx, daemon): + assert daemon.startswith('mon.') + addr = ctx.ceph['ceph'].mons['{a}'.format(a=daemon)] + ips = re.findall("[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+[:[0-9]*]*", addr) + assert len(ips) > 0 + plain_ip = re.match("[0-9\.]*", ips[0]).group() + assert plain_ip is not None + port_list = [] + for ip in ips: + ip_str, port_str = re.match("([0-9\.]*)([:[0-9]*]*)", ip).groups() + assert ip_str == plain_ip + if len(port_str) > 0: + port_list.append(port_str) + return (plain_ip, port_list) + +def disconnect(ctx, config): + assert len(config) == 2 # we can only disconnect pairs right now + # and we can only disconnect mons right now + assert config[0].startswith('mon.') + assert config[1].startswith('mon.') + (ip1, _) = get_ip_and_ports(ctx, config[0]) + (ip2, _) = get_ip_and_ports(ctx, config[1]) + + (host1,) = ctx.cluster.only(config[0]).remotes.iterkeys() + (host2,) = ctx.cluster.only(config[1]).remotes.iterkeys() + assert host1 is not None + assert host2 is not None + + host1.run( + args = ["sudo", "iptables", "-A", "INPUT", "-p", "tcp", "-s", + ip2, "-j", "DROP"] + ) + host2.run( + args = ["sudo", "iptables", "-A", "INPUT", "-p", "tcp", "-s", + ip1, "-j", "DROP"] + ) + +def reconnect(ctx, config): + assert len(config) == 2 # we can only disconnect pairs right now + # and we can only disconnect mons right now + assert config[0].startswith('mon.') + assert config[1].startswith('mon.') + + (ip1, _) = get_ip_and_ports(ctx, config[0]) + (ip2, _) = get_ip_and_ports(ctx, config[1]) + + (host1,) = ctx.cluster.only(config[0]).remotes.iterkeys() + (host2,) = ctx.cluster.only(config[1]).remotes.iterkeys() + assert host1 is not None + assert host2 is not None + + host1.run( + args = ["sudo", "iptables", "-D", "INPUT", "-p", "tcp", "-s", + ip2, "-j", "DROP"] + ) + host2.run( + args = ["sudo", "iptables", "-D", "INPUT", "-p", "tcp", "-s", + ip1, "-j", "DROP"] + ) diff --git a/qa/tasks/nvme_loop.py b/qa/tasks/nvme_loop.py new file mode 100644 index 000000000..1501ad636 --- /dev/null +++ b/qa/tasks/nvme_loop.py @@ -0,0 +1,101 @@ +import contextlib +import logging + +from io import StringIO +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run + + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + log.info('Setting up nvme_loop on scratch devices...') + host = 'hostnqn' + port = '1' + devs_by_remote = {} + old_scratch_by_remote = {} + for remote, roles in ctx.cluster.remotes.items(): + devs = teuthology.get_scratch_devices(remote) + devs_by_remote[remote] = devs + base = '/sys/kernel/config/nvmet' + remote.run( + args=[ + 'sudo', 'modprobe', 'nvme_loop', + run.Raw('&&'), + 'sudo', 'mkdir', '-p', f'{base}/hosts/{host}', + run.Raw('&&'), + 'sudo', 'mkdir', '-p', f'{base}/ports/{port}', + run.Raw('&&'), + 'echo', 'loop', run.Raw('|'), + 'sudo', 'tee', f'{base}/ports/{port}/addr_trtype', + ] + ) + for dev in devs: + short = dev.split('/')[-1] + log.info(f'Connecting nvme_loop {remote.shortname}:{dev}...') + remote.run( + args=[ + 'sudo', 'mkdir', '-p', f'{base}/subsystems/{short}', + run.Raw('&&'), + 'echo', '1', run.Raw('|'), + 'sudo', 'tee', f'{base}/subsystems/{short}/attr_allow_any_host', + run.Raw('&&'), + 'sudo', 'mkdir', '-p', f'{base}/subsystems/{short}/namespaces/1', + run.Raw('&&'), + 'echo', '-n', dev, run.Raw('|'), + 'sudo', 'tee', f'{base}/subsystems/{short}/namespaces/1/device_path', + run.Raw('&&'), + 'echo', '1', run.Raw('|'), + 'sudo', 'tee', f'{base}/subsystems/{short}/namespaces/1/enable', + run.Raw('&&'), + 'sudo', 'ln', '-s', f'{base}/subsystems/{short}', + f'{base}/ports/{port}/subsystems/{short}', + run.Raw('&&'), + 'sudo', 'nvme', 'connect', '-t', 'loop', '-n', short, '-q', host, + ] + ) + + # identify nvme_loops devices + old_scratch_by_remote[remote] = remote.read_file('/scratch_devs') + + with contextutil.safe_while(sleep=1, tries=15) as proceed: + while proceed(): + p = remote.run(args=['sudo', 'nvme', 'list'], stdout=StringIO()) + new_devs = [] + for line in p.stdout.getvalue().splitlines(): + dev, _, vendor = line.split()[0:3] + if dev.startswith('/dev/') and vendor == 'Linux': + new_devs.append(dev) + log.info(f'new_devs {new_devs}') + assert len(new_devs) <= len(devs) + if len(new_devs) == len(devs): + break + + remote.write_file( + path='/scratch_devs', + data='\n'.join(new_devs) + '\n', + sudo=True + ) + + try: + yield + + finally: + for remote, devs in devs_by_remote.items(): + for dev in devs: + short = dev.split('/')[-1] + log.info(f'Disconnecting nvme_loop {remote.shortname}:{dev}...') + remote.run( + args=[ + 'sudo', 'nvme', 'disconnect', '-n', short + ], + check_status=False, + ) + remote.write_file( + path='/scratch_devs', + data=old_scratch_by_remote[remote], + sudo=True + ) diff --git a/qa/tasks/object_source_down.py b/qa/tasks/object_source_down.py new file mode 100644 index 000000000..e4519bb6f --- /dev/null +++ b/qa/tasks/object_source_down.py @@ -0,0 +1,101 @@ +""" +Test Object locations going down +""" +import logging +import time +from teuthology import misc as teuthology +from tasks import ceph_manager +from tasks.util.rados import rados + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test handling of object location going down + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'lost_unfound task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + manager.wait_for_clean() + + # something that is always there + dummyfile = '/etc/fstab' + + # take 0, 1 out + manager.mark_out_osd(0) + manager.mark_out_osd(1) + manager.wait_for_clean() + + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.0', + 'injectargs', + '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' + ) + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.1', + 'injectargs', + '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' + ) + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.2', + 'injectargs', + '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' + ) + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.3', + 'injectargs', + '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' + ) + + # kludge to make sure they get a map + rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile]) + + # create old objects + for f in range(1, 10): + rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile]) + + manager.mark_out_osd(3) + manager.wait_till_active() + + manager.mark_in_osd(0) + manager.wait_till_active() + + manager.flush_pg_stats([2, 0]) + + manager.mark_out_osd(2) + manager.wait_till_active() + + # bring up 1 + manager.mark_in_osd(1) + manager.wait_till_active() + + manager.flush_pg_stats([0, 1]) + log.info("Getting unfound objects") + unfound = manager.get_num_unfound_objects() + assert not unfound + + manager.kill_osd(2) + manager.mark_down_osd(2) + manager.kill_osd(3) + manager.mark_down_osd(3) + + manager.flush_pg_stats([0, 1]) + log.info("Getting unfound objects") + unfound = manager.get_num_unfound_objects() + assert unfound diff --git a/qa/tasks/omapbench.py b/qa/tasks/omapbench.py new file mode 100644 index 000000000..a5bd3a4df --- /dev/null +++ b/qa/tasks/omapbench.py @@ -0,0 +1,83 @@ +""" +Run omapbench executable within teuthology +""" +import contextlib +import logging + +from teuthology.orchestra import run +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run omapbench + + The config should be as follows:: + + omapbench: + clients: [client list] + threads: <threads at once> + objects: <number of objects to write> + entries: <number of entries per object map> + keysize: <number of characters per object map key> + valsize: <number of characters per object map val> + increment: <interval to show in histogram (in ms)> + omaptype: <how the omaps should be generated> + + example:: + + tasks: + - ceph: + - omapbench: + clients: [client.0] + threads: 30 + objects: 1000 + entries: 10 + keysize: 10 + valsize: 100 + increment: 100 + omaptype: uniform + - interactive: + """ + log.info('Beginning omapbench...') + assert isinstance(config, dict), \ + "please list clients to run on" + omapbench = {} + testdir = teuthology.get_testdir(ctx) + print(str(config.get('increment',-1))) + for role in config.get('clients', ['client.0']): + assert isinstance(role, str) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.keys() + proc = remote.run( + args=[ + "/bin/sh", "-c", + " ".join(['adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage', + 'omapbench', + '--name', role[len(PREFIX):], + '-t', str(config.get('threads', 30)), + '-o', str(config.get('objects', 1000)), + '--entries', str(config.get('entries',10)), + '--keysize', str(config.get('keysize',10)), + '--valsize', str(config.get('valsize',1000)), + '--inc', str(config.get('increment',10)), + '--omaptype', str(config.get('omaptype','uniform')) + ]).format(tdir=testdir), + ], + logger=log.getChild('omapbench.{id}'.format(id=id_)), + stdin=run.PIPE, + wait=False + ) + omapbench[id_] = proc + + try: + yield + finally: + log.info('joining omapbench') + run.wait(omapbench.values()) diff --git a/qa/tasks/openssl_keys.py b/qa/tasks/openssl_keys.py new file mode 100644 index 000000000..f9a7f7ede --- /dev/null +++ b/qa/tasks/openssl_keys.py @@ -0,0 +1,245 @@ +""" +Generates and installs a signed SSL certificate. +""" +import argparse +import logging +import os + +from teuthology import misc +from teuthology.exceptions import ConfigError +from teuthology.orchestra import run +from teuthology.task import Task + +log = logging.getLogger(__name__) + +class OpenSSLKeys(Task): + name = 'openssl_keys' + """ + Generates and installs a signed SSL certificate. + + To create a self-signed certificate: + + - openssl_keys: + # certificate name + root: # results in root.key and root.crt + + # [required] make the private key and certificate available in this client's test directory + client: client.0 + + # common name, defaults to `hostname`. chained certificates must not share a common name + cn: teuthology + + # private key type for -newkey, defaults to rsa:2048 + key-type: rsa:4096 + + # install the certificate as trusted on these clients: + install: [client.0, client.1] + + + To create a certificate signed by a ca certificate: + + - openssl_keys: + root: (self-signed certificate as above) + ... + + cert-for-client1: + client: client.1 + + # use another ssl certificate (by 'name') as the certificate authority + ca: root # --CAkey=root.key -CA=root.crt + + # embed the private key in the certificate file + embed-key: true + """ + + def __init__(self, ctx, config): + super(OpenSSLKeys, self).__init__(ctx, config) + self.certs = [] + self.installed = [] + + def setup(self): + # global dictionary allows other tasks to look up certificate paths + if not hasattr(self.ctx, 'ssl_certificates'): + self.ctx.ssl_certificates = {} + + # use testdir/ca as a working directory + self.cadir = '/'.join((misc.get_testdir(self.ctx), 'ca')) + # make sure self-signed certs get added first, they don't have 'ca' field + configs = sorted(self.config.items(), key=lambda x: 'ca' in x[1]) + for name, config in configs: + # names must be unique to avoid clobbering each others files + if name in self.ctx.ssl_certificates: + raise ConfigError('ssl: duplicate certificate name {}'.format(name)) + + # create the key and certificate + cert = self.create_cert(name, config) + + self.ctx.ssl_certificates[name] = cert + self.certs.append(cert) + + # install as trusted on the requested clients + for client in config.get('install', []): + installed = self.install_cert(cert, client) + self.installed.append(installed) + + def teardown(self): + """ + Clean up any created/installed certificate files. + """ + for cert in self.certs: + self.remove_cert(cert) + + for installed in self.installed: + self.uninstall_cert(installed) + + def create_cert(self, name, config): + """ + Create a certificate with the given configuration. + """ + cert = argparse.Namespace() + cert.name = name + cert.key_type = config.get('key-type', 'rsa:2048') + + cert.client = config.get('client', None) + if not cert.client: + raise ConfigError('ssl: missing required field "client"') + + (cert.remote,) = self.ctx.cluster.only(cert.client).remotes.keys() + + cert.remote.run(args=['mkdir', '-p', self.cadir]) + + cert.key = f'{self.cadir}/{cert.name}.key' + cert.certificate = f'{self.cadir}/{cert.name}.crt' + + san_ext = [] + add_san_default = False + cn = config.get('cn', '') + if cn == '': + cn = cert.remote.hostname + add_san_default = True + if config.get('add-san', add_san_default): + ext = f'{self.cadir}/{cert.name}.ext' + san_ext = ['-extfile', ext] + + # provide the common name in -subj to avoid the openssl command prompts + subject = f'/CN={cn}' + + # if a ca certificate is provided, use it to sign the new certificate + ca = config.get('ca', None) + if ca: + # the ca certificate must have been created by a prior ssl task + ca_cert = self.ctx.ssl_certificates.get(ca, None) + if not ca_cert: + raise ConfigError(f'ssl: ca {ca} not found for certificate {cert.name}') + + csr = f'{self.cadir}/{cert.name}.csr' + srl = f'{self.cadir}/{ca_cert.name}.srl' + remove_files = ['rm', csr, srl] + + # these commands are run on the ca certificate's client because + # they need access to its private key and cert + + # generate a private key and signing request + ca_cert.remote.run(args=['openssl', 'req', '-nodes', + '-newkey', cert.key_type, '-keyout', cert.key, + '-out', csr, '-subj', subject]) + + if san_ext: + remove_files.append(ext) + ca_cert.remote.write_file(path=ext, + data='subjectAltName = DNS:{},IP:{}'.format( + cn, + config.get('ip', cert.remote.ip_address))) + + # create the signed certificate + ca_cert.remote.run(args=['openssl', 'x509', '-req', '-in', csr, + '-CA', ca_cert.certificate, '-CAkey', ca_cert.key, '-CAcreateserial', + '-out', cert.certificate, '-days', '365', '-sha256'] + san_ext) + + ca_cert.remote.run(args=remove_files) # clean up the signing request and serial + + # verify the new certificate against its ca cert + ca_cert.remote.run(args=['openssl', 'verify', + '-CAfile', ca_cert.certificate, cert.certificate]) + + if cert.remote != ca_cert.remote: + # copy to remote client + self.remote_copy_file(ca_cert.remote, cert.certificate, cert.remote, cert.certificate) + self.remote_copy_file(ca_cert.remote, cert.key, cert.remote, cert.key) + # clean up the local copies + ca_cert.remote.run(args=['rm', cert.certificate, cert.key]) + # verify the remote certificate (requires ca to be in its trusted ca certificate store) + cert.remote.run(args=['openssl', 'verify', cert.certificate]) + else: + # otherwise, generate a private key and use it to self-sign a new certificate + cert.remote.run(args=['openssl', 'req', '-x509', '-nodes', + '-newkey', cert.key_type, '-keyout', cert.key, + '-days', '365', '-out', cert.certificate, '-subj', subject]) + + if config.get('embed-key', False): + # append the private key to the certificate file + cert.remote.run(args=['cat', cert.key, run.Raw('>>'), cert.certificate]) + + return cert + + def remove_cert(self, cert): + """ + Delete all of the files associated with the given certificate. + """ + # remove the private key and certificate + cert.remote.run(args=['rm', '-f', cert.certificate, cert.key]) + + # remove ca subdirectory if it's empty + cert.remote.run(args=['rmdir', '--ignore-fail-on-non-empty', self.cadir]) + + def install_cert(self, cert, client): + """ + Install as a trusted ca certificate on the given client. + """ + (remote,) = self.ctx.cluster.only(client).remotes.keys() + + installed = argparse.Namespace() + installed.remote = remote + + if remote.os.package_type == 'deb': + installed.path = '/usr/local/share/ca-certificates/{}.crt'.format(cert.name) + installed.command = ['sudo', 'update-ca-certificates'] + else: + installed.path = '/usr/share/pki/ca-trust-source/anchors/{}.crt'.format(cert.name) + installed.command = ['sudo', 'update-ca-trust'] + + cp_or_mv = 'cp' + if remote != cert.remote: + # copy into remote cadir (with mkdir if necessary) + remote.run(args=['mkdir', '-p', self.cadir]) + self.remote_copy_file(cert.remote, cert.certificate, remote, cert.certificate) + cp_or_mv = 'mv' # move this remote copy into the certificate store + + # install into certificate store as root + remote.run(args=['sudo', cp_or_mv, cert.certificate, installed.path]) + remote.run(args=installed.command) + + return installed + + def uninstall_cert(self, installed): + """ + Uninstall a certificate from the trusted certificate store. + """ + installed.remote.run(args=['sudo', 'rm', installed.path]) + installed.remote.run(args=installed.command) + + def remote_copy_file(self, from_remote, from_path, to_remote, to_path): + """ + Copies a file from one remote to another. + + The remotes don't have public-key auth for 'scp' or misc.copy_file(), + so this copies through an intermediate local tmp file. + """ + log.info('copying from {}:{} to {}:{}...'.format(from_remote, from_path, to_remote, to_path)) + local_path = from_remote.get_file(from_path) + try: + to_remote.put_file(local_path, to_path) + finally: + os.remove(local_path) + +task = OpenSSLKeys diff --git a/qa/tasks/osd_backfill.py b/qa/tasks/osd_backfill.py new file mode 100644 index 000000000..b33e1c912 --- /dev/null +++ b/qa/tasks/osd_backfill.py @@ -0,0 +1,104 @@ +""" +Osd backfill test +""" +import logging +import time +from tasks import ceph_manager +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + + +def rados_start(ctx, remote, cmd): + """ + Run a remote rados command (currently used to only write data) + """ + log.info("rados %s" % ' '.join(cmd)) + testdir = teuthology.get_testdir(ctx) + pre = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rados', + ]; + pre.extend(cmd) + proc = remote.run( + args=pre, + wait=False, + ) + return proc + +def task(ctx, config): + """ + Test backfill + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'thrashosds task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + log.info('num_osds is %s' % num_osds) + assert num_osds == 3 + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + manager.flush_pg_stats([0, 1, 2]) + manager.wait_for_clean() + + # write some data + p = rados_start(ctx, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096', + '--no-cleanup']) + err = p.wait() + log.info('err is %d' % err) + + # mark osd.0 out to trigger a rebalance/backfill + manager.mark_out_osd(0) + + # also mark it down to it won't be included in pg_temps + manager.kill_osd(0) + manager.mark_down_osd(0) + + # wait for everything to peer and be happy... + manager.flush_pg_stats([1, 2]) + manager.wait_for_recovery() + + # write some new data + p = rados_start(ctx, mon, ['-p', 'rbd', 'bench', '30', 'write', '-b', '4096', + '--no-cleanup']) + + time.sleep(15) + + # blackhole + restart osd.1 + # this triggers a divergent backfill target + manager.blackhole_kill_osd(1) + time.sleep(2) + manager.revive_osd(1) + + # wait for our writes to complete + succeed + err = p.wait() + log.info('err is %d' % err) + + # wait for osd.1 and osd.2 to be up + manager.wait_till_osd_is_up(1) + manager.wait_till_osd_is_up(2) + + # cluster must recover + manager.flush_pg_stats([1, 2]) + manager.wait_for_recovery() + + # re-add osd.0 + manager.revive_osd(0) + manager.flush_pg_stats([1, 2]) + manager.wait_for_clean() + + diff --git a/qa/tasks/osd_failsafe_enospc.py b/qa/tasks/osd_failsafe_enospc.py new file mode 100644 index 000000000..fe2996a78 --- /dev/null +++ b/qa/tasks/osd_failsafe_enospc.py @@ -0,0 +1,218 @@ +""" +Handle osdfailsafe configuration settings (nearfull ratio and full ratio) +""" +from io import StringIO +import logging +import time + +from teuthology.orchestra import run +from tasks.util.rados import rados +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio + configuration settings + + In order for test to pass must use log-ignorelist as follows + + tasks: + - chef: + - install: + - ceph: + log-ignorelist: ['OSD near full', 'OSD full dropping all updates'] + - osd_failsafe_enospc: + + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'osd_failsafe_enospc task only accepts a dict for configuration' + + # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding + sleep_time = 50 + + # something that is always there + dummyfile = '/etc/fstab' + dummyfile2 = '/etc/resolv.conf' + + manager = ctx.managers['ceph'] + + # create 1 pg pool with 1 rep which can only be on osd.0 + osds = manager.get_osd_dump() + for osd in osds: + if osd['osd'] != 0: + manager.mark_out_osd(osd['osd']) + + log.info('creating pool foo') + manager.create_pool("foo") + manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1') + + # State NONE -> NEAR + log.info('1. Verify warning messages when exceeding nearfull_ratio') + + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + proc = mon.run( + args=[ + 'sudo', + 'daemon-helper', + 'kill', + 'ceph', '-w' + ], + stdin=run.PIPE, + stdout=StringIO(), + wait=False, + ) + + manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .00001') + + time.sleep(sleep_time) + proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w + proc.wait() + + lines = proc.stdout.getvalue().split('\n') + + count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) + assert count == 2, 'Incorrect number of warning messages expected 2 got %d' % count + count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) + assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count + + # State NEAR -> FULL + log.info('2. Verify error messages when exceeding full_ratio') + + proc = mon.run( + args=[ + 'sudo', + 'daemon-helper', + 'kill', + 'ceph', '-w' + ], + stdin=run.PIPE, + stdout=StringIO(), + wait=False, + ) + + manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') + + time.sleep(sleep_time) + proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w + proc.wait() + + lines = proc.stdout.getvalue().split('\n') + + count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) + assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count + + log.info('3. Verify write failure when exceeding full_ratio') + + # Write data should fail + ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile1', dummyfile]) + assert ret != 0, 'Expected write failure but it succeeded with exit status 0' + + # Put back default + manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') + time.sleep(10) + + # State FULL -> NEAR + log.info('4. Verify write success when NOT exceeding full_ratio') + + # Write should succeed + ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile2', dummyfile2]) + assert ret == 0, 'Expected write to succeed, but got exit status %d' % ret + + log.info('5. Verify warning messages again when exceeding nearfull_ratio') + + proc = mon.run( + args=[ + 'sudo', + 'daemon-helper', + 'kill', + 'ceph', '-w' + ], + stdin=run.PIPE, + stdout=StringIO(), + wait=False, + ) + + time.sleep(sleep_time) + proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w + proc.wait() + + lines = proc.stdout.getvalue().split('\n') + + count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) + assert count == 1 or count == 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count + count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) + assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count + + manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .90') + time.sleep(10) + + # State NONE -> FULL + log.info('6. Verify error messages again when exceeding full_ratio') + + proc = mon.run( + args=[ + 'sudo', + 'daemon-helper', + 'kill', + 'ceph', '-w' + ], + stdin=run.PIPE, + stdout=StringIO(), + wait=False, + ) + + manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') + + time.sleep(sleep_time) + proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w + proc.wait() + + lines = proc.stdout.getvalue().split('\n') + + count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) + assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count + count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) + assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count + + # State FULL -> NONE + log.info('7. Verify no messages settings back to default') + + manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') + time.sleep(10) + + proc = mon.run( + args=[ + 'sudo', + 'daemon-helper', + 'kill', + 'ceph', '-w' + ], + stdin=run.PIPE, + stdout=StringIO(), + wait=False, + ) + + time.sleep(sleep_time) + proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w + proc.wait() + + lines = proc.stdout.getvalue().split('\n') + + count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) + assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count + count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) + assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count + + log.info('Test Passed') + + # Bring all OSDs back in + manager.remove_pool("foo") + for osd in osds: + if osd['osd'] != 0: + manager.mark_in_osd(osd['osd']) diff --git a/qa/tasks/osd_max_pg_per_osd.py b/qa/tasks/osd_max_pg_per_osd.py new file mode 100644 index 000000000..6680fe6e3 --- /dev/null +++ b/qa/tasks/osd_max_pg_per_osd.py @@ -0,0 +1,126 @@ +import logging +import random + + +log = logging.getLogger(__name__) + + +def pg_num_in_all_states(pgs, *states): + return sum(1 for state in pgs.values() + if all(s in state for s in states)) + + +def pg_num_in_any_state(pgs, *states): + return sum(1 for state in pgs.values() + if any(s in state for s in states)) + + +def test_create_from_mon(ctx, config): + """ + osd should stop creating new pools if the number of pg it servers + exceeds the max-pg-per-osd setting, and it should resume the previously + suspended pg creations once the its pg number drops down below the setting + How it works:: + 1. set the hard limit of pg-per-osd to "2" + 2. create pool.a with pg_num=2 + # all pgs should be active+clean + 2. create pool.b with pg_num=2 + # new pgs belonging to this pool should be unknown (the primary osd + reaches the limit) or creating (replica osd reaches the limit) + 3. remove pool.a + 4. all pg belonging to pool.b should be active+clean + """ + pg_num = config.get('pg_num', 2) + manager = ctx.managers['ceph'] + log.info('1. creating pool.a') + pool_a = manager.create_pool_with_unique_name(pg_num) + pg_states = manager.wait_till_pg_convergence(300) + pg_created = pg_num_in_all_states(pg_states, 'active', 'clean') + assert pg_created == pg_num + + log.info('2. creating pool.b') + pool_b = manager.create_pool_with_unique_name(pg_num) + pg_states = manager.wait_till_pg_convergence(300) + pg_created = pg_num_in_all_states(pg_states, 'active', 'clean') + assert pg_created == pg_num + pg_pending = pg_num_in_any_state(pg_states, 'unknown', 'creating') + assert pg_pending == pg_num + + log.info('3. removing pool.a') + manager.remove_pool(pool_a) + pg_states = manager.wait_till_pg_convergence(300) + assert len(pg_states) == pg_num + pg_created = pg_num_in_all_states(pg_states, 'active', 'clean') + assert pg_created == pg_num + + # cleanup + manager.remove_pool(pool_b) + + +def test_create_from_peer(ctx, config): + """ + osd should stop creating new pools if the number of pg it servers + exceeds the max-pg-per-osd setting, and it should resume the previously + suspended pg creations once the its pg number drops down below the setting + + How it works:: + 0. create 4 OSDs. + 1. create pool.a with pg_num=1, size=2 + pg will be mapped to osd.0, and osd.1, and it should be active+clean + 2. create pool.b with pg_num=1, size=2. + if the pgs stuck in creating, delete the pool since the pool and try + again, eventually we'll get the pool to land on the other 2 osds that + aren't occupied by pool.a. (this will also verify that pgs for deleted + pools get cleaned out of the creating wait list.) + 3. mark an osd out. verify that some pgs get stuck stale or peering. + 4. delete a pool, verify pgs go active. + """ + pg_num = config.get('pg_num', 1) + from_primary = config.get('from_primary', True) + + manager = ctx.managers['ceph'] + log.info('1. creating pool.a') + pool_a = manager.create_pool_with_unique_name(pg_num) + pg_states = manager.wait_till_pg_convergence(300) + pg_created = pg_num_in_all_states(pg_states, 'active', 'clean') + assert pg_created == pg_num + + log.info('2. creating pool.b') + while True: + pool_b = manager.create_pool_with_unique_name(pg_num) + pg_states = manager.wait_till_pg_convergence(300) + pg_created = pg_num_in_all_states(pg_states, 'active', 'clean') + assert pg_created >= pg_num + pg_pending = pg_num_in_any_state(pg_states, 'unknown', 'creating') + assert pg_pending == pg_num * 2 - pg_created + if pg_created == pg_num * 2: + break + manager.remove_pool(pool_b) + + log.info('3. mark an osd out') + pg_stats = manager.get_pg_stats() + pg = random.choice(pg_stats) + if from_primary: + victim = pg['acting'][-1] + else: + victim = pg['acting'][0] + manager.mark_out_osd(victim) + pg_states = manager.wait_till_pg_convergence(300) + pg_stuck = pg_num_in_any_state(pg_states, 'activating', 'stale', 'peering') + assert pg_stuck > 0 + + log.info('4. removing pool.b') + manager.remove_pool(pool_b) + manager.wait_for_clean(30) + + # cleanup + manager.remove_pool(pool_a) + + +def task(ctx, config): + assert isinstance(config, dict), \ + 'osd_max_pg_per_osd task only accepts a dict for config' + if config.get('test_create_from_mon', True): + test_create_from_mon(ctx, config) + else: + test_create_from_peer(ctx, config) diff --git a/qa/tasks/osd_recovery.py b/qa/tasks/osd_recovery.py new file mode 100644 index 000000000..b0623c21b --- /dev/null +++ b/qa/tasks/osd_recovery.py @@ -0,0 +1,193 @@ +""" +osd recovery +""" +import logging +import time +from tasks import ceph_manager +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + + +def rados_start(testdir, remote, cmd): + """ + Run a remote rados command (currently used to only write data) + """ + log.info("rados %s" % ' '.join(cmd)) + pre = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rados', + ]; + pre.extend(cmd) + proc = remote.run( + args=pre, + wait=False, + ) + return proc + +def task(ctx, config): + """ + Test (non-backfill) recovery + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for configuration' + testdir = teuthology.get_testdir(ctx) + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + log.info('num_osds is %s' % num_osds) + assert num_osds == 3 + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + manager.flush_pg_stats([0, 1, 2]) + manager.wait_for_clean() + + # test some osdmap flags + manager.raw_cluster_cmd('osd', 'set', 'noin') + manager.raw_cluster_cmd('osd', 'set', 'noout') + manager.raw_cluster_cmd('osd', 'set', 'noup') + manager.raw_cluster_cmd('osd', 'set', 'nodown') + manager.raw_cluster_cmd('osd', 'unset', 'noin') + manager.raw_cluster_cmd('osd', 'unset', 'noout') + manager.raw_cluster_cmd('osd', 'unset', 'noup') + manager.raw_cluster_cmd('osd', 'unset', 'nodown') + + # write some new data + p = rados_start(testdir, mon, ['-p', 'rbd', 'bench', '20', 'write', '-b', '4096', + '--no-cleanup']) + + time.sleep(15) + + # trigger a divergent target: + # blackhole + restart osd.1 (shorter log) + manager.blackhole_kill_osd(1) + # kill osd.2 (longer log... we'll make it divergent below) + manager.kill_osd(2) + time.sleep(2) + manager.revive_osd(1) + + # wait for our writes to complete + succeed + err = p.wait() + log.info('err is %d' % err) + + # cluster must repeer + manager.flush_pg_stats([0, 1]) + manager.wait_for_active_or_down() + + # write some more (make sure osd.2 really is divergent) + p = rados_start(testdir, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096']) + p.wait() + + # revive divergent osd + manager.revive_osd(2) + + while len(manager.get_osd_status()['up']) < 3: + log.info('waiting a bit...') + time.sleep(2) + log.info('3 are up!') + + # cluster must recover + manager.flush_pg_stats([0, 1, 2]) + manager.wait_for_clean() + + +def test_incomplete_pgs(ctx, config): + """ + Test handling of incomplete pgs. Requires 4 osds. + """ + testdir = teuthology.get_testdir(ctx) + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + log.info('num_osds is %s' % num_osds) + assert num_osds == 4 + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 4: + time.sleep(10) + + manager.flush_pg_stats([0, 1, 2, 3]) + manager.wait_for_clean() + + log.info('Testing incomplete pgs...') + + for i in range(4): + manager.set_config( + i, + osd_recovery_delay_start=1000) + + # move data off of osd.0, osd.1 + manager.raw_cluster_cmd('osd', 'out', '0', '1') + manager.flush_pg_stats([0, 1, 2, 3], [0, 1]) + manager.wait_for_clean() + + # lots of objects in rbd (no pg log, will backfill) + p = rados_start(testdir, mon, + ['-p', 'rbd', 'bench', '20', 'write', '-b', '1', + '--no-cleanup']) + p.wait() + + # few objects in rbd pool (with pg log, normal recovery) + for f in range(1, 20): + p = rados_start(testdir, mon, ['-p', 'rbd', 'put', + 'foo.%d' % f, '/etc/passwd']) + p.wait() + + # move it back + manager.raw_cluster_cmd('osd', 'in', '0', '1') + manager.raw_cluster_cmd('osd', 'out', '2', '3') + time.sleep(10) + manager.flush_pg_stats([0, 1, 2, 3], [2, 3]) + time.sleep(10) + manager.wait_for_active() + + assert not manager.is_clean() + assert not manager.is_recovered() + + # kill 2 + 3 + log.info('stopping 2,3') + manager.kill_osd(2) + manager.kill_osd(3) + log.info('...') + manager.raw_cluster_cmd('osd', 'down', '2', '3') + manager.flush_pg_stats([0, 1]) + manager.wait_for_active_or_down() + + assert manager.get_num_down() > 0 + + # revive 2 + 3 + manager.revive_osd(2) + manager.revive_osd(3) + while len(manager.get_osd_status()['up']) < 4: + log.info('waiting a bit...') + time.sleep(2) + log.info('all are up!') + + for i in range(4): + manager.kick_recovery_wq(i) + + # cluster must recover + manager.wait_for_clean() diff --git a/qa/tasks/peer.py b/qa/tasks/peer.py new file mode 100644 index 000000000..6b19096b1 --- /dev/null +++ b/qa/tasks/peer.py @@ -0,0 +1,90 @@ +""" +Peer test (Single test, not much configurable here) +""" +import logging +import json +import time + +from tasks import ceph_manager +from tasks.util.rados import rados +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test peering. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'peer task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + manager.flush_pg_stats([0, 1, 2]) + manager.wait_for_clean() + + for i in range(3): + manager.set_config( + i, + osd_recovery_delay_start=120) + + # take on osd down + manager.kill_osd(2) + manager.mark_down_osd(2) + + # kludge to make sure they get a map + rados(ctx, mon, ['-p', 'data', 'get', 'dummy', '-']) + + manager.flush_pg_stats([0, 1]) + manager.wait_for_recovery() + + # kill another and revive 2, so that some pgs can't peer. + manager.kill_osd(1) + manager.mark_down_osd(1) + manager.revive_osd(2) + manager.wait_till_osd_is_up(2) + + manager.flush_pg_stats([0, 2]) + + manager.wait_for_active_or_down() + + manager.flush_pg_stats([0, 2]) + + # look for down pgs + num_down_pgs = 0 + pgs = manager.get_pg_stats() + for pg in pgs: + out = manager.raw_cluster_cmd('pg', pg['pgid'], 'query') + log.debug("out string %s",out) + j = json.loads(out) + log.info("pg is %s, query json is %s", pg, j) + + if pg['state'].count('down'): + num_down_pgs += 1 + # verify that it is blocked on osd.1 + rs = j['recovery_state'] + assert len(rs) >= 2 + assert rs[0]['name'] == 'Started/Primary/Peering/Down' + assert rs[1]['name'] == 'Started/Primary/Peering' + assert rs[1]['blocked'] + assert rs[1]['down_osds_we_would_probe'] == [1] + assert len(rs[1]['peering_blocked_by']) == 1 + assert rs[1]['peering_blocked_by'][0]['osd'] == 1 + + assert num_down_pgs > 0 + + # bring it all back + manager.revive_osd(1) + manager.wait_till_osd_is_up(1) + manager.flush_pg_stats([0, 1, 2]) + manager.wait_for_clean() diff --git a/qa/tasks/peering_speed_test.py b/qa/tasks/peering_speed_test.py new file mode 100644 index 000000000..9dc658361 --- /dev/null +++ b/qa/tasks/peering_speed_test.py @@ -0,0 +1,87 @@ +""" +Remotely run peering tests. +""" +import logging +import time + +log = logging.getLogger(__name__) + +from teuthology.task.args import argify + +POOLNAME = "POOLNAME" +ARGS = [ + ('num_pgs', 'number of pgs to create', 256, int), + ('max_time', 'seconds to complete peering', 0, int), + ('runs', 'trials to run', 10, int), + ('num_objects', 'objects to create', 256 * 1024, int), + ('object_size', 'size in bytes for objects', 64, int), + ('creation_time_limit', 'time limit for pool population', 60*60, int), + ('create_threads', 'concurrent writes for create', 256, int) + ] + +def setup(ctx, config): + """ + Setup peering test on remotes. + """ + manager = ctx.managers['ceph'] + manager.clear_pools() + manager.create_pool(POOLNAME, config.num_pgs) + log.info("populating pool") + manager.rados_write_objects( + POOLNAME, + config.num_objects, + config.object_size, + config.creation_time_limit, + config.create_threads) + log.info("done populating pool") + +def do_run(ctx, config): + """ + Perform the test. + """ + start = time.time() + # mark in osd + manager = ctx.managers['ceph'] + manager.mark_in_osd(0) + log.info("writing out objects") + manager.rados_write_objects( + POOLNAME, + config.num_pgs, # write 1 object per pg or so + 1, + config.creation_time_limit, + config.num_pgs, # lots of concurrency + cleanup = True) + peering_end = time.time() + + log.info("peering done, waiting on recovery") + manager.wait_for_clean() + + log.info("recovery done") + recovery_end = time.time() + if config.max_time: + assert(peering_end - start < config.max_time) + manager.mark_out_osd(0) + manager.wait_for_clean() + return { + 'time_to_active': peering_end - start, + 'time_to_clean': recovery_end - start + } + +@argify("peering_speed_test", ARGS) +def task(ctx, config): + """ + Peering speed test + """ + setup(ctx, config) + manager = ctx.managers['ceph'] + manager.mark_out_osd(0) + manager.wait_for_clean() + ret = [] + for i in range(config.runs): + log.info("Run {i}".format(i = i)) + ret.append(do_run(ctx, config)) + + manager.mark_in_osd(0) + ctx.summary['recovery_times'] = { + 'runs': ret + } diff --git a/qa/tasks/populate_rbd_pool.py b/qa/tasks/populate_rbd_pool.py new file mode 100644 index 000000000..76395eb68 --- /dev/null +++ b/qa/tasks/populate_rbd_pool.py @@ -0,0 +1,82 @@ +""" +Populate rbd pools +""" +import contextlib +import logging + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Populate <num_pools> pools with prefix <pool_prefix> with <num_images> + rbd images at <num_snaps> snaps + + The config could be as follows:: + + populate_rbd_pool: + client: <client> + pool_prefix: foo + num_pools: 5 + num_images: 10 + num_snaps: 3 + image_size: 10737418240 + """ + if config is None: + config = {} + client = config.get("client", "client.0") + pool_prefix = config.get("pool_prefix", "foo") + num_pools = config.get("num_pools", 2) + num_images = config.get("num_images", 20) + num_snaps = config.get("num_snaps", 4) + image_size = config.get("image_size", 100) + write_size = config.get("write_size", 1024*1024) + write_threads = config.get("write_threads", 10) + write_total_per_snap = config.get("write_total_per_snap", 1024*1024*30) + + (remote,) = ctx.cluster.only(client).remotes.keys() + + for poolid in range(num_pools): + poolname = "%s-%s" % (pool_prefix, str(poolid)) + log.info("Creating pool %s" % (poolname,)) + ctx.managers['ceph'].create_pool(poolname) + for imageid in range(num_images): + imagename = "rbd-%s" % (str(imageid),) + log.info("Creating imagename %s" % (imagename,)) + remote.run( + args = [ + "rbd", + "create", + imagename, + "--image-format", "1", + "--size", str(image_size), + "--pool", str(poolname)]) + def bench_run(): + remote.run( + args = [ + "rbd", + "bench-write", + imagename, + "--pool", poolname, + "--io-size", str(write_size), + "--io-threads", str(write_threads), + "--io-total", str(write_total_per_snap), + "--io-pattern", "rand"]) + log.info("imagename %s first bench" % (imagename,)) + bench_run() + for snapid in range(num_snaps): + snapname = "snap-%s" % (str(snapid),) + log.info("imagename %s creating snap %s" % (imagename, snapname)) + remote.run( + args = [ + "rbd", "snap", "create", + "--pool", poolname, + "--snap", snapname, + imagename + ]) + bench_run() + + try: + yield + finally: + log.info('done') diff --git a/qa/tasks/pykmip.py b/qa/tasks/pykmip.py new file mode 100644 index 000000000..45a5af689 --- /dev/null +++ b/qa/tasks/pykmip.py @@ -0,0 +1,465 @@ +""" +Deploy and configure PyKMIP for Teuthology +""" +import argparse +import contextlib +import logging +import time +import tempfile +import json +import os +from io import BytesIO +from teuthology.orchestra.daemon import DaemonGroup +from teuthology.orchestra.remote import Remote + +import pprint + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run +from teuthology.packaging import install_package +from teuthology.packaging import remove_package +from teuthology.exceptions import ConfigError +from tasks.util import get_remote_for_role + +log = logging.getLogger(__name__) + + +def get_pykmip_dir(ctx): + return '{tdir}/pykmip'.format(tdir=teuthology.get_testdir(ctx)) + +def run_in_pykmip_dir(ctx, client, args, **kwargs): + (remote,) = [client] if isinstance(client,Remote) else ctx.cluster.only(client).remotes.keys() + return remote.run( + args=['cd', get_pykmip_dir(ctx), run.Raw('&&'), ] + args, + **kwargs + ) + +def run_in_pykmip_venv(ctx, client, args, **kwargs): + return run_in_pykmip_dir(ctx, client, + args = ['.', '.pykmipenv/bin/activate', + run.Raw('&&') + ] + args, **kwargs) + +@contextlib.contextmanager +def download(ctx, config): + """ + Download PyKMIP from github. + Remove downloaded file upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, dict) + log.info('Downloading pykmip...') + pykmipdir = get_pykmip_dir(ctx) + + for (client, cconf) in config.items(): + branch = cconf.get('force-branch', 'master') + repo = cconf.get('force-repo', 'https://github.com/OpenKMIP/PyKMIP') + sha1 = cconf.get('sha1') + log.info("Using branch '%s' for pykmip", branch) + log.info('sha1=%s', sha1) + + ctx.cluster.only(client).run( + args=[ + 'git', 'clone', '-b', branch, repo, + pykmipdir, + ], + ) + if sha1 is not None: + run_in_pykmip_dir(ctx, client, [ + 'git', 'reset', '--hard', sha1, + ], + ) + try: + yield + finally: + log.info('Removing pykmip...') + for client in config: + ctx.cluster.only(client).run( + args=[ 'rm', '-rf', pykmipdir ], + ) + +_bindep_txt = """# should be part of PyKMIP +libffi-dev [platform:dpkg] +libffi-devel [platform:rpm] +libssl-dev [platform:dpkg] +openssl-devel [platform:redhat] +libopenssl-devel [platform:suse] +libsqlite3-dev [platform:dpkg] +sqlite-devel [platform:rpm] +python-dev [platform:dpkg] +python-devel [(platform:redhat platform:base-py2)] +python3-dev [platform:dpkg] +python3-devel [(platform:redhat platform:base-py3) platform:suse] +python3 [platform:suse] +""" + +@contextlib.contextmanager +def install_packages(ctx, config): + """ + Download the packaged dependencies of PyKMIP. + Remove install packages upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, dict) + log.info('Installing system dependenies for PyKMIP...') + + packages = {} + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + # use bindep to read which dependencies we need from temp/bindep.txt + fd, local_temp_path = tempfile.mkstemp(suffix='.txt', + prefix='bindep-') + os.write(fd, _bindep_txt.encode()) + os.close(fd) + fd, remote_temp_path = tempfile.mkstemp(suffix='.txt', + prefix='bindep-') + os.close(fd) + remote.put_file(local_temp_path, remote_temp_path) + os.remove(local_temp_path) + run_in_pykmip_venv(ctx, remote, ['pip', 'install', 'bindep']) + r = run_in_pykmip_venv(ctx, remote, + ['bindep', '--brief', '--file', remote_temp_path], + stdout=BytesIO(), + check_status=False) # returns 1 on success? + packages[client] = r.stdout.getvalue().decode().splitlines() + for dep in packages[client]: + install_package(dep, remote) + try: + yield + finally: + log.info('Removing system dependencies of PyKMIP...') + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + for dep in packages[client]: + remove_package(dep, remote) + +@contextlib.contextmanager +def setup_venv(ctx, config): + """ + Setup the virtualenv for PyKMIP using pip. + """ + assert isinstance(config, dict) + log.info('Setting up virtualenv for pykmip...') + for (client, _) in config.items(): + run_in_pykmip_dir(ctx, client, ['python3', '-m', 'venv', '.pykmipenv']) + run_in_pykmip_venv(ctx, client, ['pip', 'install', '--upgrade', 'pip']) + run_in_pykmip_venv(ctx, client, ['pip', 'install', 'pytz', '-e', get_pykmip_dir(ctx)]) + yield + +def assign_ports(ctx, config, initial_port): + """ + Assign port numbers starting from @initial_port + """ + port = initial_port + role_endpoints = {} + for remote, roles_for_host in ctx.cluster.remotes.items(): + for role in roles_for_host: + if role in config: + r = get_remote_for_role(ctx, role) + role_endpoints[role] = r.ip_address, port, r.hostname + port += 1 + + return role_endpoints + +def copy_policy_json(ctx, cclient, cconfig): + run_in_pykmip_dir(ctx, cclient, + ['cp', + get_pykmip_dir(ctx)+'/examples/policy.json', + get_pykmip_dir(ctx)]) + +_pykmip_configuration = """# configuration for pykmip +[server] +hostname={ipaddr} +port={port} +certificate_path={servercert} +key_path={serverkey} +ca_path={clientca} +auth_suite=TLS1.2 +policy_path={confdir} +enable_tls_client_auth=False +tls_cipher_suites= + TLS_RSA_WITH_AES_128_CBC_SHA256 + TLS_RSA_WITH_AES_256_CBC_SHA256 + TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 +logging_level=DEBUG +database_path={confdir}/pykmip.sqlite +[client] +host={hostname} +port=5696 +certfile={clientcert} +keyfile={clientkey} +ca_certs={clientca} +ssl_version=PROTOCOL_TLSv1_2 +""" + +def create_pykmip_conf(ctx, cclient, cconfig): + log.info('#0 cclient={} cconfig={}'.format(pprint.pformat(cclient),pprint.pformat(cconfig))) + (remote,) = ctx.cluster.only(cclient).remotes.keys() + pykmip_ipaddr, pykmip_port, pykmip_hostname = ctx.pykmip.endpoints[cclient] + log.info('#1 ip,p,h {} {} {}'.format(pykmip_ipaddr, pykmip_port, pykmip_hostname)) + clientca = cconfig.get('clientca', None) + log.info('#2 clientca {}'.format(clientca)) + serverkey = None + servercert = cconfig.get('servercert', None) + log.info('#3 servercert {}'.format(servercert)) + servercert = ctx.ssl_certificates.get(servercert) + log.info('#4 servercert {}'.format(servercert)) + clientkey = None + clientcert = cconfig.get('clientcert', None) + log.info('#3 clientcert {}'.format(clientcert)) + clientcert = ctx.ssl_certificates.get(clientcert) + log.info('#4 clientcert {}'.format(clientcert)) + clientca = ctx.ssl_certificates.get(clientca) + log.info('#5 clientca {}'.format(clientca)) + if servercert != None: + serverkey = servercert.key + servercert = servercert.certificate + log.info('#6 serverkey {} servercert {}'.format(serverkey, servercert)) + if clientcert != None: + clientkey = clientcert.key + clientcert = clientcert.certificate + log.info('#6 clientkey {} clientcert {}'.format(clientkey, clientcert)) + if clientca != None: + clientca = clientca.certificate + log.info('#7 clientca {}'.format(clientca)) + if servercert == None or clientca == None or serverkey == None: + log.info('#8 clientca {} serverkey {} servercert {}'.format(clientca, serverkey, servercert)) + raise ConfigError('pykmip: Missing/bad servercert or clientca') + pykmipdir = get_pykmip_dir(ctx) + kmip_conf = _pykmip_configuration.format( + ipaddr=pykmip_ipaddr, + port=pykmip_port, + confdir=pykmipdir, + hostname=pykmip_hostname, + clientca=clientca, + clientkey=clientkey, + clientcert=clientcert, + serverkey=serverkey, + servercert=servercert + ) + fd, local_temp_path = tempfile.mkstemp(suffix='.conf', + prefix='pykmip') + os.write(fd, kmip_conf.encode()) + os.close(fd) + remote.put_file(local_temp_path, pykmipdir+'/pykmip.conf') + os.remove(local_temp_path) + +@contextlib.contextmanager +def configure_pykmip(ctx, config): + """ + Configure pykmip paste-api and pykmip-api. + """ + assert isinstance(config, dict) + (cclient, cconfig) = next(iter(config.items())) + + copy_policy_json(ctx, cclient, cconfig) + create_pykmip_conf(ctx, cclient, cconfig) + try: + yield + finally: + pass + +def has_ceph_task(tasks): + for task in tasks: + for name, conf in task.items(): + if name == 'ceph': + return True + return False + +@contextlib.contextmanager +def run_pykmip(ctx, config): + assert isinstance(config, dict) + if hasattr(ctx, 'daemons'): + pass + elif has_ceph_task(ctx.config['tasks']): + log.info('Delay start pykmip so ceph can do once-only daemon logic') + try: + yield + finally: + pass + else: + ctx.daemons = DaemonGroup() + log.info('Running pykmip...') + + pykmipdir = get_pykmip_dir(ctx) + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + cluster_name, _, client_id = teuthology.split_role(client) + + # start the public endpoint + client_public_with_id = 'pykmip.public' + '.' + client_id + + run_cmd = 'cd ' + pykmipdir + ' && ' + \ + '. .pykmipenv/bin/activate && ' + \ + 'HOME={}'.format(pykmipdir) + ' && ' + \ + 'exec pykmip-server -f pykmip.conf -l ' + \ + pykmipdir + '/pykmip.log & { read; kill %1; }' + + ctx.daemons.add_daemon( + remote, 'pykmip', client_public_with_id, + cluster=cluster_name, + args=['bash', '-c', run_cmd], + logger=log.getChild(client), + stdin=run.PIPE, + cwd=pykmipdir, + wait=False, + check_status=False, + ) + + # sleep driven synchronization + time.sleep(10) + try: + yield + finally: + log.info('Stopping PyKMIP instance') + ctx.daemons.get_daemon('pykmip', client_public_with_id, + cluster_name).stop() + +make_keys_template = """ +from kmip.pie import client +from kmip import enums +import ssl +import sys +import json +from io import BytesIO + +c = client.ProxyKmipClient(config_file="{replace-with-config-file-path}") + +rl=[] +for kwargs in {replace-with-secrets}: + with c: + key_id = c.create( + enums.CryptographicAlgorithm.AES, + 256, + operation_policy_name='default', + cryptographic_usage_mask=[ + enums.CryptographicUsageMask.ENCRYPT, + enums.CryptographicUsageMask.DECRYPT + ], + **kwargs + ) + c.activate(key_id) + attrs = c.get_attributes(uid=key_id) + r = {} + for a in attrs[1]: + r[str(a.attribute_name)] = str(a.attribute_value) + rl.append(r) +print(json.dumps(rl)) +""" + +@contextlib.contextmanager +def create_secrets(ctx, config): + """ + Create and activate any requested keys in kmip + """ + assert isinstance(config, dict) + + pykmipdir = get_pykmip_dir(ctx) + pykmip_conf_path = pykmipdir + '/pykmip.conf' + my_output = BytesIO() + for (client,cconf) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + secrets=cconf.get('secrets') + if secrets: + secrets_json = json.dumps(cconf['secrets']) + make_keys = make_keys_template \ + .replace("{replace-with-secrets}",secrets_json) \ + .replace("{replace-with-config-file-path}",pykmip_conf_path) + my_output.truncate() + remote.run(args=[run.Raw('. cephtest/pykmip/.pykmipenv/bin/activate;' \ + + 'python')], stdin=make_keys, stdout = my_output) + ctx.pykmip.keys[client] = json.loads(my_output.getvalue().decode()) + try: + yield + finally: + pass + +@contextlib.contextmanager +def task(ctx, config): + """ + Deploy and configure PyKMIP + + Example of configuration: + + tasks: + - install: + - ceph: + conf: + client: + rgw crypt s3 kms backend: kmip + rgw crypt kmip ca path: /home/ubuntu/cephtest/ca/kmiproot.crt + rgw crypt kmip client cert: /home/ubuntu/cephtest/ca/kmip-client.crt + rgw crypt kmip client key: /home/ubuntu/cephtest/ca/kmip-client.key + rgw crypt kmip kms key template: pykmip-$keyid + - openssl_keys: + kmiproot: + client: client.0 + cn: kmiproot + key-type: rsa:4096 + - openssl_keys: + kmip-server: + client: client.0 + ca: kmiproot + kmip-client: + client: client.0 + ca: kmiproot + cn: rgw-client + - pykmip: + client.0: + force-branch: master + clientca: kmiproot + servercert: kmip-server + clientcert: kmip-client + secrets: + - name: pykmip-key-1 + - name: pykmip-key-2 + - rgw: + client.0: + use-pykmip-role: client.0 + - s3tests: + client.0: + force-branch: master + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task pykmip only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + + overrides = ctx.config.get('overrides', {}) + # merge each client section, not the top level. + for client in config.keys(): + if not config[client]: + config[client] = {} + teuthology.deep_merge(config[client], overrides.get('pykmip', {})) + + log.debug('PyKMIP config is %s', config) + + if not hasattr(ctx, 'ssl_certificates'): + raise ConfigError('pykmip must run after the openssl_keys task') + + + ctx.pykmip = argparse.Namespace() + ctx.pykmip.endpoints = assign_ports(ctx, config, 5696) + ctx.pykmip.keys = {} + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: setup_venv(ctx=ctx, config=config), + lambda: install_packages(ctx=ctx, config=config), + lambda: configure_pykmip(ctx=ctx, config=config), + lambda: run_pykmip(ctx=ctx, config=config), + lambda: create_secrets(ctx=ctx, config=config), + ): + yield diff --git a/qa/tasks/python.py b/qa/tasks/python.py new file mode 100644 index 000000000..4ddb14f71 --- /dev/null +++ b/qa/tasks/python.py @@ -0,0 +1,45 @@ +import logging +from teuthology import misc as teuthology +from tasks.vip import subst_vip + +log = logging.getLogger(__name__) + + +def task(ctx, config): + """ + Execute some python code. + + tasks: + - python: + host.a: | + import boto3 + c = boto3.resource(...) + + The provided dict is normally indexed by role. You can also include a + 'sudo: false' key to run the code without sudo. + + tasks: + - python: + sudo: false + host.b: | + import boto3 + c = boto3.resource(...) + """ + assert isinstance(config, dict), "task python got invalid config" + + testdir = teuthology.get_testdir(ctx) + + sudo = config.pop('sudo', True) + + for role, code in config.items(): + (remote,) = ctx.cluster.only(role).remotes.keys() + log.info('Running python on role %s host %s', role, remote.name) + log.info(code) + args=[ + 'TESTDIR={tdir}'.format(tdir=testdir), + 'python3', + ] + if sudo: + args = ['sudo'] + args + remote.run(args=args, stdin=subst_vip(ctx, code)) + diff --git a/qa/tasks/qemu.py b/qa/tasks/qemu.py new file mode 100644 index 000000000..d4802d0cd --- /dev/null +++ b/qa/tasks/qemu.py @@ -0,0 +1,684 @@ +""" +Qemu task +""" + +import contextlib +import logging +import os +import yaml +import time + +from tasks import rbd +from tasks.util.workunit import get_refspec_after_overrides +from teuthology import contextutil +from teuthology import misc as teuthology +from teuthology.config import config as teuth_config +from teuthology.orchestra import run +from teuthology.packaging import install_package, remove_package + +log = logging.getLogger(__name__) + +DEFAULT_NUM_DISKS = 2 +DEFAULT_IMAGE_URL = 'http://download.ceph.com/qa/ubuntu-12.04.qcow2' +DEFAULT_IMAGE_SIZE = 10240 # in megabytes +ENCRYPTION_HEADER_SIZE = 16 # in megabytes +DEFAULT_CPUS = 1 +DEFAULT_MEM = 4096 # in megabytes + +def normalize_disks(config): + # normalize the 'disks' parameter into a list of dictionaries + for client, client_config in config.items(): + clone = client_config.get('clone', False) + image_url = client_config.get('image_url', DEFAULT_IMAGE_URL) + device_type = client_config.get('type', 'filesystem') + encryption_format = client_config.get('encryption_format', 'none') + + disks = client_config.get('disks', DEFAULT_NUM_DISKS) + if not isinstance(disks, list): + disks = [{'image_name': '{client}.{num}'.format(client=client, + num=i)} + for i in range(int(disks))] + client_config['disks'] = disks + + for i, disk in enumerate(disks): + if 'action' not in disk: + disk['action'] = 'create' + assert disk['action'] in ['none', 'create', 'clone'], 'invalid disk action' + assert disk['action'] != 'clone' or 'parent_name' in disk, 'parent_name required for clone' + + if 'image_size' not in disk: + disk['image_size'] = DEFAULT_IMAGE_SIZE + disk['image_size'] = int(disk['image_size']) + + if 'image_url' not in disk and i == 0: + disk['image_url'] = image_url + + if 'device_type' not in disk: + disk['device_type'] = device_type + + disk['device_letter'] = chr(ord('a') + i) + + if 'encryption_format' not in disk: + disk['encryption_format'] = encryption_format + assert disk['encryption_format'] in ['none', 'luks1', 'luks2'], 'invalid encryption format' + + assert disks, 'at least one rbd device must be used' + + if clone: + for disk in disks: + if disk['action'] != 'create': + continue + clone = dict(disk) + clone['action'] = 'clone' + clone['parent_name'] = clone['image_name'] + clone['image_name'] += '-clone' + del disk['device_letter'] + disks.append(clone) + +def create_images(ctx, config, managers): + for client, client_config in config.items(): + disks = client_config['disks'] + for disk in disks: + if disk.get('action') != 'create' or ( + 'image_url' in disk and + disk['encryption_format'] == 'none'): + continue + image_size = disk['image_size'] + if disk['encryption_format'] != 'none': + image_size += ENCRYPTION_HEADER_SIZE + create_config = { + client: { + 'image_name': disk['image_name'], + 'image_format': 2, + 'image_size': image_size, + 'encryption_format': disk['encryption_format'], + } + } + managers.append( + lambda create_config=create_config: + rbd.create_image(ctx=ctx, config=create_config) + ) + +def create_clones(ctx, config, managers): + for client, client_config in config.items(): + disks = client_config['disks'] + for disk in disks: + if disk['action'] != 'clone': + continue + + create_config = { + client: { + 'image_name': disk['image_name'], + 'parent_name': disk['parent_name'] + } + } + managers.append( + lambda create_config=create_config: + rbd.clone_image(ctx=ctx, config=create_config) + ) + +def create_encrypted_devices(ctx, config, managers): + for client, client_config in config.items(): + disks = client_config['disks'] + for disk in disks: + if disk['encryption_format'] == 'none' or \ + 'device_letter' not in disk: + continue + + dev_config = {client: disk} + managers.append( + lambda dev_config=dev_config: + rbd.dev_create(ctx=ctx, config=dev_config) + ) + +@contextlib.contextmanager +def create_dirs(ctx, config): + """ + Handle directory creation and cleanup + """ + testdir = teuthology.get_testdir(ctx) + for client, client_config in config.items(): + assert 'test' in client_config, 'You must specify a test to run' + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'install', '-d', '-m0755', '--', + '{tdir}/qemu'.format(tdir=testdir), + '{tdir}/archive/qemu'.format(tdir=testdir), + ] + ) + try: + yield + finally: + for client, client_config in config.items(): + assert 'test' in client_config, 'You must specify a test to run' + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'rmdir', '{tdir}/qemu'.format(tdir=testdir), run.Raw('||'), 'true', + ] + ) + +@contextlib.contextmanager +def install_block_rbd_driver(ctx, config): + """ + Make sure qemu rbd block driver (block-rbd.so) is installed + """ + for client, client_config in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + if remote.os.package_type == 'rpm': + block_rbd_pkg = 'qemu-kvm-block-rbd' + else: + block_rbd_pkg = 'qemu-block-extra' + install_package(block_rbd_pkg, remote) + try: + yield + finally: + for client, client_config in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + remove_package(block_rbd_pkg, remote) + +@contextlib.contextmanager +def generate_iso(ctx, config): + """Execute system commands to generate iso""" + log.info('generating iso...') + testdir = teuthology.get_testdir(ctx) + + # use ctx.config instead of config, because config has been + # through teuthology.replace_all_with_clients() + refspec = get_refspec_after_overrides(ctx.config, {}) + + git_url = teuth_config.get_ceph_qa_suite_git_url() + log.info('Pulling tests from %s ref %s', git_url, refspec) + + for client, client_config in config.items(): + assert 'test' in client_config, 'You must specify a test to run' + test = client_config['test'] + + (remote,) = ctx.cluster.only(client).remotes.keys() + + clone_dir = '{tdir}/qemu_clone.{role}'.format(tdir=testdir, role=client) + remote.run(args=refspec.clone(git_url, clone_dir)) + + src_dir = os.path.dirname(__file__) + userdata_path = os.path.join(testdir, 'qemu', 'userdata.' + client) + metadata_path = os.path.join(testdir, 'qemu', 'metadata.' + client) + + with open(os.path.join(src_dir, 'userdata_setup.yaml')) as f: + test_setup = ''.join(f.readlines()) + # configuring the commands to setup the nfs mount + mnt_dir = "/export/{client}".format(client=client) + test_setup = test_setup.format( + mnt_dir=mnt_dir + ) + + with open(os.path.join(src_dir, 'userdata_teardown.yaml')) as f: + test_teardown = ''.join(f.readlines()) + + user_data = test_setup + + disks = client_config['disks'] + for disk in disks: + if disk['device_type'] != 'filesystem' or \ + 'device_letter' not in disk or \ + 'image_url' in disk: + continue + dev_letter = disk['device_letter'] + user_data += """ +- | + #!/bin/bash + mkdir /mnt/test_{dev_letter} + mkfs -t xfs /dev/vd{dev_letter} + mount -t xfs /dev/vd{dev_letter} /mnt/test_{dev_letter} +""".format(dev_letter=dev_letter) + + user_data += """ +- | + #!/bin/bash + test -d /etc/ceph || mkdir /etc/ceph + cp /mnt/cdrom/ceph.* /etc/ceph/ +""" + + cloud_config_archive = client_config.get('cloud_config_archive', []) + if cloud_config_archive: + user_data += yaml.safe_dump(cloud_config_archive, default_style='|', + default_flow_style=False) + + # this may change later to pass the directories as args to the + # script or something. xfstests needs that. + user_data += """ +- | + #!/bin/bash + test -d /mnt/test_b && cd /mnt/test_b + /mnt/cdrom/test.sh > /mnt/log/test.log 2>&1 && touch /mnt/log/success +""" + test_teardown + + user_data = user_data.format( + ceph_branch=ctx.config.get('branch'), + ceph_sha1=ctx.config.get('sha1')) + remote.write_file(userdata_path, user_data) + + with open(os.path.join(src_dir, 'metadata.yaml'), 'rb') as f: + remote.write_file(metadata_path, f) + + test_file = '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client) + + log.info('fetching test %s for %s', test, client) + remote.run( + args=[ + 'cp', '--', os.path.join(clone_dir, test), test_file, + run.Raw('&&'), + 'chmod', '755', test_file, + ], + ) + remote.run( + args=[ + 'genisoimage', '-quiet', '-input-charset', 'utf-8', + '-volid', 'cidata', '-joliet', '-rock', + '-o', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client), + '-graft-points', + 'user-data={userdata}'.format(userdata=userdata_path), + 'meta-data={metadata}'.format(metadata=metadata_path), + 'ceph.conf=/etc/ceph/ceph.conf', + 'ceph.keyring=/etc/ceph/ceph.keyring', + 'test.sh={file}'.format(file=test_file), + ], + ) + try: + yield + finally: + for client in config.keys(): + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'rm', '-rf', + '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client), + os.path.join(testdir, 'qemu', 'userdata.' + client), + os.path.join(testdir, 'qemu', 'metadata.' + client), + '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client), + '{tdir}/qemu_clone.{client}'.format(tdir=testdir, client=client), + ], + ) + +@contextlib.contextmanager +def download_image(ctx, config): + """Downland base image, remove image file when done""" + log.info('downloading base image') + testdir = teuthology.get_testdir(ctx) + + client_base_files = {} + for client, client_config in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + client_base_files[client] = [] + disks = client_config['disks'] + for disk in disks: + if disk['action'] != 'create' or 'image_url' not in disk: + continue + + base_file = '{tdir}/qemu/base.{name}.qcow2'.format(tdir=testdir, + name=disk['image_name']) + client_base_files[client].append(base_file) + + remote.run( + args=[ + 'wget', '-nv', '-O', base_file, disk['image_url'], + ] + ) + + if disk['encryption_format'] == 'none': + remote.run( + args=[ + 'qemu-img', 'convert', '-f', 'qcow2', '-O', 'raw', + base_file, 'rbd:rbd/{image_name}'.format(image_name=disk['image_name']) + ] + ) + else: + dev_config = {client: {'image_name': disk['image_name'], + 'encryption_format': disk['encryption_format']}} + raw_file = '{tdir}/qemu/base.{name}.raw'.format( + tdir=testdir, name=disk['image_name']) + client_base_files[client].append(raw_file) + remote.run( + args=[ + 'qemu-img', 'convert', '-f', 'qcow2', '-O', 'raw', + base_file, raw_file + ] + ) + with rbd.dev_create(ctx, dev_config): + remote.run( + args=[ + 'dd', 'if={name}'.format(name=raw_file), + 'of={name}'.format(name=dev_config[client]['device_path']), + 'bs=4M', 'conv=fdatasync' + ] + ) + + for disk in disks: + if disk['action'] == 'clone' or \ + disk['encryption_format'] != 'none' or \ + (disk['action'] == 'create' and 'image_url' not in disk): + continue + + remote.run( + args=[ + 'rbd', 'resize', + '--size={image_size}M'.format(image_size=disk['image_size']), + disk['image_name'], run.Raw('||'), 'true' + ] + ) + + try: + yield + finally: + log.debug('cleaning up base image files') + for client, base_files in client_base_files.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + for base_file in base_files: + remote.run( + args=[ + 'rm', '-f', base_file, + ], + ) + + +def _setup_nfs_mount(remote, client, service_name, mount_dir): + """ + Sets up an nfs mount on the remote that the guest can use to + store logs. This nfs mount is also used to touch a file + at the end of the test to indicate if the test was successful + or not. + """ + export_dir = "/export/{client}".format(client=client) + log.info("Creating the nfs export directory...") + remote.run(args=[ + 'sudo', 'mkdir', '-p', export_dir, + ]) + log.info("Mounting the test directory...") + remote.run(args=[ + 'sudo', 'mount', '--bind', mount_dir, export_dir, + ]) + log.info("Adding mount to /etc/exports...") + export = "{dir} *(rw,no_root_squash,no_subtree_check,insecure)".format( + dir=export_dir + ) + log.info("Deleting export from /etc/exports...") + remote.run(args=[ + 'sudo', 'sed', '-i', "\|{export_dir}|d".format(export_dir=export_dir), + '/etc/exports' + ]) + remote.run(args=[ + 'echo', export, run.Raw("|"), + 'sudo', 'tee', '-a', "/etc/exports", + ]) + log.info("Restarting NFS...") + if remote.os.package_type == "deb": + remote.run(args=['sudo', 'service', 'nfs-kernel-server', 'restart']) + else: + remote.run(args=['sudo', 'systemctl', 'restart', service_name]) + + +def _teardown_nfs_mount(remote, client, service_name): + """ + Tears down the nfs mount on the remote used for logging and reporting the + status of the tests being ran in the guest. + """ + log.info("Tearing down the nfs mount for {remote}".format(remote=remote)) + export_dir = "/export/{client}".format(client=client) + log.info("Stopping NFS...") + if remote.os.package_type == "deb": + remote.run(args=[ + 'sudo', 'service', 'nfs-kernel-server', 'stop' + ]) + else: + remote.run(args=[ + 'sudo', 'systemctl', 'stop', service_name + ]) + log.info("Unmounting exported directory...") + remote.run(args=[ + 'sudo', 'umount', export_dir + ]) + log.info("Deleting export from /etc/exports...") + remote.run(args=[ + 'sudo', 'sed', '-i', "\|{export_dir}|d".format(export_dir=export_dir), + '/etc/exports' + ]) + log.info("Starting NFS...") + if remote.os.package_type == "deb": + remote.run(args=[ + 'sudo', 'service', 'nfs-kernel-server', 'start' + ]) + else: + remote.run(args=[ + 'sudo', 'systemctl', 'start', service_name + ]) + + +@contextlib.contextmanager +def run_qemu(ctx, config): + """Setup kvm environment and start qemu""" + procs = [] + testdir = teuthology.get_testdir(ctx) + for client, client_config in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + log_dir = '{tdir}/archive/qemu/{client}'.format(tdir=testdir, client=client) + remote.run( + args=[ + 'mkdir', log_dir, run.Raw('&&'), + 'sudo', 'modprobe', 'kvm', + ] + ) + + nfs_service_name = 'nfs' + if remote.os.name in ['rhel', 'centos'] and float(remote.os.version) >= 8: + nfs_service_name = 'nfs-server' + + # make an nfs mount to use for logging and to + # allow to test to tell teuthology the tests outcome + _setup_nfs_mount(remote, client, nfs_service_name, log_dir) + + # Hack to make sure /dev/kvm permissions are set correctly + # See http://tracker.ceph.com/issues/17977 and + # https://bugzilla.redhat.com/show_bug.cgi?id=1333159 + remote.run(args='sudo udevadm control --reload') + remote.run(args='sudo udevadm trigger /dev/kvm') + remote.run(args='ls -l /dev/kvm') + + qemu_cmd = 'qemu-system-x86_64' + if remote.os.package_type == "rpm": + qemu_cmd = "/usr/libexec/qemu-kvm" + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'daemon-helper', + 'term', + qemu_cmd, '-enable-kvm', '-nographic', '-cpu', 'host', + '-smp', str(client_config.get('cpus', DEFAULT_CPUS)), + '-m', str(client_config.get('memory', DEFAULT_MEM)), + # cd holding metadata for cloud-init + '-cdrom', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client), + ] + + cachemode = 'none' + ceph_config = ctx.ceph['ceph'].conf.get('global', {}) + ceph_config.update(ctx.ceph['ceph'].conf.get('client', {})) + ceph_config.update(ctx.ceph['ceph'].conf.get(client, {})) + if ceph_config.get('rbd cache', True): + if ceph_config.get('rbd cache max dirty', 1) > 0: + cachemode = 'writeback' + else: + cachemode = 'writethrough' + + disks = client_config['disks'] + for disk in disks: + if 'device_letter' not in disk: + continue + + if disk['encryption_format'] == 'none': + disk_spec = 'rbd:rbd/{img}:id={id}'.format( + img=disk['image_name'], + id=client[len('client.'):] + ) + else: + disk_spec = disk['device_path'] + + args.extend([ + '-drive', + 'file={disk_spec},format=raw,if=virtio,cache={cachemode}'.format( + disk_spec=disk_spec, + cachemode=cachemode, + ), + ]) + time_wait = client_config.get('time_wait', 0) + + log.info('starting qemu...') + procs.append( + remote.run( + args=args, + logger=log.getChild(client), + stdin=run.PIPE, + wait=False, + ) + ) + + try: + yield + finally: + log.info('waiting for qemu tests to finish...') + run.wait(procs) + + if time_wait > 0: + log.debug('waiting {time_wait} sec for workloads detect finish...'.format( + time_wait=time_wait)); + time.sleep(time_wait) + + log.debug('checking that qemu tests succeeded...') + for client in config.keys(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + # ensure we have permissions to all the logs + log_dir = '{tdir}/archive/qemu/{client}'.format(tdir=testdir, + client=client) + remote.run( + args=[ + 'sudo', 'chmod', 'a+rw', '-R', log_dir + ] + ) + + # teardown nfs mount + _teardown_nfs_mount(remote, client, nfs_service_name) + # check for test status + remote.run( + args=[ + 'test', '-f', + '{tdir}/archive/qemu/{client}/success'.format( + tdir=testdir, + client=client + ), + ], + ) + log.info("Deleting exported directory...") + for client in config.keys(): + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run(args=[ + 'sudo', 'rm', '-r', '/export' + ]) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run a test inside of QEMU on top of rbd. Only one test + is supported per client. + + For example, you can specify which clients to run on:: + + tasks: + - ceph: + - qemu: + client.0: + test: http://download.ceph.com/qa/test.sh + client.1: + test: http://download.ceph.com/qa/test2.sh + + Or use the same settings on all clients: + + tasks: + - ceph: + - qemu: + all: + test: http://download.ceph.com/qa/test.sh + + For tests that want to explicitly describe the RBD images to connect: + + tasks: + - ceph: + - qemu: + client.0: + test: http://download.ceph.com/qa/test.sh + clone: True/False (optionally clone all created disks), + image_url: <URL> (optional default image URL) + type: filesystem / block (optional default device type) + disks: [ + { + action: create / clone / none (optional, defaults to create) + image_name: <image name> (optional) + parent_name: <parent_name> (if action == clone), + type: filesystem / block (optional, defaults to fileystem) + image_url: <URL> (optional), + image_size: <MiB> (optional) + encryption_format: luks1 / luks2 / none (optional, defaults to none) + }, ... + ] + + You can set the amount of CPUs and memory the VM has (default is 1 CPU and + 4096 MB):: + + tasks: + - ceph: + - qemu: + client.0: + test: http://download.ceph.com/qa/test.sh + cpus: 4 + memory: 512 # megabytes + + If you need to configure additional cloud-config options, set cloud_config + to the required data set:: + + tasks: + - ceph + - qemu: + client.0: + test: http://ceph.com/qa/test.sh + cloud_config_archive: + - | + #/bin/bash + touch foo1 + - content: | + test data + type: text/plain + filename: /tmp/data + """ + assert isinstance(config, dict), \ + "task qemu only supports a dictionary for configuration" + + config = teuthology.replace_all_with_clients(ctx.cluster, config) + normalize_disks(config) + + managers = [] + create_images(ctx=ctx, config=config, managers=managers) + managers.extend([ + lambda: create_dirs(ctx=ctx, config=config), + lambda: install_block_rbd_driver(ctx=ctx, config=config), + lambda: generate_iso(ctx=ctx, config=config), + lambda: download_image(ctx=ctx, config=config), + ]) + create_clones(ctx=ctx, config=config, managers=managers) + create_encrypted_devices(ctx=ctx, config=config, managers=managers) + managers.append( + lambda: run_qemu(ctx=ctx, config=config), + ) + + with contextutil.nested(*managers): + yield diff --git a/qa/tasks/rados.py b/qa/tasks/rados.py new file mode 100644 index 000000000..349f46efc --- /dev/null +++ b/qa/tasks/rados.py @@ -0,0 +1,277 @@ +""" +Rados modle-based integration tests +""" +import contextlib +import logging +import gevent +from teuthology import misc as teuthology + + +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run RadosModel-based integration tests. + + The config should be as follows:: + + rados: + clients: [client list] + ops: <number of ops> + objects: <number of objects to use> + max_in_flight: <max number of operations in flight> + object_size: <size of objects in bytes> + min_stride_size: <minimum write stride size in bytes> + max_stride_size: <maximum write stride size in bytes> + op_weights: <dictionary mapping operation type to integer weight> + runs: <number of times to run> - the pool is remade between runs + ec_pool: use an ec pool + erasure_code_profile: profile to use with the erasure coded pool + fast_read: enable ec_pool's fast_read + min_size: set the min_size of created pool + pool_snaps: use pool snapshots instead of selfmanaged snapshots + write_fadvise_dontneed: write behavior like with LIBRADOS_OP_FLAG_FADVISE_DONTNEED. + This mean data don't access in the near future. + Let osd backend don't keep data in cache. + + For example:: + + tasks: + - ceph: + - rados: + clients: [client.0] + ops: 1000 + max_seconds: 0 # 0 for no limit + objects: 25 + max_in_flight: 16 + object_size: 4000000 + min_stride_size: 1024 + max_stride_size: 4096 + op_weights: + read: 20 + write: 10 + delete: 2 + snap_create: 3 + rollback: 2 + snap_remove: 0 + ec_pool: create an ec pool, defaults to False + erasure_code_use_overwrites: test overwrites, default false + erasure_code_profile: + name: teuthologyprofile + k: 2 + m: 1 + crush-failure-domain: osd + pool_snaps: true + write_fadvise_dontneed: true + runs: 10 + - interactive: + + Optionally, you can provide the pool name to run against: + + tasks: + - ceph: + - exec: + client.0: + - ceph osd pool create foo + - rados: + clients: [client.0] + pools: [foo] + ... + + Alternatively, you can provide a pool prefix: + + tasks: + - ceph: + - exec: + client.0: + - ceph osd pool create foo.client.0 + - rados: + clients: [client.0] + pool_prefix: foo + ... + + The tests are run asynchronously, they are not complete when the task + returns. For instance: + + - rados: + clients: [client.0] + pools: [ecbase] + ops: 4000 + objects: 500 + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 + - print: "**** done rados ec-cache-agent (part 2)" + + will run the print task immediately after the rados tasks begins but + not after it completes. To make the rados task a blocking / sequential + task, use: + + - sequential: + - rados: + clients: [client.0] + pools: [ecbase] + ops: 4000 + objects: 500 + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 + - print: "**** done rados ec-cache-agent (part 2)" + + """ + log.info('Beginning rados...') + assert isinstance(config, dict), \ + "please list clients to run on" + + object_size = int(config.get('object_size', 4000000)) + op_weights = config.get('op_weights', {}) + testdir = teuthology.get_testdir(ctx) + args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'ceph_test_rados'] + if config.get('ec_pool', False): + args.extend(['--no-omap']) + if not config.get('erasure_code_use_overwrites', False): + args.extend(['--ec-pool']) + if config.get('write_fadvise_dontneed', False): + args.extend(['--write-fadvise-dontneed']) + if config.get('set_redirect', False): + args.extend(['--set_redirect']) + if config.get('set_chunk', False): + args.extend(['--set_chunk']) + if config.get('enable_dedup', False): + args.extend(['--enable_dedup']) + if config.get('low_tier_pool', None): + args.extend(['--low_tier_pool', config.get('low_tier_pool', None)]) + if config.get('pool_snaps', False): + args.extend(['--pool-snaps']) + if config.get('balance_reads', False): + args.extend(['--balance-reads']) + if config.get('localize_reads', False): + args.extend(['--localize-reads']) + args.extend([ + '--max-ops', str(config.get('ops', 10000)), + '--objects', str(config.get('objects', 500)), + '--max-in-flight', str(config.get('max_in_flight', 16)), + '--size', str(object_size), + '--min-stride-size', str(config.get('min_stride_size', object_size // 10)), + '--max-stride-size', str(config.get('max_stride_size', object_size // 5)), + '--max-seconds', str(config.get('max_seconds', 0)) + ]) + + weights = {} + weights['read'] = 100 + weights['write'] = 100 + weights['delete'] = 10 + # Parallel of the op_types in test/osd/TestRados.cc + for field in [ + # read handled above + # write handled above + # delete handled above + "snap_create", + "snap_remove", + "rollback", + "setattr", + "rmattr", + "watch", + "copy_from", + "hit_set_list", + "is_dirty", + "undirty", + "cache_flush", + "cache_try_flush", + "cache_evict", + "append", + "write", + "read", + "delete" + ]: + if field in op_weights: + weights[field] = op_weights[field] + + if config.get('write_append_excl', True): + if 'write' in weights: + weights['write'] = weights['write'] // 2 + weights['write_excl'] = weights['write'] + + if 'append' in weights: + weights['append'] = weights['append'] // 2 + weights['append_excl'] = weights['append'] + + for op, weight in weights.items(): + args.extend([ + '--op', op, str(weight) + ]) + + + def thread(): + """Thread spawned by gevent""" + clients = ['client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + log.info('clients are %s' % clients) + manager = ctx.managers['ceph'] + if config.get('ec_pool', False): + profile = config.get('erasure_code_profile', {}) + profile_name = profile.get('name', 'teuthologyprofile') + manager.create_erasure_code_profile(profile_name, profile) + else: + profile_name = None + for i in range(int(config.get('runs', '1'))): + log.info("starting run %s out of %s", str(i), config.get('runs', '1')) + tests = {} + existing_pools = config.get('pools', []) + created_pools = [] + for role in config.get('clients', clients): + assert isinstance(role, str) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + + pool = config.get('pool', None) + if not pool and existing_pools: + pool = existing_pools.pop() + else: + pool = manager.create_pool_with_unique_name( + erasure_code_profile_name=profile_name, + erasure_code_use_overwrites= + config.get('erasure_code_use_overwrites', False) + ) + created_pools.append(pool) + if config.get('fast_read', False): + manager.raw_cluster_cmd( + 'osd', 'pool', 'set', pool, 'fast_read', 'true') + min_size = config.get('min_size', None); + if min_size is not None: + manager.raw_cluster_cmd( + 'osd', 'pool', 'set', pool, 'min_size', str(min_size)) + + (remote,) = ctx.cluster.only(role).remotes.keys() + proc = remote.run( + args=["CEPH_CLIENT_ID={id_}".format(id_=id_)] + args + + ["--pool", pool], + logger=log.getChild("rados.{id}".format(id=id_)), + stdin=run.PIPE, + wait=False + ) + tests[id_] = proc + run.wait(tests.values()) + + for pool in created_pools: + manager.wait_snap_trimming_complete(pool); + manager.remove_pool(pool) + + running = gevent.spawn(thread) + + try: + yield + finally: + log.info('joining rados') + running.get() diff --git a/qa/tasks/radosbench.py b/qa/tasks/radosbench.py new file mode 100644 index 000000000..0804840c5 --- /dev/null +++ b/qa/tasks/radosbench.py @@ -0,0 +1,143 @@ +""" +Rados benchmarking +""" +import contextlib +import logging + +from teuthology.orchestra import run +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run radosbench + + The config should be as follows: + + radosbench: + clients: [client list] + time: <seconds to run> + pool: <pool to use> + size: write size to use + concurrency: max number of outstanding writes (16) + objectsize: object size to use + unique_pool: use a unique pool, defaults to False + ec_pool: create an ec pool, defaults to False + create_pool: create pool, defaults to True + erasure_code_profile: + name: teuthologyprofile + k: 2 + m: 1 + crush-failure-domain: osd + cleanup: false (defaults to true) + type: <write|seq|rand> (defaults to write) + example: + + tasks: + - ceph: + - radosbench: + clients: [client.0] + time: 360 + - interactive: + """ + log.info('Beginning radosbench...') + assert isinstance(config, dict), \ + "please list clients to run on" + radosbench = {} + + testdir = teuthology.get_testdir(ctx) + manager = ctx.managers['ceph'] + runtype = config.get('type', 'write') + + create_pool = config.get('create_pool', True) + for role in config.get('clients', ['client.0']): + assert isinstance(role, str) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.keys() + + if config.get('ec_pool', False): + profile = config.get('erasure_code_profile', {}) + profile_name = profile.get('name', 'teuthologyprofile') + manager.create_erasure_code_profile(profile_name, profile) + else: + profile_name = None + + cleanup = [] + if not config.get('cleanup', True): + cleanup = ['--no-cleanup'] + write_to_omap = [] + if config.get('write-omap', False): + write_to_omap = ['--write-omap'] + log.info('omap writes') + + pool = config.get('pool', 'data') + if create_pool: + if pool != 'data': + manager.create_pool(pool, erasure_code_profile_name=profile_name) + else: + pool = manager.create_pool_with_unique_name(erasure_code_profile_name=profile_name) + + concurrency = config.get('concurrency', 16) + osize = config.get('objectsize', 65536) + if osize == 0: + objectsize = [] + else: + objectsize = ['--object-size', str(osize)] + size = ['-b', str(config.get('size', 65536))] + # If doing a reading run then populate data + if runtype != "write": + proc = remote.run( + args=[ + "/bin/sh", "-c", + " ".join(['adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage', + 'rados', + '--no-log-to-stderr', + '--name', role] + + ['-t', str(concurrency)] + + size + objectsize + + ['-p' , pool, + 'bench', str(60), "write", "--no-cleanup" + ]).format(tdir=testdir), + ], + logger=log.getChild('radosbench.{id}'.format(id=id_)), + wait=True + ) + size = [] + objectsize = [] + + proc = remote.run( + args=[ + "/bin/sh", "-c", + " ".join(['adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage', + 'rados', + '--no-log-to-stderr', + '--name', role] + + size + objectsize + + ['-p' , pool, + 'bench', str(config.get('time', 360)), runtype, + ] + write_to_omap + cleanup).format(tdir=testdir), + ], + logger=log.getChild('radosbench.{id}'.format(id=id_)), + stdin=run.PIPE, + wait=False + ) + radosbench[id_] = proc + + try: + yield + finally: + timeout = config.get('time', 360) * 30 + 300 + log.info('joining radosbench (timing out after %ss)', timeout) + run.wait(radosbench.values(), timeout=timeout) + + if pool != 'data' and create_pool: + manager.remove_pool(pool) diff --git a/qa/tasks/radosbenchsweep.py b/qa/tasks/radosbenchsweep.py new file mode 100644 index 000000000..df0ba1ed1 --- /dev/null +++ b/qa/tasks/radosbenchsweep.py @@ -0,0 +1,222 @@ +""" +Rados benchmarking sweep +""" +import contextlib +import logging +import re + +from io import BytesIO +from itertools import product + +from teuthology.orchestra import run +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Execute a radosbench parameter sweep + + Puts radosbench in a loop, taking values from the given config at each + iteration. If given, the min and max values below create a range, e.g. + min_replicas=1 and max_replicas=3 implies executing with 1-3 replicas. + + Parameters: + + clients: [client list] + time: seconds to run (default=120) + sizes: [list of object sizes] (default=[4M]) + mode: <write|read|seq> (default=write) + repetitions: execute the same configuration multiple times (default=1) + min_num_replicas: minimum number of replicas to use (default = 3) + max_num_replicas: maximum number of replicas to use (default = 3) + min_num_osds: the minimum number of OSDs in a pool (default=all) + max_num_osds: the maximum number of OSDs in a pool (default=all) + file: name of CSV-formatted output file (default='radosbench.csv') + columns: columns to include (default=all) + - rep: execution number (takes values from 'repetitions') + - num_osd: number of osds for pool + - num_replica: number of replicas + - avg_throughput: throughput + - avg_latency: latency + - stdev_throughput: + - stdev_latency: + + Example: + - radsobenchsweep: + columns: [rep, num_osd, num_replica, avg_throughput, stdev_throughput] + """ + log.info('Beginning radosbenchsweep...') + assert isinstance(config, dict), 'expecting dictionary for configuration' + + # get and validate config values + # { + + # only one client supported for now + if len(config.get('clients', [])) != 1: + raise Exception("Only one client can be specified") + + # only write mode + if config.get('mode', 'write') != 'write': + raise Exception("Only 'write' mode supported for now.") + + # OSDs + total_osds_in_cluster = teuthology.num_instances_of_type(ctx.cluster, 'osd') + min_num_osds = config.get('min_num_osds', total_osds_in_cluster) + max_num_osds = config.get('max_num_osds', total_osds_in_cluster) + + if max_num_osds > total_osds_in_cluster: + raise Exception('max_num_osds cannot be greater than total in cluster') + if min_num_osds < 1: + raise Exception('min_num_osds cannot be less than 1') + if min_num_osds > max_num_osds: + raise Exception('min_num_osds cannot be greater than max_num_osd') + osds = range(0, (total_osds_in_cluster + 1)) + + # replicas + min_num_replicas = config.get('min_num_replicas', 3) + max_num_replicas = config.get('max_num_replicas', 3) + + if min_num_replicas < 1: + raise Exception('min_num_replicas cannot be less than 1') + if min_num_replicas > max_num_replicas: + raise Exception('min_num_replicas cannot be greater than max_replicas') + if max_num_replicas > max_num_osds: + raise Exception('max_num_replicas cannot be greater than max_num_osds') + replicas = range(min_num_replicas, (max_num_replicas + 1)) + + # object size + sizes = config.get('size', [4 << 20]) + + # repetitions + reps = range(config.get('repetitions', 1)) + + # file + fname = config.get('file', 'radosbench.csv') + f = open('{}/{}'.format(ctx.archive, fname), 'w') + f.write(get_csv_header(config) + '\n') + # } + + # set default pools size=1 to avoid 'unhealthy' issues + ctx.manager.set_pool_property('data', 'size', 1) + ctx.manager.set_pool_property('metadata', 'size', 1) + ctx.manager.set_pool_property('rbd', 'size', 1) + + current_osds_out = 0 + + # sweep through all parameters + for osds_out, size, replica, rep in product(osds, sizes, replicas, reps): + + osds_in = total_osds_in_cluster - osds_out + + if osds_in == 0: + # we're done + break + + if current_osds_out != osds_out: + # take an osd out + ctx.manager.raw_cluster_cmd( + 'osd', 'reweight', str(osds_out-1), '0.0') + wait_until_healthy(ctx, config) + current_osds_out = osds_out + + if osds_in not in range(min_num_osds, (max_num_osds + 1)): + # no need to execute with a number of osds that wasn't requested + continue + + if osds_in < replica: + # cannot execute with more replicas than available osds + continue + + run_radosbench(ctx, config, f, osds_in, size, replica, rep) + + f.close() + + yield + + +def get_csv_header(conf): + all_columns = [ + 'rep', 'num_osd', 'num_replica', 'avg_throughput', + 'avg_latency', 'stdev_throughput', 'stdev_latency' + ] + given_columns = conf.get('columns', None) + if given_columns and len(given_columns) != 0: + for column in given_columns: + if column not in all_columns: + raise Exception('Unknown column ' + column) + return ','.join(conf['columns']) + else: + conf['columns'] = all_columns + return ','.join(all_columns) + + +def run_radosbench(ctx, config, f, num_osds, size, replica, rep): + pool = ctx.manager.create_pool_with_unique_name() + + ctx.manager.set_pool_property(pool, 'size', replica) + + wait_until_healthy(ctx, config) + + log.info('Executing with parameters: ') + log.info(' num_osd =' + str(num_osds)) + log.info(' size =' + str(size)) + log.info(' num_replicas =' + str(replica)) + log.info(' repetition =' + str(rep)) + + for role in config.get('clients', ['client.0']): + assert isinstance(role, str) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.keys() + + proc = remote.run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{}/archive/coverage'.format(teuthology.get_testdir(ctx)), + 'rados', + '--no-log-to-stderr', + '--name', role, + '-b', str(size), + '-p', pool, + 'bench', str(config.get('time', 120)), 'write', + ], + logger=log.getChild('radosbench.{id}'.format(id=id_)), + stdin=run.PIPE, + stdout=BytesIO(), + wait=False + ) + + # parse output to get summary and format it as CSV + proc.wait() + out = proc.stdout.getvalue() + all_values = { + 'stdev_throughput': re.sub(r'Stddev Bandwidth: ', '', re.search( + r'Stddev Bandwidth:.*', out).group(0)), + 'stdev_latency': re.sub(r'Stddev Latency: ', '', re.search( + r'Stddev Latency:.*', out).group(0)), + 'avg_throughput': re.sub(r'Bandwidth \(MB/sec\): ', '', re.search( + r'Bandwidth \(MB/sec\):.*', out).group(0)), + 'avg_latency': re.sub(r'Average Latency: ', '', re.search( + r'Average Latency:.*', out).group(0)), + 'rep': str(rep), + 'num_osd': str(num_osds), + 'num_replica': str(replica) + } + values_to_write = [] + for column in config['columns']: + values_to_write.extend([all_values[column]]) + f.write(','.join(values_to_write) + '\n') + + ctx.manager.remove_pool(pool) + + +def wait_until_healthy(ctx, config): + first_mon = teuthology.get_first_mon(ctx, config) + (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys() + teuthology.wait_until_healthy(ctx, mon_remote) diff --git a/qa/tasks/radosgw_admin.py b/qa/tasks/radosgw_admin.py new file mode 100644 index 000000000..836736f1a --- /dev/null +++ b/qa/tasks/radosgw_admin.py @@ -0,0 +1,1088 @@ +""" +Rgw admin testing against a running instance +""" +# The test cases in this file have been annotated for inventory. +# To extract the inventory (in csv format) use the command: +# +# grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //' +# +# to run this standalone: +# python qa/tasks/radosgw_admin.py [USER] HOSTNAME +# + +import json +import logging +import time +import datetime +import sys + +from io import BytesIO +from queue import Queue + +import boto.exception +import boto.s3.connection +import boto.s3.acl + +import httplib2 + + +from tasks.util.rgw import rgwadmin, get_user_summary, get_user_successful_ops + +log = logging.getLogger(__name__) + +def usage_acc_findentry2(entries, user, add=True): + for e in entries: + if e['user'] == user: + return e + if not add: + return None + e = {'user': user, 'buckets': []} + entries.append(e) + return e +def usage_acc_findsum2(summaries, user, add=True): + for e in summaries: + if e['user'] == user: + return e + if not add: + return None + e = {'user': user, 'categories': [], + 'total': {'bytes_received': 0, + 'bytes_sent': 0, 'ops': 0, 'successful_ops': 0 }} + summaries.append(e) + return e +def usage_acc_update2(x, out, b_in, err): + x['bytes_sent'] += b_in + x['bytes_received'] += out + x['ops'] += 1 + if not err: + x['successful_ops'] += 1 +def usage_acc_validate_fields(r, x, x2, what): + q=[] + for field in ['bytes_sent', 'bytes_received', 'ops', 'successful_ops']: + try: + if x2[field] < x[field]: + q.append("field %s: %d < %d" % (field, x2[field], x[field])) + except Exception as ex: + r.append( "missing/bad field " + field + " in " + what + " " + str(ex)) + return + if len(q) > 0: + r.append("incomplete counts in " + what + ": " + ", ".join(q)) +class usage_acc: + def __init__(self): + self.results = {'entries': [], 'summary': []} + def findentry(self, user): + return usage_acc_findentry2(self.results['entries'], user) + def findsum(self, user): + return usage_acc_findsum2(self.results['summary'], user) + def e2b(self, e, bucket, add=True): + for b in e['buckets']: + if b['bucket'] == bucket: + return b + if not add: + return None + b = {'bucket': bucket, 'categories': []} + e['buckets'].append(b) + return b + def c2x(self, c, cat, add=True): + for x in c: + if x['category'] == cat: + return x + if not add: + return None + x = {'bytes_received': 0, 'category': cat, + 'bytes_sent': 0, 'ops': 0, 'successful_ops': 0 } + c.append(x) + return x + def update(self, c, cat, user, out, b_in, err): + x = self.c2x(c, cat) + usage_acc_update2(x, out, b_in, err) + if not err and cat == 'create_bucket' and 'owner' not in x: + x['owner'] = user + def make_entry(self, cat, bucket, user, out, b_in, err): + if cat == 'create_bucket' and err: + return + e = self.findentry(user) + b = self.e2b(e, bucket) + self.update(b['categories'], cat, user, out, b_in, err) + s = self.findsum(user) + x = self.c2x(s['categories'], cat) + usage_acc_update2(x, out, b_in, err) + x = s['total'] + usage_acc_update2(x, out, b_in, err) + def generate_make_entry(self): + return lambda cat,bucket,user,out,b_in,err: self.make_entry(cat, bucket, user, out, b_in, err) + def get_usage(self): + return self.results + def compare_results(self, results): + if 'entries' not in results or 'summary' not in results: + return ['Missing entries or summary'] + r = [] + for e in self.results['entries']: + try: + e2 = usage_acc_findentry2(results['entries'], e['user'], False) + except Exception as ex: + r.append("malformed entry looking for user " + + e['user'] + " " + str(ex)) + break + if e2 == None: + r.append("missing entry for user " + e['user']) + continue + for b in e['buckets']: + c = b['categories'] + if b['bucket'] == 'nosuchbucket': + print("got here") + try: + b2 = self.e2b(e2, b['bucket'], False) + if b2 != None: + c2 = b2['categories'] + except Exception as ex: + r.append("malformed entry looking for bucket " + + b['bucket'] + " in user " + e['user'] + " " + str(ex)) + break + if b2 == None: + r.append("can't find bucket " + b['bucket'] + + " in user " + e['user']) + continue + for x in c: + try: + x2 = self.c2x(c2, x['category'], False) + except Exception as ex: + r.append("malformed entry looking for " + + x['category'] + " in bucket " + b['bucket'] + + " user " + e['user'] + " " + str(ex)) + break + usage_acc_validate_fields(r, x, x2, "entry: category " + + x['category'] + " bucket " + b['bucket'] + + " in user " + e['user']) + for s in self.results['summary']: + c = s['categories'] + try: + s2 = usage_acc_findsum2(results['summary'], s['user'], False) + except Exception as ex: + r.append("malformed summary looking for user " + e['user'] + + " " + str(ex)) + break + if s2 == None: + r.append("missing summary for user " + e['user'] + " " + str(ex)) + continue + try: + c2 = s2['categories'] + except Exception as ex: + r.append("malformed summary missing categories for user " + + e['user'] + " " + str(ex)) + break + for x in c: + try: + x2 = self.c2x(c2, x['category'], False) + except Exception as ex: + r.append("malformed summary looking for " + + x['category'] + " user " + e['user'] + " " + str(ex)) + break + usage_acc_validate_fields(r, x, x2, "summary: category " + + x['category'] + " in user " + e['user']) + x = s['total'] + try: + x2 = s2['total'] + except Exception as ex: + r.append("malformed summary looking for totals for user " + + e['user'] + " " + str(ex)) + break + usage_acc_validate_fields(r, x, x2, "summary: totals for user" + e['user']) + return r + +def ignore_this_entry(cat, bucket, user, out, b_in, err): + pass +class requestlog_queue(): + def __init__(self, add): + self.q = Queue(1000) + self.adder = add + def handle_request_data(self, request, response, error=False): + now = datetime.datetime.now() + if error: + pass + elif response.status < 200 or response.status >= 400: + error = True + self.q.put({'t': now, 'o': request, 'i': response, 'e': error}) + def clear(self): + with self.q.mutex: + self.q.queue.clear() + def log_and_clear(self, cat, bucket, user, add_entry = None): + while not self.q.empty(): + j = self.q.get() + bytes_out = 0 + if 'Content-Length' in j['o'].headers: + bytes_out = int(j['o'].headers['Content-Length']) + bytes_in = 0 + msg = j['i'].msg + if 'content-length'in msg: + bytes_in = int(msg['content-length']) + log.info('RL: %s %s %s bytes_out=%d bytes_in=%d failed=%r' + % (cat, bucket, user, bytes_out, bytes_in, j['e'])) + if add_entry == None: + add_entry = self.adder + add_entry(cat, bucket, user, bytes_out, bytes_in, j['e']) + +def create_presigned_url(conn, method, bucket_name, key_name, expiration): + return conn.generate_url(expires_in=expiration, + method=method, + bucket=bucket_name, + key=key_name, + query_auth=True, + ) + +def send_raw_http_request(conn, method, bucket_name, key_name, follow_redirects = False): + url = create_presigned_url(conn, method, bucket_name, key_name, 3600) + print(url) + h = httplib2.Http() + h.follow_redirects = follow_redirects + return h.request(url, method) + + +def get_acl(key): + """ + Helper function to get the xml acl from a key, ensuring that the xml + version tag is removed from the acl response + """ + raw_acl = key.get_xml_acl().decode() + + def remove_version(string): + return string.split( + '<?xml version="1.0" encoding="UTF-8"?>' + )[-1] + + def remove_newlines(string): + return string.strip('\n') + + return remove_version( + remove_newlines(raw_acl) + ) + +def task(ctx, config): + """ + Test radosgw-admin functionality against a running rgw instance. + """ + global log + + assert ctx.rgw.config, \ + "radosgw_admin task needs a config passed from the rgw task" + config = ctx.rgw.config + log.debug('config is: %r', config) + + clients_from_config = config.keys() + + # choose first client as default + client = next(iter(clients_from_config)) + + # once the client is chosen, pull the host name and assigned port out of + # the role_endpoints that were assigned by the rgw task + endpoint = ctx.rgw.role_endpoints[client] + + ## + user1='foo' + user2='fud' + user3='bar' + user4='bud' + subuser1='foo:foo1' + subuser2='foo:foo2' + display_name1='Foo' + display_name2='Fud' + display_name3='Bar' + email='foo@foo.com' + access_key='9te6NH5mcdcq0Tc5i8i1' + secret_key='Ny4IOauQoL18Gp2zM7lC1vLmoawgqcYP/YGcWfXu' + access_key2='p5YnriCv1nAtykxBrupQ' + secret_key2='Q8Tk6Q/27hfbFSYdSkPtUqhqx1GgzvpXa4WARozh' + access_key3='NX5QOQKC6BH2IDN8HC7A' + secret_key3='LnEsqNNqZIpkzauboDcLXLcYaWwLQ3Kop0zAnKIn' + swift_secret1='gpS2G9RREMrnbqlp29PP2D36kgPR1tm72n5fPYfL' + swift_secret2='ri2VJQcKSYATOY6uaDUX7pxgkW+W1YmC6OCxPHwy' + + bucket_name='myfoo' + bucket_name2='mybar' + + # connect to rgw + connection = boto.s3.connection.S3Connection( + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + is_secure=False, + port=endpoint.port, + host=endpoint.hostname, + calling_format=boto.s3.connection.OrdinaryCallingFormat(), + ) + connection2 = boto.s3.connection.S3Connection( + aws_access_key_id=access_key2, + aws_secret_access_key=secret_key2, + is_secure=False, + port=endpoint.port, + host=endpoint.hostname, + calling_format=boto.s3.connection.OrdinaryCallingFormat(), + ) + connection3 = boto.s3.connection.S3Connection( + aws_access_key_id=access_key3, + aws_secret_access_key=secret_key3, + is_secure=False, + port=endpoint.port, + host=endpoint.hostname, + calling_format=boto.s3.connection.OrdinaryCallingFormat(), + ) + + acc = usage_acc() + rl = requestlog_queue(acc.generate_make_entry()) + connection.set_request_hook(rl) + connection2.set_request_hook(rl) + connection3.set_request_hook(rl) + + # legend (test cases can be easily grep-ed out) + # TESTCASE 'testname','object','method','operation','assertion' + + # TESTCASE 'usage-show0' 'usage' 'show' 'all usage' 'succeeds' + (err, summary0) = rgwadmin(ctx, client, ['usage', 'show'], check_status=True) + + # TESTCASE 'info-nosuch','user','info','non-existent user','fails' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1]) + assert err + + # TESTCASE 'create-ok','user','create','w/all valid info','succeeds' + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--uid', user1, + '--display-name', display_name1, + '--email', email, + '--access-key', access_key, + '--secret', secret_key, + '--max-buckets', '4' + ], + check_status=True) + + # TESTCASE 'duplicate email','user','create','existing user email','fails' + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--uid', user2, + '--display-name', display_name2, + '--email', email, + ]) + assert err + + # TESTCASE 'info-existing','user','info','existing user','returns correct info' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) + assert out['user_id'] == user1 + assert out['email'] == email + assert out['display_name'] == display_name1 + assert len(out['keys']) == 1 + assert out['keys'][0]['access_key'] == access_key + assert out['keys'][0]['secret_key'] == secret_key + assert not out['suspended'] + + # TESTCASE 'suspend-ok','user','suspend','active user','succeeds' + (err, out) = rgwadmin(ctx, client, ['user', 'suspend', '--uid', user1], + check_status=True) + + # TESTCASE 'suspend-suspended','user','suspend','suspended user','succeeds w/advisory' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) + assert out['suspended'] + + # TESTCASE 're-enable','user','enable','suspended user','succeeds' + (err, out) = rgwadmin(ctx, client, ['user', 'enable', '--uid', user1], check_status=True) + + # TESTCASE 'info-re-enabled','user','info','re-enabled user','no longer suspended' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) + assert not out['suspended'] + + # TESTCASE 'add-keys','key','create','w/valid info','succeeds' + (err, out) = rgwadmin(ctx, client, [ + 'key', 'create', '--uid', user1, + '--access-key', access_key2, '--secret', secret_key2, + ], check_status=True) + + # TESTCASE 'info-new-key','user','info','after key addition','returns all keys' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], + check_status=True) + assert len(out['keys']) == 2 + assert out['keys'][0]['access_key'] == access_key2 or out['keys'][1]['access_key'] == access_key2 + assert out['keys'][0]['secret_key'] == secret_key2 or out['keys'][1]['secret_key'] == secret_key2 + + # TESTCASE 'rm-key','key','rm','newly added key','succeeds, key is removed' + (err, out) = rgwadmin(ctx, client, [ + 'key', 'rm', '--uid', user1, + '--access-key', access_key2, + ], check_status=True) + assert len(out['keys']) == 1 + assert out['keys'][0]['access_key'] == access_key + assert out['keys'][0]['secret_key'] == secret_key + + # TESTCASE 'add-swift-key','key','create','swift key','succeeds' + subuser_access = 'full' + subuser_perm = 'full-control' + + (err, out) = rgwadmin(ctx, client, [ + 'subuser', 'create', '--subuser', subuser1, + '--access', subuser_access + ], check_status=True) + + # TESTCASE 'add-swift-key','key','create','swift key','succeeds' + (err, out) = rgwadmin(ctx, client, [ + 'subuser', 'modify', '--subuser', subuser1, + '--secret', swift_secret1, + '--key-type', 'swift', + ], check_status=True) + + # TESTCASE 'subuser-perm-mask', 'subuser', 'info', 'test subuser perm mask durability', 'succeeds' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1]) + + assert out['subusers'][0]['permissions'] == subuser_perm + + # TESTCASE 'info-swift-key','user','info','after key addition','returns all keys' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) + assert len(out['swift_keys']) == 1 + assert out['swift_keys'][0]['user'] == subuser1 + assert out['swift_keys'][0]['secret_key'] == swift_secret1 + + # TESTCASE 'add-swift-subuser','key','create','swift sub-user key','succeeds' + (err, out) = rgwadmin(ctx, client, [ + 'subuser', 'create', '--subuser', subuser2, + '--secret', swift_secret2, + '--key-type', 'swift', + ], check_status=True) + + # TESTCASE 'info-swift-subuser','user','info','after key addition','returns all sub-users/keys' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) + assert len(out['swift_keys']) == 2 + assert out['swift_keys'][0]['user'] == subuser2 or out['swift_keys'][1]['user'] == subuser2 + assert out['swift_keys'][0]['secret_key'] == swift_secret2 or out['swift_keys'][1]['secret_key'] == swift_secret2 + + # TESTCASE 'rm-swift-key1','key','rm','subuser','succeeds, one key is removed' + (err, out) = rgwadmin(ctx, client, [ + 'key', 'rm', '--subuser', subuser1, + '--key-type', 'swift', + ], check_status=True) + assert len(out['swift_keys']) == 1 + + # TESTCASE 'rm-subuser','subuser','rm','subuser','success, subuser is removed' + (err, out) = rgwadmin(ctx, client, [ + 'subuser', 'rm', '--subuser', subuser1, + ], check_status=True) + assert len(out['subusers']) == 1 + + # TESTCASE 'rm-subuser-with-keys','subuser','rm','subuser','succeeds, second subser and key is removed' + (err, out) = rgwadmin(ctx, client, [ + 'subuser', 'rm', '--subuser', subuser2, + '--key-type', 'swift', '--purge-keys', + ], check_status=True) + assert len(out['swift_keys']) == 0 + assert len(out['subusers']) == 0 + + # TESTCASE 'bucket-stats','bucket','stats','no session/buckets','succeeds, empty list' + (err, out) = rgwadmin(ctx, client, ['bucket', 'stats', '--uid', user1], + check_status=True) + assert len(out) == 0 + + # TESTCASE 'bucket-stats2','bucket','stats','no buckets','succeeds, empty list' + (err, out) = rgwadmin(ctx, client, ['bucket', 'list', '--uid', user1], check_status=True) + assert len(out) == 0 + + # create a first bucket + bucket = connection.create_bucket(bucket_name) + + rl.log_and_clear("create_bucket", bucket_name, user1) + + # TESTCASE 'bucket-list','bucket','list','one bucket','succeeds, expected list' + (err, out) = rgwadmin(ctx, client, ['bucket', 'list', '--uid', user1], check_status=True) + assert len(out) == 1 + assert out[0] == bucket_name + + bucket_list = connection.get_all_buckets() + assert len(bucket_list) == 1 + assert bucket_list[0].name == bucket_name + + rl.log_and_clear("list_buckets", '', user1) + + # TESTCASE 'bucket-list-all','bucket','list','all buckets','succeeds, expected list' + (err, out) = rgwadmin(ctx, client, ['bucket', 'list'], check_status=True) + assert len(out) >= 1 + assert bucket_name in out; + + # TESTCASE 'max-bucket-limit,'bucket','create','4 buckets','5th bucket fails due to max buckets == 4' + bucket2 = connection.create_bucket(bucket_name + '2') + rl.log_and_clear("create_bucket", bucket_name + '2', user1) + bucket3 = connection.create_bucket(bucket_name + '3') + rl.log_and_clear("create_bucket", bucket_name + '3', user1) + bucket4 = connection.create_bucket(bucket_name + '4') + rl.log_and_clear("create_bucket", bucket_name + '4', user1) + # the 5th should fail. + failed = False + try: + connection.create_bucket(bucket_name + '5') + except Exception: + failed = True + assert failed + rl.log_and_clear("create_bucket", bucket_name + '5', user1) + + # delete the buckets + bucket2.delete() + rl.log_and_clear("delete_bucket", bucket_name + '2', user1) + bucket3.delete() + rl.log_and_clear("delete_bucket", bucket_name + '3', user1) + bucket4.delete() + rl.log_and_clear("delete_bucket", bucket_name + '4', user1) + + # TESTCASE 'bucket-stats3','bucket','stats','new empty bucket','succeeds, empty list' + (err, out) = rgwadmin(ctx, client, [ + 'bucket', 'stats', '--bucket', bucket_name], check_status=True) + assert out['owner'] == user1 + bucket_id = out['id'] + + # TESTCASE 'bucket-stats4','bucket','stats','new empty bucket','succeeds, expected bucket ID' + (err, out) = rgwadmin(ctx, client, ['bucket', 'stats', '--uid', user1], check_status=True) + assert len(out) == 1 + assert out[0]['id'] == bucket_id # does it return the same ID twice in a row? + + # use some space + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('one') + rl.log_and_clear("put_obj", bucket_name, user1) + + # TESTCASE 'bucket-stats5','bucket','stats','after creating key','succeeds, lists one non-empty object' + (err, out) = rgwadmin(ctx, client, [ + 'bucket', 'stats', '--bucket', bucket_name], check_status=True) + assert out['id'] == bucket_id + assert out['usage']['rgw.main']['num_objects'] == 1 + assert out['usage']['rgw.main']['size_kb'] > 0 + + #validate we have a positive user stats now + (err, out) = rgwadmin(ctx, client, + ['user', 'stats','--uid', user1, '--sync-stats'], + check_status=True) + assert out['stats']['size'] > 0 + + # reclaim it + key.delete() + rl.log_and_clear("delete_obj", bucket_name, user1) + + # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'fails', 'access denied error' + (err, out) = rgwadmin(ctx, client, + ['bucket', 'unlink', '--uid', user1, '--bucket', bucket_name], + check_status=True) + + # create a second user to link the bucket to + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--uid', user2, + '--display-name', display_name2, + '--access-key', access_key2, + '--secret', secret_key2, + '--max-buckets', '1', + ], + check_status=True) + + # try creating an object with the first user before the bucket is relinked + denied = False + key = boto.s3.key.Key(bucket) + + try: + key.set_contents_from_string('two') + except boto.exception.S3ResponseError: + denied = True + + assert not denied + rl.log_and_clear("put_obj", bucket_name, user1) + + # delete the object + key.delete() + rl.log_and_clear("delete_obj", bucket_name, user1) + + # link the bucket to another user + (err, out) = rgwadmin(ctx, client, ['metadata', 'get', 'bucket:{n}'.format(n=bucket_name)], + check_status=True) + + bucket_data = out['data'] + assert bucket_data['bucket']['name'] == bucket_name + + bucket_id = bucket_data['bucket']['bucket_id'] + + # link the bucket to another user + (err, out) = rgwadmin(ctx, client, ['bucket', 'link', '--uid', user2, '--bucket', bucket_name, '--bucket-id', bucket_id], + check_status=True) + + # try to remove user, should fail (has a linked bucket) + (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user2]) + assert err + + # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'succeeds, bucket unlinked' + (err, out) = rgwadmin(ctx, client, ['bucket', 'unlink', '--uid', user2, '--bucket', bucket_name], + check_status=True) + + # relink the bucket to the first user and delete the second user + (err, out) = rgwadmin(ctx, client, + ['bucket', 'link', '--uid', user1, '--bucket', bucket_name, '--bucket-id', bucket_id], + check_status=True) + + (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user2], + check_status=True) + + #TESTCASE 'bucket link', 'bucket', 'tenanted user', 'succeeds' + tenant_name = "testx" + # create a tenanted user to link the bucket to + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--tenant', tenant_name, + '--uid', 'tenanteduser', + '--display-name', 'tenanted-user', + '--access-key', access_key2, + '--secret', secret_key2, + '--max-buckets', '1', + ], + check_status=True) + + # link the bucket to a tenanted user + (err, out) = rgwadmin(ctx, client, ['bucket', 'link', '--bucket', '/' + bucket_name, '--tenant', tenant_name, '--uid', 'tenanteduser'], + check_status=True) + + # check if the bucket name has tenant/ prefix + (err, out) = rgwadmin(ctx, client, ['metadata', 'get', 'bucket:{n}'.format(n= tenant_name + '/' + bucket_name)], + check_status=True) + + bucket_data = out['data'] + assert bucket_data['bucket']['name'] == bucket_name + assert bucket_data['bucket']['tenant'] == tenant_name + + # relink the bucket to the first user and delete the tenanted user + (err, out) = rgwadmin(ctx, client, + ['bucket', 'link', '--bucket', tenant_name + '/' + bucket_name, '--uid', user1], + check_status=True) + + (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--tenant', tenant_name, '--uid', 'tenanteduser'], + check_status=True) + + # TESTCASE 'object-rm', 'object', 'rm', 'remove object', 'succeeds, object is removed' + + # upload an object + object_name = 'four' + key = boto.s3.key.Key(bucket, object_name) + key.set_contents_from_string(object_name) + rl.log_and_clear("put_obj", bucket_name, user1) + + # fetch it too (for usage stats presently) + s = key.get_contents_as_string(encoding='ascii') + rl.log_and_clear("get_obj", bucket_name, user1) + assert s == object_name + # list bucket too (for usage stats presently) + keys = list(bucket.list()) + rl.log_and_clear("list_bucket", bucket_name, user1) + assert len(keys) == 1 + assert keys[0].name == object_name + + # now delete it + (err, out) = rgwadmin(ctx, client, + ['object', 'rm', '--bucket', bucket_name, '--object', object_name], + check_status=True) + + # TESTCASE 'bucket-stats6','bucket','stats','after deleting key','succeeds, lists one no objects' + (err, out) = rgwadmin(ctx, client, [ + 'bucket', 'stats', '--bucket', bucket_name], + check_status=True) + assert out['id'] == bucket_id + assert out['usage']['rgw.main']['num_objects'] == 0 + + # list log objects + # TESTCASE 'log-list','log','list','after activity','succeeds, lists one no objects' + (err, out) = rgwadmin(ctx, client, ['log', 'list'], check_status=True) + assert len(out) > 0 + + for obj in out: + # TESTCASE 'log-show','log','show','after activity','returns expected info' + if obj[:4] == 'meta' or obj[:4] == 'data' or obj[:18] == 'obj_delete_at_hint': + continue + + (err, rgwlog) = rgwadmin(ctx, client, ['log', 'show', '--object', obj], + check_status=True) + assert len(rgwlog) > 0 + + # exempt bucket_name2 from checking as it was only used for multi-region tests + assert rgwlog['bucket'].find(bucket_name) == 0 or rgwlog['bucket'].find(bucket_name2) == 0 + assert rgwlog['bucket'] != bucket_name or rgwlog['bucket_id'] == bucket_id + assert rgwlog['bucket_owner'] == user1 or rgwlog['bucket'] == bucket_name + '5' or rgwlog['bucket'] == bucket_name2 + for entry in rgwlog['log_entries']: + log.debug('checking log entry: ', entry) + assert entry['bucket'] == rgwlog['bucket'] + possible_buckets = [bucket_name + '5', bucket_name2] + user = entry['user'] + assert user == user1 or user.endswith('system-user') or \ + rgwlog['bucket'] in possible_buckets + + # TESTCASE 'log-rm','log','rm','delete log objects','succeeds' + (err, out) = rgwadmin(ctx, client, ['log', 'rm', '--object', obj], + check_status=True) + + # TODO: show log by bucket+date + + # TESTCASE 'user-suspend2','user','suspend','existing user','succeeds' + (err, out) = rgwadmin(ctx, client, ['user', 'suspend', '--uid', user1], + check_status=True) + + # TESTCASE 'user-suspend3','user','suspend','suspended user','cannot write objects' + denied = False + try: + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('five') + except boto.exception.S3ResponseError as e: + denied = True + assert e.status == 403 + + assert denied + rl.log_and_clear("put_obj", bucket_name, user1) + + # TESTCASE 'user-renable2','user','enable','suspended user','succeeds' + (err, out) = rgwadmin(ctx, client, ['user', 'enable', '--uid', user1], + check_status=True) + + # TESTCASE 'user-renable3','user','enable','reenabled user','can write objects' + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('six') + rl.log_and_clear("put_obj", bucket_name, user1) + + # TESTCASE 'gc-list', 'gc', 'list', 'get list of objects ready for garbage collection' + + # create an object large enough to be split into multiple parts + test_string = 'foo'*10000000 + + big_key = boto.s3.key.Key(bucket) + big_key.set_contents_from_string(test_string) + rl.log_and_clear("put_obj", bucket_name, user1) + + # now delete the head + big_key.delete() + rl.log_and_clear("delete_obj", bucket_name, user1) + + # wait a bit to give the garbage collector time to cycle + time.sleep(15) + + (err, out) = rgwadmin(ctx, client, ['gc', 'list']) + + assert len(out) > 0 + + # TESTCASE 'gc-process', 'gc', 'process', 'manually collect garbage' + (err, out) = rgwadmin(ctx, client, ['gc', 'process'], check_status=True) + + #confirm + (err, out) = rgwadmin(ctx, client, ['gc', 'list']) + + assert len(out) == 0 + + # TESTCASE 'rm-user-buckets','user','rm','existing user','fails, still has buckets' + (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user1]) + assert err + + # delete should fail because ``key`` still exists + try: + bucket.delete() + except boto.exception.S3ResponseError as e: + assert e.status == 409 + rl.log_and_clear("delete_bucket", bucket_name, user1) + + key.delete() + rl.log_and_clear("delete_obj", bucket_name, user1) + bucket.delete() + rl.log_and_clear("delete_bucket", bucket_name, user1) + + # TESTCASE 'policy', 'bucket', 'policy', 'get bucket policy', 'returns S3 policy' + bucket = connection.create_bucket(bucket_name) + rl.log_and_clear("create_bucket", bucket_name, user1) + + # create an object + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('seven') + rl.log_and_clear("put_obj", bucket_name, user1) + + # should be private already but guarantee it + key.set_acl('private') + rl.log_and_clear("put_acls", bucket_name, user1) + + (err, out) = rgwadmin(ctx, client, + ['policy', '--bucket', bucket.name, '--object', key.key.decode()], + check_status=True, format='xml') + + acl = get_acl(key) + rl.log_and_clear("get_acls", bucket_name, user1) + + assert acl == out.strip('\n') + + # add another grantee by making the object public read + key.set_acl('public-read') + rl.log_and_clear("put_acls", bucket_name, user1) + + (err, out) = rgwadmin(ctx, client, + ['policy', '--bucket', bucket.name, '--object', key.key.decode()], + check_status=True, format='xml') + + acl = get_acl(key) + rl.log_and_clear("get_acls", bucket_name, user1) + + assert acl == out.strip('\n') + + # TESTCASE 'rm-bucket', 'bucket', 'rm', 'bucket with objects', 'succeeds' + bucket = connection.create_bucket(bucket_name) + rl.log_and_clear("create_bucket", bucket_name, user1) + key_name = ['eight', 'nine', 'ten', 'eleven'] + for i in range(4): + key = boto.s3.key.Key(bucket) + key.set_contents_from_string(key_name[i]) + rl.log_and_clear("put_obj", bucket_name, user1) + + (err, out) = rgwadmin(ctx, client, + ['bucket', 'rm', '--bucket', bucket_name, '--purge-objects'], + check_status=True) + + # TESTCASE 'caps-add', 'caps', 'add', 'add user cap', 'succeeds' + caps='user=read' + (err, out) = rgwadmin(ctx, client, ['caps', 'add', '--uid', user1, '--caps', caps]) + + assert out['caps'][0]['perm'] == 'read' + + # TESTCASE 'caps-rm', 'caps', 'rm', 'remove existing cap from user', 'succeeds' + (err, out) = rgwadmin(ctx, client, ['caps', 'rm', '--uid', user1, '--caps', caps]) + + assert not out['caps'] + + # TESTCASE 'rm-user','user','rm','existing user','fails, still has buckets' + bucket = connection.create_bucket(bucket_name) + rl.log_and_clear("create_bucket", bucket_name, user1) + key = boto.s3.key.Key(bucket) + + (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user1]) + assert err + + # TESTCASE 'rm-user2', 'user', 'rm', 'user with data', 'succeeds' + bucket = connection.create_bucket(bucket_name) + rl.log_and_clear("create_bucket", bucket_name, user1) + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('twelve') + rl.log_and_clear("put_obj", bucket_name, user1) + + time.sleep(35) + + # need to wait for all usage data to get flushed, should take up to 30 seconds + timestamp = time.time() + while time.time() - timestamp <= (2 * 60): # wait up to 20 minutes + (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--categories', 'delete_obj']) # one of the operations we did is delete_obj, should be present. + if get_user_successful_ops(out, user1) > 0: + break + time.sleep(1) + + assert time.time() - timestamp <= (20 * 60) + + # TESTCASE 'usage-show' 'usage' 'show' 'all usage' 'succeeds' + (err, out) = rgwadmin(ctx, client, ['usage', 'show'], check_status=True) + assert len(out['entries']) > 0 + assert len(out['summary']) > 0 + + r = acc.compare_results(out) + if len(r) != 0: + sys.stderr.write(("\n".join(r))+"\n") + assert(len(r) == 0) + + user_summary = get_user_summary(out, user1) + + total = user_summary['total'] + assert total['successful_ops'] > 0 + + # TESTCASE 'usage-show2' 'usage' 'show' 'user usage' 'succeeds' + (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1], + check_status=True) + assert len(out['entries']) > 0 + assert len(out['summary']) > 0 + user_summary = out['summary'][0] + for entry in user_summary['categories']: + assert entry['successful_ops'] > 0 + assert user_summary['user'] == user1 + + # TESTCASE 'usage-show3' 'usage' 'show' 'user usage categories' 'succeeds' + test_categories = ['create_bucket', 'put_obj', 'delete_obj', 'delete_bucket'] + for cat in test_categories: + (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1, '--categories', cat], + check_status=True) + assert len(out['summary']) > 0 + user_summary = out['summary'][0] + assert user_summary['user'] == user1 + assert len(user_summary['categories']) == 1 + entry = user_summary['categories'][0] + assert entry['category'] == cat + assert entry['successful_ops'] > 0 + + # TESTCASE 'user-rename', 'user', 'rename', 'existing user', 'new user', 'succeeds' + # create a new user user3 + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--uid', user3, + '--display-name', display_name3, + '--access-key', access_key3, + '--secret', secret_key3, + '--max-buckets', '4' + ], + check_status=True) + + # create a bucket + bucket = connection3.create_bucket(bucket_name + '6') + + rl.log_and_clear("create_bucket", bucket_name + '6', user3) + + # create object + object_name1 = 'thirteen' + key1 = boto.s3.key.Key(bucket, object_name1) + key1.set_contents_from_string(object_name1) + rl.log_and_clear("put_obj", bucket_name + '6', user3) + + # rename user3 + (err, out) = rgwadmin(ctx, client, ['user', 'rename', '--uid', user3, '--new-uid', user4], check_status=True) + assert out['user_id'] == user4 + assert out['keys'][0]['access_key'] == access_key3 + assert out['keys'][0]['secret_key'] == secret_key3 + + time.sleep(5) + + # get bucket and object to test if user keys are preserved + bucket = connection3.get_bucket(bucket_name + '6') + s = key1.get_contents_as_string(encoding='ascii') + rl.log_and_clear("get_obj", bucket_name + '6', user4) + assert s == object_name1 + + # TESTCASE 'user-rename', 'user', 'rename', 'existing user', 'another existing user', 'fails' + # create a new user user2 + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--uid', user2, + '--display-name', display_name2, + '--access-key', access_key2, + '--secret', secret_key2, + '--max-buckets', '4' + ], + check_status=True) + + # create a bucket + bucket = connection2.create_bucket(bucket_name + '7') + + rl.log_and_clear("create_bucket", bucket_name + '7', user2) + + # create object + object_name2 = 'fourteen' + key2 = boto.s3.key.Key(bucket, object_name2) + key2.set_contents_from_string(object_name2) + rl.log_and_clear("put_obj", bucket_name + '7', user2) + + (err, out) = rgwadmin(ctx, client, ['user', 'rename', '--uid', user4, '--new-uid', user2]) + assert err + + # test if user 2 and user4 can still access their bucket and objects after rename fails + bucket = connection3.get_bucket(bucket_name + '6') + s = key1.get_contents_as_string(encoding='ascii') + rl.log_and_clear("get_obj", bucket_name + '6', user4) + assert s == object_name1 + + bucket = connection2.get_bucket(bucket_name + '7') + s = key2.get_contents_as_string(encoding='ascii') + rl.log_and_clear("get_obj", bucket_name + '7', user2) + assert s == object_name2 + + (err, out) = rgwadmin(ctx, client, + ['user', 'rm', '--uid', user4, '--purge-data' ], + check_status=True) + + (err, out) = rgwadmin(ctx, client, + ['user', 'rm', '--uid', user2, '--purge-data' ], + check_status=True) + + time.sleep(5) + + # should be all through with connection. (anything using connection + # should be BEFORE the usage stuff above.) + rl.log_and_clear("(before-close)", '-', '-', ignore_this_entry) + connection.close() + connection = None + + # the usage flush interval is 30 seconds, wait that much an then some + # to make sure everything has been flushed + time.sleep(35) + + # TESTCASE 'usage-trim' 'usage' 'trim' 'user usage' 'succeeds, usage removed' + (err, out) = rgwadmin(ctx, client, ['usage', 'trim', '--uid', user1], + check_status=True) + (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1], + check_status=True) + assert len(out['entries']) == 0 + assert len(out['summary']) == 0 + + (err, out) = rgwadmin(ctx, client, + ['user', 'rm', '--uid', user1, '--purge-data' ], + check_status=True) + + # TESTCASE 'rm-user3','user','rm','deleted user','fails' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1]) + assert err + + # TESTCASE 'zone-info', 'zone', 'get', 'get zone info', 'succeeds, has default placement rule' + # + + (err, out) = rgwadmin(ctx, client, ['zone', 'get','--rgw-zone','default']) + orig_placement_pools = len(out['placement_pools']) + + # removed this test, it is not correct to assume that zone has default placement, it really + # depends on how we set it up before + # + # assert len(out) > 0 + # assert len(out['placement_pools']) == 1 + + # default_rule = out['placement_pools'][0] + # assert default_rule['key'] == 'default-placement' + + rule={'key': 'new-placement', 'val': {'data_pool': '.rgw.buckets.2', 'index_pool': '.rgw.buckets.index.2'}} + + out['placement_pools'].append(rule) + + (err, out) = rgwadmin(ctx, client, ['zone', 'set'], + stdin=BytesIO(json.dumps(out).encode()), + check_status=True) + + (err, out) = rgwadmin(ctx, client, ['zone', 'get']) + assert len(out) > 0 + assert len(out['placement_pools']) == orig_placement_pools + 1 + + zonecmd = ['zone', 'placement', 'rm', + '--rgw-zone', 'default', + '--placement-id', 'new-placement'] + + (err, out) = rgwadmin(ctx, client, zonecmd, check_status=True) + + # TESTCASE 'zonegroup-info', 'zonegroup', 'get', 'get zonegroup info', 'succeeds' + (err, out) = rgwadmin(ctx, client, ['zonegroup', 'get'], check_status=True) + +from teuthology.config import config +from teuthology.orchestra import cluster, remote +import argparse; + +def main(): + if len(sys.argv) == 3: + user = sys.argv[1] + "@" + host = sys.argv[2] + elif len(sys.argv) == 2: + user = "" + host = sys.argv[1] + else: + sys.stderr.write("usage: radosgw_admin.py [user] host\n") + exit(1) + client0 = remote.Remote(user + host) + ctx = config + ctx.cluster=cluster.Cluster(remotes=[(client0, + [ 'ceph.client.rgw.%s' % (host), ]),]) + ctx.rgw = argparse.Namespace() + endpoints = {} + endpoints['ceph.client.rgw.%s' % host] = (host, 80) + ctx.rgw.role_endpoints = endpoints + ctx.rgw.realm = None + ctx.rgw.regions = {'region0': { 'api name': 'api1', + 'is master': True, 'master zone': 'r0z0', + 'zones': ['r0z0', 'r0z1'] }} + ctx.rgw.config = {'ceph.client.rgw.%s' % host: {'system user': {'name': '%s-system-user' % host}}} + task(config, None) + exit() + +if __name__ == '__main__': + main() diff --git a/qa/tasks/radosgw_admin_rest.py b/qa/tasks/radosgw_admin_rest.py new file mode 100644 index 000000000..95fe5b8ac --- /dev/null +++ b/qa/tasks/radosgw_admin_rest.py @@ -0,0 +1,721 @@ +""" +Run a series of rgw admin commands through the rest interface. + +The test cases in this file have been annotated for inventory. +To extract the inventory (in csv format) use the command: + + grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //' + +""" +import logging + + +import boto.exception +import boto.s3.connection +import boto.s3.acl + +import requests +import time + +from boto.connection import AWSAuthConnection +from teuthology import misc as teuthology +from tasks.util.rgw import get_user_summary, get_user_successful_ops, rgwadmin + +log = logging.getLogger(__name__) + +def rgwadmin_rest(connection, cmd, params=None, headers=None, raw=False): + """ + perform a rest command + """ + log.info('radosgw-admin-rest: %s %s' % (cmd, params)) + put_cmds = ['create', 'link', 'add'] + post_cmds = ['unlink', 'modify'] + delete_cmds = ['trim', 'rm', 'process'] + get_cmds = ['check', 'info', 'show', 'list'] + + bucket_sub_resources = ['object', 'policy', 'index'] + user_sub_resources = ['subuser', 'key', 'caps'] + zone_sub_resources = ['pool', 'log', 'garbage'] + + def get_cmd_method_and_handler(cmd): + """ + Get the rest command and handler from information in cmd and + from the imported requests object. + """ + if cmd[1] in put_cmds: + return 'PUT', requests.put + elif cmd[1] in delete_cmds: + return 'DELETE', requests.delete + elif cmd[1] in post_cmds: + return 'POST', requests.post + elif cmd[1] in get_cmds: + return 'GET', requests.get + + def get_resource(cmd): + """ + Get the name of the resource from information in cmd. + """ + if cmd[0] == 'bucket' or cmd[0] in bucket_sub_resources: + if cmd[0] == 'bucket': + return 'bucket', '' + else: + return 'bucket', cmd[0] + elif cmd[0] == 'user' or cmd[0] in user_sub_resources: + if cmd[0] == 'user': + return 'user', '' + else: + return 'user', cmd[0] + elif cmd[0] == 'usage': + return 'usage', '' + elif cmd[0] == 'zone' or cmd[0] in zone_sub_resources: + if cmd[0] == 'zone': + return 'zone', '' + else: + return 'zone', cmd[0] + + def build_admin_request(conn, method, resource = '', headers=None, data='', + query_args=None, params=None): + """ + Build an administative request adapted from the build_request() + method of boto.connection + """ + + path = conn.calling_format.build_path_base('admin', resource) + auth_path = conn.calling_format.build_auth_path('admin', resource) + host = conn.calling_format.build_host(conn.server_name(), 'admin') + if query_args: + path += '?' + query_args + boto.log.debug('path=%s' % path) + auth_path += '?' + query_args + boto.log.debug('auth_path=%s' % auth_path) + return AWSAuthConnection.build_base_http_request(conn, method, path, + auth_path, params, headers, data, host) + + method, handler = get_cmd_method_and_handler(cmd) + resource, query_args = get_resource(cmd) + request = build_admin_request(connection, method, resource, + query_args=query_args, headers=headers) + + url = '{protocol}://{host}{path}'.format(protocol=request.protocol, + host=request.host, path=request.path) + + request.authorize(connection=connection) + result = handler(url, params=params, headers=request.headers) + + if raw: + log.info(' text result: %s' % result.text) + return result.status_code, result.text + elif len(result.content) == 0: + # many admin requests return no body, so json() throws a JSONDecodeError + log.info(' empty result') + return result.status_code, None + else: + log.info(' json result: %s' % result.json()) + return result.status_code, result.json() + + +def task(ctx, config): + """ + Test radosgw-admin functionality through the RESTful interface + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task s3tests only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + clients = config.keys() + + # just use the first client... + client = next(iter(clients)) + + ## + admin_user = 'ada' + admin_display_name = 'Ms. Admin User' + admin_access_key = 'MH1WC2XQ1S8UISFDZC8W' + admin_secret_key = 'dQyrTPA0s248YeN5bBv4ukvKU0kh54LWWywkrpoG' + admin_caps = 'users=read, write; usage=read, write; buckets=read, write; zone=read, write' + + user1 = 'foo' + user2 = 'fud' + subuser1 = 'foo:foo1' + subuser2 = 'foo:foo2' + display_name1 = 'Foo' + display_name2 = 'Fud' + email = 'foo@foo.com' + access_key = '9te6NH5mcdcq0Tc5i8i1' + secret_key = 'Ny4IOauQoL18Gp2zM7lC1vLmoawgqcYP/YGcWfXu' + access_key2 = 'p5YnriCv1nAtykxBrupQ' + secret_key2 = 'Q8Tk6Q/27hfbFSYdSkPtUqhqx1GgzvpXa4WARozh' + swift_secret1 = 'gpS2G9RREMrnbqlp29PP2D36kgPR1tm72n5fPYfL' + swift_secret2 = 'ri2VJQcKSYATOY6uaDUX7pxgkW+W1YmC6OCxPHwy' + + bucket_name = 'myfoo' + + # legend (test cases can be easily grep-ed out) + # TESTCASE 'testname','object','method','operation','assertion' + # TESTCASE 'create-admin-user','user','create','administrative user','succeeds' + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--uid', admin_user, + '--display-name', admin_display_name, + '--access-key', admin_access_key, + '--secret', admin_secret_key, + '--max-buckets', '0', + '--caps', admin_caps + ]) + logging.error(out) + logging.error(err) + assert not err + + assert hasattr(ctx, 'rgw'), 'radosgw-admin-rest must run after the rgw task' + endpoint = ctx.rgw.role_endpoints.get(client) + assert endpoint, 'no rgw endpoint for {}'.format(client) + + admin_conn = boto.s3.connection.S3Connection( + aws_access_key_id=admin_access_key, + aws_secret_access_key=admin_secret_key, + is_secure=True if endpoint.cert else False, + port=endpoint.port, + host=endpoint.hostname, + calling_format=boto.s3.connection.OrdinaryCallingFormat(), + ) + + # TESTCASE 'info-nosuch','user','info','non-existent user','fails' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {"uid": user1}) + assert ret == 404 + + # TESTCASE 'create-ok','user','create','w/all valid info','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, + ['user', 'create'], + {'uid' : user1, + 'display-name' : display_name1, + 'email' : email, + 'access-key' : access_key, + 'secret-key' : secret_key, + 'max-buckets' : '4' + }) + + assert ret == 200 + + # TESTCASE 'list-no-user','user','list','list user keys','user list object' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'list'], {'list' : '', 'max-entries' : 0}) + assert ret == 200 + assert out['count'] == 0 + assert out['truncated'] == True + assert len(out['keys']) == 0 + assert len(out['marker']) > 0 + + # TESTCASE 'list-user-without-marker','user','list','list user keys','user list object' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'list'], {'list' : '', 'max-entries' : 1}) + assert ret == 200 + assert out['count'] == 1 + assert out['truncated'] == True + assert len(out['keys']) == 1 + assert len(out['marker']) > 0 + marker = out['marker'] + + # TESTCASE 'list-user-with-marker','user','list','list user keys','user list object' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'list'], {'list' : '', 'max-entries' : 1, 'marker': marker}) + assert ret == 200 + assert out['count'] == 1 + assert out['truncated'] == False + assert len(out['keys']) == 1 + + # TESTCASE 'info-existing','user','info','existing user','returns correct info' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + + assert out['user_id'] == user1 + assert out['email'] == email + assert out['display_name'] == display_name1 + assert len(out['keys']) == 1 + assert out['keys'][0]['access_key'] == access_key + assert out['keys'][0]['secret_key'] == secret_key + assert not out['suspended'] + assert out['tenant'] == '' + assert out['max_buckets'] == 4 + assert out['caps'] == [] + assert out['op_mask'] == 'read, write, delete' + assert out['default_placement'] == '' + assert out['default_storage_class'] == '' + assert out['placement_tags'] == [] + assert not out['bucket_quota']['enabled'] + assert not out['bucket_quota']['check_on_raw'] + assert out['bucket_quota']['max_size'] == -1 + assert out['bucket_quota']['max_size_kb'] == 0 + assert out['bucket_quota']['max_objects'] == -1 + assert not out['user_quota']['enabled'] + assert not out['user_quota']['check_on_raw'] + assert out['user_quota']['max_size'] == -1 + assert out['user_quota']['max_size_kb'] == 0 + assert out['user_quota']['max_objects'] == -1 + assert out['temp_url_keys'] == [] + assert out['type'] == 'rgw' + assert out['mfa_ids'] == [] + # TESTCASE 'info-existing','user','info','existing user query with wrong uid but correct access key','returns correct info' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'access-key' : access_key, 'uid': 'uid_not_exist'}) + + assert out['user_id'] == user1 + assert out['email'] == email + assert out['display_name'] == display_name1 + assert len(out['keys']) == 1 + assert out['keys'][0]['access_key'] == access_key + assert out['keys'][0]['secret_key'] == secret_key + assert not out['suspended'] + assert out['tenant'] == '' + assert out['max_buckets'] == 4 + assert out['caps'] == [] + assert out['op_mask'] == "read, write, delete" + assert out['default_placement'] == '' + assert out['default_storage_class'] == '' + assert out['placement_tags'] == [] + assert not out['bucket_quota']['enabled'] + assert not out['bucket_quota']['check_on_raw'] + assert out ['bucket_quota']['max_size'] == -1 + assert out ['bucket_quota']['max_size_kb'] == 0 + assert out ['bucket_quota']['max_objects'] == -1 + assert not out['user_quota']['enabled'] + assert not out['user_quota']['check_on_raw'] + assert out['user_quota']['max_size'] == -1 + assert out['user_quota']['max_size_kb'] == 0 + assert out['user_quota']['max_objects'] == -1 + assert out['temp_url_keys'] == [] + assert out['type'] == 'rgw' + assert out['mfa_ids'] == [] + + # TESTCASE 'suspend-ok','user','suspend','active user','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : True}) + assert ret == 200 + + # TESTCASE 'suspend-suspended','user','suspend','suspended user','succeeds w/advisory' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert ret == 200 + assert out['suspended'] + assert out['email'] == email + + # TESTCASE 're-enable','user','enable','suspended user','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : 'false'}) + assert not err + + # TESTCASE 'info-re-enabled','user','info','re-enabled user','no longer suspended' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert ret == 200 + assert not out['suspended'] + + # TESTCASE 'add-keys','key','create','w/valid info','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, + ['key', 'create'], + {'uid' : user1, + 'access-key' : access_key2, + 'secret-key' : secret_key2 + }) + + + assert ret == 200 + + # TESTCASE 'info-new-key','user','info','after key addition','returns all keys' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert ret == 200 + assert len(out['keys']) == 2 + assert out['keys'][0]['access_key'] == access_key2 or out['keys'][1]['access_key'] == access_key2 + assert out['keys'][0]['secret_key'] == secret_key2 or out['keys'][1]['secret_key'] == secret_key2 + + # TESTCASE 'rm-key','key','rm','newly added key','succeeds, key is removed' + (ret, out) = rgwadmin_rest(admin_conn, + ['key', 'rm'], + {'uid' : user1, + 'access-key' : access_key2 + }) + + assert ret == 200 + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + + assert len(out['keys']) == 1 + assert out['keys'][0]['access_key'] == access_key + assert out['keys'][0]['secret_key'] == secret_key + + # TESTCASE 'add-swift-key','key','create','swift key','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, + ['subuser', 'create'], + {'subuser' : subuser1, + 'secret-key' : swift_secret1, + 'key-type' : 'swift' + }) + + assert ret == 200 + + # TESTCASE 'info-swift-key','user','info','after key addition','returns all keys' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert ret == 200 + assert len(out['swift_keys']) == 1 + assert out['swift_keys'][0]['user'] == subuser1 + assert out['swift_keys'][0]['secret_key'] == swift_secret1 + + # TESTCASE 'add-swift-subuser','key','create','swift sub-user key','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, + ['subuser', 'create'], + {'subuser' : subuser2, + 'secret-key' : swift_secret2, + 'key-type' : 'swift' + }) + + assert ret == 200 + + # TESTCASE 'info-swift-subuser','user','info','after key addition','returns all sub-users/keys' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert ret == 200 + assert len(out['swift_keys']) == 2 + assert out['swift_keys'][0]['user'] == subuser2 or out['swift_keys'][1]['user'] == subuser2 + assert out['swift_keys'][0]['secret_key'] == swift_secret2 or out['swift_keys'][1]['secret_key'] == swift_secret2 + + # TESTCASE 'rm-swift-key1','key','rm','subuser','succeeds, one key is removed' + (ret, out) = rgwadmin_rest(admin_conn, + ['key', 'rm'], + {'subuser' : subuser1, + 'key-type' :'swift' + }) + + assert ret == 200 + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert len(out['swift_keys']) == 1 + + # TESTCASE 'rm-subuser','subuser','rm','subuser','success, subuser is removed' + (ret, out) = rgwadmin_rest(admin_conn, + ['subuser', 'rm'], + {'subuser' : subuser1 + }) + + assert ret == 200 + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert len(out['subusers']) == 1 + + # TESTCASE 'rm-subuser-with-keys','subuser','rm','subuser','succeeds, second subser and key is removed' + (ret, out) = rgwadmin_rest(admin_conn, + ['subuser', 'rm'], + {'subuser' : subuser2, + 'key-type' : 'swift', + '{purge-keys' :True + }) + + assert ret == 200 + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert len(out['swift_keys']) == 0 + assert len(out['subusers']) == 0 + + # TESTCASE 'bucket-stats','bucket','info','no session/buckets','succeeds, empty list' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1}) + assert ret == 200 + assert len(out) == 0 + + # connect to rgw + connection = boto.s3.connection.S3Connection( + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + is_secure=True if endpoint.cert else False, + port=endpoint.port, + host=endpoint.hostname, + calling_format=boto.s3.connection.OrdinaryCallingFormat(), + ) + + # TESTCASE 'bucket-stats2','bucket','stats','no buckets','succeeds, empty list' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1, 'stats' : True}) + assert ret == 200 + assert len(out) == 0 + + # create a first bucket + bucket = connection.create_bucket(bucket_name) + + # TESTCASE 'bucket-list','bucket','list','one bucket','succeeds, expected list' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1}) + assert ret == 200 + assert len(out) == 1 + assert out[0] == bucket_name + + # TESTCASE 'bucket-stats3','bucket','stats','new empty bucket','succeeds, empty list' + (ret, out) = rgwadmin_rest(admin_conn, + ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True}) + + assert ret == 200 + assert out['owner'] == user1 + assert out['tenant'] == '' + bucket_id = out['id'] + + # TESTCASE 'bucket-stats4','bucket','stats','new empty bucket','succeeds, expected bucket ID' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1, 'stats' : True}) + assert ret == 200 + assert len(out) == 1 + assert out[0]['id'] == bucket_id # does it return the same ID twice in a row? + + # use some space + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('one') + + # TESTCASE 'bucket-stats5','bucket','stats','after creating key','succeeds, lists one non-empty object' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True}) + assert ret == 200 + assert out['id'] == bucket_id + assert out['usage']['rgw.main']['num_objects'] == 1 + assert out['usage']['rgw.main']['size_kb'] > 0 + + # TESTCASE 'bucket-stats6', 'bucket', 'stats', 'non-existent bucket', 'fails, 'bucket not found error' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : 'doesnotexist'}) + assert ret == 404 + assert out['Code'] == 'NoSuchBucket' + + # reclaim it + key.delete() + + # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'fails', 'access denied error' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'unlink'], {'uid' : user1, 'bucket' : bucket_name}) + + assert ret == 200 + + # create a second user to link the bucket to + (ret, out) = rgwadmin_rest(admin_conn, + ['user', 'create'], + {'uid' : user2, + 'display-name' : display_name2, + 'access-key' : access_key2, + 'secret-key' : secret_key2, + 'max-buckets' : '1', + }) + + assert ret == 200 + + # try creating an object with the first user before the bucket is relinked + denied = False + key = boto.s3.key.Key(bucket) + + try: + key.set_contents_from_string('two') + except boto.exception.S3ResponseError: + denied = True + + assert not denied + + # delete the object + key.delete() + + # link the bucket to another user + (ret, out) = rgwadmin_rest(admin_conn, + ['bucket', 'link'], + {'uid' : user2, + 'bucket' : bucket_name, + 'bucket-id' : bucket_id, + }) + + assert ret == 200 + + # try creating an object with the first user which should cause an error + key = boto.s3.key.Key(bucket) + + try: + key.set_contents_from_string('three') + except boto.exception.S3ResponseError: + denied = True + + assert denied + + # relink the bucket to the first user and delete the second user + (ret, out) = rgwadmin_rest(admin_conn, + ['bucket', 'link'], + {'uid' : user1, + 'bucket' : bucket_name, + 'bucket-id' : bucket_id, + }) + assert ret == 200 + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user2}) + assert ret == 200 + + # TESTCASE 'object-rm', 'object', 'rm', 'remove object', 'succeeds, object is removed' + + # upload an object + object_name = 'four' + key = boto.s3.key.Key(bucket, object_name) + key.set_contents_from_string(object_name) + + # now delete it + (ret, out) = rgwadmin_rest(admin_conn, ['object', 'rm'], {'bucket' : bucket_name, 'object' : object_name}) + assert ret == 200 + + # TESTCASE 'bucket-stats6','bucket','stats','after deleting key','succeeds, lists one no objects' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True}) + assert ret == 200 + assert out['id'] == bucket_id + assert out['usage']['rgw.main']['num_objects'] == 0 + + # create a bucket for deletion stats + useless_bucket = connection.create_bucket('useless-bucket') + useless_key = useless_bucket.new_key('useless_key') + useless_key.set_contents_from_string('useless string') + + # delete it + useless_key.delete() + useless_bucket.delete() + + # wait for the statistics to flush + time.sleep(60) + + # need to wait for all usage data to get flushed, should take up to 30 seconds + timestamp = time.time() + while time.time() - timestamp <= (20 * 60): # wait up to 20 minutes + (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'categories' : 'delete_obj'}) # last operation we did is delete obj, wait for it to flush + + if get_user_successful_ops(out, user1) > 0: + break + time.sleep(1) + + assert time.time() - timestamp <= (20 * 60) + + # TESTCASE 'usage-show' 'usage' 'show' 'all usage' 'succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show']) + assert ret == 200 + assert len(out['entries']) > 0 + assert len(out['summary']) > 0 + user_summary = get_user_summary(out, user1) + total = user_summary['total'] + assert total['successful_ops'] > 0 + + # TESTCASE 'usage-show2' 'usage' 'show' 'user usage' 'succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1}) + assert ret == 200 + assert len(out['entries']) > 0 + assert len(out['summary']) > 0 + user_summary = out['summary'][0] + for entry in user_summary['categories']: + assert entry['successful_ops'] > 0 + assert user_summary['user'] == user1 + + # TESTCASE 'usage-show3' 'usage' 'show' 'user usage categories' 'succeeds' + test_categories = ['create_bucket', 'put_obj', 'delete_obj', 'delete_bucket'] + for cat in test_categories: + (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1, 'categories' : cat}) + assert ret == 200 + assert len(out['summary']) > 0 + user_summary = out['summary'][0] + assert user_summary['user'] == user1 + assert len(user_summary['categories']) == 1 + entry = user_summary['categories'][0] + assert entry['category'] == cat + assert entry['successful_ops'] > 0 + + # TESTCASE 'usage-trim' 'usage' 'trim' 'user usage' 'succeeds, usage removed' + (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'trim'], {'uid' : user1}) + assert ret == 200 + (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1}) + assert ret == 200 + assert len(out['entries']) == 0 + assert len(out['summary']) == 0 + + # TESTCASE 'user-suspend2','user','suspend','existing user','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : True}) + assert ret == 200 + + # TESTCASE 'user-suspend3','user','suspend','suspended user','cannot write objects' + try: + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('five') + except boto.exception.S3ResponseError as e: + assert e.status == 403 + + # TESTCASE 'user-renable2','user','enable','suspended user','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : 'false'}) + assert ret == 200 + + # TESTCASE 'user-renable3','user','enable','reenabled user','can write objects' + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('six') + + # TESTCASE 'garbage-list', 'garbage', 'list', 'get list of objects ready for garbage collection' + + # create an object large enough to be split into multiple parts + test_string = 'foo'*10000000 + + big_key = boto.s3.key.Key(bucket) + big_key.set_contents_from_string(test_string) + + # now delete the head + big_key.delete() + + # TESTCASE 'rm-user-buckets','user','rm','existing user','fails, still has buckets' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1}) + assert ret == 409 + + # delete should fail because ``key`` still exists + try: + bucket.delete() + except boto.exception.S3ResponseError as e: + assert e.status == 409 + + key.delete() + bucket.delete() + + # TESTCASE 'policy', 'bucket', 'policy', 'get bucket policy', 'returns S3 policy' + bucket = connection.create_bucket(bucket_name) + + # create an object + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('seven') + + # should be private already but guarantee it + key.set_acl('private') + + (ret, out) = rgwadmin_rest(admin_conn, ['policy', 'show'], {'bucket' : bucket.name, 'object' : key.key}) + assert ret == 200 + assert len(out['acl']['grant_map']) == 1 + + # add another grantee by making the object public read + key.set_acl('public-read') + + (ret, out) = rgwadmin_rest(admin_conn, ['policy', 'show'], {'bucket' : bucket.name, 'object' : key.key}) + assert ret == 200 + assert len(out['acl']['grant_map']) == 2 + + # TESTCASE 'rm-bucket', 'bucket', 'rm', 'bucket with objects', 'succeeds' + bucket = connection.create_bucket(bucket_name) + key_name = ['eight', 'nine', 'ten', 'eleven'] + for i in range(4): + key = boto.s3.key.Key(bucket) + key.set_contents_from_string(key_name[i]) + + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'rm'], {'bucket' : bucket_name, 'purge-objects' : True}) + assert ret == 200 + + # TESTCASE 'caps-add', 'caps', 'add', 'add user cap', 'succeeds' + caps = 'usage=read' + (ret, out) = rgwadmin_rest(admin_conn, ['caps', 'add'], {'uid' : user1, 'user-caps' : caps}) + assert ret == 200 + assert out[0]['perm'] == 'read' + + # TESTCASE 'caps-rm', 'caps', 'rm', 'remove existing cap from user', 'succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['caps', 'rm'], {'uid' : user1, 'user-caps' : caps}) + assert ret == 200 + assert not out + + # TESTCASE 'rm-user','user','rm','existing user','fails, still has buckets' + bucket = connection.create_bucket(bucket_name) + key = boto.s3.key.Key(bucket) + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1}) + assert ret == 409 + + # TESTCASE 'rm-user2', 'user', 'rm', user with data', 'succeeds' + bucket = connection.create_bucket(bucket_name) + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('twelve') + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1, 'purge-data' : True}) + assert ret == 200 + + # TESTCASE 'rm-user3','user','info','deleted user','fails' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert ret == 404 + diff --git a/qa/tasks/ragweed.py b/qa/tasks/ragweed.py new file mode 100644 index 000000000..eae1806c1 --- /dev/null +++ b/qa/tasks/ragweed.py @@ -0,0 +1,376 @@ +""" +Run a set of s3 tests on rgw. +""" +from io import BytesIO +from configobj import ConfigObj +import base64 +import contextlib +import logging +import os +import random +import string + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.config import config as teuth_config +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + + +def get_ragweed_branches(config, client_conf): + """ + figure out the ragweed branch according to the per-client settings + + use force-branch is specified, and fall back to the ones deduced using ceph + branch under testing + """ + force_branch = client_conf.get('force-branch', None) + if force_branch: + return [force_branch] + else: + S3_BRANCHES = ['master', 'nautilus', 'mimic', + 'luminous', 'kraken', 'jewel'] + ceph_branch = config.get('branch') + suite_branch = config.get('suite_branch', ceph_branch) + if suite_branch in S3_BRANCHES: + branch = client_conf.get('branch', 'ceph-' + suite_branch) + else: + branch = client_conf.get('branch', suite_branch) + default_branch = client_conf.get('default-branch', None) + if default_branch: + return [branch, default_branch] + else: + return [branch] + + +@contextlib.contextmanager +def download(ctx, config): + """ + Download the s3 tests from the git builder. + Remove downloaded s3 file upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, dict) + log.info('Downloading ragweed...') + testdir = teuthology.get_testdir(ctx) + for (client, cconf) in config.items(): + ragweed_repo = ctx.config.get('ragweed_repo', + teuth_config.ceph_git_base_url + 'ragweed.git') + for branch in get_ragweed_branches(ctx.config, cconf): + log.info("Using branch '%s' for ragweed", branch) + try: + ctx.cluster.only(client).sh( + script=f'git clone -b {branch} {ragweed_repo} {testdir}/ragweed') + break + except Exception as e: + exc = e + else: + raise exc + + sha1 = cconf.get('sha1') + if sha1 is not None: + ctx.cluster.only(client).run( + args=[ + 'cd', '{tdir}/ragweed'.format(tdir=testdir), + run.Raw('&&'), + 'git', 'reset', '--hard', sha1, + ], + ) + try: + yield + finally: + log.info('Removing ragweed...') + testdir = teuthology.get_testdir(ctx) + for client in config: + ctx.cluster.only(client).run( + args=[ + 'rm', + '-rf', + '{tdir}/ragweed'.format(tdir=testdir), + ], + ) + + +def _config_user(ragweed_conf, section, user): + """ + Configure users for this section by stashing away keys, ids, and + email addresses. + """ + ragweed_conf[section].setdefault('user_id', user) + ragweed_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user)) + ragweed_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user)) + ragweed_conf[section].setdefault('access_key', ''.join(random.choice(string.ascii_uppercase) for i in range(20))) + ragweed_conf[section].setdefault('secret_key', base64.b64encode(os.urandom(40)).decode('ascii')) + + +@contextlib.contextmanager +def create_users(ctx, config, run_stages): + """ + Create a main and an alternate s3 user. + """ + assert isinstance(config, dict) + + for client, properties in config['config'].items(): + run_stages[client] = properties.get('stages', 'prepare,check').split(',') + + log.info('Creating rgw users...') + testdir = teuthology.get_testdir(ctx) + users = {'user regular': 'ragweed', 'user system': 'sysuser'} + for client in config['clients']: + if not 'prepare' in run_stages[client]: + # should have been prepared in a previous run + continue + + ragweed_conf = config['ragweed_conf'][client] + ragweed_conf.setdefault('fixtures', {}) + ragweed_conf['rgw'].setdefault('bucket_prefix', 'test-' + client) + for section, user in users.items(): + _config_user(ragweed_conf, section, '{user}.{client}'.format(user=user, client=client)) + log.debug('Creating user {user} on {host}'.format(user=ragweed_conf[section]['user_id'], host=client)) + if user == 'sysuser': + sys_str = 'true' + else: + sys_str = 'false' + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client, + 'user', 'create', + '--uid', ragweed_conf[section]['user_id'], + '--display-name', ragweed_conf[section]['display_name'], + '--access-key', ragweed_conf[section]['access_key'], + '--secret', ragweed_conf[section]['secret_key'], + '--email', ragweed_conf[section]['email'], + '--system', sys_str, + ], + ) + try: + yield + finally: + for client in config['clients']: + if not 'check' in run_stages[client]: + # only remove user if went through the check stage + continue + for user in users.values(): + uid = '{user}.{client}'.format(user=user, client=client) + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client, + 'user', 'rm', + '--uid', uid, + '--purge-data', + ], + ) + + +@contextlib.contextmanager +def configure(ctx, config, run_stages): + """ + Configure the ragweed. This includes the running of the + bootstrap code and the updating of local conf files. + """ + assert isinstance(config, dict) + log.info('Configuring ragweed...') + testdir = teuthology.get_testdir(ctx) + for client, properties in config['clients'].items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'cd', + '{tdir}/ragweed'.format(tdir=testdir), + run.Raw('&&'), + './bootstrap', + ], + ) + + preparing = 'prepare' in run_stages[client] + if not preparing: + # should have been prepared in a previous run + continue + + ragweed_conf = config['ragweed_conf'][client] + if properties is not None and 'slow_backend' in properties: + ragweed_conf['fixtures']['slow backend'] = properties['slow_backend'] + + conf_fp = BytesIO() + ragweed_conf.write(conf_fp) + remote.write_file( + path='{tdir}/archive/ragweed.{client}.conf'.format(tdir=testdir, client=client), + data=conf_fp.getvalue(), + ) + + log.info('Configuring boto...') + boto_src = os.path.join(os.path.dirname(__file__), 'boto.cfg.template') + for client, properties in config['clients'].items(): + with open(boto_src, 'r') as f: + (remote,) = ctx.cluster.only(client).remotes.keys() + conf = f.read().format( + idle_timeout=config.get('idle_timeout', 30) + ) + remote.write_file('{tdir}/boto.cfg'.format(tdir=testdir), conf) + + try: + yield + + finally: + log.info('Cleaning up boto...') + for client, properties in config['clients'].items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'rm', + '{tdir}/boto.cfg'.format(tdir=testdir), + ], + ) + +@contextlib.contextmanager +def run_tests(ctx, config, run_stages): + """ + Run the ragweed after everything is set up. + + :param ctx: Context passed to task + :param config: specific configuration information + """ + assert isinstance(config, dict) + testdir = teuthology.get_testdir(ctx) + attrs = ["!fails_on_rgw"] + for client, client_config in config.items(): + stages = ','.join(run_stages[client]) + args = [ + 'RAGWEED_CONF={tdir}/archive/ragweed.{client}.conf'.format(tdir=testdir, client=client), + 'RAGWEED_STAGES={stages}'.format(stages=stages), + 'BOTO_CONFIG={tdir}/boto.cfg'.format(tdir=testdir), + '{tdir}/ragweed/virtualenv/bin/python'.format(tdir=testdir), + '-m', 'nose', + '-w', + '{tdir}/ragweed'.format(tdir=testdir), + '-v', + '-a', ','.join(attrs), + ] + if client_config is not None and 'extra_args' in client_config: + args.extend(client_config['extra_args']) + + ctx.cluster.only(client).run( + args=args, + label="ragweed tests against rgw" + ) + yield + +@contextlib.contextmanager +def task(ctx, config): + """ + Run the ragweed suite against rgw. + + To run all tests on all clients:: + + tasks: + - ceph: + - rgw: + - ragweed: + + To restrict testing to particular clients:: + + tasks: + - ceph: + - rgw: [client.0] + - ragweed: [client.0] + + To run against a server on client.1 and increase the boto timeout to 10m:: + + tasks: + - ceph: + - rgw: [client.1] + - ragweed: + client.0: + rgw_server: client.1 + idle_timeout: 600 + stages: prepare,check + + To pass extra arguments to nose (e.g. to run a certain test):: + + tasks: + - ceph: + - rgw: [client.0] + - ragweed: + client.0: + extra_args: ['test_s3:test_object_acl_grand_public_read'] + client.1: + extra_args: ['--exclude', 'test_100_continue'] + """ + assert hasattr(ctx, 'rgw'), 'ragweed must run after the rgw task' + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task ragweed only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + clients = config.keys() + + overrides = ctx.config.get('overrides', {}) + # merge each client section, not the top level. + for client in config.keys(): + if not config[client]: + config[client] = {} + teuthology.deep_merge(config[client], overrides.get('ragweed', {})) + + log.debug('ragweed config is %s', config) + + ragweed_conf = {} + for client in clients: + # use rgw_server endpoint if given, or default to same client + target = config[client].get('rgw_server', client) + + endpoint = ctx.rgw.role_endpoints.get(target) + assert endpoint, 'ragweed: no rgw endpoint for {}'.format(target) + + ragweed_conf[client] = ConfigObj( + indent_type='', + infile={ + 'rgw': + { + 'host' : endpoint.dns_name, + 'port' : endpoint.port, + 'is_secure' : endpoint.cert is not None, + }, + 'fixtures' : {}, + 'user system' : {}, + 'user regular' : {}, + 'rados': + { + 'ceph_conf' : '/etc/ceph/ceph.conf', + }, + } + ) + + run_stages = {} + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: create_users(ctx=ctx, config=dict( + clients=clients, + ragweed_conf=ragweed_conf, + config=config, + ), + run_stages=run_stages), + lambda: configure(ctx=ctx, config=dict( + clients=config, + ragweed_conf=ragweed_conf, + ), + run_stages=run_stages), + lambda: run_tests(ctx=ctx, config=config, run_stages=run_stages), + ): + pass + yield diff --git a/qa/tasks/rbd.py b/qa/tasks/rbd.py new file mode 100644 index 000000000..6b9786f22 --- /dev/null +++ b/qa/tasks/rbd.py @@ -0,0 +1,686 @@ +""" +Rbd testing task +""" +import contextlib +import logging +import os +import tempfile +import sys + +from io import StringIO +from teuthology.orchestra import run +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.parallel import parallel +from teuthology.task.common_fs_utils import generic_mkfs +from teuthology.task.common_fs_utils import generic_mount +from teuthology.task.common_fs_utils import default_image_name + + +#V1 image unsupported but required for testing purposes +os.environ["RBD_FORCE_ALLOW_V1"] = "1" + +log = logging.getLogger(__name__) + +ENCRYPTION_PASSPHRASE = "password" + +@contextlib.contextmanager +def create_image(ctx, config): + """ + Create an rbd image. + + For example:: + + tasks: + - ceph: + - rbd.create_image: + client.0: + image_name: testimage + image_size: 100 + image_format: 1 + encryption_format: luks2 + client.1: + + Image size is expressed as a number of megabytes; default value + is 10240. + + Image format value must be either 1 or 2; default value is 1. + + """ + assert isinstance(config, dict) or isinstance(config, list), \ + "task create_image only supports a list or dictionary for configuration" + + if isinstance(config, dict): + images = config.items() + else: + images = [(role, None) for role in config] + + testdir = teuthology.get_testdir(ctx) + passphrase_file = '{tdir}/passphrase'.format(tdir=testdir) + for role, properties in images: + if properties is None: + properties = {} + name = properties.get('image_name', default_image_name(role)) + size = properties.get('image_size', 10240) + fmt = properties.get('image_format', 1) + encryption_format = properties.get('encryption_format', 'none') + (remote,) = ctx.cluster.only(role).remotes.keys() + log.info('Creating image {name} with size {size}'.format(name=name, + size=size)) + args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', + '-p', 'rbd', + 'create', + '--size', str(size), + name, + ] + # omit format option if using the default (format 1) + # since old versions of don't support it + if int(fmt) != 1: + args += ['--image-format', str(fmt)] + remote.run(args=args) + + if encryption_format != 'none': + remote.run( + args=[ + 'echo', + ENCRYPTION_PASSPHRASE, + run.Raw('>'), + passphrase_file + ] + ) + remote.run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', + 'encryption', + 'format', + name, + encryption_format, + passphrase_file, + '-p', + 'rbd' + ] + ) + try: + yield + finally: + log.info('Deleting rbd images...') + remote.run(args=['rm', '-f', passphrase_file]) + for role, properties in images: + if properties is None: + properties = {} + name = properties.get('image_name', default_image_name(role)) + (remote,) = ctx.cluster.only(role).remotes.keys() + remote.run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', + '-p', 'rbd', + 'rm', + name, + ], + ) + +@contextlib.contextmanager +def clone_image(ctx, config): + """ + Clones a parent imag + + For example:: + + tasks: + - ceph: + - rbd.clone_image: + client.0: + parent_name: testimage + image_name: cloneimage + """ + assert isinstance(config, dict) or isinstance(config, list), \ + "task clone_image only supports a list or dictionary for configuration" + + if isinstance(config, dict): + images = config.items() + else: + images = [(role, None) for role in config] + + testdir = teuthology.get_testdir(ctx) + for role, properties in images: + if properties is None: + properties = {} + + name = properties.get('image_name', default_image_name(role)) + parent_name = properties.get('parent_name') + assert parent_name is not None, \ + "parent_name is required" + parent_spec = '{name}@{snap}'.format(name=parent_name, snap=name) + + (remote,) = ctx.cluster.only(role).remotes.keys() + log.info('Clone image {parent} to {child}'.format(parent=parent_name, + child=name)) + for cmd in [('snap', 'create', parent_spec), + ('snap', 'protect', parent_spec), + ('clone', parent_spec, name)]: + args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', '-p', 'rbd' + ] + args.extend(cmd) + remote.run(args=args) + + try: + yield + finally: + log.info('Deleting rbd clones...') + for role, properties in images: + if properties is None: + properties = {} + name = properties.get('image_name', default_image_name(role)) + parent_name = properties.get('parent_name') + parent_spec = '{name}@{snap}'.format(name=parent_name, snap=name) + + (remote,) = ctx.cluster.only(role).remotes.keys() + + for cmd in [('rm', name), + ('snap', 'unprotect', parent_spec), + ('snap', 'rm', parent_spec)]: + args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', '-p', 'rbd' + ] + args.extend(cmd) + remote.run(args=args) + +@contextlib.contextmanager +def modprobe(ctx, config): + """ + Load the rbd kernel module.. + + For example:: + + tasks: + - ceph: + - rbd.create_image: [client.0] + - rbd.modprobe: [client.0] + """ + log.info('Loading rbd kernel module...') + for role in config: + (remote,) = ctx.cluster.only(role).remotes.keys() + remote.run( + args=[ + 'sudo', + 'modprobe', + 'rbd', + ], + ) + try: + yield + finally: + log.info('Unloading rbd kernel module...') + for role in config: + (remote,) = ctx.cluster.only(role).remotes.keys() + remote.run( + args=[ + 'sudo', + 'modprobe', + '-r', + 'rbd', + # force errors to be ignored; necessary if more + # than one device was created, which may mean + # the module isn't quite ready to go the first + # time through. + run.Raw('||'), + 'true', + ], + ) + +@contextlib.contextmanager +def dev_create(ctx, config): + """ + Map block devices to rbd images. + + For example:: + + tasks: + - ceph: + - rbd.create_image: [client.0] + - rbd.modprobe: [client.0] + - rbd.dev_create: + client.0: + image_name: testimage.client.0 + encryption_format: luks2 + """ + assert isinstance(config, dict) or isinstance(config, list), \ + "task dev_create only supports a list or dictionary for configuration" + + if isinstance(config, dict): + images = config.items() + else: + images = [(role, None) for role in config] + + log.info('Creating rbd block devices...') + + testdir = teuthology.get_testdir(ctx) + passphrase_file = '{tdir}/passphrase'.format(tdir=testdir) + device_path = {} + + for role, properties in images: + if properties is None: + properties = {} + name = properties.get('image_name', default_image_name(role)) + encryption_format = properties.get('encryption_format', 'none') + (remote,) = ctx.cluster.only(role).remotes.keys() + + if encryption_format == 'none': + device_path[role] = '/dev/rbd/rbd/{image}'.format(image=name) + device_specific_args = [] + else: + remote.run( + args=[ + 'echo', + ENCRYPTION_PASSPHRASE, + run.Raw('>'), + passphrase_file + ] + ) + device_specific_args = [ + '-t', 'nbd', '-o', + 'encryption-format=%s,encryption-passphrase-file=%s' % ( + encryption_format, passphrase_file)] + + map_fp = StringIO() + remote.run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', + '--id', role.rsplit('.')[-1], + '-p', 'rbd', + 'map', + name] + device_specific_args, + stdout=map_fp, + ) + + if encryption_format != 'none': + device_path[role] = map_fp.getvalue().rstrip() + properties['device_path'] = device_path[role] + remote.run(args=['sudo', 'chmod', '666', device_path[role]]) + try: + yield + finally: + log.info('Unmapping rbd devices...') + remote.run(args=['rm', '-f', passphrase_file]) + for role, properties in images: + if not device_path.get(role): + continue + + if properties is None: + properties = {} + encryption_format = properties.get('encryption_format', 'none') + (remote,) = ctx.cluster.only(role).remotes.keys() + + if encryption_format == 'none': + device_specific_args = [] + else: + device_specific_args = ['-t', 'nbd'] + + remote.run( + args=[ + 'LD_LIBRARY_PATH={tdir}/binary/usr/local/lib'.format(tdir=testdir), + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', + '-p', 'rbd', + 'unmap', + device_path[role], + ] + device_specific_args, + ) + + +def rbd_devname_rtn(ctx, image): + return '/dev/rbd/rbd/{image}'.format(image=image) + +def canonical_path(ctx, role, path): + """ + Determine the canonical path for a given path on the host + representing the given role. A canonical path contains no + . or .. components, and includes no symbolic links. + """ + version_fp = StringIO() + ctx.cluster.only(role).run( + args=[ 'readlink', '-f', path ], + stdout=version_fp, + ) + canonical_path = version_fp.getvalue().rstrip('\n') + version_fp.close() + return canonical_path + +@contextlib.contextmanager +def run_xfstests(ctx, config): + """ + Run xfstests over specified devices. + + Warning: both the test and scratch devices specified will be + overwritten. Normally xfstests modifies (but does not destroy) + the test device, but for now the run script used here re-makes + both filesystems. + + Note: Only one instance of xfstests can run on a single host at + a time, although this is not enforced. + + This task in its current form needs some improvement. For + example, it assumes all roles provided in the config are + clients, and that the config provided is a list of key/value + pairs. For now please use the xfstests() interface, below. + + For example:: + + tasks: + - ceph: + - rbd.run_xfstests: + client.0: + count: 2 + test_dev: 'test_dev' + scratch_dev: 'scratch_dev' + fs_type: 'xfs' + tests: 'generic/100 xfs/003 xfs/005 xfs/006 generic/015' + exclude: + - generic/42 + randomize: true + """ + with parallel() as p: + for role, properties in config.items(): + p.spawn(run_xfstests_one_client, ctx, role, properties) + exc = None + while True: + try: + p.next() + except StopIteration: + break + except: + exc = sys.exc_info()[1] + if exc is not None: + raise exc + yield + +def run_xfstests_one_client(ctx, role, properties): + """ + Spawned routine to handle xfs tests for a single client + """ + testdir = teuthology.get_testdir(ctx) + try: + count = properties.get('count') + test_dev = properties.get('test_dev') + assert test_dev is not None, \ + "task run_xfstests requires test_dev to be defined" + test_dev = canonical_path(ctx, role, test_dev) + + scratch_dev = properties.get('scratch_dev') + assert scratch_dev is not None, \ + "task run_xfstests requires scratch_dev to be defined" + scratch_dev = canonical_path(ctx, role, scratch_dev) + + fs_type = properties.get('fs_type') + tests = properties.get('tests') + exclude_list = properties.get('exclude') + randomize = properties.get('randomize') + + (remote,) = ctx.cluster.only(role).remotes.keys() + + # Fetch the test script + test_root = teuthology.get_testdir(ctx) + test_script = 'run_xfstests.sh' + test_path = os.path.join(test_root, test_script) + + xfstests_url = properties.get('xfstests_url') + assert xfstests_url is not None, \ + "task run_xfstests requires xfstests_url to be defined" + + xfstests_krbd_url = xfstests_url + '/' + test_script + + log.info('Fetching {script} for {role} from {url}'.format( + script=test_script, + role=role, + url=xfstests_krbd_url)) + + args = [ 'wget', '-O', test_path, '--', xfstests_krbd_url ] + remote.run(args=args) + + log.info('Running xfstests on {role}:'.format(role=role)) + log.info(' iteration count: {count}:'.format(count=count)) + log.info(' test device: {dev}'.format(dev=test_dev)) + log.info(' scratch device: {dev}'.format(dev=scratch_dev)) + log.info(' using fs_type: {fs_type}'.format(fs_type=fs_type)) + log.info(' tests to run: {tests}'.format(tests=tests)) + log.info(' exclude list: {}'.format(' '.join(exclude_list))) + log.info(' randomize: {randomize}'.format(randomize=randomize)) + + if exclude_list: + with tempfile.NamedTemporaryFile(mode='w', prefix='exclude') as exclude_file: + for test in exclude_list: + exclude_file.write("{}\n".format(test)) + exclude_file.flush() + remote.put_file(exclude_file.name, exclude_file.name) + + # Note that the device paths are interpreted using + # readlink -f <path> in order to get their canonical + # pathname (so it matches what the kernel remembers). + args = [ + '/usr/bin/sudo', + 'TESTDIR={tdir}'.format(tdir=testdir), + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + '/bin/bash', + test_path, + '-c', str(count), + '-f', fs_type, + '-t', test_dev, + '-s', scratch_dev, + ] + if exclude_list: + args.extend(['-x', exclude_file.name]) + if randomize: + args.append('-r') + if tests: + args.extend(['--', tests]) + remote.run(args=args, logger=log.getChild(role)) + finally: + log.info('Removing {script} on {role}'.format(script=test_script, + role=role)) + remote.run(args=['rm', '-f', test_path]) + +@contextlib.contextmanager +def xfstests(ctx, config): + """ + Run xfstests over rbd devices. This interface sets up all + required configuration automatically if not otherwise specified. + Note that only one instance of xfstests can run on a single host + at a time. By default, the set of tests specified is run once. + If a (non-zero) count value is supplied, the complete set of + tests will be run that number of times. + + For example:: + + tasks: + - ceph: + # Image sizes are in MB + - rbd.xfstests: + client.0: + count: 3 + test_image: 'test_image' + test_size: 250 + test_format: 2 + scratch_image: 'scratch_image' + scratch_size: 250 + scratch_format: 1 + fs_type: 'xfs' + tests: 'generic/100 xfs/003 xfs/005 xfs/006 generic/015' + exclude: + - generic/42 + randomize: true + xfstests_url: 'https://raw.github.com/ceph/ceph-ci/wip-55555/qa' + """ + if config is None: + config = { 'all': None } + assert isinstance(config, dict) or isinstance(config, list), \ + "task xfstests only supports a list or dictionary for configuration" + if isinstance(config, dict): + config = teuthology.replace_all_with_clients(ctx.cluster, config) + runs = config.items() + else: + runs = [(role, None) for role in config] + + running_xfstests = {} + for role, properties in runs: + assert role.startswith('client.'), \ + "task xfstests can only run on client nodes" + for host, roles_for_host in ctx.cluster.remotes.items(): + if role in roles_for_host: + assert host not in running_xfstests, \ + "task xfstests allows only one instance at a time per host" + running_xfstests[host] = True + + images_config = {} + scratch_config = {} + modprobe_config = {} + image_map_config = {} + scratch_map_config = {} + xfstests_config = {} + for role, properties in runs: + if properties is None: + properties = {} + + test_image = properties.get('test_image', 'test_image.{role}'.format(role=role)) + test_size = properties.get('test_size', 10000) # 10G + test_fmt = properties.get('test_format', 1) + scratch_image = properties.get('scratch_image', 'scratch_image.{role}'.format(role=role)) + scratch_size = properties.get('scratch_size', 10000) # 10G + scratch_fmt = properties.get('scratch_format', 1) + + images_config[role] = dict( + image_name=test_image, + image_size=test_size, + image_format=test_fmt, + ) + + scratch_config[role] = dict( + image_name=scratch_image, + image_size=scratch_size, + image_format=scratch_fmt, + ) + + xfstests_branch = properties.get('xfstests_branch', 'master') + xfstests_url = properties.get('xfstests_url', 'https://raw.github.com/ceph/ceph/{branch}/qa'.format(branch=xfstests_branch)) + + xfstests_config[role] = dict( + count=properties.get('count', 1), + test_dev='/dev/rbd/rbd/{image}'.format(image=test_image), + scratch_dev='/dev/rbd/rbd/{image}'.format(image=scratch_image), + fs_type=properties.get('fs_type', 'xfs'), + randomize=properties.get('randomize', False), + tests=properties.get('tests'), + exclude=properties.get('exclude', []), + xfstests_url=xfstests_url, + ) + + log.info('Setting up xfstests using RBD images:') + log.info(' test ({size} MB): {image}'.format(size=test_size, + image=test_image)) + log.info(' scratch ({size} MB): {image}'.format(size=scratch_size, + image=scratch_image)) + modprobe_config[role] = None + image_map_config[role] = {'image_name': test_image} + scratch_map_config[role] = {'image_name': scratch_image} + + with contextutil.nested( + lambda: create_image(ctx=ctx, config=images_config), + lambda: create_image(ctx=ctx, config=scratch_config), + lambda: modprobe(ctx=ctx, config=modprobe_config), + lambda: dev_create(ctx=ctx, config=image_map_config), + lambda: dev_create(ctx=ctx, config=scratch_map_config), + lambda: run_xfstests(ctx=ctx, config=xfstests_config), + ): + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Create and mount an rbd image. + + For example, you can specify which clients to run on:: + + tasks: + - ceph: + - rbd: [client.0, client.1] + + There are a few image options:: + + tasks: + - ceph: + - rbd: + client.0: # uses defaults + client.1: + image_name: foo + image_size: 2048 + image_format: 2 + fs_type: xfs + + To use default options on all clients:: + + tasks: + - ceph: + - rbd: + all: + + To create 20GiB images and format them with xfs on all clients:: + + tasks: + - ceph: + - rbd: + all: + image_size: 20480 + fs_type: xfs + """ + if config is None: + config = { 'all': None } + norm_config = config + if isinstance(config, dict): + norm_config = teuthology.replace_all_with_clients(ctx.cluster, config) + if isinstance(norm_config, dict): + role_images = {} + for role, properties in norm_config.items(): + if properties is None: + properties = {} + role_images[role] = properties.get('image_name') + else: + role_images = norm_config + + log.debug('rbd config is: %s', norm_config) + + with contextutil.nested( + lambda: create_image(ctx=ctx, config=norm_config), + lambda: modprobe(ctx=ctx, config=norm_config), + lambda: dev_create(ctx=ctx, config=norm_config), + lambda: generic_mkfs(ctx=ctx, config=norm_config, + devname_rtn=rbd_devname_rtn), + lambda: generic_mount(ctx=ctx, config=role_images, + devname_rtn=rbd_devname_rtn), + ): + yield diff --git a/qa/tasks/rbd_fio.py b/qa/tasks/rbd_fio.py new file mode 100644 index 000000000..959d07d49 --- /dev/null +++ b/qa/tasks/rbd_fio.py @@ -0,0 +1,225 @@ +""" + Long running fio tests on rbd mapped devices for format/features provided in config + Many fio parameters can be configured so that this task can be used along with thrash/power-cut tests + and exercise IO on full disk for all format/features + - This test should not be run on VM due to heavy use of resource + +""" +import contextlib +import json +import logging +import os + +from teuthology.parallel import parallel +from teuthology import misc as teuthology +from tempfile import NamedTemporaryFile +from teuthology.orchestra import run +from teuthology.packaging import install_package, remove_package + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + client.0: + fio-io-size: 100g or 80% or 100m + fio-version: 2.2.9 + formats: [2] + features: [[layering],[striping],[layering,exclusive-lock,object-map]] + test-clone-io: 1 #remove this option to not run create rbd clone and not run io on clone + io-engine: "sync or rbd or any io-engine" + rw: randrw + client.1: + fio-io-size: 100g + fio-version: 2.2.9 + rw: read + image-size:20480 + +or + all: + fio-io-size: 400g + rw: randrw + formats: [2] + features: [[layering],[striping]] + io-engine: libaio + + Create rbd image + device and exercise IO for format/features provided in config file + Config can be per client or one config can be used for all clients, fio jobs are run in parallel for client provided + + """ + if config.get('all'): + client_config = config['all'] + clients = ctx.cluster.only(teuthology.is_type('client')) + rbd_test_dir = teuthology.get_testdir(ctx) + "/rbd_fio_test" + for remote,role in clients.remotes.items(): + if 'client_config' in locals(): + with parallel() as p: + p.spawn(run_fio, remote, client_config, rbd_test_dir) + else: + for client_config in config: + if client_config in role: + with parallel() as p: + p.spawn(run_fio, remote, config[client_config], rbd_test_dir) + + yield + + +def get_ioengine_package_name(ioengine, remote): + system_type = teuthology.get_system_type(remote) + if ioengine == 'rbd': + return 'librbd1-devel' if system_type == 'rpm' else 'librbd-dev' + elif ioengine == 'libaio': + return 'libaio-devel' if system_type == 'rpm' else 'libaio-dev' + else: + return None + + +def run_rbd_map(remote, image, iodepth): + iodepth = max(iodepth, 128) # RBD_QUEUE_DEPTH_DEFAULT + dev = remote.sh(['sudo', 'rbd', 'device', 'map', '-o', + 'queue_depth={}'.format(iodepth), image]).rstrip('\n') + remote.sudo_write_file( + '/sys/block/{}/queue/nr_requests'.format(os.path.basename(dev)), + str(iodepth)) + return dev + + +def run_fio(remote, config, rbd_test_dir): + """ + create fio config file with options based on above config + get the fio from github, generate binary, and use it to run on + the generated fio config file + """ + fio_config=NamedTemporaryFile(mode='w', prefix='fio_rbd_', dir='/tmp/', delete=False) + fio_config.write('[global]\n') + if config.get('io-engine'): + ioengine=config['io-engine'] + fio_config.write('ioengine={ioe}\n'.format(ioe=ioengine)) + else: + fio_config.write('ioengine=sync\n') + if config.get('bs'): + bs=config['bs'] + fio_config.write('bs={bs}\n'.format(bs=bs)) + else: + fio_config.write('bs=4k\n') + iodepth = config.get('io-depth', 2) + fio_config.write('iodepth={iod}\n'.format(iod=iodepth)) + if config.get('fio-io-size'): + size=config['fio-io-size'] + fio_config.write('size={size}\n'.format(size=size)) + else: + fio_config.write('size=100m\n') + + fio_config.write('time_based\n') + if config.get('runtime'): + runtime=config['runtime'] + fio_config.write('runtime={runtime}\n'.format(runtime=runtime)) + else: + fio_config.write('runtime=1800\n') + fio_config.write('allow_file_create=0\n') + image_size=10240 + if config.get('image_size'): + image_size=config['image_size'] + + formats=[1,2] + features=[['layering'],['striping'],['exclusive-lock','object-map']] + fio_version='3.32' + if config.get('formats'): + formats=config['formats'] + if config.get('features'): + features=config['features'] + if config.get('fio-version'): + fio_version=config['fio-version'] + + # handle package required for ioengine, if any + sn=remote.shortname + ioengine_pkg = get_ioengine_package_name(ioengine, remote) + if ioengine_pkg: + install_package(ioengine_pkg, remote) + + fio_config.write('norandommap\n') + if ioengine == 'rbd': + fio_config.write('clientname=admin\n') + fio_config.write('pool=rbd\n') + fio_config.write('invalidate=0\n') + elif ioengine == 'libaio': + fio_config.write('direct=1\n') + for frmt in formats: + for feature in features: + log.info("Creating rbd images on {sn}".format(sn=sn)) + feature_name = '-'.join(feature) + rbd_name = 'i{i}f{f}{sn}'.format(i=frmt,f=feature_name,sn=sn) + rbd_snap_name = 'i{i}f{f}{sn}@i{i}f{f}{sn}Snap'.format(i=frmt,f=feature_name,sn=sn) + rbd_clone_name = 'i{i}f{f}{sn}Clone'.format(i=frmt,f=feature_name,sn=sn) + create_args=['rbd', 'create', + '--size', '{size}'.format(size=image_size), + '--image', rbd_name, + '--image-format', '{f}'.format(f=frmt)] + map(lambda x: create_args.extend(['--image-feature', x]), feature) + if config.get('thick-provision'): + create_args.append('--thick-provision') + remote.run(args=create_args) + remote.run(args=['rbd', 'info', rbd_name]) + if ioengine != 'rbd': + rbd_dev = run_rbd_map(remote, rbd_name, iodepth) + if config.get('test-clone-io'): + log.info("Testing clones using fio") + remote.run(args=['rbd', 'snap', 'create', rbd_snap_name]) + remote.run(args=['rbd', 'snap', 'protect', rbd_snap_name]) + remote.run(args=['rbd', 'clone', rbd_snap_name, rbd_clone_name]) + rbd_clone_dev = run_rbd_map(remote, rbd_clone_name, iodepth) + fio_config.write('[{rbd_dev}]\n'.format(rbd_dev=rbd_dev)) + if config.get('rw'): + rw=config['rw'] + fio_config.write('rw={rw}\n'.format(rw=rw)) + else: + fio_config .write('rw=randrw\n') + fio_config.write('filename={rbd_dev}\n'.format(rbd_dev=rbd_dev)) + if config.get('test-clone-io'): + fio_config.write('[{rbd_clone_dev}]\n'.format(rbd_clone_dev=rbd_clone_dev)) + fio_config.write('rw={rw}\n'.format(rw=rw)) + fio_config.write('filename={rbd_clone_dev}\n'.format(rbd_clone_dev=rbd_clone_dev)) + else: + if config.get('test-clone-io'): + log.info("Testing clones using fio") + remote.run(args=['rbd', 'snap', 'create', rbd_snap_name]) + remote.run(args=['rbd', 'snap', 'protect', rbd_snap_name]) + remote.run(args=['rbd', 'clone', rbd_snap_name, rbd_clone_name]) + fio_config.write('[{img_name}]\n'.format(img_name=rbd_name)) + if config.get('rw'): + rw=config['rw'] + fio_config.write('rw={rw}\n'.format(rw=rw)) + else: + fio_config.write('rw=randrw\n') + fio_config.write('rbdname={img_name}\n'.format(img_name=rbd_name)) + if config.get('test-clone-io'): + fio_config.write('[{clone_img_name}]\n'.format(clone_img_name=rbd_clone_name)) + fio_config.write('rw={rw}\n'.format(rw=rw)) + fio_config.write('rbdname={clone_img_name}\n'.format(clone_img_name=rbd_clone_name)) + + + fio_config.close() + remote.put_file(fio_config.name,fio_config.name) + try: + log.info("Running rbd feature - fio test on {sn}".format(sn=sn)) + fio = "https://github.com/axboe/fio/archive/fio-" + fio_version + ".tar.gz" + remote.run(args=['mkdir', run.Raw(rbd_test_dir),]) + remote.run(args=['cd' , run.Raw(rbd_test_dir), + run.Raw(';'), 'wget', fio, run.Raw(';'), run.Raw('tar -xvf fio*tar.gz'), run.Raw(';'), + run.Raw('cd fio-fio*'), run.Raw(';'), './configure', run.Raw(';'), 'make']) + remote.run(args=['ceph', '-s']) + remote.run(args=[run.Raw('{tdir}/fio-fio-{v}/fio --showcmd {f}'.format(tdir=rbd_test_dir,v=fio_version,f=fio_config.name))]) + remote.run(args=['sudo', run.Raw('{tdir}/fio-fio-{v}/fio {f}'.format(tdir=rbd_test_dir,v=fio_version,f=fio_config.name))]) + remote.run(args=['ceph', '-s']) + finally: + out = remote.sh('rbd device list --format=json') + mapped_images = json.loads(out) + if mapped_images: + log.info("Unmapping rbd images on {sn}".format(sn=sn)) + for image in mapped_images: + remote.run(args=['sudo', 'rbd', 'device', 'unmap', + str(image['device'])]) + log.info("Cleaning up fio install") + remote.run(args=['rm','-rf', run.Raw(rbd_test_dir)]) + if ioengine_pkg: + remove_package(ioengine_pkg, remote) diff --git a/qa/tasks/rbd_fsx.py b/qa/tasks/rbd_fsx.py new file mode 100644 index 000000000..efea7208e --- /dev/null +++ b/qa/tasks/rbd_fsx.py @@ -0,0 +1,115 @@ +""" +Run fsx on an rbd image +""" +import contextlib +import logging + +from teuthology.exceptions import ConfigError +from teuthology.parallel import parallel +from teuthology import misc as teuthology +from tasks.ceph_manager import get_valgrind_args + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run fsx on an rbd image. + + Currently this requires running as client.admin + to create a pool. + + Specify which clients to run on as a list:: + + tasks: + ceph: + rbd_fsx: + clients: [client.0, client.1] + + You can optionally change some properties of fsx: + + tasks: + ceph: + rbd_fsx: + clients: <list of clients> + seed: <random seed number, or 0 to use the time> + ops: <number of operations to do> + size: <maximum image size in bytes> + valgrind: [--tool=<valgrind tool>] + """ + log.info('starting rbd_fsx...') + with parallel() as p: + for role in config['clients']: + p.spawn(_run_one_client, ctx, config, role) + yield + +def _run_one_client(ctx, config, role): + """Spawned task that runs the client""" + krbd = config.get('krbd', False) + nbd = config.get('nbd', False) + testdir = teuthology.get_testdir(ctx) + (remote,) = ctx.cluster.only(role).remotes.keys() + + args = [] + if krbd or nbd: + args.append('sudo') # rbd(-nbd) map/unmap need privileges + args.extend([ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir) + ]) + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('rbd_fsx', {})) + + if config.get('valgrind'): + args = get_valgrind_args( + testdir, + 'fsx_{id}'.format(id=role), + args, + config.get('valgrind') + ) + + cluster_name, type_, client_id = teuthology.split_role(role) + if type_ != 'client': + msg = 'client role ({0}) must be a client'.format(role) + raise ConfigError(msg) + + args.extend([ + 'ceph_test_librbd_fsx', + '--cluster', cluster_name, + '--id', client_id, + '-d', # debug output for all operations + '-W', '-R', # mmap doesn't work with rbd + '-p', str(config.get('progress_interval', 100)), # show progress + '-P', '{tdir}/archive'.format(tdir=testdir), + '-r', str(config.get('readbdy',1)), + '-w', str(config.get('writebdy',1)), + '-t', str(config.get('truncbdy',1)), + '-h', str(config.get('holebdy',1)), + '-l', str(config.get('size', 250000000)), + '-S', str(config.get('seed', 0)), + '-N', str(config.get('ops', 1000)), + ]) + if krbd: + args.append('-K') # -K enables krbd mode + if nbd: + args.append('-M') # -M enables nbd mode + if config.get('direct_io', False): + args.append('-Z') # -Z use direct IO + if not config.get('randomized_striping', True): + args.append('-U') # -U disables randomized striping + if not config.get('punch_holes', True): + args.append('-H') # -H disables discard ops + if config.get('deep_copy', False): + args.append('-g') # -g deep copy instead of clone + if config.get('journal_replay', False): + args.append('-j') # -j replay all IO events from journal + if config.get('keep_images', False): + args.append('-k') # -k keep images on success + args.extend([ + config.get('pool_name', 'pool_{pool}'.format(pool=role)), + 'image_{image}'.format(image=role), + ]) + + remote.run(args=args) diff --git a/qa/tasks/rbd_mirror.py b/qa/tasks/rbd_mirror.py new file mode 100644 index 000000000..5da252560 --- /dev/null +++ b/qa/tasks/rbd_mirror.py @@ -0,0 +1,120 @@ +""" +Task for running rbd mirroring daemons and configuring mirroring +""" + +import logging + +from teuthology.orchestra import run +from teuthology import misc +from teuthology.exceptions import ConfigError +from teuthology.task import Task +from tasks.ceph_manager import get_valgrind_args +from tasks.util import get_remote_for_role + +log = logging.getLogger(__name__) + + +class RBDMirror(Task): + """ + Run an rbd-mirror daemon to sync rbd images between clusters. + + This requires two clients (one from each cluster) on the same host + to connect with. The pool configuration should be adjusted by later + test scripts to include the remote client and cluster name. This task + just needs to know how to connect to the local cluster. + + For example: + + roles: + - [primary.mon.a, primary.osd.0, primary.osd.1, primary.osd.2] + - [secondary.mon.a, secondary.osd.0, secondary.osd.1, secondary.osd.2] + - [primary.client.mirror, secondary.client.mirror] + tasks: + - ceph: + cluster: primary + - ceph: + cluster: secondary + - rbd-mirror: + client: primary.client.mirror + + To mirror back to the primary cluster as well, add another + rbd_mirror instance: + + - rbd-mirror: + client: secondary.client.mirror + + Possible options for this task are: + + client: role - ceph client to connect as + valgrind: [--tool=<valgrind tool>] - none by default + coverage: bool - whether this run may be collecting coverage data + thrash: bool - whether this run may be thrashed + """ + def __init__(self, ctx, config): + super(RBDMirror, self).__init__(ctx, config) + self.log = log + + def setup(self): + super(RBDMirror, self).setup() + try: + self.client = self.config['client'] + except KeyError: + raise ConfigError('rbd-mirror requires a client to connect with') + + self.cluster_name, type_, self.client_id = misc.split_role(self.client) + + if type_ != 'client': + msg = 'client role ({0}) must be a client'.format(self.client) + raise ConfigError(msg) + + self.remote = get_remote_for_role(self.ctx, self.client) + + def begin(self): + super(RBDMirror, self).begin() + testdir = misc.get_testdir(self.ctx) + daemon_signal = 'kill' + if 'coverage' in self.config or 'valgrind' in self.config or \ + self.config.get('thrash', False): + daemon_signal = 'term' + + args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'daemon-helper', + daemon_signal, + ] + + if 'valgrind' in self.config: + args = get_valgrind_args( + testdir, + 'rbd-mirror-{id}'.format(id=self.client), + args, + self.config.get('valgrind') + ) + + args.extend([ + 'rbd-mirror', '--foreground', + '--cluster', + self.cluster_name, + '--id', + self.client_id, + ]) + + self.ctx.daemons.add_daemon( + self.remote, 'rbd-mirror', self.client, + cluster=self.cluster_name, + args=args, + logger=self.log.getChild(self.client), + stdin=run.PIPE, + wait=False, + ) + + def end(self): + mirror_daemon = self.ctx.daemons.get_daemon('rbd-mirror', + self.client, + self.cluster_name) + mirror_daemon.stop() + super(RBDMirror, self).end() + +task = RBDMirror diff --git a/qa/tasks/rbd_mirror_thrash.py b/qa/tasks/rbd_mirror_thrash.py new file mode 100644 index 000000000..a42d19e70 --- /dev/null +++ b/qa/tasks/rbd_mirror_thrash.py @@ -0,0 +1,218 @@ +""" +Task for thrashing rbd-mirror daemons +""" + +import contextlib +import logging +import random +import signal +import socket +import time + +from gevent import sleep +from gevent.greenlet import Greenlet +from gevent.event import Event + +from teuthology.exceptions import CommandFailedError +from teuthology.orchestra import run +from tasks.thrasher import Thrasher + +log = logging.getLogger(__name__) + + +class RBDMirrorThrasher(Thrasher, Greenlet): + """ + RBDMirrorThrasher:: + + The RBDMirrorThrasher thrashes rbd-mirror daemons during execution of other + tasks (workunits, etc). + + The config is optional. Many of the config parameters are a maximum value + to use when selecting a random value from a range. The config is a dict + containing some or all of: + + cluster: [default: ceph] cluster to thrash + + max_thrash: [default: 1] the maximum number of active rbd-mirror daemons per + cluster will be thrashed at any given time. + + min_thrash_delay: [default: 60] minimum number of seconds to delay before + thrashing again. + + max_thrash_delay: [default: 120] maximum number of seconds to delay before + thrashing again. + + max_revive_delay: [default: 10] maximum number of seconds to delay before + bringing back a thrashed rbd-mirror daemon. + + randomize: [default: true] enables randomization and use the max/min values + + seed: [no default] seed the random number generator + + Examples:: + + The following example disables randomization, and uses the max delay + values: + + tasks: + - ceph: + - rbd_mirror_thrash: + randomize: False + max_thrash_delay: 10 + """ + + def __init__(self, ctx, config, cluster, daemons): + super(RBDMirrorThrasher, self).__init__() + + self.ctx = ctx + self.config = config + self.cluster = cluster + self.daemons = daemons + + self.logger = log + self.name = 'thrasher.rbd_mirror.[{cluster}]'.format(cluster = cluster) + self.stopping = Event() + + self.randomize = bool(self.config.get('randomize', True)) + self.max_thrash = int(self.config.get('max_thrash', 1)) + self.min_thrash_delay = float(self.config.get('min_thrash_delay', 60.0)) + self.max_thrash_delay = float(self.config.get('max_thrash_delay', 120.0)) + self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0)) + + def _run(self): + try: + self.do_thrash() + except Exception as e: + # See _run exception comment for MDSThrasher + self.set_thrasher_exception(e) + self.logger.exception("exception:") + # Allow successful completion so gevent doesn't see an exception. + # The DaemonWatchdog will observe the error and tear down the test. + + def log(self, x): + """Write data to logger assigned to this RBDMirrorThrasher""" + self.logger.info(x) + + def stop(self): + self.stopping.set() + + def do_thrash(self): + """ + Perform the random thrashing action + """ + + self.log('starting thrash for cluster {cluster}'.format(cluster=self.cluster)) + stats = { + "kill": 0, + } + + while not self.stopping.is_set(): + delay = self.max_thrash_delay + if self.randomize: + delay = random.randrange(self.min_thrash_delay, self.max_thrash_delay) + + if delay > 0.0: + self.log('waiting for {delay} secs before thrashing'.format(delay=delay)) + self.stopping.wait(delay) + if self.stopping.is_set(): + continue + + killed_daemons = [] + + weight = 1.0 / len(self.daemons) + count = 0 + for daemon in self.daemons: + skip = random.uniform(0.0, 1.0) + if weight <= skip: + self.log('skipping daemon {label} with skip ({skip}) > weight ({weight})'.format( + label=daemon.id_, skip=skip, weight=weight)) + continue + + self.log('kill {label}'.format(label=daemon.id_)) + try: + daemon.signal(signal.SIGTERM) + except socket.error: + pass + killed_daemons.append(daemon) + stats['kill'] += 1 + + # if we've reached max_thrash, we're done + count += 1 + if count >= self.max_thrash: + break + + if killed_daemons: + # wait for a while before restarting + delay = self.max_revive_delay + if self.randomize: + delay = random.randrange(0.0, self.max_revive_delay) + + self.log('waiting for {delay} secs before reviving daemons'.format(delay=delay)) + sleep(delay) + + for daemon in killed_daemons: + self.log('waiting for {label}'.format(label=daemon.id_)) + try: + run.wait([daemon.proc], timeout=600) + except CommandFailedError: + pass + except: + self.log('Failed to stop {label}'.format(label=daemon.id_)) + + try: + # try to capture a core dump + daemon.signal(signal.SIGABRT) + except socket.error: + pass + raise + finally: + daemon.reset() + + for daemon in killed_daemons: + self.log('reviving {label}'.format(label=daemon.id_)) + daemon.start() + + for stat in stats: + self.log("stat['{key}'] = {value}".format(key = stat, value = stats[stat])) + +@contextlib.contextmanager +def task(ctx, config): + """ + Stress test the rbd-mirror by thrashing while another task/workunit + is running. + + Please refer to RBDMirrorThrasher class for further information on the + available options. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'rbd_mirror_thrash task only accepts a dict for configuration' + + cluster = config.get('cluster', 'ceph') + daemons = list(ctx.daemons.iter_daemons_of_role('rbd-mirror', cluster)) + assert len(daemons) > 0, \ + 'rbd_mirror_thrash task requires at least 1 rbd-mirror daemon' + + # choose random seed + if 'seed' in config: + seed = int(config['seed']) + else: + seed = int(time.time()) + log.info('rbd_mirror_thrash using random seed: {seed}'.format(seed=seed)) + random.seed(seed) + + thrasher = RBDMirrorThrasher(ctx, config, cluster, daemons) + thrasher.start() + ctx.ceph[cluster].thrashers.append(thrasher) + + try: + log.debug('Yielding') + yield + finally: + log.info('joining rbd_mirror_thrash') + thrasher.stop() + if thrasher.exception is not None: + raise RuntimeError('error during thrashing') + thrasher.join() + log.info('done joining') diff --git a/qa/tasks/rbd_pwl_cache_recovery.py b/qa/tasks/rbd_pwl_cache_recovery.py new file mode 100644 index 000000000..e13c1f664 --- /dev/null +++ b/qa/tasks/rbd_pwl_cache_recovery.py @@ -0,0 +1,96 @@ +""" +persistent write log cache recovery task +""" +import contextlib +import logging +import random +import json +import time + +from teuthology import misc as teuthology +from teuthology import contextutil + +DEFAULT_NUM_ITERATIONS = 20 +IO_PATTERNS = ("full-seq", "rand") +IO_SIZES = ('4K', '16K', '128K', '1024K') + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def thrashes_rbd_bench_on_persistent_cache(ctx, config): + """ + thrashes rbd bench on persistent write log cache. + It can test recovery feature of persistent write log cache. + """ + log.info("thrashes rbd bench on persistent write log cache") + + client, client_config = list(config.items())[0] + (remote,) = ctx.cluster.only(client).remotes.keys() + client_config = client_config if client_config is not None else dict() + image_name = client_config.get('image_name', 'testimage') + num_iterations = client_config.get('num_iterations', DEFAULT_NUM_ITERATIONS) + + for i in range(num_iterations): + log.info("start rbd bench") + # rbd bench could not specify the run time so set a large enough test size. + remote.run( + args=[ + 'rbd', 'bench', + '--io-type', 'write', + '--io-pattern', random.choice(IO_PATTERNS), + '--io-size', random.choice(IO_SIZES), + '--io-total', '100G', + image_name, + ], + wait=False, + ) + # Wait a few seconds for the rbd bench process to run + # and complete the pwl cache initialization + time.sleep(10) + log.info("dump cache state when rbd bench running.") + remote.sh(['rbd', 'status', image_name, '--format=json']) + log.info("sleep...") + time.sleep(random.randint(10, 60)) + log.info("rbd bench crash.") + remote.run( + args=[ + 'killall', '-9', 'rbd', + ], + check_status=False, + ) + log.info("wait for watch timeout.") + time.sleep(40) + log.info("check cache state after crash.") + out = remote.sh(['rbd', 'status', image_name, '--format=json']) + rbd_status = json.loads(out) + assert len(rbd_status['watchers']) == 0 + assert rbd_status['persistent_cache']['present'] == True + assert rbd_status['persistent_cache']['empty'] == False + assert rbd_status['persistent_cache']['clean'] == False + log.info("check dirty cache file.") + remote.run( + args=[ + 'test', '-e', rbd_status['persistent_cache']['path'], + ] + ) + try: + yield + finally: + log.info("cleanup") + +@contextlib.contextmanager +def task(ctx, config): + """ + This is task for testing persistent write log cache recovery. + """ + assert isinstance(config, dict), \ + "task rbd_pwl_cache_recovery only supports a dictionary for configuration" + + managers = [] + config = teuthology.replace_all_with_clients(ctx.cluster, config) + managers.append( + lambda: thrashes_rbd_bench_on_persistent_cache(ctx=ctx, config=config) + ) + + with contextutil.nested(*managers): + yield diff --git a/qa/tasks/rebuild_mondb.py b/qa/tasks/rebuild_mondb.py new file mode 100644 index 000000000..008e312e2 --- /dev/null +++ b/qa/tasks/rebuild_mondb.py @@ -0,0 +1,224 @@ +""" +Test if we can recover the leveldb from OSD after where all leveldbs are +corrupted +""" + +import logging +import os.path +import shutil +import tempfile + +from tasks import ceph_manager +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + + +def _push_directory(path, remote, remote_dir): + """ + local_temp_path=`mktemp` + tar czf $local_temp_path $path + ssh remote mkdir -p remote_dir + remote_temp_path=`mktemp` + scp $local_temp_path $remote_temp_path + rm $local_temp_path + tar xzf $remote_temp_path -C $remote_dir + ssh remote:$remote_temp_path + """ + fd, local_temp_path = tempfile.mkstemp(suffix='.tgz', + prefix='rebuild_mondb-') + os.close(fd) + cmd = ' '.join(['tar', 'cz', + '-f', local_temp_path, + '-C', path, + '--', '.']) + teuthology.sh(cmd) + _, fname = os.path.split(local_temp_path) + fd, remote_temp_path = tempfile.mkstemp(suffix='.tgz', + prefix='rebuild_mondb-') + os.close(fd) + remote.put_file(local_temp_path, remote_temp_path) + os.remove(local_temp_path) + remote.run(args=['sudo', + 'tar', 'xz', + '-C', remote_dir, + '-f', remote_temp_path]) + remote.run(args=['sudo', 'rm', '-fr', remote_temp_path]) + + +def _nuke_mons(manager, mons, mon_id): + assert mons + is_mon = teuthology.is_type('mon') + for remote, roles in mons.remotes.items(): + for role in roles: + if not is_mon(role): + continue + cluster, _, m = teuthology.split_role(role) + log.info('killing {cluster}:mon.{mon}'.format( + cluster=cluster, + mon=m)) + manager.kill_mon(m) + mon_data = os.path.join('/var/lib/ceph/mon/', + '{0}-{1}'.format(cluster, m)) + if m == mon_id: + # so we will only need to recreate the store.db for the + # first mon, would be easier than mkfs on it then replace + # the its store.db with the recovered one + store_dir = os.path.join(mon_data, 'store.db') + remote.run(args=['sudo', 'rm', '-r', store_dir]) + else: + remote.run(args=['sudo', 'rm', '-r', mon_data]) + + +def _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path): + local_mstore = tempfile.mkdtemp() + + # collect the maps from all OSDs + is_osd = teuthology.is_type('osd') + osds = ctx.cluster.only(is_osd) + assert osds + for osd, roles in osds.remotes.items(): + for role in roles: + if not is_osd(role): + continue + cluster, _, osd_id = teuthology.split_role(role) + assert cluster_name == cluster + log.info('collecting maps from {cluster}:osd.{osd}'.format( + cluster=cluster, + osd=osd_id)) + # push leveldb to OSD + osd_mstore = os.path.join(teuthology.get_testdir(ctx), 'mon-store') + osd.run(args=['sudo', 'mkdir', '-m', 'o+x', '-p', osd_mstore]) + + _push_directory(local_mstore, osd, osd_mstore) + log.info('rm -rf {0}'.format(local_mstore)) + shutil.rmtree(local_mstore) + # update leveldb with OSD data + options = '--no-mon-config --op update-mon-db --mon-store-path {0}' + log.info('cot {0}'.format(osd_mstore)) + manager.objectstore_tool(pool=None, + options=options.format(osd_mstore), + args='', + osd=osd_id, + do_revive=False) + # pull the updated mon db + log.info('pull dir {0} -> {1}'.format(osd_mstore, local_mstore)) + local_mstore = tempfile.mkdtemp() + teuthology.pull_directory(osd, osd_mstore, local_mstore) + log.info('rm -rf osd:{0}'.format(osd_mstore)) + osd.run(args=['sudo', 'rm', '-fr', osd_mstore]) + + # recover the first_mon with re-built mon db + # pull from recovered leveldb from client + mon_store_dir = os.path.join('/var/lib/ceph/mon', + '{0}-{1}'.format(cluster_name, mon_id)) + _push_directory(local_mstore, mon, mon_store_dir) + mon.run(args=['sudo', 'chown', '-R', 'ceph:ceph', mon_store_dir]) + shutil.rmtree(local_mstore) + + # fill up the caps in the keyring file + mon.run(args=['sudo', + 'ceph-authtool', keyring_path, + '-n', 'mon.', + '--cap', 'mon', 'allow *']) + mon.run(args=['sudo', + 'ceph-authtool', keyring_path, + '-n', 'client.admin', + '--cap', 'mon', 'allow *', + '--cap', 'osd', 'allow *', + '--cap', 'mds', 'allow *', + '--cap', 'mgr', 'allow *']) + mon.run(args=['sudo', '-u', 'ceph', + 'CEPH_ARGS=--no-mon-config', + 'ceph-monstore-tool', mon_store_dir, + 'rebuild', '--', + '--keyring', keyring_path, + '--monmap', '/tmp/monmap', + ]) + + +def _revive_mons(manager, mons, recovered, keyring_path): + # revive monitors + # the initial monmap is in the ceph.conf, so we are good. + n_mons = 0 + is_mon = teuthology.is_type('mon') + for remote, roles in mons.remotes.items(): + for role in roles: + if not is_mon(role): + continue + cluster, _, m = teuthology.split_role(role) + if recovered != m: + log.info('running mkfs on {cluster}:mon.{mon}'.format( + cluster=cluster, + mon=m)) + remote.run( + args=[ + 'sudo', + 'ceph-mon', + '--cluster', cluster, + '--mkfs', + '-i', m, + '--keyring', keyring_path, + '--monmap', '/tmp/monmap']) + log.info('reviving mon.{0}'.format(m)) + manager.revive_mon(m) + n_mons += 1 + manager.wait_for_mon_quorum_size(n_mons, timeout=30) + + +def _revive_mgrs(ctx, manager): + is_mgr = teuthology.is_type('mgr') + mgrs = ctx.cluster.only(is_mgr) + for _, roles in mgrs.remotes.items(): + for role in roles: + if not is_mgr(role): + continue + _, _, mgr_id = teuthology.split_role(role) + log.info('reviving mgr.{0}'.format(mgr_id)) + manager.revive_mgr(mgr_id) + + +def _revive_osds(ctx, manager): + is_osd = teuthology.is_type('osd') + osds = ctx.cluster.only(is_osd) + for _, roles in osds.remotes.items(): + for role in roles: + if not is_osd(role): + continue + _, _, osd_id = teuthology.split_role(role) + log.info('reviving osd.{0}'.format(osd_id)) + manager.revive_osd(osd_id) + + +def task(ctx, config): + """ + Test monitor recovery from OSD + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for configuration' + + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + # stash a monmap for later + mon.run(args=['ceph', 'mon', 'getmap', '-o', '/tmp/monmap']) + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager')) + + mons = ctx.cluster.only(teuthology.is_type('mon')) + # note down the first cluster_name and mon_id + # we will recover it later on + cluster_name, _, mon_id = teuthology.split_role(first_mon) + _nuke_mons(manager, mons, mon_id) + default_keyring = '/etc/ceph/{cluster}.keyring'.format( + cluster=cluster_name) + keyring_path = config.get('keyring_path', default_keyring) + _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path) + _revive_mons(manager, mons, mon_id, keyring_path) + _revive_mgrs(ctx, manager) + _revive_osds(ctx, manager) diff --git a/qa/tasks/reg11184.py b/qa/tasks/reg11184.py new file mode 100644 index 000000000..86cfbf39a --- /dev/null +++ b/qa/tasks/reg11184.py @@ -0,0 +1,242 @@ +""" +Special regression test for tracker #11184 + +Synopsis: osd/SnapMapper.cc: 282: FAILED assert(check(oid)) + +This is accomplished by moving a pg that wasn't part of split and still include +divergent priors. +""" +import logging +import time + +from teuthology.exceptions import CommandFailedError +from teuthology.orchestra import run +from teuthology import misc as teuthology +from tasks.util.rados import rados +import os + + +log = logging.getLogger(__name__) + + +def task(ctx, config): + """ + Test handling of divergent entries during export / import + to regression test tracker #11184 + + overrides: + ceph: + conf: + osd: + debug osd: 5 + + Requires 3 osds on a single test node. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'divergent_priors task only accepts a dict for configuration' + + manager = ctx.managers['ceph'] + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + osds = [0, 1, 2] + manager.flush_pg_stats(osds) + manager.raw_cluster_cmd('osd', 'set', 'noout') + manager.raw_cluster_cmd('osd', 'set', 'noin') + manager.raw_cluster_cmd('osd', 'set', 'nodown') + manager.wait_for_clean() + + # something that is always there + dummyfile = '/etc/fstab' + dummyfile2 = '/etc/resolv.conf' + testdir = teuthology.get_testdir(ctx) + + # create 1 pg pool + log.info('creating foo') + manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') + manager.raw_cluster_cmd( + 'osd', 'pool', 'application', 'enable', + 'foo', 'rados', run.Raw('||'), 'true') + + # Remove extra pool to simlify log output + manager.raw_cluster_cmd('osd', 'pool', 'delete', 'rbd', 'rbd', '--yes-i-really-really-mean-it') + + for i in osds: + manager.set_config(i, osd_min_pg_log_entries=10) + manager.set_config(i, osd_max_pg_log_entries=10) + manager.set_config(i, osd_pg_log_trim_min=5) + + # determine primary + divergent = manager.get_pg_primary('foo', 0) + log.info("primary and soon to be divergent is %d", divergent) + non_divergent = list(osds) + non_divergent.remove(divergent) + + log.info('writing initial objects') + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + # write 100 objects + for i in range(100): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) + + manager.wait_for_clean() + + # blackhole non_divergent + log.info("blackholing osds %s", str(non_divergent)) + for i in non_divergent: + manager.set_config(i, objectstore_blackhole=1) + + DIVERGENT_WRITE = 5 + DIVERGENT_REMOVE = 5 + # Write some soon to be divergent + log.info('writing divergent objects') + for i in range(DIVERGENT_WRITE): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, + dummyfile2], wait=False) + # Remove some soon to be divergent + log.info('remove divergent objects') + for i in range(DIVERGENT_REMOVE): + rados(ctx, mon, ['-p', 'foo', 'rm', + 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) + time.sleep(10) + mon.run( + args=['killall', '-9', 'rados'], + wait=True, + check_status=False) + + # kill all the osds but leave divergent in + log.info('killing all the osds') + for i in osds: + manager.kill_osd(i) + for i in osds: + manager.mark_down_osd(i) + for i in non_divergent: + manager.mark_out_osd(i) + + # bring up non-divergent + log.info("bringing up non_divergent %s", str(non_divergent)) + for i in non_divergent: + manager.revive_osd(i) + for i in non_divergent: + manager.mark_in_osd(i) + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) + log.info('writing non-divergent object ' + objname) + rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) + + manager.wait_for_recovery() + + # ensure no recovery of up osds first + log.info('delay recovery') + for i in non_divergent: + manager.wait_run_admin_socket( + 'osd', i, ['set_recovery_delay', '100000']) + + # bring in our divergent friend + log.info("revive divergent %d", divergent) + manager.raw_cluster_cmd('osd', 'set', 'noup') + manager.revive_osd(divergent) + + log.info('delay recovery divergent') + manager.wait_run_admin_socket( + 'osd', divergent, ['set_recovery_delay', '100000']) + + manager.raw_cluster_cmd('osd', 'unset', 'noup') + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + + log.info('wait for peering') + rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) + + # At this point the divergent_priors should have been detected + + log.info("killing divergent %d", divergent) + manager.kill_osd(divergent) + + # Split pgs for pool foo + manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'pg_num', '2') + time.sleep(5) + + manager.raw_cluster_cmd('pg','dump') + + # Export a pg + (exp_remote,) = ctx.\ + cluster.only('osd.{o}'.format(o=divergent)).remotes.keys() + FSPATH = manager.get_filepath() + JPATH = os.path.join(FSPATH, "journal") + prefix = ("sudo adjust-ulimits ceph-objectstore-tool " + "--data-path {fpath} --journal-path {jpath} " + "--log-file=" + "/var/log/ceph/objectstore_tool.$$.log ". + format(fpath=FSPATH, jpath=JPATH)) + pid = os.getpid() + expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid)) + cmd = ((prefix + "--op export-remove --pgid 2.0 --file {file}"). + format(id=divergent, file=expfile)) + try: + exp_remote.sh(cmd, wait=True) + except CommandFailedError as e: + assert e.exitstatus == 0 + + # Kill one of non-divergent OSDs + log.info('killing osd.%d' % non_divergent[0]) + manager.kill_osd(non_divergent[0]) + manager.mark_down_osd(non_divergent[0]) + # manager.mark_out_osd(non_divergent[0]) + + # An empty collection for pg 2.0 might need to be cleaned up + cmd = ((prefix + "--force --op remove --pgid 2.0"). + format(id=non_divergent[0])) + exp_remote.sh(cmd, wait=True, check_status=False) + + cmd = ((prefix + "--op import --file {file}"). + format(id=non_divergent[0], file=expfile)) + try: + exp_remote.sh(cmd, wait=True) + except CommandFailedError as e: + assert e.exitstatus == 0 + + # bring in our divergent friend and other node + log.info("revive divergent %d", divergent) + manager.revive_osd(divergent) + manager.mark_in_osd(divergent) + log.info("revive %d", non_divergent[0]) + manager.revive_osd(non_divergent[0]) + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + + log.info('delay recovery divergent') + manager.set_config(divergent, osd_recovery_delay_start=100000) + log.info('mark divergent in') + manager.mark_in_osd(divergent) + + log.info('wait for peering') + rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) + + log.info("killing divergent %d", divergent) + manager.kill_osd(divergent) + log.info("reviving divergent %d", divergent) + manager.revive_osd(divergent) + time.sleep(3) + + log.info('allowing recovery') + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in osds: + manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', + 'kick_recovery_wq', ' 0') + + log.info('reading divergent objects') + for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): + exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, + '/tmp/existing']) + assert exit_status == 0 + + (remote,) = ctx.\ + cluster.only('osd.{o}'.format(o=divergent)).remotes.keys() + cmd = 'rm {file}'.format(file=expfile) + remote.run(args=cmd, wait=True) + log.info("success") diff --git a/qa/tasks/rep_lost_unfound_delete.py b/qa/tasks/rep_lost_unfound_delete.py new file mode 100644 index 000000000..8e99ade27 --- /dev/null +++ b/qa/tasks/rep_lost_unfound_delete.py @@ -0,0 +1,179 @@ +""" +Lost_unfound +""" +import logging +import time + +from tasks import ceph_manager +from tasks.util.rados import rados +from teuthology import misc as teuthology +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test handling of lost objects. + + A pretty rigid cluster is brought up and tested by this task + """ + POOL = 'unfounddel_pool' + if config is None: + config = {} + assert isinstance(config, dict), \ + 'lost_unfound task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + manager.flush_pg_stats([0, 1, 2]) + manager.wait_for_clean() + + manager.create_pool(POOL) + + # something that is always there + dummyfile = '/etc/fstab' + + # take an osd out until the very end + manager.kill_osd(2) + manager.mark_down_osd(2) + manager.mark_out_osd(2) + + # kludge to make sure they get a map + rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile]) + + manager.flush_pg_stats([0, 1]) + manager.wait_for_recovery() + + # create old objects + for f in range(1, 10): + rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f]) + + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.1', + 'injectargs', + '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000' + ) + + manager.kill_osd(0) + manager.mark_down_osd(0) + + for f in range(1, 10): + rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) + + # bring osd.0 back up, let it peer, but don't replicate the new + # objects... + log.info('osd.0 command_args is %s' % 'foo') + log.info(ctx.daemons.get_daemon('osd', 0).command_args) + ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([ + '--osd-recovery-delay-start', '1000' + ]) + manager.revive_osd(0) + manager.mark_in_osd(0) + manager.wait_till_osd_is_up(0) + + manager.flush_pg_stats([0, 1]) + manager.wait_till_active() + + # take out osd.1 and the only copy of those objects. + manager.kill_osd(1) + manager.mark_down_osd(1) + manager.mark_out_osd(1) + manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') + + # bring up osd.2 so that things would otherwise, in theory, recovery fully + manager.revive_osd(2) + manager.mark_in_osd(2) + manager.wait_till_osd_is_up(2) + + manager.flush_pg_stats([0, 2]) + manager.wait_till_active() + manager.flush_pg_stats([0, 2]) + + # verify that there are unfound objects + unfound = manager.get_num_unfound_objects() + log.info("there are %d unfound objects" % unfound) + assert unfound + + testdir = teuthology.get_testdir(ctx) + procs = [] + if config.get('parallel_bench', True): + procs.append(mon.run( + args=[ + "/bin/sh", "-c", + " ".join(['adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage', + 'rados', + '--no-log-to-stderr', + '--name', 'client.admin', + '-b', str(4<<10), + '-p' , POOL, + '-t', '20', + 'bench', '240', 'write', + ]).format(tdir=testdir), + ], + logger=log.getChild('radosbench.{id}'.format(id='client.admin')), + stdin=run.PIPE, + wait=False + )) + time.sleep(10) + + # mark stuff lost + pgs = manager.get_pg_stats() + for pg in pgs: + if pg['stat_sum']['num_objects_unfound'] > 0: + primary = 'osd.%d' % pg['acting'][0] + + # verify that i can list them direct from the osd + log.info('listing missing/lost in %s state %s', pg['pgid'], + pg['state']); + m = manager.list_pg_unfound(pg['pgid']) + #log.info('%s' % m) + assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] + num_unfound=0 + for o in m['objects']: + if len(o['locations']) == 0: + num_unfound += 1 + assert m['num_unfound'] == num_unfound + + log.info("reverting unfound in %s on %s", pg['pgid'], primary) + manager.raw_cluster_cmd('pg', pg['pgid'], + 'mark_unfound_lost', 'delete') + else: + log.info("no unfound in %s", pg['pgid']) + + manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') + manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') + manager.flush_pg_stats([0, 2]) + manager.wait_for_recovery() + + # verify result + for f in range(1, 10): + err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-']) + assert err + err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-']) + assert err + err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-']) + assert err + + # see if osd.1 can cope + manager.mark_in_osd(1) + manager.revive_osd(1) + manager.wait_till_osd_is_up(1) + manager.wait_for_clean() + run.wait(procs) + manager.wait_for_clean() + diff --git a/qa/tasks/repair_test.py b/qa/tasks/repair_test.py new file mode 100644 index 000000000..cfd6ef791 --- /dev/null +++ b/qa/tasks/repair_test.py @@ -0,0 +1,303 @@ +""" +Test pool repairing after objects are damaged. +""" +import logging +import time + +log = logging.getLogger(__name__) + + +def choose_primary(manager, pool, num): + """ + Return primary to test on. + """ + log.info("Choosing primary") + return manager.get_pg_primary(pool, num) + + +def choose_replica(manager, pool, num): + """ + Return replica to test on. + """ + log.info("Choosing replica") + return manager.get_pg_replica(pool, num) + + +def trunc(manager, osd, pool, obj): + """ + truncate an object + """ + log.info("truncating object") + return manager.osd_admin_socket( + osd, + ['truncobj', pool, obj, '1']) + + +def dataerr(manager, osd, pool, obj): + """ + cause an error in the data + """ + log.info("injecting data err on object") + return manager.osd_admin_socket( + osd, + ['injectdataerr', pool, obj]) + + +def mdataerr(manager, osd, pool, obj): + """ + cause an error in the mdata + """ + log.info("injecting mdata err on object") + return manager.osd_admin_socket( + osd, + ['injectmdataerr', pool, obj]) + + +def omaperr(manager, osd, pool, obj): + """ + Cause an omap error. + """ + log.info("injecting omap err on object") + return manager.osd_admin_socket(osd, ['setomapval', pool, obj, + 'badkey', 'badval']) + + +def repair_test_1(manager, corrupter, chooser, scrub_type): + """ + Creates an object in the pool, corrupts it, + scrubs it, and verifies that the pool is inconsistent. It then repairs + the pool, rescrubs it, and verifies that the pool is consistent + + :param corrupter: error generating function (truncate, data-error, or + meta-data error, for example). + :param chooser: osd type chooser (primary or replica) + :param scrub_type: regular scrub or deep-scrub + """ + pool = "repair_pool_1" + manager.wait_for_clean() + with manager.pool(pool, 1): + + log.info("starting repair test type 1") + victim_osd = chooser(manager, pool, 0) + + # create object + log.info("doing put") + manager.do_put(pool, 'repair_test_obj', '/etc/hosts') + + # corrupt object + log.info("corrupting object") + corrupter(manager, victim_osd, pool, 'repair_test_obj') + + # verify inconsistent + log.info("scrubbing") + manager.do_pg_scrub(pool, 0, scrub_type) + + manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s) + + # repair + log.info("repairing") + manager.do_pg_scrub(pool, 0, "repair") + + log.info("re-scrubbing") + manager.do_pg_scrub(pool, 0, scrub_type) + + # verify consistent + manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s) + log.info("done") + + +def repair_test_2(ctx, manager, config, chooser): + """ + First creates a set of objects and + sets the omap value. It then corrupts an object, does both a scrub + and a deep-scrub, and then corrupts more objects. After that, it + repairs the pool and makes sure that the pool is consistent some + time after a deep-scrub. + + :param chooser: primary or replica selection routine. + """ + pool = "repair_pool_2" + manager.wait_for_clean() + with manager.pool(pool, 1): + log.info("starting repair test type 2") + victim_osd = chooser(manager, pool, 0) + + # create object + log.info("doing put and setomapval") + manager.do_put(pool, 'file1', '/etc/hosts') + manager.do_rados(['setomapval', 'file1', 'key', 'val'], pool=pool) + manager.do_put(pool, 'file2', '/etc/hosts') + manager.do_put(pool, 'file3', '/etc/hosts') + manager.do_put(pool, 'file4', '/etc/hosts') + manager.do_put(pool, 'file5', '/etc/hosts') + manager.do_rados(['setomapval', 'file5', 'key', 'val'], pool=pool) + manager.do_put(pool, 'file6', '/etc/hosts') + + # corrupt object + log.info("corrupting object") + omaperr(manager, victim_osd, pool, 'file1') + + # verify inconsistent + log.info("scrubbing") + manager.do_pg_scrub(pool, 0, 'deep-scrub') + + manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s) + + # Regression test for bug #4778, should still + # be inconsistent after scrub + manager.do_pg_scrub(pool, 0, 'scrub') + + manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s) + + # Additional corruptions including 2 types for file1 + log.info("corrupting more objects") + dataerr(manager, victim_osd, pool, 'file1') + mdataerr(manager, victim_osd, pool, 'file2') + trunc(manager, victim_osd, pool, 'file3') + omaperr(manager, victim_osd, pool, 'file6') + + # see still inconsistent + log.info("scrubbing") + manager.do_pg_scrub(pool, 0, 'deep-scrub') + + manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s) + + # repair + log.info("repairing") + manager.do_pg_scrub(pool, 0, "repair") + + # Let repair clear inconsistent flag + time.sleep(10) + + # verify consistent + manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s) + + # In the future repair might determine state of + # inconsistency itself, verify with a deep-scrub + log.info("scrubbing") + manager.do_pg_scrub(pool, 0, 'deep-scrub') + + # verify consistent + manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s) + + log.info("done") + + +def hinfoerr(manager, victim, pool, obj): + """ + cause an error in the hinfo_key + """ + log.info("remove the hinfo_key") + manager.objectstore_tool(pool, + options='', + args='rm-attr hinfo_key', + object_name=obj, + osd=victim) + + +def repair_test_erasure_code(manager, corrupter, victim, scrub_type): + """ + Creates an object in the pool, corrupts it, + scrubs it, and verifies that the pool is inconsistent. It then repairs + the pool, rescrubs it, and verifies that the pool is consistent + + :param corrupter: error generating function. + :param chooser: osd type chooser (primary or replica) + :param scrub_type: regular scrub or deep-scrub + """ + pool = "repair_pool_3" + manager.wait_for_clean() + with manager.pool(pool_name=pool, pg_num=1, + erasure_code_profile_name='default'): + + log.info("starting repair test for erasure code") + + # create object + log.info("doing put") + manager.do_put(pool, 'repair_test_obj', '/etc/hosts') + + # corrupt object + log.info("corrupting object") + corrupter(manager, victim, pool, 'repair_test_obj') + + # verify inconsistent + log.info("scrubbing") + manager.do_pg_scrub(pool, 0, scrub_type) + + manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s) + + # repair + log.info("repairing") + manager.do_pg_scrub(pool, 0, "repair") + + log.info("re-scrubbing") + manager.do_pg_scrub(pool, 0, scrub_type) + + # verify consistent + manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s) + log.info("done") + + +def task(ctx, config): + """ + Test [deep] repair in several situations: + Repair [Truncate, Data EIO, MData EIO] on [Primary|Replica] + + The config should be as follows: + + Must include the log-ignorelist below + Must enable filestore_debug_inject_read_err config + + example: + + tasks: + - chef: + - install: + - ceph: + log-ignorelist: + - 'candidate had a stat error' + - 'candidate had a read error' + - 'deep-scrub 0 missing, 1 inconsistent objects' + - 'deep-scrub 0 missing, 4 inconsistent objects' + - 'deep-scrub [0-9]+ errors' + - '!= omap_digest' + - '!= data_digest' + - 'repair 0 missing, 1 inconsistent objects' + - 'repair 0 missing, 4 inconsistent objects' + - 'repair [0-9]+ errors, [0-9]+ fixed' + - 'scrub 0 missing, 1 inconsistent objects' + - 'scrub [0-9]+ errors' + - 'size 1 != size' + - 'attr name mismatch' + - 'Regular scrub request, deep-scrub details will be lost' + - 'candidate size [0-9]+ info size [0-9]+ mismatch' + conf: + osd: + filestore debug inject read err: true + - repair_test: + + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'repair_test task only accepts a dict for config' + + manager = ctx.managers['ceph'] + manager.wait_for_all_osds_up() + + manager.raw_cluster_cmd('osd', 'set', 'noscrub') + manager.raw_cluster_cmd('osd', 'set', 'nodeep-scrub') + + repair_test_1(manager, mdataerr, choose_primary, "scrub") + repair_test_1(manager, mdataerr, choose_replica, "scrub") + repair_test_1(manager, dataerr, choose_primary, "deep-scrub") + repair_test_1(manager, dataerr, choose_replica, "deep-scrub") + repair_test_1(manager, trunc, choose_primary, "scrub") + repair_test_1(manager, trunc, choose_replica, "scrub") + repair_test_2(ctx, manager, config, choose_primary) + repair_test_2(ctx, manager, config, choose_replica) + + repair_test_erasure_code(manager, hinfoerr, 'primary', "deep-scrub") + + manager.raw_cluster_cmd('osd', 'unset', 'noscrub') + manager.raw_cluster_cmd('osd', 'unset', 'nodeep-scrub') diff --git a/qa/tasks/resolve_stuck_peering.py b/qa/tasks/resolve_stuck_peering.py new file mode 100644 index 000000000..d140544c4 --- /dev/null +++ b/qa/tasks/resolve_stuck_peering.py @@ -0,0 +1,112 @@ +""" +Resolve stuck peering +""" +import logging +import time + +from teuthology import misc as teuthology +from tasks.util.rados import rados + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test handling resolve stuck peering + + requires 3 osds on a single test node + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'Resolve stuck peering only accepts a dict for config' + + manager = ctx.managers['ceph'] + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + + + manager.wait_for_clean() + + dummyfile = '/etc/fstab' + dummyfile1 = '/etc/resolv.conf' + + #create 1 PG pool + pool='foo' + log.info('creating pool foo') + manager.raw_cluster_cmd('osd', 'pool', 'create', '%s' % pool, '1') + + #set min_size of the pool to 1 + #so that we can continue with I/O + #when 2 osds are down + manager.set_pool_property(pool, "min_size", 1) + + osds = [0, 1, 2] + + primary = manager.get_pg_primary('foo', 0) + log.info("primary osd is %d", primary) + + others = list(osds) + others.remove(primary) + + log.info('writing initial objects') + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + #create few objects + for i in range(100): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) + + manager.wait_for_clean() + + #kill other osds except primary + log.info('killing other osds except primary') + for i in others: + manager.kill_osd(i) + for i in others: + manager.mark_down_osd(i) + + + for i in range(100): + rados(ctx, mon, ['-p', 'foo', 'put', 'new_%d' % i, dummyfile1]) + + #kill primary osd + manager.kill_osd(primary) + manager.mark_down_osd(primary) + + #revive other 2 osds + for i in others: + manager.revive_osd(i) + + #make sure that pg is down + #Assuming pg number for single pg pool will start from 0 + pgnum=0 + pgstr = manager.get_pgid(pool, pgnum) + stats = manager.get_single_pg_stats(pgstr) + print(stats['state']) + + timeout=60 + start=time.time() + + while 'down' not in stats['state']: + assert time.time() - start < timeout, \ + 'failed to reach down state before timeout expired' + stats = manager.get_single_pg_stats(pgstr) + + #mark primary as lost + manager.raw_cluster_cmd('osd', 'lost', '%d' % primary,\ + '--yes-i-really-mean-it') + + + #expect the pg status to be active+undersized+degraded + #pg should recover and become active+clean within timeout + stats = manager.get_single_pg_stats(pgstr) + print(stats['state']) + + timeout=10 + start=time.time() + + while manager.get_num_down(): + assert time.time() - start < timeout, \ + 'failed to recover before timeout expired' + + manager.revive_osd(primary) diff --git a/qa/tasks/restart.py b/qa/tasks/restart.py new file mode 100644 index 000000000..4053bd2cb --- /dev/null +++ b/qa/tasks/restart.py @@ -0,0 +1,161 @@ +""" +Daemon restart +""" +import logging +import pipes +import os + +from teuthology import misc as teuthology +from teuthology.orchestra import run as tor + +from teuthology.orchestra import run +log = logging.getLogger(__name__) + +def restart_daemon(ctx, config, role, id_, *args): + """ + Handle restart (including the execution of the command parameters passed) + """ + log.info('Restarting {r}.{i} daemon...'.format(r=role, i=id_)) + daemon = ctx.daemons.get_daemon(role, id_) + log.debug('Waiting for exit of {r}.{i} daemon...'.format(r=role, i=id_)) + try: + daemon.wait_for_exit() + except tor.CommandFailedError as e: + log.debug('Command Failed: {e}'.format(e=e)) + if len(args) > 0: + confargs = ['--{k}={v}'.format(k=k, v=v) for k,v in zip(args[0::2], args[1::2])] + log.debug('Doing restart of {r}.{i} daemon with args: {a}...'.format(r=role, i=id_, a=confargs)) + daemon.restart_with_args(confargs) + else: + log.debug('Doing restart of {r}.{i} daemon...'.format(r=role, i=id_)) + daemon.restart() + +def get_tests(ctx, config, role, remote, testdir): + """Download restart tests""" + srcdir = '{tdir}/restart.{role}'.format(tdir=testdir, role=role) + + refspec = config.get('branch') + if refspec is None: + refspec = config.get('sha1') + if refspec is None: + refspec = config.get('tag') + if refspec is None: + refspec = 'HEAD' + log.info('Pulling restart qa/workunits from ref %s', refspec) + + remote.run( + logger=log.getChild(role), + args=[ + 'mkdir', '--', srcdir, + run.Raw('&&'), + 'git', + 'clone', + 'https://git.ceph.com/ceph.git', + srcdir, + run.Raw('&&'), + 'cd', '--', srcdir, + run.Raw('&&'), + 'git', 'checkout', '-b', 'restart_test', str(refspec), + run.Raw('&&'), + 'cd', '--', 'qa/workunits', + run.Raw('&&'), + 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi', + run.Raw('&&'), + 'find', '-executable', '-type', 'f', '-printf', r'%P\0', + run.Raw('>{tdir}/restarts.list'.format(tdir=testdir)), + ], + ) + restarts = sorted(remote.read_file(f'{testdir}/restarts.list').decode().split('\0')) + return (os.path.join(srcdir, 'qa/workunits'), restarts) + +def task(ctx, config): + """ + Execute commands and allow daemon restart with config options. + Each process executed can output to stdout restart commands of the form: + restart <role> <id> <conf_key1> <conf_value1> <conf_key2> <conf_value2> + This will restart the daemon <role>.<id> with the specified config values once + by modifying the conf file with those values, and then replacing the old conf file + once the daemon is restarted. + This task does not kill a running daemon, it assumes the daemon will abort on an + assert specified in the config. + + tasks: + - install: + - ceph: + - restart: + exec: + client.0: + - test_backtraces.py + + """ + assert isinstance(config, dict), "task kill got invalid config" + + testdir = teuthology.get_testdir(ctx) + + try: + assert 'exec' in config, "config requires exec key with <role>: <command> entries" + for role, task in config['exec'].items(): + log.info('restart for role {r}'.format(r=role)) + (remote,) = ctx.cluster.only(role).remotes.keys() + srcdir, restarts = get_tests(ctx, config, role, remote, testdir) + log.info('Running command on role %s host %s', role, remote.name) + spec = '{spec}'.format(spec=task[0]) + log.info('Restarts list: %s', restarts) + log.info('Spec is %s', spec) + to_run = [w for w in restarts if w == task or w.find(spec) != -1] + log.info('To run: %s', to_run) + for c in to_run: + log.info('Running restart script %s...', c) + args = [ + run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)), + ] + env = config.get('env') + if env is not None: + for var, val in env.items(): + quoted_val = pipes.quote(val) + env_arg = '{var}={val}'.format(var=var, val=quoted_val) + args.append(run.Raw(env_arg)) + args.extend([ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + '{srcdir}/{c}'.format( + srcdir=srcdir, + c=c, + ), + ]) + proc = remote.run( + args=args, + stdout=tor.PIPE, + stdin=tor.PIPE, + stderr=log, + wait=False, + ) + log.info('waiting for a command from script') + while True: + l = proc.stdout.readline() + if not l or l == '': + break + log.debug('script command: {c}'.format(c=l)) + ll = l.strip() + cmd = ll.split(' ') + if cmd[0] == "done": + break + assert cmd[0] == 'restart', "script sent invalid command request to kill task" + # cmd should be: restart <role> <id> <conf_key1> <conf_value1> <conf_key2> <conf_value2> + # or to clear, just: restart <role> <id> + restart_daemon(ctx, config, cmd[1], cmd[2], *cmd[3:]) + proc.stdin.writelines(['restarted\n']) + proc.stdin.flush() + try: + proc.wait() + except tor.CommandFailedError: + raise Exception('restart task got non-zero exit status from script: {s}'.format(s=c)) + finally: + log.info('Finishing %s on %s...', task, role) + remote.run( + logger=log.getChild(role), + args=[ + 'rm', '-rf', '--', '{tdir}/restarts.list'.format(tdir=testdir), srcdir, + ], + ) diff --git a/qa/tasks/rgw.py b/qa/tasks/rgw.py new file mode 100644 index 000000000..36627f682 --- /dev/null +++ b/qa/tasks/rgw.py @@ -0,0 +1,431 @@ +""" +rgw routines +""" +import argparse +import contextlib +import logging + +from teuthology.orchestra import run +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.exceptions import ConfigError +from tasks.ceph_manager import get_valgrind_args +from tasks.util import get_remote_for_role +from tasks.util.rgw import rgwadmin, wait_for_radosgw +from tasks.util.rados import (create_ec_pool, + create_replicated_pool, + create_cache_pool) + +log = logging.getLogger(__name__) + +class RGWEndpoint: + def __init__(self, hostname=None, port=None, cert=None, dns_name=None, website_dns_name=None): + self.hostname = hostname + self.port = port + self.cert = cert + self.dns_name = dns_name + self.website_dns_name = website_dns_name + + def url(self): + proto = 'https' if self.cert else 'http' + return '{proto}://{hostname}:{port}/'.format(proto=proto, hostname=self.hostname, port=self.port) + +@contextlib.contextmanager +def start_rgw(ctx, config, clients): + """ + Start rgw on remote sites. + """ + log.info('Starting rgw...') + testdir = teuthology.get_testdir(ctx) + for client in clients: + (remote,) = ctx.cluster.only(client).remotes.keys() + cluster_name, daemon_type, client_id = teuthology.split_role(client) + client_with_id = daemon_type + '.' + client_id + client_with_cluster = cluster_name + '.' + client_with_id + + client_config = config.get(client) + if client_config is None: + client_config = {} + log.info("rgw %s config is %s", client, client_config) + cmd_prefix = [ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'daemon-helper', + 'term', + ] + + rgw_cmd = ['radosgw'] + + log.info("Using %s as radosgw frontend", ctx.rgw.frontend) + + endpoint = ctx.rgw.role_endpoints[client] + frontends = ctx.rgw.frontend + frontend_prefix = client_config.get('frontend_prefix', None) + if frontend_prefix: + frontends += ' prefix={pfx}'.format(pfx=frontend_prefix) + + if endpoint.cert: + # add the ssl certificate path + frontends += ' ssl_certificate={}'.format(endpoint.cert.certificate) + if ctx.rgw.frontend == 'civetweb': + frontends += ' port={}s'.format(endpoint.port) + else: + frontends += ' ssl_port={}'.format(endpoint.port) + else: + frontends += ' port={}'.format(endpoint.port) + + rgw_cmd.extend([ + '--rgw-frontends', frontends, + '-n', client_with_id, + '--cluster', cluster_name, + '-k', '/etc/ceph/{client_with_cluster}.keyring'.format(client_with_cluster=client_with_cluster), + '--log-file', + '/var/log/ceph/rgw.{client_with_cluster}.log'.format(client_with_cluster=client_with_cluster), + '--rgw_ops_log_socket_path', + '{tdir}/rgw.opslog.{client_with_cluster}.sock'.format(tdir=testdir, + client_with_cluster=client_with_cluster), + ]) + + keystone_role = client_config.get('use-keystone-role', None) + if keystone_role is not None: + if not ctx.keystone: + raise ConfigError('rgw must run after the keystone task') + url = 'http://{host}:{port}/v1/KEY_$(tenant_id)s'.format(host=endpoint.hostname, + port=endpoint.port) + ctx.keystone.create_endpoint(ctx, keystone_role, 'swift', url) + + keystone_host, keystone_port = \ + ctx.keystone.public_endpoints[keystone_role] + rgw_cmd.extend([ + '--rgw_keystone_url', + 'http://{khost}:{kport}'.format(khost=keystone_host, + kport=keystone_port), + ]) + + + if client_config.get('dns-name') is not None: + rgw_cmd.extend(['--rgw-dns-name', endpoint.dns_name]) + if client_config.get('dns-s3website-name') is not None: + rgw_cmd.extend(['--rgw-dns-s3website-name', endpoint.website_dns_name]) + + + vault_role = client_config.get('use-vault-role', None) + barbican_role = client_config.get('use-barbican-role', None) + pykmip_role = client_config.get('use-pykmip-role', None) + + token_path = '/etc/ceph/vault-root-token' + if barbican_role is not None: + if not hasattr(ctx, 'barbican'): + raise ConfigError('rgw must run after the barbican task') + + barbican_host, barbican_port = \ + ctx.barbican.endpoints[barbican_role] + log.info("Use barbican url=%s:%s", barbican_host, barbican_port) + + rgw_cmd.extend([ + '--rgw_barbican_url', + 'http://{bhost}:{bport}'.format(bhost=barbican_host, + bport=barbican_port), + ]) + elif vault_role is not None: + if not ctx.vault.root_token: + raise ConfigError('vault: no "root_token" specified') + # create token on file + ctx.rgw.vault_role = vault_role + ctx.cluster.only(client).run(args=['sudo', 'echo', '-n', ctx.vault.root_token, run.Raw('|'), 'sudo', 'tee', token_path]) + log.info("Token file content") + ctx.cluster.only(client).run(args=['cat', token_path]) + log.info("Restrict access to token file") + ctx.cluster.only(client).run(args=['sudo', 'chmod', '600', token_path]) + ctx.cluster.only(client).run(args=['sudo', 'chown', 'ceph', token_path]) + + rgw_cmd.extend([ + '--rgw_crypt_vault_addr', "{}:{}".format(*ctx.vault.endpoints[vault_role]), + '--rgw_crypt_vault_token_file', token_path + ]) + elif pykmip_role is not None: + if not hasattr(ctx, 'pykmip'): + raise ConfigError('rgw must run after the pykmip task') + ctx.rgw.pykmip_role = pykmip_role + rgw_cmd.extend([ + '--rgw_crypt_kmip_addr', "{}:{}".format(*ctx.pykmip.endpoints[pykmip_role]), + ]) + + clientcert = ctx.ssl_certificates.get('kmip-client') + servercert = ctx.ssl_certificates.get('kmip-server') + clientca = ctx.ssl_certificates.get('kmiproot') + + clientkey = clientcert.key + clientcert = clientcert.certificate + serverkey = servercert.key + servercert = servercert.certificate + rootkey = clientca.key + rootcert = clientca.certificate + + cert_path = '/etc/ceph/' + ctx.cluster.only(client).run(args=['sudo', 'cp', clientcert, cert_path]) + ctx.cluster.only(client).run(args=['sudo', 'cp', clientkey, cert_path]) + ctx.cluster.only(client).run(args=['sudo', 'cp', servercert, cert_path]) + ctx.cluster.only(client).run(args=['sudo', 'cp', serverkey, cert_path]) + ctx.cluster.only(client).run(args=['sudo', 'cp', rootkey, cert_path]) + ctx.cluster.only(client).run(args=['sudo', 'cp', rootcert, cert_path]) + + clientcert = cert_path + 'kmip-client.crt' + clientkey = cert_path + 'kmip-client.key' + servercert = cert_path + 'kmip-server.crt' + serverkey = cert_path + 'kmip-server.key' + rootkey = cert_path + 'kmiproot.key' + rootcert = cert_path + 'kmiproot.crt' + + ctx.cluster.only(client).run(args=['sudo', 'chmod', '600', clientcert, clientkey, servercert, serverkey, rootkey, rootcert]) + ctx.cluster.only(client).run(args=['sudo', 'chown', 'ceph', clientcert, clientkey, servercert, serverkey, rootkey, rootcert]) + + rgw_cmd.extend([ + '--foreground', + run.Raw('|'), + 'sudo', + 'tee', + '/var/log/ceph/rgw.{client_with_cluster}.stdout'.format(client_with_cluster=client_with_cluster), + run.Raw('2>&1'), + ]) + + if client_config.get('valgrind'): + cmd_prefix = get_valgrind_args( + testdir, + client_with_cluster, + cmd_prefix, + client_config.get('valgrind') + ) + + run_cmd = list(cmd_prefix) + run_cmd.extend(rgw_cmd) + + ctx.daemons.add_daemon( + remote, 'rgw', client_with_id, + cluster=cluster_name, + fsid=ctx.ceph[cluster_name].fsid, + args=run_cmd, + logger=log.getChild(client), + stdin=run.PIPE, + wait=False, + ) + + # XXX: add_daemon() doesn't let us wait until radosgw finishes startup + for client in clients: + endpoint = ctx.rgw.role_endpoints[client] + url = endpoint.url() + log.info('Polling {client} until it starts accepting connections on {url}'.format(client=client, url=url)) + (remote,) = ctx.cluster.only(client).remotes.keys() + wait_for_radosgw(url, remote) + + try: + yield + finally: + for client in clients: + cluster_name, daemon_type, client_id = teuthology.split_role(client) + client_with_id = daemon_type + '.' + client_id + client_with_cluster = cluster_name + '.' + client_with_id + ctx.daemons.get_daemon('rgw', client_with_id, cluster_name).stop() + ctx.cluster.only(client).run( + args=[ + 'rm', + '-f', + '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir, + client=client_with_cluster), + ], + ) + ctx.cluster.only(client).run(args=['sudo', 'rm', '-f', token_path]) + +def assign_endpoints(ctx, config, default_cert): + role_endpoints = {} + for role, client_config in config.items(): + client_config = client_config or {} + remote = get_remote_for_role(ctx, role) + + cert = client_config.get('ssl certificate', default_cert) + if cert: + # find the certificate created by the ssl task + if not hasattr(ctx, 'ssl_certificates'): + raise ConfigError('rgw: no ssl task found for option "ssl certificate"') + ssl_certificate = ctx.ssl_certificates.get(cert, None) + if not ssl_certificate: + raise ConfigError('rgw: missing ssl certificate "{}"'.format(cert)) + else: + ssl_certificate = None + + port = client_config.get('port', 443 if ssl_certificate else 80) + + # if dns-name is given, use it as the hostname (or as a prefix) + dns_name = client_config.get('dns-name', '') + if len(dns_name) == 0 or dns_name.endswith('.'): + dns_name += remote.hostname + + website_dns_name = client_config.get('dns-s3website-name') + if website_dns_name is not None and (len(website_dns_name) == 0 or website_dns_name.endswith('.')): + website_dns_name += remote.hostname + + role_endpoints[role] = RGWEndpoint(remote.hostname, port, ssl_certificate, dns_name, website_dns_name) + + return role_endpoints + +@contextlib.contextmanager +def create_pools(ctx, clients): + """Create replicated or erasure coded data pools for rgw.""" + + log.info('Creating data pools') + for client in clients: + log.debug("Obtaining remote for client {}".format(client)) + (remote,) = ctx.cluster.only(client).remotes.keys() + data_pool = 'default.rgw.buckets.data' + cluster_name, daemon_type, client_id = teuthology.split_role(client) + + if ctx.rgw.ec_data_pool: + create_ec_pool(remote, data_pool, client, ctx.rgw.data_pool_pg_size, + ctx.rgw.erasure_code_profile, cluster_name, 'rgw') + else: + create_replicated_pool(remote, data_pool, ctx.rgw.data_pool_pg_size, cluster_name, 'rgw') + + index_pool = 'default.rgw.buckets.index' + create_replicated_pool(remote, index_pool, ctx.rgw.index_pool_pg_size, cluster_name, 'rgw') + + if ctx.rgw.cache_pools: + create_cache_pool(remote, data_pool, data_pool + '.cache', 64, + 64*1024*1024, cluster_name) + log.debug('Pools created') + yield + +@contextlib.contextmanager +def configure_compression(ctx, clients, compression): + """ set a compression type in the default zone placement """ + log.info('Configuring compression type = %s', compression) + for client in clients: + # XXX: the 'default' zone and zonegroup aren't created until we run RGWRados::init_complete(). + # issue a 'radosgw-admin user list' command to trigger this + rgwadmin(ctx, client, cmd=['user', 'list'], check_status=True) + + rgwadmin(ctx, client, + cmd=['zone', 'placement', 'modify', '--rgw-zone', 'default', + '--placement-id', 'default-placement', + '--compression', compression], + check_status=True) + yield + +@contextlib.contextmanager +def configure_storage_classes(ctx, clients, storage_classes): + """ set a compression type in the default zone placement """ + + sc = [s.strip() for s in storage_classes.split(',')] + + for client in clients: + # XXX: the 'default' zone and zonegroup aren't created until we run RGWRados::init_complete(). + # issue a 'radosgw-admin user list' command to trigger this + rgwadmin(ctx, client, cmd=['user', 'list'], check_status=True) + + for storage_class in sc: + log.info('Configuring storage class type = %s', storage_class) + rgwadmin(ctx, client, + cmd=['zonegroup', 'placement', 'add', + '--rgw-zone', 'default', + '--placement-id', 'default-placement', + '--storage-class', storage_class], + check_status=True) + rgwadmin(ctx, client, + cmd=['zone', 'placement', 'add', + '--rgw-zone', 'default', + '--placement-id', 'default-placement', + '--storage-class', storage_class, + '--data-pool', 'default.rgw.buckets.data.' + storage_class.lower()], + check_status=True) + yield + +@contextlib.contextmanager +def task(ctx, config): + """ + For example, to run rgw on all clients:: + + tasks: + - ceph: + - rgw: + + To only run on certain clients:: + + tasks: + - ceph: + - rgw: [client.0, client.3] + + or + + tasks: + - ceph: + - rgw: + client.0: + client.3: + + To run radosgw through valgrind: + + tasks: + - ceph: + - rgw: + client.0: + valgrind: [--tool=memcheck] + client.3: + valgrind: [--tool=memcheck] + + To configure data or index pool pg_size: + + overrides: + rgw: + data_pool_pg_size: 256 + index_pool_pg_size: 128 + """ + if config is None: + config = dict(('client.{id}'.format(id=id_), None) + for id_ in teuthology.all_roles_of_type( + ctx.cluster, 'client')) + elif isinstance(config, list): + config = dict((name, None) for name in config) + + clients = config.keys() # http://tracker.ceph.com/issues/20417 + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('rgw', {})) + + ctx.rgw = argparse.Namespace() + + ctx.rgw.ec_data_pool = bool(config.pop('ec-data-pool', False)) + ctx.rgw.erasure_code_profile = config.pop('erasure_code_profile', {}) + ctx.rgw.cache_pools = bool(config.pop('cache-pools', False)) + ctx.rgw.frontend = config.pop('frontend', 'civetweb') + ctx.rgw.compression_type = config.pop('compression type', None) + ctx.rgw.storage_classes = config.pop('storage classes', None) + default_cert = config.pop('ssl certificate', None) + ctx.rgw.data_pool_pg_size = config.pop('data_pool_pg_size', 64) + ctx.rgw.index_pool_pg_size = config.pop('index_pool_pg_size', 64) + ctx.rgw.config = config + + log.debug("config is {}".format(config)) + log.debug("client list is {}".format(clients)) + + ctx.rgw.role_endpoints = assign_endpoints(ctx, config, default_cert) + + subtasks = [ + lambda: create_pools(ctx=ctx, clients=clients), + ] + if ctx.rgw.compression_type: + subtasks.extend([ + lambda: configure_compression(ctx=ctx, clients=clients, + compression=ctx.rgw.compression_type), + ]) + if ctx.rgw.storage_classes: + subtasks.extend([ + lambda: configure_storage_classes(ctx=ctx, clients=clients, + storage_classes=ctx.rgw.storage_classes), + ]) + subtasks.extend([ + lambda: start_rgw(ctx=ctx, config=config, clients=clients), + ]) + + with contextutil.nested(*subtasks): + yield diff --git a/qa/tasks/rgw_logsocket.py b/qa/tasks/rgw_logsocket.py new file mode 100644 index 000000000..d76e59d7f --- /dev/null +++ b/qa/tasks/rgw_logsocket.py @@ -0,0 +1,165 @@ +""" +rgw s3tests logging wrappers +""" +from io import BytesIO +from configobj import ConfigObj +import contextlib +import logging +from tasks import s3tests + +from teuthology import misc as teuthology +from teuthology import contextutil + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def download(ctx, config): + """ + Run s3tests download function + """ + return s3tests.download(ctx, config) + +def _config_user(s3tests_conf, section, user): + """ + Run s3tests user config function + """ + return s3tests._config_user(s3tests_conf, section, user) + +@contextlib.contextmanager +def create_users(ctx, config): + """ + Run s3tests user create function + """ + return s3tests.create_users(ctx, config) + +@contextlib.contextmanager +def configure(ctx, config): + """ + Run s3tests user configure function + """ + return s3tests.configure(ctx, config) + +@contextlib.contextmanager +def run_tests(ctx, config): + """ + Run remote netcat tests + """ + assert isinstance(config, dict) + testdir = teuthology.get_testdir(ctx) + for client, client_config in config.items(): + client_config['extra_args'] = [ + 's3tests.functional.test_s3:test_bucket_list_return_data', + ] +# args = [ +# 'S3TEST_CONF={tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client), +# '{tdir}/s3-tests/virtualenv/bin/nosetests'.format(tdir=testdir), +# '-w', +# '{tdir}/s3-tests'.format(tdir=testdir), +# '-v', +# 's3tests.functional.test_s3:test_bucket_list_return_data', +# ] +# if client_config is not None and 'extra_args' in client_config: +# args.extend(client_config['extra_args']) +# +# ctx.cluster.only(client).run( +# args=args, +# ) + + s3tests.run_tests(ctx, config) + + netcat_out = BytesIO() + + for client, client_config in config.items(): + ctx.cluster.only(client).run( + args = [ + 'netcat', + '-w', '5', + '-U', '{tdir}/rgw.opslog.sock'.format(tdir=testdir), + ], + stdout = netcat_out, + ) + + out = netcat_out.getvalue() + + assert len(out) > 100 + + log.info('Received', out) + + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run some s3-tests suite against rgw, verify opslog socket returns data + + Must restrict testing to a particular client:: + + tasks: + - ceph: + - rgw: [client.0] + - s3tests: [client.0] + + To pass extra arguments to nose (e.g. to run a certain test):: + + tasks: + - ceph: + - rgw: [client.0] + - s3tests: + client.0: + extra_args: ['test_s3:test_object_acl_grand_public_read'] + client.1: + extra_args: ['--exclude', 'test_100_continue'] + """ + assert hasattr(ctx, 'rgw'), 'rgw-logsocket must run after the rgw task' + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task rgw-logsocket only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + clients = config.keys() + + overrides = ctx.config.get('overrides', {}) + # merge each client section, not the top level. + for (client, cconf) in config.items(): + teuthology.deep_merge(cconf, overrides.get('rgw-logsocket', {})) + + log.debug('config is %s', config) + + s3tests_conf = {} + for client in clients: + endpoint = ctx.rgw.role_endpoints.get(client) + assert endpoint, 'rgw-logsocket: no rgw endpoint for {}'.format(client) + + s3tests_conf[client] = ConfigObj( + indent_type='', + infile={ + 'DEFAULT': + { + 'port' : endpoint.port, + 'is_secure' : endpoint.cert is not None, + }, + 'fixtures' : {}, + 's3 main' : {}, + 's3 alt' : {}, + } + ) + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: create_users(ctx=ctx, config=dict( + clients=clients, + s3tests_conf=s3tests_conf, + )), + lambda: configure(ctx=ctx, config=dict( + clients=config, + s3tests_conf=s3tests_conf, + )), + lambda: run_tests(ctx=ctx, config=config), + ): + yield diff --git a/qa/tasks/rgw_multi b/qa/tasks/rgw_multi new file mode 120000 index 000000000..abfc703b9 --- /dev/null +++ b/qa/tasks/rgw_multi @@ -0,0 +1 @@ +../../src/test/rgw/rgw_multi
\ No newline at end of file diff --git a/qa/tasks/rgw_multisite.py b/qa/tasks/rgw_multisite.py new file mode 100644 index 000000000..266d0fb69 --- /dev/null +++ b/qa/tasks/rgw_multisite.py @@ -0,0 +1,436 @@ +""" +rgw multisite configuration routines +""" +import argparse +import logging +import random +import string +from copy import deepcopy +from tasks.util.rgw import rgwadmin, wait_for_radosgw +from tasks.util.rados import create_ec_pool, create_replicated_pool +from tasks.rgw_multi import multisite +from tasks.rgw_multi.zone_rados import RadosZone as RadosZone +from tasks.rgw_multi.zone_ps import PSZone as PSZone + +from teuthology.orchestra import run +from teuthology import misc +from teuthology.exceptions import ConfigError +from teuthology.task import Task + +log = logging.getLogger(__name__) + +class RGWMultisite(Task): + """ + Performs rgw multisite configuration to match the given realm definition. + + - rgw-multisite: + realm: + name: test-realm + is_default: true + + List one or more zonegroup definitions. These are provided as json + input to `radosgw-admin zonegroup set`, with the exception of these keys: + + * 'is_master' is passed on the command line as --master + * 'is_default' is passed on the command line as --default + * 'is_pubsub' is used to create a zone with tier-type=pubsub + * 'endpoints' given as client names are replaced with actual endpoints + + zonegroups: + - name: test-zonegroup + api_name: test-api + is_master: true + is_default: true + endpoints: [c1.client.0] + + List each of the zones to be created in this zonegroup. + + zones: + - name: test-zone1 + is_master: true + is_default: true + endpoints: [c1.client.0] + - name: test-zone2 + is_default: true + endpoints: [c2.client.0] + + A complete example: + + tasks: + - install: + - ceph: {cluster: c1} + - ceph: {cluster: c2} + - rgw: + c1.client.0: + c2.client.0: + - rgw-multisite: + realm: + name: test-realm + is_default: true + zonegroups: + - name: test-zonegroup + is_master: true + is_default: true + zones: + - name: test-zone1 + is_master: true + is_default: true + endpoints: [c1.client.0] + - name: test-zone2 + is_default: true + endpoints: [c2.client.0] + - name: test-zone3 + is_pubsub: true + endpoints: [c1.client.1] + + """ + def __init__(self, ctx, config): + super(RGWMultisite, self).__init__(ctx, config) + + def setup(self): + super(RGWMultisite, self).setup() + + overrides = self.ctx.config.get('overrides', {}) + misc.deep_merge(self.config, overrides.get('rgw-multisite', {})) + + if not self.ctx.rgw: + raise ConfigError('rgw-multisite must run after the rgw task') + role_endpoints = self.ctx.rgw.role_endpoints + + # construct Clusters and Gateways for each client in the rgw task + clusters, gateways = extract_clusters_and_gateways(self.ctx, + role_endpoints) + + # get the master zone and zonegroup configuration + mz, mzg = extract_master_zone_zonegroup(self.config['zonegroups']) + cluster1 = cluster_for_zone(clusters, mz) + + # create the realm and period on the master zone's cluster + log.info('creating realm..') + realm = create_realm(cluster1, self.config['realm']) + period = realm.current_period + + creds = gen_credentials() + + # create the master zonegroup and its master zone + log.info('creating master zonegroup..') + master_zonegroup = create_zonegroup(cluster1, gateways, period, + deepcopy(mzg)) + period.master_zonegroup = master_zonegroup + + log.info('creating master zone..') + master_zone = create_zone(self.ctx, cluster1, gateways, creds, + master_zonegroup, deepcopy(mz)) + master_zonegroup.master_zone = master_zone + + period.update(master_zone, commit=True) + restart_zone_gateways(master_zone) # restart with --rgw-zone + + # create the admin user on the master zone + log.info('creating admin user..') + user_args = ['--display-name', 'Realm Admin', '--system'] + user_args += creds.credential_args() + admin_user = multisite.User('realm-admin') + admin_user.create(master_zone, user_args) + + # process 'zonegroups' + for zg_config in self.config['zonegroups']: + zones_config = zg_config.pop('zones') + + zonegroup = None + for zone_config in zones_config: + # get the cluster for this zone + cluster = cluster_for_zone(clusters, zone_config) + + if cluster != cluster1: # already created on master cluster + log.info('pulling realm configuration to %s', cluster.name) + realm.pull(cluster, master_zone.gateways[0], creds) + + # use the first zone's cluster to create the zonegroup + if not zonegroup: + if zg_config['name'] == master_zonegroup.name: + zonegroup = master_zonegroup + else: + log.info('creating zonegroup..') + zonegroup = create_zonegroup(cluster, gateways, + period, zg_config) + + if zone_config['name'] == master_zone.name: + # master zone was already created + zone = master_zone + else: + # create the zone and commit the period + log.info('creating zone..') + zone = create_zone(self.ctx, cluster, gateways, creds, + zonegroup, zone_config) + period.update(zone, commit=True) + + restart_zone_gateways(zone) # restart with --rgw-zone + + # attach configuration to the ctx for other tasks + self.ctx.rgw_multisite = argparse.Namespace() + self.ctx.rgw_multisite.clusters = clusters + self.ctx.rgw_multisite.gateways = gateways + self.ctx.rgw_multisite.realm = realm + self.ctx.rgw_multisite.admin_user = admin_user + + log.info('rgw multisite configuration completed') + + def end(self): + del self.ctx.rgw_multisite + +class Cluster(multisite.Cluster): + """ Issues 'radosgw-admin' commands with the rgwadmin() helper """ + def __init__(self, ctx, name, client): + super(Cluster, self).__init__() + self.ctx = ctx + self.name = name + self.client = client + + def admin(self, args = None, **kwargs): + """ radosgw-admin command """ + args = args or [] + args += ['--cluster', self.name] + args += ['--debug-rgw', str(kwargs.pop('debug_rgw', 0))] + args += ['--debug-ms', str(kwargs.pop('debug_ms', 0))] + if kwargs.pop('read_only', False): + args += ['--rgw-cache-enabled', 'false'] + kwargs['decode'] = False + check_retcode = kwargs.pop('check_retcode', True) + r, s = rgwadmin(self.ctx, self.client, args, **kwargs) + if check_retcode: + assert r == 0 + return s, r + +class Gateway(multisite.Gateway): + """ Controls a radosgw instance using its daemon """ + def __init__(self, role, remote, daemon, *args, **kwargs): + super(Gateway, self).__init__(*args, **kwargs) + self.role = role + self.remote = remote + self.daemon = daemon + + def set_zone(self, zone): + """ set the zone and add its args to the daemon's command line """ + assert self.zone is None, 'zone can only be set once' + self.zone = zone + # daemon.restart_with_args() would be perfect for this, except that + # radosgw args likely include a pipe and redirect. zone arguments at + # the end won't actually apply to radosgw + args = self.daemon.command_kwargs.get('args', []) + try: + # insert zone args before the first | + pipe = args.index(run.Raw('|')) + args = args[0:pipe] + zone.zone_args() + args[pipe:] + except ValueError: + args += zone.zone_args() + self.daemon.command_kwargs['args'] = args + + def start(self, args = None): + """ (re)start the daemon """ + self.daemon.restart() + # wait until startup completes + wait_for_radosgw(self.endpoint(), self.remote) + + def stop(self): + """ stop the daemon """ + self.daemon.stop() + +def extract_clusters_and_gateways(ctx, role_endpoints): + """ create cluster and gateway instances for all of the radosgw roles """ + clusters = {} + gateways = {} + for role, endpoint in role_endpoints.items(): + cluster_name, daemon_type, client_id = misc.split_role(role) + # find or create the cluster by name + cluster = clusters.get(cluster_name) + if not cluster: + clusters[cluster_name] = cluster = Cluster(ctx, cluster_name, role) + # create a gateway for this daemon + client_with_id = daemon_type + '.' + client_id # match format from rgw.py + daemon = ctx.daemons.get_daemon('rgw', client_with_id, cluster_name) + if not daemon: + raise ConfigError('no daemon for role=%s cluster=%s type=rgw id=%s' % \ + (role, cluster_name, client_id)) + (remote,) = ctx.cluster.only(role).remotes.keys() + gateways[role] = Gateway(role, remote, daemon, endpoint.hostname, + endpoint.port, cluster) + return clusters, gateways + +def create_realm(cluster, config): + """ create a realm from configuration and initialize its first period """ + realm = multisite.Realm(config['name']) + args = [] + if config.get('is_default', False): + args += ['--default'] + realm.create(cluster, args) + realm.current_period = multisite.Period(realm) + return realm + +def extract_user_credentials(config): + """ extract keys from configuration """ + return multisite.Credentials(config['access_key'], config['secret_key']) + +def extract_master_zone(zonegroup_config): + """ find and return the master zone definition """ + master = None + for zone in zonegroup_config['zones']: + if not zone.get('is_master', False): + continue + if master: + raise ConfigError('zones %s and %s cannot both set \'is_master\'' % \ + (master['name'], zone['name'])) + master = zone + # continue the loop so we can detect duplicates + if not master: + raise ConfigError('one zone must set \'is_master\' in zonegroup %s' % \ + zonegroup_config['name']) + return master + +def extract_master_zone_zonegroup(zonegroups_config): + """ find and return the master zone and zonegroup definitions """ + master_zone, master_zonegroup = (None, None) + for zonegroup in zonegroups_config: + # verify that all zonegroups have a master zone set, even if they + # aren't in the master zonegroup + zone = extract_master_zone(zonegroup) + if not zonegroup.get('is_master', False): + continue + if master_zonegroup: + raise ConfigError('zonegroups %s and %s cannot both set \'is_master\'' % \ + (master_zonegroup['name'], zonegroup['name'])) + master_zonegroup = zonegroup + master_zone = zone + # continue the loop so we can detect duplicates + if not master_zonegroup: + raise ConfigError('one zonegroup must set \'is_master\'') + return master_zone, master_zonegroup + +def extract_zone_cluster_name(zone_config): + """ return the cluster (must be common to all zone endpoints) """ + cluster_name = None + endpoints = zone_config.get('endpoints') + if not endpoints: + raise ConfigError('zone %s missing \'endpoints\' list' % \ + zone_config['name']) + for role in endpoints: + name, _, _ = misc.split_role(role) + if not cluster_name: + cluster_name = name + elif cluster_name != name: + raise ConfigError('all zone %s endpoints must be in the same cluster' % \ + zone_config['name']) + return cluster_name + +def cluster_for_zone(clusters, zone_config): + """ return the cluster entry for the given zone """ + name = extract_zone_cluster_name(zone_config) + try: + return clusters[name] + except KeyError: + raise ConfigError('no cluster %s found' % name) + +def gen_access_key(): + return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(16)) + +def gen_secret(): + return ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(32)) + +def gen_credentials(): + return multisite.Credentials(gen_access_key(), gen_secret()) + +def extract_gateway_endpoints(gateways, endpoints_config): + """ return a list of gateway endpoints associated with the given roles """ + endpoints = [] + for role in endpoints_config: + try: + # replace role names with their gateway's endpoint + endpoints.append(gateways[role].endpoint()) + except KeyError: + raise ConfigError('no radosgw endpoint found for role %s' % role) + return endpoints + +def is_default_arg(config): + return ['--default'] if config.pop('is_default', False) else [] + +def is_master_arg(config): + return ['--master'] if config.pop('is_master', False) else [] + +def create_zonegroup(cluster, gateways, period, config): + """ pass the zonegroup configuration to `zonegroup set` """ + config.pop('zones', None) # remove 'zones' from input to `zonegroup set` + endpoints = config.get('endpoints') + if endpoints: + # replace client names with their gateway endpoints + config['endpoints'] = extract_gateway_endpoints(gateways, endpoints) + zonegroup = multisite.ZoneGroup(config['name'], period) + # `zonegroup set` needs --default on command line, and 'is_master' in json + args = is_default_arg(config) + zonegroup.set(cluster, config, args) + period.zonegroups.append(zonegroup) + return zonegroup + +def create_zone(ctx, cluster, gateways, creds, zonegroup, config): + """ create a zone with the given configuration """ + zone = multisite.Zone(config['name'], zonegroup, cluster) + if config.pop('is_pubsub', False): + zone = PSZone(config['name'], zonegroup, cluster) + else: + zone = RadosZone(config['name'], zonegroup, cluster) + + # collect Gateways for the zone's endpoints + endpoints = config.get('endpoints') + if not endpoints: + raise ConfigError('no \'endpoints\' for zone %s' % config['name']) + zone.gateways = [gateways[role] for role in endpoints] + for gateway in zone.gateways: + gateway.set_zone(zone) + + # format the gateway endpoints + endpoints = [g.endpoint() for g in zone.gateways] + + args = is_default_arg(config) + args += is_master_arg(config) + args += creds.credential_args() + if len(endpoints): + args += ['--endpoints', ','.join(endpoints)] + zone.create(cluster, args) + zonegroup.zones.append(zone) + + create_zone_pools(ctx, zone) + if ctx.rgw.compression_type: + configure_zone_compression(zone, ctx.rgw.compression_type) + + zonegroup.zones_by_type.setdefault(zone.tier_type(), []).append(zone) + + if zone.is_read_only(): + zonegroup.ro_zones.append(zone) + else: + zonegroup.rw_zones.append(zone) + + return zone + +def create_zone_pools(ctx, zone): + """ Create the data_pool for each placement type """ + gateway = zone.gateways[0] + cluster = zone.cluster + for pool_config in zone.data.get('placement_pools', []): + pool_name = pool_config['val']['storage_classes']['STANDARD']['data_pool'] + if ctx.rgw.ec_data_pool: + create_ec_pool(gateway.remote, pool_name, zone.name, 64, + ctx.rgw.erasure_code_profile, cluster.name, 'rgw') + else: + create_replicated_pool(gateway.remote, pool_name, 64, cluster.name, 'rgw') + +def configure_zone_compression(zone, compression): + """ Set compression type in the zone's default-placement """ + zone.json_command(zone.cluster, 'placement', ['modify', + '--placement-id', 'default-placement', + '--compression', compression + ]) + +def restart_zone_gateways(zone): + zone.stop() + zone.start() + +task = RGWMultisite diff --git a/qa/tasks/rgw_multisite_tests.py b/qa/tasks/rgw_multisite_tests.py new file mode 100644 index 000000000..53aedf792 --- /dev/null +++ b/qa/tasks/rgw_multisite_tests.py @@ -0,0 +1,99 @@ +""" +rgw multisite testing +""" +import logging +import nose.core +import nose.config + +from teuthology.exceptions import ConfigError +from teuthology.task import Task +from teuthology import misc + +from tasks.rgw_multi import multisite, tests, tests_ps + +log = logging.getLogger(__name__) + + +class RGWMultisiteTests(Task): + """ + Runs the rgw_multi tests against a multisite configuration created by the + rgw-multisite task. Tests are run with nose, using any additional 'args' + provided. Overrides for tests.Config can be set in 'config'. + + - rgw-multisite-tests: + args: + - tasks.rgw_multi.tests:test_object_sync + config: + reconfigure_delay: 60 + + """ + def __init__(self, ctx, config): + super(RGWMultisiteTests, self).__init__(ctx, config) + + def setup(self): + super(RGWMultisiteTests, self).setup() + + overrides = self.ctx.config.get('overrides', {}) + misc.deep_merge(self.config, overrides.get('rgw-multisite-tests', {})) + + if not self.ctx.rgw_multisite: + raise ConfigError('rgw-multisite-tests must run after the rgw-multisite task') + realm = self.ctx.rgw_multisite.realm + master_zone = realm.meta_master_zone() + + # create the test user + log.info('creating test user..') + user = multisite.User('rgw-multisite-test-user') + user.create(master_zone, ['--display-name', 'Multisite Test User', + '--gen-access-key', '--gen-secret']) + + config = self.config.get('config', {}) + tests.init_multi(realm, user, tests.Config(**config)) + tests.realm_meta_checkpoint(realm) + + def begin(self): + # extra arguments for nose can be passed as a string or list + extra_args = self.config.get('args', []) + if not isinstance(extra_args, list): + extra_args = [extra_args] + argv = [__name__] + extra_args + + log.info("running rgw multisite tests on '%s' with args=%r", + tests.__name__, extra_args) + + # run nose tests in the rgw_multi.tests module + conf = nose.config.Config(stream=get_log_stream(), verbosity=2) + error_msg = '' + result = nose.run(defaultTest=tests.__name__, argv=argv, config=conf) + if not result: + error_msg += 'rgw multisite, ' + result = nose.run(defaultTest=tests_ps.__name__, argv=argv, config=conf) + if not result: + error_msg += 'rgw multisite pubsub, ' + if error_msg: + raise RuntimeError(error_msg + 'test failures') + + +def get_log_stream(): + """ return a log stream for nose output """ + # XXX: this is a workaround for IOErrors when nose writes to stderr, + # copied from vstart_runner.py + class LogStream(object): + def __init__(self): + self.buffer = "" + + def write(self, data): + self.buffer += data + if "\n" in self.buffer: + lines = self.buffer.split("\n") + for line in lines[:-1]: + log.info(line) + self.buffer = lines[-1] + + def flush(self): + pass + + return LogStream() + + +task = RGWMultisiteTests diff --git a/qa/tasks/rook-ceph.conf b/qa/tasks/rook-ceph.conf new file mode 100644 index 000000000..38ac11e41 --- /dev/null +++ b/qa/tasks/rook-ceph.conf @@ -0,0 +1,41 @@ +[global] + +log to file = true + +mon clock drift allowed = 1.000 + +# replicate across OSDs, not hosts +osd crush chooseleaf type = 0 + +# enable some debugging +auth debug = true +ms die on old message = true +ms die on bug = true +debug asserts on shutdown = true + + +[osd] +# debugging +osd debug shutdown = true +osd debug op order = true +osd debug verify stray on activate = true +osd debug pg log writeout = true +osd debug verify cached snaps = true +osd debug verify missing on start = true +osd debug misdirected ops = true +osd op queue = debug_random +osd op queue cut off = debug_random +osd shutdown pgref assert = true +bdev debug aio = true +osd sloppy crc = true + + +[mon] +# rotate auth tickets quickly to exercise renewal paths +auth mon ticket ttl = 660 # 11m +auth service ticket ttl = 240 # 4m + +# don't complain about global id reclaim +mon_warn_on_insecure_global_id_reclaim = false +mon_warn_on_insecure_global_id_reclaim_allowed = false + diff --git a/qa/tasks/rook.py b/qa/tasks/rook.py new file mode 100644 index 000000000..bdd9e58dc --- /dev/null +++ b/qa/tasks/rook.py @@ -0,0 +1,638 @@ +""" +Rook cluster task +""" +import argparse +import configobj +import contextlib +import json +import logging +import os +import yaml +from io import BytesIO + +from tarfile import ReadError +from tasks.ceph_manager import CephManager +from teuthology import misc as teuthology +from teuthology.config import config as teuth_config +from teuthology.contextutil import safe_while +from teuthology.orchestra import run +from teuthology import contextutil +from tasks.ceph import healthy +from tasks.cephadm import update_archive_setting + +log = logging.getLogger(__name__) + + +def _kubectl(ctx, config, args, **kwargs): + cluster_name = config.get('cluster', 'ceph') + return ctx.rook[cluster_name].remote.run( + args=['kubectl'] + args, + **kwargs + ) + + +def shell(ctx, config): + """ + Run command(s) inside the rook tools container. + + tasks: + - kubeadm: + - rook: + - rook.shell: + - ceph -s + + or + + tasks: + - kubeadm: + - rook: + - rook.shell: + commands: + - ceph -s + + """ + if isinstance(config, list): + config = {'commands': config} + for cmd in config.get('commands', []): + if isinstance(cmd, str): + _shell(ctx, config, cmd.split(' ')) + else: + _shell(ctx, config, cmd) + + +def _shell(ctx, config, args, **kwargs): + cluster_name = config.get('cluster', 'ceph') + return _kubectl( + ctx, config, + [ + '-n', 'rook-ceph', + 'exec', + ctx.rook[cluster_name].toolbox, '--' + ] + args, + **kwargs + ) + + +@contextlib.contextmanager +def rook_operator(ctx, config): + cluster_name = config['cluster'] + rook_branch = config.get('rook_branch', 'master') + rook_git_url = config.get('rook_git_url', 'https://github.com/rook/rook') + + log.info(f'Cloning {rook_git_url} branch {rook_branch}') + ctx.rook[cluster_name].remote.run( + args=[ + 'rm', '-rf', 'rook', + run.Raw('&&'), + 'git', + 'clone', + '--single-branch', + '--branch', rook_branch, + rook_git_url, + 'rook', + ] + ) + + # operator.yaml + operator_yaml = ctx.rook[cluster_name].remote.read_file( + 'rook/cluster/examples/kubernetes/ceph/operator.yaml' + ) + rook_image = config.get('rook_image') + if rook_image: + log.info(f'Patching operator to use image {rook_image}') + crs = list(yaml.load_all(operator_yaml, Loader=yaml.FullLoader)) + assert len(crs) == 2 + crs[1]['spec']['template']['spec']['containers'][0]['image'] = rook_image + operator_yaml = yaml.dump_all(crs) + ctx.rook[cluster_name].remote.write_file('operator.yaml', operator_yaml) + + op_job = None + try: + log.info('Deploying operator') + _kubectl(ctx, config, [ + 'create', + '-f', 'rook/cluster/examples/kubernetes/ceph/crds.yaml', + '-f', 'rook/cluster/examples/kubernetes/ceph/common.yaml', + '-f', 'operator.yaml', + ]) + + # on centos: + if teuthology.get_distro(ctx) == 'centos': + _kubectl(ctx, config, [ + '-n', 'rook-ceph', + 'set', 'env', 'deploy/rook-ceph-operator', + 'ROOK_HOSTPATH_REQUIRES_PRIVILEGED=true' + ]) + + # wait for operator + op_name = None + with safe_while(sleep=10, tries=90, action="wait for operator") as proceed: + while not op_name and proceed(): + p = _kubectl( + ctx, config, + ['-n', 'rook-ceph', 'get', 'pods', '-l', 'app=rook-ceph-operator'], + stdout=BytesIO(), + ) + for line in p.stdout.getvalue().decode('utf-8').strip().splitlines(): + name, ready, status, _ = line.split(None, 3) + if status == 'Running': + op_name = name + break + + # log operator output + op_job = _kubectl( + ctx, + config, + ['-n', 'rook-ceph', 'logs', '-f', op_name], + wait=False, + logger=log.getChild('operator'), + ) + + yield + + except Exception as e: + log.exception(e) + raise + + finally: + log.info('Cleaning up rook operator') + _kubectl(ctx, config, [ + 'delete', + '-f', 'operator.yaml', + ]) + if False: + # don't bother since we'll tear down k8s anyway (and this mysteriously + # fails sometimes when deleting some of the CRDs... not sure why!) + _kubectl(ctx, config, [ + 'delete', + '-f', 'rook/cluster/examples/kubernetes/ceph/common.yaml', + ]) + _kubectl(ctx, config, [ + 'delete', + '-f', 'rook/cluster/examples/kubernetes/ceph/crds.yaml', + ]) + ctx.rook[cluster_name].remote.run(args=['rm', '-rf', 'rook', 'operator.yaml']) + if op_job: + op_job.wait() + run.wait( + ctx.cluster.run( + args=[ + 'sudo', 'rm', '-rf', '/var/lib/rook' + ] + ) + ) + + +@contextlib.contextmanager +def ceph_log(ctx, config): + cluster_name = config['cluster'] + + log_dir = '/var/lib/rook/rook-ceph/log' + update_archive_setting(ctx, 'log', log_dir) + + try: + yield + + except Exception: + # we need to know this below + ctx.summary['success'] = False + raise + + finally: + log.info('Checking cluster log for badness...') + def first_in_ceph_log(pattern, excludes): + """ + Find the first occurrence of the pattern specified in the Ceph log, + Returns None if none found. + + :param pattern: Pattern scanned for. + :param excludes: Patterns to ignore. + :return: First line of text (or None if not found) + """ + args = [ + 'sudo', + 'egrep', pattern, + f'{log_dir}/ceph.log', + ] + if excludes: + for exclude in excludes: + args.extend([run.Raw('|'), 'egrep', '-v', exclude]) + args.extend([ + run.Raw('|'), 'head', '-n', '1', + ]) + r = ctx.rook[cluster_name].remote.run( + stdout=BytesIO(), + args=args, + ) + stdout = r.stdout.getvalue().decode() + if stdout: + return stdout + return None + + if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', + config.get('log-ignorelist')) is not None: + log.warning('Found errors (ERR|WRN|SEC) in cluster log') + ctx.summary['success'] = False + # use the most severe problem as the failure reason + if 'failure_reason' not in ctx.summary: + for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: + match = first_in_ceph_log(pattern, config['log-ignorelist']) + if match is not None: + ctx.summary['failure_reason'] = \ + '"{match}" in cluster log'.format( + match=match.rstrip('\n'), + ) + break + + if ctx.archive is not None and \ + not (ctx.config.get('archive-on-error') and ctx.summary['success']): + # and logs + log.info('Compressing logs...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'find', + log_dir, + '-name', + '*.log', + '-print0', + run.Raw('|'), + 'sudo', + 'xargs', + '-0', + '--no-run-if-empty', + '--', + 'gzip', + '--', + ], + wait=False, + ), + ) + + log.info('Archiving logs...') + path = os.path.join(ctx.archive, 'remote') + try: + os.makedirs(path) + except OSError: + pass + for remote in ctx.cluster.remotes.keys(): + sub = os.path.join(path, remote.name) + try: + os.makedirs(sub) + except OSError: + pass + try: + teuthology.pull_directory(remote, log_dir, + os.path.join(sub, 'log')) + except ReadError: + pass + + +def build_initial_config(ctx, config): + path = os.path.join(os.path.dirname(__file__), 'rook-ceph.conf') + conf = configobj.ConfigObj(path, file_error=True) + + # overrides + for section, keys in config.get('conf',{}).items(): + for key, value in keys.items(): + log.info(" override: [%s] %s = %s" % (section, key, value)) + if section not in conf: + conf[section] = {} + conf[section][key] = value + + return conf + + +@contextlib.contextmanager +def rook_cluster(ctx, config): + cluster_name = config['cluster'] + + # count how many OSDs we'll create + num_devs = 0 + num_hosts = 0 + for remote in ctx.cluster.remotes.keys(): + ls = remote.read_file('/scratch_devs').decode('utf-8').strip().splitlines() + num_devs += len(ls) + num_hosts += 1 + ctx.rook[cluster_name].num_osds = num_devs + + # config + config = build_initial_config(ctx, config) + config_fp = BytesIO() + config.write(config_fp) + log.info(f'Config:\n{config_fp.getvalue()}') + _kubectl(ctx, config, ['create', '-f', '-'], stdin=yaml.dump({ + 'apiVersion': 'v1', + 'kind': 'ConfigMap', + 'metadata': { + 'name': 'rook-config-override', + 'namespace': 'rook-ceph'}, + 'data': { + 'config': config_fp.getvalue() + } + })) + + # cluster + cluster = { + 'apiVersion': 'ceph.rook.io/v1', + 'kind': 'CephCluster', + 'metadata': {'name': 'rook-ceph', 'namespace': 'rook-ceph'}, + 'spec': { + 'cephVersion': { + 'image': ctx.rook[cluster_name].image, + 'allowUnsupported': True, + }, + 'dataDirHostPath': '/var/lib/rook', + 'skipUpgradeChecks': True, + 'mgr': { + 'count': 1, + 'modules': [ + { 'name': 'rook', 'enabled': True }, + ], + }, + 'mon': { + 'count': num_hosts, + 'allowMultiplePerNode': True, + }, + 'storage': { + 'storageClassDeviceSets': [ + { + 'name': 'scratch', + 'count': num_devs, + 'portable': False, + 'volumeClaimTemplates': [ + { + 'metadata': {'name': 'data'}, + 'spec': { + 'resources': { + 'requests': { + 'storage': '10Gi' # <= (lte) the actual PV size + } + }, + 'storageClassName': 'scratch', + 'volumeMode': 'Block', + 'accessModes': ['ReadWriteOnce'], + }, + }, + ], + } + ], + }, + } + } + teuthology.deep_merge(cluster['spec'], config.get('spec', {})) + + cluster_yaml = yaml.dump(cluster) + log.info(f'Cluster:\n{cluster_yaml}') + try: + ctx.rook[cluster_name].remote.write_file('cluster.yaml', cluster_yaml) + _kubectl(ctx, config, ['create', '-f', 'cluster.yaml']) + yield + + except Exception as e: + log.exception(e) + raise + + finally: + _kubectl(ctx, config, ['delete', '-f', 'cluster.yaml'], check_status=False) + + # wait for cluster to shut down + log.info('Waiting for cluster to stop') + running = True + with safe_while(sleep=5, tries=100, action="wait for teardown") as proceed: + while running and proceed(): + p = _kubectl( + ctx, config, + ['-n', 'rook-ceph', 'get', 'pods'], + stdout=BytesIO(), + ) + running = False + for line in p.stdout.getvalue().decode('utf-8').strip().splitlines(): + name, ready, status, _ = line.split(None, 3) + if ( + name != 'NAME' + and not name.startswith('csi-') + and not name.startswith('rook-ceph-operator-') + and not name.startswith('rook-ceph-tools-') + ): + running = True + break + + _kubectl( + ctx, config, + ['-n', 'rook-ceph', 'delete', 'configmap', 'rook-config-override'], + check_status=False, + ) + ctx.rook[cluster_name].remote.run(args=['rm', '-f', 'cluster.yaml']) + + +@contextlib.contextmanager +def rook_toolbox(ctx, config): + cluster_name = config['cluster'] + try: + _kubectl(ctx, config, [ + 'create', + '-f', 'rook/cluster/examples/kubernetes/ceph/toolbox.yaml', + ]) + + log.info('Waiting for tools container to start') + toolbox = None + with safe_while(sleep=5, tries=100, action="wait for toolbox") as proceed: + while not toolbox and proceed(): + p = _kubectl( + ctx, config, + ['-n', 'rook-ceph', 'get', 'pods', '-l', 'app=rook-ceph-tools'], + stdout=BytesIO(), + ) + for line in p.stdout.getvalue().decode('utf-8').strip().splitlines(): + name, ready, status, _ = line.split(None, 3) + if status == 'Running': + toolbox = name + break + ctx.rook[cluster_name].toolbox = toolbox + yield + + except Exception as e: + log.exception(e) + raise + + finally: + _kubectl(ctx, config, [ + 'delete', + '-f', 'rook/cluster/examples/kubernetes/ceph/toolbox.yaml', + ], check_status=False) + + +@contextlib.contextmanager +def wait_for_osds(ctx, config): + cluster_name = config.get('cluster', 'ceph') + + want = ctx.rook[cluster_name].num_osds + log.info(f'Waiting for {want} OSDs') + with safe_while(sleep=10, tries=90, action="check osd count") as proceed: + while proceed(): + p = _shell(ctx, config, ['ceph', 'osd', 'stat', '-f', 'json'], + stdout=BytesIO(), + check_status=False) + if p.exitstatus == 0: + r = json.loads(p.stdout.getvalue().decode('utf-8')) + have = r.get('num_up_osds', 0) + if have == want: + break + log.info(f' have {have}/{want} OSDs') + + yield + + +@contextlib.contextmanager +def ceph_config_keyring(ctx, config): + # get config and push to hosts + log.info('Distributing ceph config and client.admin keyring') + p = _shell(ctx, config, ['cat', '/etc/ceph/ceph.conf'], stdout=BytesIO()) + conf = p.stdout.getvalue() + p = _shell(ctx, config, ['cat', '/etc/ceph/keyring'], stdout=BytesIO()) + keyring = p.stdout.getvalue() + ctx.cluster.run(args=['sudo', 'mkdir', '-p', '/etc/ceph']) + for remote in ctx.cluster.remotes.keys(): + remote.write_file( + '/etc/ceph/ceph.conf', + conf, + sudo=True, + ) + remote.write_file( + '/etc/ceph/keyring', + keyring, + sudo=True, + ) + + try: + yield + + except Exception as e: + log.exception(e) + raise + + finally: + log.info('Cleaning up config and client.admin keyring') + ctx.cluster.run(args=[ + 'sudo', 'rm', '-f', + '/etc/ceph/ceph.conf', + '/etc/ceph/ceph.client.admin.keyring' + ]) + + +@contextlib.contextmanager +def ceph_clients(ctx, config): + cluster_name = config['cluster'] + + log.info('Setting up client nodes...') + clients = ctx.cluster.only(teuthology.is_type('client', cluster_name)) + for remote, roles_for_host in clients.remotes.items(): + for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', + cluster_name): + name = teuthology.ceph_role(role) + client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name, + name) + r = _shell(ctx, config, + args=[ + 'ceph', 'auth', + 'get-or-create', name, + 'mon', 'allow *', + 'osd', 'allow *', + 'mds', 'allow *', + 'mgr', 'allow *', + ], + stdout=BytesIO(), + ) + keyring = r.stdout.getvalue() + remote.write_file(client_keyring, keyring, sudo=True, mode='0644') + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Deploy rook-ceph cluster + + tasks: + - kubeadm: + - rook: + branch: wip-foo + spec: + mon: + count: 1 + + The spec item is deep-merged against the cluster.yaml. The branch, sha1, or + image items are used to determine the Ceph container image. + """ + if not config: + config = {} + assert isinstance(config, dict), \ + "task only supports a dictionary for configuration" + + log.info('Rook start') + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('ceph', {})) + teuthology.deep_merge(config, overrides.get('rook', {})) + log.info('Config: ' + str(config)) + + # set up cluster context + if not hasattr(ctx, 'rook'): + ctx.rook = {} + if 'cluster' not in config: + config['cluster'] = 'ceph' + cluster_name = config['cluster'] + if cluster_name not in ctx.rook: + ctx.rook[cluster_name] = argparse.Namespace() + + ctx.rook[cluster_name].remote = list(ctx.cluster.remotes.keys())[0] + + # image + teuth_defaults = teuth_config.get('defaults', {}) + cephadm_defaults = teuth_defaults.get('cephadm', {}) + containers_defaults = cephadm_defaults.get('containers', {}) + container_image_name = containers_defaults.get('image', None) + if 'image' in config: + ctx.rook[cluster_name].image = config.get('image') + else: + sha1 = config.get('sha1') + flavor = config.get('flavor', 'default') + if sha1: + if flavor == "crimson": + ctx.rook[cluster_name].image = container_image_name + ':' + sha1 + '-' + flavor + else: + ctx.rook[cluster_name].image = container_image_name + ':' + sha1 + else: + # hmm, fall back to branch? + branch = config.get('branch', 'master') + ctx.rook[cluster_name].image = container_image_name + ':' + branch + log.info('Ceph image is %s' % ctx.rook[cluster_name].image) + + with contextutil.nested( + lambda: rook_operator(ctx, config), + lambda: ceph_log(ctx, config), + lambda: rook_cluster(ctx, config), + lambda: rook_toolbox(ctx, config), + lambda: wait_for_osds(ctx, config), + lambda: ceph_config_keyring(ctx, config), + lambda: ceph_clients(ctx, config), + ): + if not hasattr(ctx, 'managers'): + ctx.managers = {} + ctx.managers[cluster_name] = CephManager( + ctx.rook[cluster_name].remote, + ctx=ctx, + logger=log.getChild('ceph_manager.' + cluster_name), + cluster=cluster_name, + rook=True, + ) + try: + if config.get('wait-for-healthy', True): + healthy(ctx=ctx, config=config) + log.info('Rook complete, yielding') + yield + + finally: + log.info('Tearing down rook') diff --git a/qa/tasks/s3a_hadoop.py b/qa/tasks/s3a_hadoop.py new file mode 100644 index 000000000..7b77359fc --- /dev/null +++ b/qa/tasks/s3a_hadoop.py @@ -0,0 +1,285 @@ +import contextlib +import logging +from teuthology import misc +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run Hadoop S3A tests using Ceph + usage: + -tasks: + ceph-ansible: + s3a-hadoop: + maven-version: '3.6.3' (default) + hadoop-version: '2.9.2' + bucket-name: 's3atest' (default) + access-key: 'anykey' (uses a default value) + secret-key: 'secretkey' ( uses a default value) + role: client.0 + """ + if config is None: + config = {} + + assert isinstance(config, dict), \ + "task only supports a dictionary for configuration" + + assert hasattr(ctx, 'rgw'), 's3a-hadoop must run after the rgw task' + + overrides = ctx.config.get('overrides', {}) + misc.deep_merge(config, overrides.get('s3a-hadoop', {})) + testdir = misc.get_testdir(ctx) + + role = config.get('role') + (remote,) = ctx.cluster.only(role).remotes.keys() + endpoint = ctx.rgw.role_endpoints.get(role) + assert endpoint, 's3tests: no rgw endpoint for {}'.format(role) + + # get versions + maven_major = config.get('maven-major', 'maven-3') + maven_version = config.get('maven-version', '3.6.3') + hadoop_ver = config.get('hadoop-version', '2.9.2') + bucket_name = config.get('bucket-name', 's3atest') + access_key = config.get('access-key', 'EGAQRD2ULOIFKFSKCT4F') + secret_key = config.get( + 'secret-key', + 'zi816w1vZKfaSM85Cl0BxXTwSLyN7zB4RbTswrGb') + + # set versions for cloning the repo + apache_maven = 'apache-maven-{maven_version}-bin.tar.gz'.format( + maven_version=maven_version) + maven_link = 'http://archive.apache.org/dist/maven/' + \ + '{maven_major}/{maven_version}/binaries/'.format(maven_major=maven_major, maven_version=maven_version) + apache_maven + hadoop_git = 'https://github.com/apache/hadoop' + hadoop_rel = 'hadoop-{ver} rel/release-{ver}'.format(ver=hadoop_ver) + if hadoop_ver == 'trunk': + # just checkout a new branch out of trunk + hadoop_rel = 'hadoop-ceph-trunk' + install_prereq(remote) + remote.run( + args=[ + 'cd', + testdir, + run.Raw('&&'), + 'wget', + maven_link, + run.Raw('&&'), + 'tar', + '-xvf', + apache_maven, + run.Raw('&&'), + 'git', + 'clone', + run.Raw(hadoop_git), + run.Raw('&&'), + 'cd', + 'hadoop', + run.Raw('&&'), + 'git', + 'checkout', + '-b', + run.Raw(hadoop_rel) + ] + ) + configure_s3a(remote, endpoint.dns_name, access_key, secret_key, bucket_name, testdir) + setup_user_bucket(remote, endpoint.dns_name, access_key, secret_key, bucket_name, testdir) + if hadoop_ver.startswith('2.8'): + # test all ITtests but skip AWS test using public bucket landsat-pds + # which is not available from within this test + test_options = '-Dit.test=ITestS3A* -Dparallel-tests -Dscale \ + -Dfs.s3a.scale.test.timeout=1200 \ + -Dfs.s3a.scale.test.huge.filesize=256M verify' + else: + test_options = 'test -Dtest=S3a*,TestS3A*' + try: + run_s3atest(remote, maven_version, testdir, test_options) + yield + finally: + log.info("Done s3a testing, Cleaning up") + for fil in ['apache*', 'hadoop*', 'venv*', 'create*']: + remote.run(args=['rm', run.Raw('-rf'), run.Raw('{tdir}/{file}'.format(tdir=testdir, file=fil))]) + + +def install_prereq(client): + """ + Install pre requisites for RHEL and CentOS + TBD: Ubuntu + """ + if client.os.name == 'rhel' or client.os.name == 'centos': + client.run( + args=[ + 'sudo', + 'yum', + 'install', + '-y', + 'protobuf-c.x86_64', + 'java', + 'java-1.8.0-openjdk-devel', + 'dnsmasq' + ] + ) + + +def setup_user_bucket(client, dns_name, access_key, secret_key, bucket_name, testdir): + """ + Create user with access_key and secret_key that will be + used for the s3a testdir + """ + client.run( + args=[ + 'sudo', + 'radosgw-admin', + 'user', + 'create', + run.Raw('--uid'), + 's3a', + run.Raw('--display-name="s3a cephtests"'), + run.Raw('--access-key={access_key}'.format(access_key=access_key)), + run.Raw('--secret-key={secret_key}'.format(secret_key=secret_key)), + run.Raw('--email=s3a@ceph.com'), + ] + ) + client.run( + args=[ + 'python3', + '-m', + 'venv', + '{testdir}/venv'.format(testdir=testdir), + run.Raw('&&'), + run.Raw('{testdir}/venv/bin/pip'.format(testdir=testdir)), + 'install', + 'boto' + ] + ) + create_bucket = """ +#!/usr/bin/env python +import boto +import boto.s3.connection +access_key = '{access_key}' +secret_key = '{secret_key}' + +conn = boto.connect_s3( + aws_access_key_id = access_key, + aws_secret_access_key = secret_key, + host = '{dns_name}', + is_secure=False, + calling_format = boto.s3.connection.OrdinaryCallingFormat(), + ) +bucket = conn.create_bucket('{bucket_name}') +for bucket in conn.get_all_buckets(): + print(bucket.name + "\t" + bucket.creation_date) +""".format(access_key=access_key, secret_key=secret_key, dns_name=dns_name, bucket_name=bucket_name) + py_bucket_file = '{testdir}/create_bucket.py'.format(testdir=testdir) + client.sudo_write_file(py_bucket_file, create_bucket, mode='0744') + client.run( + args=[ + 'cat', + '{testdir}/create_bucket.py'.format(testdir=testdir), + ] + ) + client.run( + args=[ + '{testdir}/venv/bin/python'.format(testdir=testdir), + '{testdir}/create_bucket.py'.format(testdir=testdir), + ] + ) + + +def run_s3atest(client, maven_version, testdir, test_options): + """ + Finally run the s3a test + """ + aws_testdir = '{testdir}/hadoop/hadoop-tools/hadoop-aws/'.format(testdir=testdir) + run_test = '{testdir}/apache-maven-{maven_version}/bin/mvn'.format(testdir=testdir, maven_version=maven_version) + # Remove AWS CredentialsProvider tests as it hits public bucket from AWS + # better solution is to create the public bucket on local server and test + rm_test = 'rm src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java' + client.run( + args=[ + 'cd', + run.Raw(aws_testdir), + run.Raw('&&'), + run.Raw(rm_test), + run.Raw('&&'), + run.Raw(run_test), + run.Raw(test_options) + ] + ) + + +def configure_s3a(client, dns_name, access_key, secret_key, bucket_name, testdir): + """ + Use the template to configure s3a test, Fill in access_key, secret_key + and other details required for test. + """ + config_template = """<configuration> +<property> +<name>fs.s3a.endpoint</name> +<value>{name}</value> +</property> + +<property> +<name>fs.contract.test.fs.s3a</name> +<value>s3a://{bucket_name}/</value> +</property> + +<property> +<name>fs.s3a.connection.ssl.enabled</name> +<value>false</value> +</property> + +<property> +<name>test.fs.s3n.name</name> +<value>s3n://{bucket_name}/</value> +</property> + +<property> +<name>test.fs.s3a.name</name> +<value>s3a://{bucket_name}/</value> +</property> + +<property> +<name>test.fs.s3.name</name> +<value>s3://{bucket_name}/</value> +</property> + +<property> +<name>fs.s3.awsAccessKeyId</name> +<value>{access_key}</value> +</property> + +<property> +<name>fs.s3.awsSecretAccessKey</name> +<value>{secret_key}</value> +</property> + +<property> +<name>fs.s3n.awsAccessKeyId</name> +<value>{access_key}</value> +</property> + +<property> +<name>fs.s3n.awsSecretAccessKey</name> +<value>{secret_key}</value> +</property> + +<property> +<name>fs.s3a.access.key</name> +<description>AWS access key ID. Omit for Role-based authentication.</description> +<value>{access_key}</value> +</property> + +<property> +<name>fs.s3a.secret.key</name> +<description>AWS secret key. Omit for Role-based authentication.</description> +<value>{secret_key}</value> +</property> +</configuration> +""".format(name=dns_name, bucket_name=bucket_name, access_key=access_key, secret_key=secret_key) + config_path = testdir + '/hadoop/hadoop-tools/hadoop-aws/src/test/resources/auth-keys.xml' + client.write_file(config_path, config_template) + # output for debug + client.run(args=['cat', config_path]) diff --git a/qa/tasks/s3tests.py b/qa/tasks/s3tests.py new file mode 100644 index 000000000..8b40499e4 --- /dev/null +++ b/qa/tasks/s3tests.py @@ -0,0 +1,653 @@ +""" +Run a set of s3 tests on rgw. +""" +from io import BytesIO +from configobj import ConfigObj +import base64 +import contextlib +import logging +import os +import random +import string + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.config import config as teuth_config +from teuthology.orchestra import run +from teuthology.exceptions import ConfigError + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def download(ctx, config): + """ + Download the s3 tests from the git builder. + Remove downloaded s3 file upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, dict) + log.info('Downloading s3-tests...') + testdir = teuthology.get_testdir(ctx) + for (client, client_config) in config.items(): + s3tests_branch = client_config.get('force-branch', None) + if not s3tests_branch: + raise ValueError( + "Could not determine what branch to use for s3-tests. Please add 'force-branch: {s3-tests branch name}' to the .yaml config for this s3tests task.") + + log.info("Using branch '%s' for s3tests", s3tests_branch) + sha1 = client_config.get('sha1') + git_remote = client_config.get('git_remote', teuth_config.ceph_git_base_url) + ctx.cluster.only(client).run( + args=[ + 'git', 'clone', + '-b', s3tests_branch, + git_remote + 's3-tests.git', + '{tdir}/s3-tests'.format(tdir=testdir), + ], + ) + if sha1 is not None: + ctx.cluster.only(client).run( + args=[ + 'cd', '{tdir}/s3-tests'.format(tdir=testdir), + run.Raw('&&'), + 'git', 'reset', '--hard', sha1, + ], + ) + try: + yield + finally: + log.info('Removing s3-tests...') + testdir = teuthology.get_testdir(ctx) + for client in config: + ctx.cluster.only(client).run( + args=[ + 'rm', + '-rf', + '{tdir}/s3-tests'.format(tdir=testdir), + ], + ) + + +def _config_user(s3tests_conf, section, user): + """ + Configure users for this section by stashing away keys, ids, and + email addresses. + """ + s3tests_conf[section].setdefault('user_id', user) + s3tests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user)) + s3tests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user)) + s3tests_conf[section].setdefault('access_key', + ''.join(random.choice(string.ascii_uppercase) for i in range(20))) + s3tests_conf[section].setdefault('secret_key', + base64.b64encode(os.urandom(40)).decode()) + s3tests_conf[section].setdefault('totp_serial', + ''.join(random.choice(string.digits) for i in range(10))) + s3tests_conf[section].setdefault('totp_seed', + base64.b32encode(os.urandom(40)).decode()) + s3tests_conf[section].setdefault('totp_seconds', '5') + + +@contextlib.contextmanager +def create_users(ctx, config): + """ + Create a main and an alternate s3 user. + """ + assert isinstance(config, dict) + log.info('Creating rgw users...') + testdir = teuthology.get_testdir(ctx) + + if ctx.sts_variable: + users = {'s3 main': 'foo', 's3 alt': 'bar', 's3 tenant': 'testx$tenanteduser', 'iam': 'foobar'} + for client in config['clients']: + s3tests_conf = config['s3tests_conf'][client] + s3tests_conf.setdefault('fixtures', {}) + s3tests_conf['fixtures'].setdefault('bucket prefix', 'test-' + client + '-{random}-') + for section, user in users.items(): + _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client)) + log.debug('Creating user {user} on {host}'.format(user=s3tests_conf[section]['user_id'], host=client)) + cluster_name, daemon_type, client_id = teuthology.split_role(client) + client_with_id = daemon_type + '.' + client_id + if section=='iam': + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'user', 'create', + '--uid', s3tests_conf[section]['user_id'], + '--display-name', s3tests_conf[section]['display_name'], + '--access-key', s3tests_conf[section]['access_key'], + '--secret', s3tests_conf[section]['secret_key'], + '--cluster', cluster_name, + ], + ) + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'caps', 'add', + '--uid', s3tests_conf[section]['user_id'], + '--caps', 'user-policy=*', + '--cluster', cluster_name, + ], + ) + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'caps', 'add', + '--uid', s3tests_conf[section]['user_id'], + '--caps', 'roles=*', + '--cluster', cluster_name, + ], + ) + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'caps', 'add', + '--uid', s3tests_conf[section]['user_id'], + '--caps', 'oidc-provider=*', + '--cluster', cluster_name, + ], + ) + + else: + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'user', 'create', + '--uid', s3tests_conf[section]['user_id'], + '--display-name', s3tests_conf[section]['display_name'], + '--access-key', s3tests_conf[section]['access_key'], + '--secret', s3tests_conf[section]['secret_key'], + '--email', s3tests_conf[section]['email'], + '--caps', 'user-policy=*', + '--cluster', cluster_name, + ], + ) + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'mfa', 'create', + '--uid', s3tests_conf[section]['user_id'], + '--totp-serial', s3tests_conf[section]['totp_serial'], + '--totp-seed', s3tests_conf[section]['totp_seed'], + '--totp-seconds', s3tests_conf[section]['totp_seconds'], + '--totp-window', '8', + '--totp-seed-type', 'base32', + '--cluster', cluster_name, + ], + ) + + else: + users = {'s3 main': 'foo', 's3 alt': 'bar', 's3 tenant': 'testx$tenanteduser'} + for client in config['clients']: + s3tests_conf = config['s3tests_conf'][client] + s3tests_conf.setdefault('fixtures', {}) + s3tests_conf['fixtures'].setdefault('bucket prefix', 'test-' + client + '-{random}-') + for section, user in users.items(): + _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client)) + log.debug('Creating user {user} on {host}'.format(user=s3tests_conf[section]['user_id'], host=client)) + cluster_name, daemon_type, client_id = teuthology.split_role(client) + client_with_id = daemon_type + '.' + client_id + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'user', 'create', + '--uid', s3tests_conf[section]['user_id'], + '--display-name', s3tests_conf[section]['display_name'], + '--access-key', s3tests_conf[section]['access_key'], + '--secret', s3tests_conf[section]['secret_key'], + '--email', s3tests_conf[section]['email'], + '--caps', 'user-policy=*', + '--cluster', cluster_name, + ], + ) + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'mfa', 'create', + '--uid', s3tests_conf[section]['user_id'], + '--totp-serial', s3tests_conf[section]['totp_serial'], + '--totp-seed', s3tests_conf[section]['totp_seed'], + '--totp-seconds', s3tests_conf[section]['totp_seconds'], + '--totp-window', '8', + '--totp-seed-type', 'base32', + '--cluster', cluster_name, + ], + ) + + if "TOKEN" in os.environ: + s3tests_conf.setdefault('webidentity', {}) + s3tests_conf['webidentity'].setdefault('token',os.environ['TOKEN']) + s3tests_conf['webidentity'].setdefault('aud',os.environ['AUD']) + s3tests_conf['webidentity'].setdefault('sub',os.environ['SUB']) + s3tests_conf['webidentity'].setdefault('azp',os.environ['AZP']) + s3tests_conf['webidentity'].setdefault('user_token',os.environ['USER_TOKEN']) + s3tests_conf['webidentity'].setdefault('thumbprint',os.environ['THUMBPRINT']) + s3tests_conf['webidentity'].setdefault('KC_REALM',os.environ['KC_REALM']) + + try: + yield + finally: + for client in config['clients']: + for user in users.values(): + uid = '{user}.{client}'.format(user=user, client=client) + cluster_name, daemon_type, client_id = teuthology.split_role(client) + client_with_id = daemon_type + '.' + client_id + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'user', 'rm', + '--uid', uid, + '--purge-data', + '--cluster', cluster_name, + ], + ) + + +@contextlib.contextmanager +def configure(ctx, config): + """ + Configure the s3-tests. This includes the running of the + bootstrap code and the updating of local conf files. + """ + assert isinstance(config, dict) + log.info('Configuring s3-tests...') + testdir = teuthology.get_testdir(ctx) + for client, properties in config['clients'].items(): + properties = properties or {} + s3tests_conf = config['s3tests_conf'][client] + s3tests_conf['DEFAULT']['calling_format'] = properties.get('calling-format', 'ordinary') + + # use rgw_server if given, or default to local client + role = properties.get('rgw_server', client) + + endpoint = ctx.rgw.role_endpoints.get(role) + assert endpoint, 's3tests: no rgw endpoint for {}'.format(role) + + s3tests_conf['DEFAULT']['host'] = endpoint.dns_name + + website_role = properties.get('rgw_website_server') + if website_role: + website_endpoint = ctx.rgw.role_endpoints.get(website_role) + assert website_endpoint, \ + 's3tests: no rgw endpoint for rgw_website_server {}'.format(website_role) + assert website_endpoint.website_dns_name, \ + 's3tests: no dns-s3website-name for rgw_website_server {}'.format(website_role) + s3tests_conf['DEFAULT']['s3website_domain'] = website_endpoint.website_dns_name + + if hasattr(ctx, 'barbican'): + properties = properties['barbican'] + if properties is not None and 'kms_key' in properties: + if not (properties['kms_key'] in ctx.barbican.keys): + raise ConfigError('Key '+properties['kms_key']+' not defined') + + if not (properties['kms_key2'] in ctx.barbican.keys): + raise ConfigError('Key '+properties['kms_key2']+' not defined') + + key = ctx.barbican.keys[properties['kms_key']] + s3tests_conf['DEFAULT']['kms_keyid'] = key['id'] + + key = ctx.barbican.keys[properties['kms_key2']] + s3tests_conf['DEFAULT']['kms_keyid2'] = key['id'] + + elif hasattr(ctx, 'vault'): + engine_or_flavor = vars(ctx.vault).get('flavor',ctx.vault.engine) + keys=[] + for name in (x['Path'] for x in vars(ctx.vault).get('keys', {}).get(ctx.rgw.vault_role)): + keys.append(name) + + keys.extend(['testkey-1','testkey-2']) + if engine_or_flavor == "old": + keys=[keys[i] + "/1" for i in range(len(keys))] + + properties = properties.get('vault_%s' % engine_or_flavor, {}) + s3tests_conf['DEFAULT']['kms_keyid'] = properties.get('key_path', keys[0]) + s3tests_conf['DEFAULT']['kms_keyid2'] = properties.get('key_path2', keys[1]) + elif hasattr(ctx.rgw, 'pykmip_role'): + keys=[] + for name in (x['Name'] for x in ctx.pykmip.keys[ctx.rgw.pykmip_role]): + p=name.partition('-') + keys.append(p[2] if p[2] else p[0]) + keys.extend(['testkey-1', 'testkey-2']) + s3tests_conf['DEFAULT']['kms_keyid'] = properties.get('kms_key', keys[0]) + s3tests_conf['DEFAULT']['kms_keyid2'] = properties.get('kms_key2', keys[1]) + else: + # Fallback scenario where it's the local (ceph.conf) kms being tested + s3tests_conf['DEFAULT']['kms_keyid'] = 'testkey-1' + s3tests_conf['DEFAULT']['kms_keyid2'] = 'testkey-2' + + slow_backend = properties.get('slow_backend') + if slow_backend: + s3tests_conf['fixtures']['slow backend'] = slow_backend + + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'cd', + '{tdir}/s3-tests'.format(tdir=testdir), + run.Raw('&&'), + './bootstrap', + ], + ) + conf_fp = BytesIO() + s3tests_conf.write(conf_fp) + remote.write_file( + path='{tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client), + data=conf_fp.getvalue(), + ) + + log.info('Configuring boto...') + boto_src = os.path.join(os.path.dirname(__file__), 'boto.cfg.template') + for client, properties in config['clients'].items(): + with open(boto_src) as f: + (remote,) = ctx.cluster.only(client).remotes.keys() + conf = f.read().format( + idle_timeout=config.get('idle_timeout', 30) + ) + remote.write_file('{tdir}/boto.cfg'.format(tdir=testdir), conf) + + try: + yield + + finally: + log.info('Cleaning up boto...') + for client, properties in config['clients'].items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'rm', + '{tdir}/boto.cfg'.format(tdir=testdir), + ], + ) + +@contextlib.contextmanager +def run_tests(ctx, config): + """ + Run the s3tests after everything is set up. + + :param ctx: Context passed to task + :param config: specific configuration information + """ + assert isinstance(config, dict) + testdir = teuthology.get_testdir(ctx) + for client, client_config in config.items(): + client_config = client_config or {} + (remote,) = ctx.cluster.only(client).remotes.keys() + args = [ + 'S3TEST_CONF={tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client), + 'BOTO_CONFIG={tdir}/boto.cfg'.format(tdir=testdir) + ] + # the 'requests' library comes with its own ca bundle to verify ssl + # certificates - override that to use the system's ca bundle, which + # is where the ssl task installed this certificate + if remote.os.package_type == 'deb': + args += ['REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt'] + else: + args += ['REQUESTS_CA_BUNDLE=/etc/pki/tls/certs/ca-bundle.crt'] + # civetweb > 1.8 && beast parsers are strict on rfc2616 + attrs = ["!fails_on_rgw", "!lifecycle_expiration", "!fails_strict_rfc2616","!s3select","!test_of_sts","!webidentity_test"] + if client_config.get('calling-format') != 'ordinary': + attrs += ['!fails_with_subdomain'] + + if 'extra_attrs' in client_config: + attrs = client_config.get('extra_attrs') + args += [ + '{tdir}/s3-tests/virtualenv/bin/python'.format(tdir=testdir), + '-m', 'nose', + '-w', + '{tdir}/s3-tests'.format(tdir=testdir), + '-v', + '-a', ','.join(attrs), + ] + if 'extra_args' in client_config: + args.append(client_config['extra_args']) + + remote.run( + args=args, + label="s3 tests against rgw" + ) + yield + +@contextlib.contextmanager +def scan_for_leaked_encryption_keys(ctx, config): + """ + Scan radosgw logs for the encryption keys used by s3tests to + verify that we're not leaking secrets. + + :param ctx: Context passed to task + :param config: specific configuration information + """ + assert isinstance(config, dict) + + try: + yield + finally: + # x-amz-server-side-encryption-customer-key + s3test_customer_key = 'pO3upElrwuEXSoFwCfnZPdSsmt/xWeFa0N9KgDijwVs=' + + log.debug('Scanning radosgw logs for leaked encryption keys...') + procs = list() + for client, client_config in config.items(): + if not client_config.get('scan_for_encryption_keys', True): + continue + cluster_name, daemon_type, client_id = teuthology.split_role(client) + client_with_cluster = '.'.join((cluster_name, daemon_type, client_id)) + (remote,) = ctx.cluster.only(client).remotes.keys() + proc = remote.run( + args=[ + 'grep', + '--binary-files=text', + s3test_customer_key, + '/var/log/ceph/rgw.{client}.log'.format(client=client_with_cluster), + ], + wait=False, + check_status=False, + ) + procs.append(proc) + + for proc in procs: + proc.wait() + if proc.returncode == 1: # 1 means no matches + continue + log.error('radosgw log is leaking encryption keys!') + raise Exception('radosgw log is leaking encryption keys') + +@contextlib.contextmanager +def task(ctx, config): + """ + Run the s3-tests suite against rgw. + + To run all tests on all clients:: + + tasks: + - ceph: + - rgw: + - s3tests: + + To restrict testing to particular clients:: + + tasks: + - ceph: + - rgw: [client.0] + - s3tests: [client.0] + + To run against a server on client.1 and increase the boto timeout to 10m:: + + tasks: + - ceph: + - rgw: [client.1] + - s3tests: + client.0: + rgw_server: client.1 + idle_timeout: 600 + + To pass extra arguments to nose (e.g. to run a certain test):: + + tasks: + - ceph: + - rgw: [client.0] + - s3tests: + client.0: + extra_args: ['test_s3:test_object_acl_grand_public_read'] + client.1: + extra_args: ['--exclude', 'test_100_continue'] + + To run any sts-tests don't forget to set a config variable named 'sts_tests' to 'True' as follows:: + + tasks: + - ceph: + - rgw: [client.0] + - s3tests: + client.0: + sts_tests: True + rgw_server: client.0 + + """ + assert hasattr(ctx, 'rgw'), 's3tests must run after the rgw task' + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task s3tests only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + clients = config.keys() + + overrides = ctx.config.get('overrides', {}) + # merge each client section, not the top level. + for client in config.keys(): + if not config[client]: + config[client] = {} + teuthology.deep_merge(config[client], overrides.get('s3tests', {})) + + log.debug('s3tests config is %s', config) + + s3tests_conf = {} + + for client, client_config in config.items(): + if 'sts_tests' in client_config: + ctx.sts_variable = True + else: + ctx.sts_variable = False + #This will be the structure of config file when you want to run webidentity_test (sts-test) + if ctx.sts_variable and "TOKEN" in os.environ: + for client in clients: + endpoint = ctx.rgw.role_endpoints.get(client) + assert endpoint, 's3tests: no rgw endpoint for {}'.format(client) + + s3tests_conf[client] = ConfigObj( + indent_type='', + infile={ + 'DEFAULT': + { + 'port' : endpoint.port, + 'is_secure' : endpoint.cert is not None, + 'api_name' : 'default', + }, + 'fixtures' : {}, + 's3 main' : {}, + 's3 alt' : {}, + 's3 tenant' : {}, + 'iam' : {}, + 'webidentity': {}, + } + ) + + elif ctx.sts_variable: + #This will be the structure of config file when you want to run assume_role_test and get_session_token_test (sts-test) + for client in clients: + endpoint = ctx.rgw.role_endpoints.get(client) + assert endpoint, 's3tests: no rgw endpoint for {}'.format(client) + + s3tests_conf[client] = ConfigObj( + indent_type='', + infile={ + 'DEFAULT': + { + 'port' : endpoint.port, + 'is_secure' : endpoint.cert is not None, + 'api_name' : 'default', + }, + 'fixtures' : {}, + 's3 main' : {}, + 's3 alt' : {}, + 's3 tenant' : {}, + 'iam' : {}, + } + ) + + else: + #This will be the structure of config file when you want to run normal s3-tests + for client in clients: + endpoint = ctx.rgw.role_endpoints.get(client) + assert endpoint, 's3tests: no rgw endpoint for {}'.format(client) + + s3tests_conf[client] = ConfigObj( + indent_type='', + infile={ + 'DEFAULT': + { + 'port' : endpoint.port, + 'is_secure' : endpoint.cert is not None, + 'api_name' : 'default', + }, + 'fixtures' : {}, + 's3 main' : {}, + 's3 alt' : {}, + 's3 tenant' : {}, + } + ) + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: create_users(ctx=ctx, config=dict( + clients=clients, + s3tests_conf=s3tests_conf, + )), + lambda: configure(ctx=ctx, config=dict( + clients=config, + s3tests_conf=s3tests_conf, + )), + lambda: run_tests(ctx=ctx, config=config), + lambda: scan_for_leaked_encryption_keys(ctx=ctx, config=config), + ): + pass + yield diff --git a/qa/tasks/s3tests_java.py b/qa/tasks/s3tests_java.py new file mode 100644 index 000000000..dbe03921c --- /dev/null +++ b/qa/tasks/s3tests_java.py @@ -0,0 +1,402 @@ +""" +Task for running RGW S3 tests with the AWS Java SDK +""" +from io import BytesIO +import logging + +import base64 +import os +import random +import string +import yaml +import getpass + +from teuthology import misc as teuthology +from teuthology.task import Task +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +""" + Task for running RGW S3 tests with the AWS Java SDK + + Tests run only on clients specified in the s3tests-java config section. + If no client is given a default 'client.0' is chosen. + If such does not match the rgw client the task will fail. + + tasks: + - ceph: + - rgw: [client.0] + - s3tests-java: + client.0: + + Extra arguments can be passed by adding options to the corresponding client + section under the s3tests-java task (e.g. to run a certain test, + specify a different repository and branch for the test suite, + run in info/debug mode (for the java suite) or forward the gradle output to a log file): + + tasks: + - ceph: + - rgw: [client.0] + - s3tests-java: + client.0: + force-branch: wip + force-repo: 'https://github.com/adamyanova/java_s3tests.git' + log-fwd: '../s3tests-java.log' + log-level: info + extra-args: ['--tests', 'ObjectTest.testEncryptionKeySSECInvalidMd5'] + + To run a specific test, provide its name to the extra-args section e.g.: + - s3tests-java: + client.0: + extra-args: ['--tests', 'ObjectTest.testEncryptionKeySSECInvalidMd5'] + +""" + + +class S3tests_java(Task): + """ + Download and install S3 tests in Java + This will require openjdk and gradle + """ + + def __init__(self, ctx, config): + super(S3tests_java, self).__init__(ctx, config) + self.log = log + log.debug('S3 Tests Java: __INIT__ ') + assert hasattr(ctx, 'rgw'), 'S3tests_java must run after the rgw task' + clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(self.ctx.cluster, 'client')] + self.all_clients = [] + for client in clients: + if client in self.config: + self.all_clients.extend([client]) + if self.all_clients is None: + self.all_clients = 'client.0' + self.users = {'s3main': 'tester', + 's3alt': 'johndoe', 'tenanted': 'testx$tenanteduser'} + + def setup(self): + super(S3tests_java, self).setup() + log.debug('S3 Tests Java: SETUP') + for client in self.all_clients: + self.download_test_suite(client) + self.install_required_packages(client) + + def begin(self): + super(S3tests_java, self).begin() + log.debug('S3 Tests Java: BEGIN') + for (host, roles) in self.ctx.cluster.remotes.items(): + log.debug( + 'S3 Tests Java: Cluster config is: {cfg}'.format(cfg=roles)) + log.debug('S3 Tests Java: Host is: {host}'.format(host=host)) + self.create_users() + self.run_tests() + + def end(self): + super(S3tests_java, self).end() + log.debug('S3 Tests Java: END') + for client in self.all_clients: + self.remove_tests(client) + self.delete_users(client) + + def download_test_suite(self, client): + log.info("S3 Tests Java: Downloading test suite...") + testdir = teuthology.get_testdir(self.ctx) + branch = 'master' + repo = 'https://github.com/ceph/java_s3tests.git' + if client in self.config and self.config[client] is not None: + if 'force-branch' in self.config[client] and self.config[client]['force-branch'] is not None: + branch = self.config[client]['force-branch'] + if 'force-repo' in self.config[client] and self.config[client]['force-repo'] is not None: + repo = self.config[client]['force-repo'] + self.ctx.cluster.only(client).run( + args=[ + 'git', 'clone', + '-b', branch, + repo, + '{tdir}/s3-tests-java'.format(tdir=testdir), + ], + stdout=BytesIO() + ) + if client in self.config and self.config[client] is not None: + if 'sha1' in self.config[client] and self.config[client]['sha1'] is not None: + self.ctx.cluster.only(client).run( + args=[ + 'cd', '{tdir}/s3-tests-java'.format(tdir=testdir), + run.Raw('&&'), + 'git', 'reset', '--hard', self.config[client]['sha1'], + ], + ) + + if 'log-level' in self.config[client]: + if self.config[client]['log-level'] == 'info': + self.ctx.cluster.only(client).run( + args=[ + 'sed', '-i', '\'s/log4j.rootLogger=WARN/log4j.rootLogger=INFO/g\'', + '{tdir}/s3-tests-java/src/main/resources/log4j.properties'.format( + tdir=testdir) + ] + ) + if self.config[client]['log-level'] == 'debug': + self.ctx.cluster.only(client).run( + args=[ + 'sed', '-i', '\'s/log4j.rootLogger=WARN/log4j.rootLogger=DEBUG/g\'', + '{tdir}/s3-tests-java/src/main/resources/log4j.properties'.format( + tdir=testdir) + ] + ) + + def install_required_packages(self, client): + """ + Run bootstrap script to install openjdk and gradle. + Add certificates to java keystore + """ + log.info("S3 Tests Java: Installing required packages...") + testdir = teuthology.get_testdir(self.ctx) + self.ctx.cluster.only(client).run( + args=['{tdir}/s3-tests-java/bootstrap.sh'.format(tdir=testdir)], + stdout=BytesIO() + ) + + endpoint = self.ctx.rgw.role_endpoints[client] + if endpoint.cert: + path = 'lib/security/cacerts' + self.ctx.cluster.only(client).run( + args=['sudo', + 'keytool', + '-import', '-alias', '{alias}'.format( + alias=endpoint.hostname), + '-keystore', + run.Raw( + '$(readlink -e $(dirname $(readlink -e $(which keytool)))/../{path})'.format(path=path)), + '-file', endpoint.cert.certificate, + '-storepass', 'changeit', + ], + stdout=BytesIO() + ) + + def create_users(self): + """ + Create a main and an alternative s3 user. + Configuration is read from a skelethon config file + s3tests.teuth.config.yaml in the java-s3tests repository + and missing information is added from the task. + Existing values are NOT overriden unless they are empty! + """ + log.info("S3 Tests Java: Creating S3 users...") + testdir = teuthology.get_testdir(self.ctx) + for client in self.all_clients: + endpoint = self.ctx.rgw.role_endpoints.get(client) + local_user = getpass.getuser() + remote_user = teuthology.get_test_user() + os.system("scp {remote}@{host}:{tdir}/s3-tests-java/s3tests.teuth.config.yaml /home/{local}/".format( + host=endpoint.hostname, tdir=testdir, remote=remote_user, local=local_user)) + s3tests_conf = teuthology.config_file( + '/home/{local}/s3tests.teuth.config.yaml'.format(local=local_user)) + log.debug("S3 Tests Java: s3tests_conf is {s3cfg}".format( + s3cfg=s3tests_conf)) + for section, user in list(self.users.items()): + if section in s3tests_conf: + s3_user_id = '{user}.{client}'.format( + user=user, client=client) + log.debug( + 'S3 Tests Java: Creating user {s3_user_id}'.format(s3_user_id=s3_user_id)) + self._config_user(s3tests_conf=s3tests_conf, + section=section, user=s3_user_id, client=client) + cluster_name, daemon_type, client_id = teuthology.split_role( + client) + client_with_id = daemon_type + '.' + client_id + args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'user', 'create', + '--uid', s3tests_conf[section]['user_id'], + '--display-name', s3tests_conf[section]['display_name'], + '--access-key', s3tests_conf[section]['access_key'], + '--secret', s3tests_conf[section]['access_secret'], + '--email', s3tests_conf[section]['email'], + '--cluster', cluster_name, + ] + log.info('{args}'.format(args=args)) + self.ctx.cluster.only(client).run( + args=args, + stdout=BytesIO() + ) + else: + self.users.pop(section) + self._write_cfg_file(s3tests_conf, client) + os.system( + "rm -rf /home/{local}/s3tests.teuth.config.yaml".format(local=local_user)) + + def _config_user(self, s3tests_conf, section, user, client): + """ + Generate missing users data for this section by stashing away keys, ids, and + email addresses. + """ + access_key = ''.join(random.choice(string.ascii_uppercase) + for i in range(20)) + access_secret = base64.b64encode(os.urandom(40)).decode('ascii') + endpoint = self.ctx.rgw.role_endpoints.get(client) + + self._set_cfg_entry( + s3tests_conf[section], 'user_id', '{user}'.format(user=user)) + self._set_cfg_entry( + s3tests_conf[section], 'email', '{user}_test@test.test'.format(user=user)) + self._set_cfg_entry( + s3tests_conf[section], 'display_name', 'Ms. {user}'.format(user=user)) + self._set_cfg_entry( + s3tests_conf[section], 'access_key', '{ak}'.format(ak=access_key)) + self._set_cfg_entry( + s3tests_conf[section], 'access_secret', '{asc}'.format(asc=access_secret)) + self._set_cfg_entry( + s3tests_conf[section], 'region', 'us-east-1') + self._set_cfg_entry( + s3tests_conf[section], 'endpoint', '{ip}:{port}'.format( + ip=endpoint.hostname, port=endpoint.port)) + self._set_cfg_entry( + s3tests_conf[section], 'host', endpoint.hostname) + self._set_cfg_entry( + s3tests_conf[section], 'port', endpoint.port) + self._set_cfg_entry( + s3tests_conf[section], 'is_secure', True if endpoint.cert else False) + + log.debug("S3 Tests Java: s3tests_conf[{sect}] is {s3cfg}".format( + sect=section, s3cfg=s3tests_conf[section])) + log.debug('S3 Tests Java: Setion, User = {sect}, {user}'.format( + sect=section, user=user)) + + def _write_cfg_file(self, cfg_dict, client): + """ + Write s3 tests java config file on the remote node. + """ + testdir = teuthology.get_testdir(self.ctx) + (remote,) = self.ctx.cluster.only(client).remotes.keys() + data = yaml.safe_dump(cfg_dict, default_flow_style=False) + path = testdir + '/archive/s3-tests-java.' + client + '.conf' + remote.write_file(path, data) + + def _set_cfg_entry(self, cfg_dict, key, value): + if not (key in cfg_dict): + cfg_dict.setdefault(key, value) + elif cfg_dict[key] is None: + cfg_dict[key] = value + + def run_tests(self): + log.info("S3 Tests Java: Running tests...") + testdir = teuthology.get_testdir(self.ctx) + for client in self.all_clients: + self.ctx.cluster.only(client).run( + args=['cp', + '{tdir}/archive/s3-tests-java.{client}.conf'.format( + tdir=testdir, client=client), + '{tdir}/s3-tests-java/config.properties'.format( + tdir=testdir) + ], + stdout=BytesIO() + ) + args = ['cd', + '{tdir}/s3-tests-java'.format(tdir=testdir), + run.Raw('&&'), + '/opt/gradle/gradle/bin/gradle', 'clean', 'test', + '--rerun-tasks', '--no-build-cache', + ] + extra_args = [] + suppress_groups = False + self.log_fwd = False + self.log_name = '' + if client in self.config and self.config[client] is not None: + if 'extra-args' in self.config[client]: + extra_args.extend(self.config[client]['extra-args']) + suppress_groups = True + if 'log-level' in self.config[client] and self.config[client]['log-level'] == 'debug': + extra_args += ['--debug'] + if 'log-fwd' in self.config[client]: + self.log_fwd = True + self.log_name = '{tdir}/s3tests_log.txt'.format( + tdir=testdir) + if self.config[client]['log-fwd'] is not None: + self.log_name = self.config[client]['log-fwd'] + extra_args += [run.Raw('>>'), + self.log_name] + + if not suppress_groups: + test_groups = ['AWS4Test', 'BucketTest', 'ObjectTest'] + else: + test_groups = ['All'] + + for gr in test_groups: + for i in range(2): + self.ctx.cluster.only(client).run( + args=['radosgw-admin', 'gc', + 'process', '--include-all'], + stdout=BytesIO() + ) + + if gr != 'All': + self.ctx.cluster.only(client).run( + args=args + ['--tests'] + [gr] + extra_args, + stdout=BytesIO() + ) + else: + self.ctx.cluster.only(client).run( + args=args + extra_args, + stdout=BytesIO() + ) + + for i in range(2): + self.ctx.cluster.only(client).run( + args=['radosgw-admin', 'gc', + 'process', '--include-all'], + stdout=BytesIO() + ) + + def remove_tests(self, client): + log.info('S3 Tests Java: Cleaning up s3-tests-java...') + testdir = teuthology.get_testdir(self.ctx) + + if self.log_fwd: + self.ctx.cluster.only(client).run( + args=['cd', + '{tdir}/s3-tests-java'.format(tdir=testdir), + run.Raw('&&'), + 'cat', self.log_name, + run.Raw('&&'), + 'rm', self.log_name], + stdout=BytesIO() + ) + + self.ctx.cluster.only(client).run( + args=[ + 'rm', + '-rf', + '{tdir}/s3-tests-java'.format(tdir=testdir), + ], + stdout=BytesIO() + ) + + def delete_users(self, client): + log.info("S3 Tests Java: Deleting S3 users...") + testdir = teuthology.get_testdir(self.ctx) + for section, user in self.users.items(): + s3_user_id = '{user}.{client}'.format(user=user, client=client) + self.ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client, + 'user', 'rm', + '--uid', s3_user_id, + '--purge-data', + '--cluster', 'ceph', + ], + stdout=BytesIO() + ) + + +task = S3tests_java diff --git a/qa/tasks/samba.py b/qa/tasks/samba.py new file mode 100644 index 000000000..bcc247697 --- /dev/null +++ b/qa/tasks/samba.py @@ -0,0 +1,244 @@ +""" +Samba +""" +import contextlib +import logging +import time + +from teuthology import misc as teuthology +from teuthology.orchestra import run +from teuthology.orchestra.daemon import DaemonGroup + +log = logging.getLogger(__name__) + + +def get_sambas(ctx, roles): + """ + Scan for roles that are samba. Yield the id of the the samba role + (samba.0, samba.1...) and the associated remote site + + :param ctx: Context + :param roles: roles for this test (extracted from yaml files) + """ + for role in roles: + assert isinstance(role, str) + PREFIX = 'samba.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.keys() + yield (id_, remote) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Setup samba smbd with ceph vfs module. This task assumes the samba + package has already been installed via the install task. + + The config is optional and defaults to starting samba on all nodes. + If a config is given, it is expected to be a list of + samba nodes to start smbd servers on. + + Example that starts smbd on all samba nodes:: + + tasks: + - install: + - install: + project: samba + extra_packages: ['samba'] + - ceph: + - samba: + - interactive: + + Example that starts smbd on just one of the samba nodes and cifs on the other:: + + tasks: + - samba: [samba.0] + - cifs: [samba.1] + + An optional backend can be specified, and requires a path which smbd will + use as the backend storage location: + + roles: + - [osd.0, osd.1, osd.2, mon.0, mon.1, mon.2, mds.a] + - [client.0, samba.0] + + tasks: + - ceph: + - ceph-fuse: [client.0] + - samba: + samba.0: + cephfuse: "{testdir}/mnt.0" + + This mounts ceph to {testdir}/mnt.0 using fuse, and starts smbd with + a UNC of //localhost/cephfuse. Access through that UNC will be on + the ceph fuse mount point. + + If no arguments are specified in the samba + role, the default behavior is to enable the ceph UNC //localhost/ceph + and use the ceph vfs module as the smbd backend. + + :param ctx: Context + :param config: Configuration + """ + log.info("Setting up smbd with ceph vfs...") + assert config is None or isinstance(config, list) or isinstance(config, dict), \ + "task samba got invalid config" + + if config is None: + config = dict(('samba.{id}'.format(id=id_), None) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba')) + elif isinstance(config, list): + config = dict((name, None) for name in config) + + samba_servers = list(get_sambas(ctx=ctx, roles=config.keys())) + + testdir = teuthology.get_testdir(ctx) + + if not hasattr(ctx, 'daemons'): + ctx.daemons = DaemonGroup() + + for id_, remote in samba_servers: + + rolestr = "samba.{id_}".format(id_=id_) + + confextras = """vfs objects = ceph + ceph:config_file = /etc/ceph/ceph.conf""" + + unc = "ceph" + backend = "/" + + if config[rolestr] is not None: + # verify that there's just one parameter in role + if len(config[rolestr]) != 1: + log.error("samba config for role samba.{id_} must have only one parameter".format(id_=id_)) + raise Exception('invalid config') + confextras = "" + (unc, backendstr) = config[rolestr].items()[0] + backend = backendstr.format(testdir=testdir) + + # on first samba role, set ownership and permissions of ceph root + # so that samba tests succeed + if config[rolestr] is None and id_ == samba_servers[0][0]: + remote.run( + args=[ + 'mkdir', '-p', '/tmp/cmnt', run.Raw('&&'), + 'sudo', 'ceph-fuse', '/tmp/cmnt', run.Raw('&&'), + 'sudo', 'chown', 'ubuntu:ubuntu', '/tmp/cmnt/', run.Raw('&&'), + 'sudo', 'chmod', '1777', '/tmp/cmnt/', run.Raw('&&'), + 'sudo', 'umount', '/tmp/cmnt/', run.Raw('&&'), + 'rm', '-rf', '/tmp/cmnt', + ], + ) + else: + remote.run( + args=[ + 'sudo', 'chown', 'ubuntu:ubuntu', backend, run.Raw('&&'), + 'sudo', 'chmod', '1777', backend, + ], + ) + + remote.sudo_write_file("/usr/local/samba/etc/smb.conf", """ +[global] + workgroup = WORKGROUP + netbios name = DOMAIN + +[{unc}] + path = {backend} + {extras} + writeable = yes + valid users = ubuntu +""".format(extras=confextras, unc=unc, backend=backend)) + + # create ubuntu user + remote.run( + args=[ + 'sudo', '/usr/local/samba/bin/smbpasswd', '-e', 'ubuntu', + run.Raw('||'), + 'printf', run.Raw('"ubuntu\nubuntu\n"'), + run.Raw('|'), + 'sudo', '/usr/local/samba/bin/smbpasswd', '-s', '-a', 'ubuntu' + ]) + + smbd_cmd = [ + 'sudo', + 'daemon-helper', + 'term', + 'nostdin', + '/usr/local/samba/sbin/smbd', + '-F', + ] + ctx.daemons.add_daemon(remote, 'smbd', id_, + args=smbd_cmd, + logger=log.getChild("smbd.{id_}".format(id_=id_)), + stdin=run.PIPE, + wait=False, + ) + + # let smbd initialize, probably a better way... + seconds_to_sleep = 100 + log.info('Sleeping for %s seconds...' % seconds_to_sleep) + time.sleep(seconds_to_sleep) + log.info('Sleeping stopped...') + + try: + yield + finally: + log.info('Stopping smbd processes...') + exc = None + for d in ctx.daemons.iter_daemons_of_role('smbd'): + try: + d.stop() + except (run.CommandFailedError, + run.CommandCrashedError, + run.ConnectionLostError) as e: + exc = e + log.exception('Saw exception from %s.%s', d.role, d.id_) + if exc is not None: + raise exc + + for id_, remote in samba_servers: + remote.run( + args=[ + 'sudo', + 'rm', '-rf', + '/usr/local/samba/etc/smb.conf', + '/usr/local/samba/private/*', + '/usr/local/samba/var/run/', + '/usr/local/samba/var/locks', + '/usr/local/samba/var/lock', + ], + ) + # make sure daemons are gone + try: + remote.run( + args=[ + 'while', + 'sudo', 'killall', '-9', 'smbd', + run.Raw(';'), + 'do', 'sleep', '1', + run.Raw(';'), + 'done', + ], + ) + + remote.run( + args=[ + 'sudo', + 'lsof', + backend, + ], + check_status=False + ) + remote.run( + args=[ + 'sudo', + 'fuser', + '-M', + backend, + ], + check_status=False + ) + except Exception: + log.exception("Saw exception") + pass diff --git a/qa/tasks/scrub.py b/qa/tasks/scrub.py new file mode 100644 index 000000000..ddc1a9164 --- /dev/null +++ b/qa/tasks/scrub.py @@ -0,0 +1,117 @@ +""" +Scrub osds +""" +import contextlib +import gevent +import logging +import random +import time + +from tasks import ceph_manager +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run scrub periodically. Randomly chooses an OSD to scrub. + + The config should be as follows: + + scrub: + frequency: <seconds between scrubs> + deep: <bool for deepness> + + example: + + tasks: + - ceph: + - scrub: + frequency: 30 + deep: 0 + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'scrub task only accepts a dict for configuration' + + log.info('Beginning scrub...') + + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + while len(manager.get_osd_status()['up']) < num_osds: + time.sleep(10) + + scrub_proc = Scrubber( + manager, + config, + ) + try: + yield + finally: + log.info('joining scrub') + scrub_proc.do_join() + +class Scrubber: + """ + Scrubbing is actually performed during initialization + """ + def __init__(self, manager, config): + """ + Spawn scrubbing thread upon completion. + """ + self.ceph_manager = manager + self.ceph_manager.wait_for_clean() + + osd_status = self.ceph_manager.get_osd_status() + self.osds = osd_status['up'] + + self.config = config + if self.config is None: + self.config = dict() + + else: + def tmp(x): + """Local display""" + print(x) + self.log = tmp + + self.stopping = False + + log.info("spawning thread") + + self.thread = gevent.spawn(self.do_scrub) + + def do_join(self): + """Scrubbing thread finished""" + self.stopping = True + self.thread.get() + + def do_scrub(self): + """Perform the scrub operation""" + frequency = self.config.get("frequency", 30) + deep = self.config.get("deep", 0) + + log.info("stopping %s" % self.stopping) + + while not self.stopping: + osd = str(random.choice(self.osds)) + + if deep: + cmd = 'deep-scrub' + else: + cmd = 'scrub' + + log.info('%sbing %s' % (cmd, osd)) + self.ceph_manager.raw_cluster_cmd('osd', cmd, osd) + + time.sleep(frequency) diff --git a/qa/tasks/scrub_test.py b/qa/tasks/scrub_test.py new file mode 100644 index 000000000..3d629e9d7 --- /dev/null +++ b/qa/tasks/scrub_test.py @@ -0,0 +1,401 @@ +"""Scrub testing""" + +import contextlib +import json +import logging +import os +import time +import tempfile + +from tasks import ceph_manager +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + + +def wait_for_victim_pg(manager): + """Return a PG with some data and its acting set""" + # wait for some PG to have data that we can mess with + victim = None + while victim is None: + stats = manager.get_pg_stats() + for pg in stats: + size = pg['stat_sum']['num_bytes'] + if size > 0: + victim = pg['pgid'] + acting = pg['acting'] + return victim, acting + time.sleep(3) + + +def find_victim_object(ctx, pg, osd): + """Return a file to be fuzzed""" + (osd_remote,) = ctx.cluster.only('osd.%d' % osd).remotes.keys() + data_path = os.path.join( + '/var/lib/ceph/osd', + 'ceph-{id}'.format(id=osd), + 'fuse', + '{pg}_head'.format(pg=pg), + 'all', + ) + + # fuzz time + ls_out = osd_remote.sh('sudo ls %s' % data_path) + + # find an object file we can mess with (and not the pg info object) + osdfilename = next(line for line in ls_out.split('\n') + if not line.endswith('::::head#')) + assert osdfilename is not None + + # Get actual object name from osd stored filename + objname = osdfilename.split(':')[4] + return osd_remote, os.path.join(data_path, osdfilename), objname + + +def corrupt_file(osd_remote, path): + # put a single \0 at the beginning of the file + osd_remote.run( + args=['sudo', 'dd', + 'if=/dev/zero', + 'of=%s/data' % path, + 'bs=1', 'count=1', 'conv=notrunc'] + ) + + +def get_pgnum(pgid): + pos = pgid.find('.') + assert pos != -1 + return pgid[pos+1:] + + +def deep_scrub(manager, victim, pool): + # scrub, verify inconsistent + pgnum = get_pgnum(victim) + manager.do_pg_scrub(pool, pgnum, 'deep-scrub') + + stats = manager.get_single_pg_stats(victim) + inconsistent = stats['state'].find('+inconsistent') != -1 + assert inconsistent + + +def repair(manager, victim, pool): + # repair, verify no longer inconsistent + pgnum = get_pgnum(victim) + manager.do_pg_scrub(pool, pgnum, 'repair') + + stats = manager.get_single_pg_stats(victim) + inconsistent = stats['state'].find('+inconsistent') != -1 + assert not inconsistent + + +def test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path, pool): + corrupt_file(osd_remote, obj_path) + deep_scrub(manager, pg, pool) + repair(manager, pg, pool) + + +def test_repair_bad_omap(ctx, manager, pg, osd, objname): + # Test deep-scrub with various omap modifications + # Modify omap on specific osd + log.info('fuzzing omap of %s' % objname) + manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'key']) + manager.osd_admin_socket(osd, ['setomapval', 'rbd', objname, + 'badkey', 'badval']) + manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'badhdr']) + + deep_scrub(manager, pg, 'rbd') + # please note, the repair here is errnomous, it rewrites the correct omap + # digest and data digest on the replicas with the corresponding digests + # from the primary osd which is hosting the victim object, see + # find_victim_object(). + # so we need to either put this test and the end of this task or + # undo the mess-up manually before the "repair()" that just ensures + # the cleanup is sane, otherwise the succeeding tests will fail. if they + # try set "badkey" in hope to get an "inconsistent" pg with a deep-scrub. + manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'hdr']) + manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'badkey']) + manager.osd_admin_socket(osd, ['setomapval', 'rbd', objname, + 'key', 'val']) + repair(manager, pg, 'rbd') + + +class MessUp: + def __init__(self, manager, osd_remote, pool, osd_id, + obj_name, obj_path, omap_key, omap_val): + self.manager = manager + self.osd = osd_remote + self.pool = pool + self.osd_id = osd_id + self.obj = obj_name + self.path = obj_path + self.omap_key = omap_key + self.omap_val = omap_val + + @contextlib.contextmanager + def _test_with_file(self, messup_cmd, *checks): + temp = tempfile.mktemp() + backup_cmd = ['sudo', 'cp', os.path.join(self.path, 'data'), temp] + self.osd.run(args=backup_cmd) + self.osd.run(args=messup_cmd.split()) + yield checks + create_cmd = ['sudo', 'mkdir', self.path] + self.osd.run(args=create_cmd, check_status=False) + restore_cmd = ['sudo', 'cp', temp, os.path.join(self.path, 'data')] + self.osd.run(args=restore_cmd) + + def remove(self): + cmd = 'sudo rmdir {path}'.format(path=self.path) + return self._test_with_file(cmd, 'missing') + + def append(self): + cmd = 'sudo dd if=/dev/zero of={path}/data bs=1 count=1 ' \ + 'conv=notrunc oflag=append'.format(path=self.path) + return self._test_with_file(cmd, + 'data_digest_mismatch', + 'size_mismatch') + + def truncate(self): + cmd = 'sudo dd if=/dev/null of={path}/data'.format(path=self.path) + return self._test_with_file(cmd, + 'data_digest_mismatch', + 'size_mismatch') + + def change_obj(self): + cmd = 'sudo dd if=/dev/zero of={path}/data bs=1 count=1 ' \ + 'conv=notrunc'.format(path=self.path) + return self._test_with_file(cmd, + 'data_digest_mismatch') + + @contextlib.contextmanager + def rm_omap(self): + cmd = ['rmomapkey', self.pool, self.obj, self.omap_key] + self.manager.osd_admin_socket(self.osd_id, cmd) + yield ('omap_digest_mismatch',) + cmd = ['setomapval', self.pool, self.obj, + self.omap_key, self.omap_val] + self.manager.osd_admin_socket(self.osd_id, cmd) + + @contextlib.contextmanager + def add_omap(self): + cmd = ['setomapval', self.pool, self.obj, 'badkey', 'badval'] + self.manager.osd_admin_socket(self.osd_id, cmd) + yield ('omap_digest_mismatch',) + cmd = ['rmomapkey', self.pool, self.obj, 'badkey'] + self.manager.osd_admin_socket(self.osd_id, cmd) + + @contextlib.contextmanager + def change_omap(self): + cmd = ['setomapval', self.pool, self.obj, self.omap_key, 'badval'] + self.manager.osd_admin_socket(self.osd_id, cmd) + yield ('omap_digest_mismatch',) + cmd = ['setomapval', self.pool, self.obj, self.omap_key, self.omap_val] + self.manager.osd_admin_socket(self.osd_id, cmd) + + +class InconsistentObjChecker: + """Check the returned inconsistents/inconsistent info""" + + def __init__(self, osd, acting, obj_name): + self.osd = osd + self.acting = acting + self.obj = obj_name + assert self.osd in self.acting + + def basic_checks(self, inc): + assert inc['object']['name'] == self.obj + assert inc['object']['snap'] == "head" + assert len(inc['shards']) == len(self.acting), \ + "the number of returned shard does not match with the acting set" + + def run(self, check, inc): + func = getattr(self, check) + func(inc) + + def _check_errors(self, inc, err_name): + bad_found = False + good_found = False + for shard in inc['shards']: + log.info('shard = %r' % shard) + log.info('err = %s' % err_name) + assert 'osd' in shard + osd = shard['osd'] + err = err_name in shard['errors'] + if osd == self.osd: + assert bad_found is False, \ + "multiple entries found for the given OSD" + assert err is True, \ + "Didn't find '{err}' in errors".format(err=err_name) + bad_found = True + else: + assert osd in self.acting, "shard not in acting set" + assert err is False, \ + "Expected '{err}' in errors".format(err=err_name) + good_found = True + assert bad_found is True, \ + "Shard for osd.{osd} not found".format(osd=self.osd) + assert good_found is True, \ + "No other acting shards found" + + def _check_attrs(self, inc, attr_name): + bad_attr = None + good_attr = None + for shard in inc['shards']: + log.info('shard = %r' % shard) + log.info('attr = %s' % attr_name) + assert 'osd' in shard + osd = shard['osd'] + attr = shard.get(attr_name, False) + if osd == self.osd: + assert bad_attr is None, \ + "multiple entries found for the given OSD" + bad_attr = attr + else: + assert osd in self.acting, "shard not in acting set" + assert good_attr is None or good_attr == attr, \ + "multiple good attrs found" + good_attr = attr + assert bad_attr is not None, \ + "bad {attr} not found".format(attr=attr_name) + assert good_attr is not None, \ + "good {attr} not found".format(attr=attr_name) + assert good_attr != bad_attr, \ + "bad attr is identical to the good ones: " \ + "{0} == {1}".format(good_attr, bad_attr) + + def data_digest_mismatch(self, inc): + assert 'data_digest_mismatch' in inc['errors'] + self._check_attrs(inc, 'data_digest') + + def missing(self, inc): + assert 'missing' in inc['union_shard_errors'] + self._check_errors(inc, 'missing') + + def size_mismatch(self, inc): + assert 'size_mismatch' in inc['errors'] + self._check_attrs(inc, 'size') + + def omap_digest_mismatch(self, inc): + assert 'omap_digest_mismatch' in inc['errors'] + self._check_attrs(inc, 'omap_digest') + + +def test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd_id, + obj_name, obj_path): + mon = manager.controller + pool = 'rbd' + omap_key = 'key' + omap_val = 'val' + manager.do_rados(['setomapval', obj_name, omap_key, omap_val], pool=pool) + # Update missing digests, requires "osd deep scrub update digest min age: 0" + pgnum = get_pgnum(pg) + manager.do_pg_scrub(pool, pgnum, 'deep-scrub') + + messup = MessUp(manager, osd_remote, pool, osd_id, obj_name, obj_path, + omap_key, omap_val) + for test in [messup.rm_omap, messup.add_omap, messup.change_omap, + messup.append, messup.truncate, messup.change_obj, + messup.remove]: + with test() as checks: + deep_scrub(manager, pg, pool) + cmd = 'rados list-inconsistent-pg {pool} ' \ + '--format=json'.format(pool=pool) + pgs = json.loads(mon.sh(cmd)) + assert pgs == [pg] + + cmd = 'rados list-inconsistent-obj {pg} ' \ + '--format=json'.format(pg=pg) + objs = json.loads(mon.sh(cmd)) + assert len(objs['inconsistents']) == 1 + + checker = InconsistentObjChecker(osd_id, acting, obj_name) + inc_obj = objs['inconsistents'][0] + log.info('inc = %r', inc_obj) + checker.basic_checks(inc_obj) + for check in checks: + checker.run(check, inc_obj) + + +def task(ctx, config): + """ + Test [deep] scrub + + tasks: + - chef: + - install: + - ceph: + log-ignorelist: + - '!= data_digest' + - '!= omap_digest' + - '!= size' + - deep-scrub 0 missing, 1 inconsistent objects + - deep-scrub [0-9]+ errors + - repair 0 missing, 1 inconsistent objects + - repair [0-9]+ errors, [0-9]+ fixed + - shard [0-9]+ .* : missing + - deep-scrub 1 missing, 1 inconsistent objects + - does not match object info size + - attr name mistmatch + - deep-scrub 1 missing, 0 inconsistent objects + - failed to pick suitable auth object + - candidate size [0-9]+ info size [0-9]+ mismatch + conf: + osd: + osd deep scrub update digest min age: 0 + - scrub_test: + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'scrub_test task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + log.info('num_osds is %s' % num_osds) + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < num_osds: + time.sleep(10) + + for i in range(num_osds): + manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs', + '--', '--osd-objectstore-fuse') + manager.flush_pg_stats(range(num_osds)) + manager.wait_for_clean() + + # write some data + p = manager.do_rados(['bench', '--no-cleanup', '1', 'write', '-b', '4096'], pool='rbd') + log.info('err is %d' % p.exitstatus) + + # wait for some PG to have data that we can mess with + pg, acting = wait_for_victim_pg(manager) + osd = acting[0] + + osd_remote, obj_path, obj_name = find_victim_object(ctx, pg, osd) + manager.do_rados(['setomapval', obj_name, 'key', 'val'], pool='rbd') + log.info('err is %d' % p.exitstatus) + manager.do_rados(['setomapheader', obj_name, 'hdr'], pool='rbd') + log.info('err is %d' % p.exitstatus) + + # Update missing digests, requires "osd deep scrub update digest min age: 0" + pgnum = get_pgnum(pg) + manager.do_pg_scrub('rbd', pgnum, 'deep-scrub') + + log.info('messing with PG %s on osd %d' % (pg, osd)) + test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path, 'rbd') + test_repair_bad_omap(ctx, manager, pg, osd, obj_name) + test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd, + obj_name, obj_path) + log.info('test successful!') + + # shut down fuse mount + for i in range(num_osds): + manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs', + '--', '--no-osd-objectstore-fuse') + time.sleep(5) + log.info('done') diff --git a/qa/tasks/systemd.py b/qa/tasks/systemd.py new file mode 100644 index 000000000..1728b920f --- /dev/null +++ b/qa/tasks/systemd.py @@ -0,0 +1,135 @@ +""" +Systemd test +""" +import contextlib +import logging +import re +import time + +from teuthology.orchestra import run +from teuthology.misc import reconnect, get_first_mon, wait_until_healthy + +log = logging.getLogger(__name__) + +def _remote_service_status(remote, service): + status = remote.sh('sudo systemctl status %s' % service, + check_status=False) + return status + +@contextlib.contextmanager +def task(ctx, config): + """ + - tasks: + ceph-deploy: + systemd: + + Test ceph systemd services can start, stop and restart and + check for any failed services and report back errors + """ + for remote, roles in ctx.cluster.remotes.items(): + remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'), + 'grep', 'ceph']) + units = remote.sh('sudo systemctl list-units | grep ceph', + check_status=False) + log.info(units) + if units.find('failed'): + log.info("Ceph services in failed state") + + # test overall service stop and start using ceph.target + # ceph.target tests are meant for ceph systemd tests + # and not actual process testing using 'ps' + log.info("Stopping all Ceph services") + remote.run(args=['sudo', 'systemctl', 'stop', 'ceph.target']) + status = _remote_service_status(remote, 'ceph.target') + log.info(status) + log.info("Checking process status") + ps_eaf = remote.sh('sudo ps -eaf | grep ceph') + if ps_eaf.find('Active: inactive'): + log.info("Successfully stopped all ceph services") + else: + log.info("Failed to stop ceph services") + + log.info("Starting all Ceph services") + remote.run(args=['sudo', 'systemctl', 'start', 'ceph.target']) + status = _remote_service_status(remote, 'ceph.target') + log.info(status) + if status.find('Active: active'): + log.info("Successfully started all Ceph services") + else: + log.info("info", "Failed to start Ceph services") + ps_eaf = remote.sh('sudo ps -eaf | grep ceph') + log.info(ps_eaf) + time.sleep(4) + + # test individual services start stop + name = remote.shortname + mon_name = 'ceph-mon@' + name + '.service' + mds_name = 'ceph-mds@' + name + '.service' + mgr_name = 'ceph-mgr@' + name + '.service' + mon_role_name = 'mon.' + name + mds_role_name = 'mds.' + name + mgr_role_name = 'mgr.' + name + m_osd = re.search('--id (\d+) --setuser ceph', ps_eaf) + if m_osd: + osd_service = 'ceph-osd@{m}.service'.format(m=m_osd.group(1)) + remote.run(args=['sudo', 'systemctl', 'status', + osd_service]) + remote.run(args=['sudo', 'systemctl', 'stop', + osd_service]) + time.sleep(4) # immediate check will result in deactivating state + status = _remote_service_status(remote, osd_service) + log.info(status) + if status.find('Active: inactive'): + log.info("Successfully stopped single osd ceph service") + else: + log.info("Failed to stop ceph osd services") + remote.sh(['sudo', 'systemctl', 'start', osd_service]) + time.sleep(4) + if mon_role_name in roles: + remote.run(args=['sudo', 'systemctl', 'status', mon_name]) + remote.run(args=['sudo', 'systemctl', 'stop', mon_name]) + time.sleep(4) # immediate check will result in deactivating state + status = _remote_service_status(remote, mon_name) + if status.find('Active: inactive'): + log.info("Successfully stopped single mon ceph service") + else: + log.info("Failed to stop ceph mon service") + remote.run(args=['sudo', 'systemctl', 'start', mon_name]) + time.sleep(4) + if mgr_role_name in roles: + remote.run(args=['sudo', 'systemctl', 'status', mgr_name]) + remote.run(args=['sudo', 'systemctl', 'stop', mgr_name]) + time.sleep(4) # immediate check will result in deactivating state + status = _remote_service_status(remote, mgr_name) + if status.find('Active: inactive'): + log.info("Successfully stopped single ceph mgr service") + else: + log.info("Failed to stop ceph mgr service") + remote.run(args=['sudo', 'systemctl', 'start', mgr_name]) + time.sleep(4) + if mds_role_name in roles: + remote.run(args=['sudo', 'systemctl', 'status', mds_name]) + remote.run(args=['sudo', 'systemctl', 'stop', mds_name]) + time.sleep(4) # immediate check will result in deactivating state + status = _remote_service_status(remote, mds_name) + if status.find('Active: inactive'): + log.info("Successfully stopped single ceph mds service") + else: + log.info("Failed to stop ceph mds service") + remote.run(args=['sudo', 'systemctl', 'start', mds_name]) + time.sleep(4) + + # reboot all nodes and verify the systemd units restart + # workunit that runs would fail if any of the systemd unit doesnt start + ctx.cluster.run(args='sudo reboot', wait=False, check_status=False) + # avoid immediate reconnect + time.sleep(120) + reconnect(ctx, 480) # reconnect all nodes + # for debug info + ctx.cluster.run(args=['sudo', 'ps', '-eaf', run.Raw('|'), + 'grep', 'ceph']) + # wait for HEALTH_OK + mon = get_first_mon(ctx, config) + (mon_remote,) = ctx.cluster.only(mon).remotes.keys() + wait_until_healthy(ctx, mon_remote, use_sudo=True) + yield diff --git a/qa/tasks/tempest.py b/qa/tasks/tempest.py new file mode 100644 index 000000000..cee942e37 --- /dev/null +++ b/qa/tasks/tempest.py @@ -0,0 +1,263 @@ +""" +Deploy and configure Tempest for Teuthology +""" +import configparser +import contextlib +import logging + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.exceptions import ConfigError +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + + +def get_tempest_dir(ctx): + return '{tdir}/tempest'.format(tdir=teuthology.get_testdir(ctx)) + +def run_in_tempest_dir(ctx, client, cmdargs, **kwargs): + ctx.cluster.only(client).run( + args=[ 'cd', get_tempest_dir(ctx), run.Raw('&&'), ] + cmdargs, + **kwargs + ) + +def run_in_tempest_rgw_dir(ctx, client, cmdargs, **kwargs): + ctx.cluster.only(client).run( + args=[ 'cd', get_tempest_dir(ctx) + '/rgw', run.Raw('&&'), ] + cmdargs, + **kwargs + ) + +def run_in_tempest_venv(ctx, client, cmdargs, **kwargs): + run_in_tempest_dir(ctx, client, + [ 'source', + '.tox/venv/bin/activate', + run.Raw('&&') + ] + cmdargs, **kwargs) + +@contextlib.contextmanager +def download(ctx, config): + """ + Download the Tempest from github. + Remove downloaded file upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, dict) + log.info('Downloading Tempest...') + for (client, cconf) in config.items(): + ctx.cluster.only(client).run( + args=[ + 'git', 'clone', + '-b', cconf.get('force-branch', 'master'), + 'https://github.com/openstack/tempest.git', + get_tempest_dir(ctx) + ], + ) + + sha1 = cconf.get('sha1') + if sha1 is not None: + run_in_tempest_dir(ctx, client, [ 'git', 'reset', '--hard', sha1 ]) + try: + yield + finally: + log.info('Removing Tempest...') + for client in config: + ctx.cluster.only(client).run( + args=[ 'rm', '-rf', get_tempest_dir(ctx) ], + ) + +def get_toxvenv_dir(ctx): + return ctx.tox.venv_path + +@contextlib.contextmanager +def setup_venv(ctx, config): + """ + Setup the virtualenv for Tempest using tox. + """ + assert isinstance(config, dict) + log.info('Setting up virtualenv for Tempest') + for (client, _) in config.items(): + run_in_tempest_dir(ctx, client, + [ '{tvdir}/bin/tox'.format(tvdir=get_toxvenv_dir(ctx)), + '-e', 'venv', '--notest' + ]) + yield + +def setup_logging(ctx, cpar): + cpar.set('DEFAULT', 'log_dir', teuthology.get_archive_dir(ctx)) + cpar.set('DEFAULT', 'log_file', 'tempest.log') + +def to_config(config, params, section, cpar): + for (k, v) in config[section].items(): + if isinstance(v, str): + v = v.format(**params) + elif isinstance(v, bool): + v = 'true' if v else 'false' + else: + v = str(v) + cpar.set(section, k, v) + +@contextlib.contextmanager +def configure_instance(ctx, config): + assert isinstance(config, dict) + log.info('Configuring Tempest') + + for (client, cconfig) in config.items(): + run_in_tempest_venv(ctx, client, + [ + 'tempest', + 'init', + '--workspace-path', + get_tempest_dir(ctx) + '/workspace.yaml', + 'rgw' + ]) + + # prepare the config file + tetcdir = '{tdir}/rgw/etc'.format(tdir=get_tempest_dir(ctx)) + (remote,) = ctx.cluster.only(client).remotes.keys() + local_conf = remote.get_file(tetcdir + '/tempest.conf.sample') + + # fill the params dictionary which allows to use templatized configs + keystone_role = cconfig.get('use-keystone-role', None) + if keystone_role is None \ + or keystone_role not in ctx.keystone.public_endpoints: + raise ConfigError('the use-keystone-role is misconfigured') + public_host, public_port = ctx.keystone.public_endpoints[keystone_role] + params = { + 'keystone_public_host': public_host, + 'keystone_public_port': str(public_port), + } + + cpar = configparser.ConfigParser() + cpar.read(local_conf) + setup_logging(ctx, cpar) + to_config(cconfig, params, 'auth', cpar) + to_config(cconfig, params, 'identity', cpar) + to_config(cconfig, params, 'object-storage', cpar) + to_config(cconfig, params, 'object-storage-feature-enabled', cpar) + cpar.write(open(local_conf, 'w+')) + + remote.put_file(local_conf, tetcdir + '/tempest.conf') + yield + +@contextlib.contextmanager +def run_tempest(ctx, config): + assert isinstance(config, dict) + log.info('Configuring Tempest') + + for (client, cconf) in config.items(): + blocklist = cconf.get('blocklist', []) + assert isinstance(blocklist, list) + run_in_tempest_venv(ctx, client, + [ + 'tempest', + 'run', + '--workspace-path', + get_tempest_dir(ctx) + '/workspace.yaml', + '--workspace', + 'rgw', + '--regex', '^tempest.api.object_storage', + '--black-regex', '|'.join(blocklist) + ]) + try: + yield + finally: + pass + + +@contextlib.contextmanager +def task(ctx, config): + """ + Deploy and run Tempest's object storage campaign + + Example of configuration: + + overrides: + ceph: + conf: + client: + rgw keystone api version: 3 + rgw keystone accepted roles: admin,Member + rgw keystone implicit tenants: true + rgw keystone accepted admin roles: admin + rgw swift enforce content length: true + rgw swift account in url: true + rgw swift versioning enabled: true + rgw keystone admin domain: Default + rgw keystone admin user: admin + rgw keystone admin password: ADMIN + rgw keystone admin project: admin + tasks: + # typically, the task should be preceded with install, ceph, tox, + # keystone and rgw. Tox and Keystone are specific requirements + # of tempest.py. + - rgw: + # it's important to match the prefix with the endpoint's URL + # in Keystone. Additionally, if we want to test /info and its + # accompanying stuff, the whole Swift API must be put in root + # of the whole URL hierarchy (read: frontend_prefix == /swift). + frontend_prefix: /swift + client.0: + use-keystone-role: client.0 + - tempest: + client.0: + force-branch: master + use-keystone-role: client.0 + auth: + admin_username: admin + admin_project_name: admin + admin_password: ADMIN + admin_domain_name: Default + identity: + uri: http://{keystone_public_host}:{keystone_public_port}/v2.0/ + uri_v3: http://{keystone_public_host}:{keystone_public_port}/v3/ + admin_role: admin + object-storage: + reseller_admin_role: admin + object-storage-feature-enabled: + container_sync: false + discoverability: false + blocklist: + # please strip half of these items after merging PRs #15369 + # and #12704 + - .*test_list_containers_reverse_order.* + - .*test_list_container_contents_with_end_marker.* + - .*test_delete_non_empty_container.* + - .*test_container_synchronization.* + - .*test_get_object_after_expiration_time.* + - .*test_create_object_with_transfer_encoding.* + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + 'task tempest only supports a list or dictionary for configuration' + + if not ctx.tox: + raise ConfigError('tempest must run after the tox task') + if not ctx.keystone: + raise ConfigError('tempest must run after the keystone task') + + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + + overrides = ctx.config.get('overrides', {}) + # merge each client section, not the top level. + for client in config.keys(): + if not config[client]: + config[client] = {} + teuthology.deep_merge(config[client], overrides.get('keystone', {})) + + log.debug('Tempest config is %s', config) + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: setup_venv(ctx=ctx, config=config), + lambda: configure_instance(ctx=ctx, config=config), + lambda: run_tempest(ctx=ctx, config=config), + ): + yield diff --git a/qa/tasks/tests/__init__.py b/qa/tasks/tests/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/tasks/tests/__init__.py diff --git a/qa/tasks/tests/test_cephadm.py b/qa/tasks/tests/test_cephadm.py new file mode 100644 index 000000000..403d1915e --- /dev/null +++ b/qa/tasks/tests/test_cephadm.py @@ -0,0 +1,70 @@ +from tasks import cephadm + +v1 = """ +[registries.search] +registries = ['registry.access.redhat.com', 'registry.redhat.io', 'docker.io', 'quay.io'] + +[registries.insecure] +registries = [] +""" + +v2 = """ +unqualified-search-registries = ["registry.access.redhat.com", "registry.redhat.io", "docker.io", 'quay.io'] + +[[registry]] +prefix = "registry.access.redhat.com" +location = "registry.access.redhat.com" +insecure = false +blocked = false + +[[registry]] +prefix = "registry.redhat.io" +location = "registry.redhat.io" +insecure = false +blocked = false + +[[registry]] +prefix = "docker.io" +location = "docker.io" +insecure = false +blocked = false + +[[registry.mirror]] +location = "vossi04.front.sepia.ceph.com:5000" +insecure = true + +[[registry]] +prefix = "quay.io" +location = "quay.io" +insecure = false +blocked = false +""" + +expected = { + 'unqualified-search-registries': ['registry.access.redhat.com', 'registry.redhat.io', + 'docker.io', 'quay.io'], + 'registry': [ + {'prefix': 'registry.access.redhat.com', + 'location': 'registry.access.redhat.com', + 'insecure': False, + 'blocked': False}, + {'prefix': 'registry.redhat.io', + 'location': 'registry.redhat.io', + 'insecure': False, + 'blocked': False}, + {'prefix': 'docker.io', + 'location': 'docker.io', + 'insecure': False, + 'blocked': False, + 'mirror': [{'location': 'vossi04.front.sepia.ceph.com:5000', + 'insecure': True}]}, + {'prefix': 'quay.io', + 'location': 'quay.io', + 'insecure': False, + 'blocked': False} + ] +} + +def test_add_mirror(): + assert cephadm.registries_add_mirror_to_docker_io(v1, 'vossi04.front.sepia.ceph.com:5000') == expected + assert cephadm.registries_add_mirror_to_docker_io(v2, 'vossi04.front.sepia.ceph.com:5000') == expected diff --git a/qa/tasks/tests/test_devstack.py b/qa/tasks/tests/test_devstack.py new file mode 100644 index 000000000..39b94a64c --- /dev/null +++ b/qa/tasks/tests/test_devstack.py @@ -0,0 +1,48 @@ +from textwrap import dedent + +from tasks import devstack + + +class TestDevstack(object): + def test_parse_os_table(self): + table_str = dedent(""" + +---------------------+--------------------------------------+ + | Property | Value | + +---------------------+--------------------------------------+ + | attachments | [] | + | availability_zone | nova | + | bootable | false | + | created_at | 2014-02-21T17:14:47.548361 | + | display_description | None | + | display_name | NAME | + | id | ffdbd1bb-60dc-4d95-acfe-88774c09ad3e | + | metadata | {} | + | size | 1 | + | snapshot_id | None | + | source_volid | None | + | status | creating | + | volume_type | None | + +---------------------+--------------------------------------+ + """).strip() + expected = { + 'Property': 'Value', + 'attachments': '[]', + 'availability_zone': 'nova', + 'bootable': 'false', + 'created_at': '2014-02-21T17:14:47.548361', + 'display_description': 'None', + 'display_name': 'NAME', + 'id': 'ffdbd1bb-60dc-4d95-acfe-88774c09ad3e', + 'metadata': '{}', + 'size': '1', + 'snapshot_id': 'None', + 'source_volid': 'None', + 'status': 'creating', + 'volume_type': 'None'} + + vol_info = devstack.parse_os_table(table_str) + assert vol_info == expected + + + + diff --git a/qa/tasks/tests/test_radosgw_admin.py b/qa/tasks/tests/test_radosgw_admin.py new file mode 100644 index 000000000..4e86d5c2b --- /dev/null +++ b/qa/tasks/tests/test_radosgw_admin.py @@ -0,0 +1,31 @@ +from unittest.mock import Mock + +from tasks import radosgw_admin + +acl_with_version = """<?xml version="1.0" encoding="UTF-8"?><AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>foo</ID><DisplayName>Foo</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>foo</ID><DisplayName>Foo</DisplayName></Grantee><Permission>FULL_CONTROL</Permission></Grant></AccessControlList></AccessControlPolicy> +""" # noqa + + +acl_without_version = """<AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>foo</ID><DisplayName>Foo</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>foo</ID><DisplayName>Foo</DisplayName></Grantee><Permission>FULL_CONTROL</Permission></Grant></AccessControlList></AccessControlPolicy> +""" # noqa + + +class TestGetAcl(object): + + def setup(self): + self.key = Mock() + + def test_removes_xml_version(self): + self.key.get_xml_acl = Mock(return_value=acl_with_version) + result = radosgw_admin.get_acl(self.key) + assert result.startswith('<AccessControlPolicy') + + def test_xml_version_is_already_removed(self): + self.key.get_xml_acl = Mock(return_value=acl_without_version) + result = radosgw_admin.get_acl(self.key) + assert result.startswith('<AccessControlPolicy') + + def test_newline_gets_trimmed(self): + self.key.get_xml_acl = Mock(return_value=acl_without_version) + result = radosgw_admin.get_acl(self.key) + assert result.endswith('\n') is False diff --git a/qa/tasks/teuthology_integration.py b/qa/tasks/teuthology_integration.py new file mode 100644 index 000000000..b5a2278eb --- /dev/null +++ b/qa/tasks/teuthology_integration.py @@ -0,0 +1,19 @@ +import logging +from teuthology import misc +from teuthology.task import Task + +log = logging.getLogger(__name__) + + +class TeuthologyIntegration(Task): + + def begin(self): + misc.sh(""" + set -x + pip install tox + tox + # tox -e py27-integration + tox -e openstack-integration + """) + +task = TeuthologyIntegration diff --git a/qa/tasks/tgt.py b/qa/tasks/tgt.py new file mode 100644 index 000000000..a0758f472 --- /dev/null +++ b/qa/tasks/tgt.py @@ -0,0 +1,177 @@ +""" +Task to handle tgt + +Assumptions made: + The ceph-extras tgt package may need to get installed. + The open-iscsi package needs to get installed. +""" +import logging +import contextlib + +from teuthology import misc as teuthology +from teuthology import contextutil + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def start_tgt_remotes(ctx, start_tgtd): + """ + This subtask starts up a tgtd on the clients specified + """ + remotes = ctx.cluster.only(teuthology.is_type('client')).remotes + tgtd_list = [] + for rem, roles in remotes.items(): + for _id in roles: + if _id in start_tgtd: + if not rem in tgtd_list: + tgtd_list.append(rem) + size = ctx.config.get('image_size', 10240) + rem.run( + args=[ + 'rbd', + 'create', + 'iscsi-image', + '--size', + str(size), + ]) + rem.run( + args=[ + 'sudo', + 'tgtadm', + '--lld', + 'iscsi', + '--mode', + 'target', + '--op', + 'new', + '--tid', + '1', + '--targetname', + 'rbd', + ]) + rem.run( + args=[ + 'sudo', + 'tgtadm', + '--lld', + 'iscsi', + '--mode', + 'logicalunit', + '--op', + 'new', + '--tid', + '1', + '--lun', + '1', + '--backing-store', + 'iscsi-image', + '--bstype', + 'rbd', + ]) + rem.run( + args=[ + 'sudo', + 'tgtadm', + '--lld', + 'iscsi', + '--op', + 'bind', + '--mode', + 'target', + '--tid', + '1', + '-I', + 'ALL', + ]) + try: + yield + + finally: + for rem in tgtd_list: + rem.run( + args=[ + 'sudo', + 'tgtadm', + '--lld', + 'iscsi', + '--mode', + 'target', + '--op', + 'delete', + '--force', + '--tid', + '1', + ]) + rem.run( + args=[ + 'rbd', + 'snap', + 'purge', + 'iscsi-image', + ]) + rem.run( + args=[ + 'sudo', + 'rbd', + 'rm', + 'iscsi-image', + ]) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Start up tgt. + + To start on on all clients:: + + tasks: + - ceph: + - tgt: + + To start on certain clients:: + + tasks: + - ceph: + - tgt: [client.0, client.3] + + or + + tasks: + - ceph: + - tgt: + client.0: + client.3: + + An image blocksize size can also be specified:: + + tasks: + - ceph: + - tgt: + image_size = 20480 + + The general flow of things here is: + 1. Find clients on which tgt is supposed to run (start_tgtd) + 2. Remotely start up tgt daemon + On cleanup: + 3. Stop tgt daemon + + The iscsi administration is handled by the iscsi task. + """ + if config: + config = {key : val for key, val in config.items() + if key.startswith('client')} + # config at this point should only contain keys starting with 'client' + start_tgtd = [] + remotes = ctx.cluster.only(teuthology.is_type('client')).remotes + log.info(remotes) + if not config: + start_tgtd = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + else: + start_tgtd = config + log.info(start_tgtd) + with contextutil.nested( + lambda: start_tgt_remotes(ctx=ctx, start_tgtd=start_tgtd),): + yield diff --git a/qa/tasks/thrash_pool_snaps.py b/qa/tasks/thrash_pool_snaps.py new file mode 100644 index 000000000..c71c9ce8d --- /dev/null +++ b/qa/tasks/thrash_pool_snaps.py @@ -0,0 +1,61 @@ +""" +Thrash -- Simulate random osd failures. +""" +import contextlib +import logging +import gevent +import time +import random + + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + "Thrash" snap creation and removal on the listed pools + + Example: + + thrash_pool_snaps: + pools: [.rgw.buckets, .rgw.buckets.index] + max_snaps: 10 + min_snaps: 5 + period: 10 + """ + stopping = False + def do_thrash(): + pools = config.get('pools', []) + max_snaps = config.get('max_snaps', 10) + min_snaps = config.get('min_snaps', 5) + period = config.get('period', 30) + snaps = [] + manager = ctx.managers['ceph'] + def remove_snap(): + assert len(snaps) > 0 + snap = random.choice(snaps) + log.info("Removing snap %s" % (snap,)) + for pool in pools: + manager.remove_pool_snap(pool, str(snap)) + snaps.remove(snap) + def add_snap(snap): + log.info("Adding snap %s" % (snap,)) + for pool in pools: + manager.add_pool_snap(pool, str(snap)) + snaps.append(snap) + index = 0 + while not stopping: + index += 1 + time.sleep(period) + if len(snaps) <= min_snaps: + add_snap(index) + elif len(snaps) >= max_snaps: + remove_snap() + else: + random.choice([lambda: add_snap(index), remove_snap])() + log.info("Stopping") + thread = gevent.spawn(do_thrash) + yield + stopping = True + thread.join() + diff --git a/qa/tasks/thrasher.py b/qa/tasks/thrasher.py new file mode 100644 index 000000000..0ea1bf0ee --- /dev/null +++ b/qa/tasks/thrasher.py @@ -0,0 +1,15 @@ +""" +Thrasher base class +""" +class Thrasher(object): + + def __init__(self): + super(Thrasher, self).__init__() + self._exception = None + + @property + def exception(self): + return self._exception + + def set_thrasher_exception(self, e): + self._exception = e diff --git a/qa/tasks/thrashosds-health.yaml b/qa/tasks/thrashosds-health.yaml new file mode 100644 index 000000000..1b2560d4e --- /dev/null +++ b/qa/tasks/thrashosds-health.yaml @@ -0,0 +1,18 @@ +overrides: + ceph: + conf: + osd: + osd max markdown count: 1000 + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(SMALLER_PGP_NUM\) + - \(OBJECT_ + - \(SLOW_OPS\) + - \(REQUEST_SLOW\) + - \(TOO_FEW_PGS\) + - slow request diff --git a/qa/tasks/thrashosds.py b/qa/tasks/thrashosds.py new file mode 100644 index 000000000..aa7ec437a --- /dev/null +++ b/qa/tasks/thrashosds.py @@ -0,0 +1,221 @@ +""" +Thrash -- Simulate random osd failures. +""" +import contextlib +import logging +from tasks import ceph_manager +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + "Thrash" the OSDs by randomly marking them out/down (and then back + in) until the task is ended. This loops, and every op_delay + seconds it randomly chooses to add or remove an OSD (even odds) + unless there are fewer than min_out OSDs out of the cluster, or + more than min_in OSDs in the cluster. + + All commands are run on mon0 and it stops when __exit__ is called. + + The config is optional, and is a dict containing some or all of: + + cluster: (default 'ceph') the name of the cluster to thrash + + min_in: (default 4) the minimum number of OSDs to keep in the + cluster + + min_out: (default 0) the minimum number of OSDs to keep out of the + cluster + + op_delay: (5) the length of time to sleep between changing an + OSD's status + + min_dead: (0) minimum number of osds to leave down/dead. + + max_dead: (0) maximum number of osds to leave down/dead before waiting + for clean. This should probably be num_replicas - 1. + + clean_interval: (60) the approximate length of time to loop before + waiting until the cluster goes clean. (In reality this is used + to probabilistically choose when to wait, and the method used + makes it closer to -- but not identical to -- the half-life.) + + scrub_interval: (-1) the approximate length of time to loop before + waiting until a scrub is performed while cleaning. (In reality + this is used to probabilistically choose when to wait, and it + only applies to the cases where cleaning is being performed). + -1 is used to indicate that no scrubbing will be done. + + chance_down: (0.4) the probability that the thrasher will mark an + OSD down rather than marking it out. (The thrasher will not + consider that OSD out of the cluster, since presently an OSD + wrongly marked down will mark itself back up again.) This value + can be either an integer (eg, 75) or a float probability (eg + 0.75). + + chance_test_min_size: (0) chance to run test_pool_min_size, + which: + - kills all but one osd + - waits + - kills that osd + - revives all other osds + - verifies that the osds fully recover + + timeout: (360) the number of seconds to wait for the cluster + to become clean after each cluster change. If this doesn't + happen within the timeout, an exception will be raised. + + revive_timeout: (150) number of seconds to wait for an osd asok to + appear after attempting to revive the osd + + thrash_primary_affinity: (true) randomly adjust primary-affinity + + chance_pgnum_grow: (0) chance to increase a pool's size + chance_pgpnum_fix: (0) chance to adjust pgpnum to pg for a pool + pool_grow_by: (10) amount to increase pgnum by + chance_pgnum_shrink: (0) chance to decrease a pool's size + pool_shrink_by: (10) amount to decrease pgnum by + max_pgs_per_pool_osd: (1200) don't expand pools past this size per osd + + pause_short: (3) duration of short pause + pause_long: (80) duration of long pause + pause_check_after: (50) assert osd down after this long + chance_inject_pause_short: (1) chance of injecting short stall + chance_inject_pause_long: (0) chance of injecting long stall + + clean_wait: (0) duration to wait before resuming thrashing once clean + + sighup_delay: (0.1) duration to delay between sending signal.SIGHUP to a + random live osd + + powercycle: (false) whether to power cycle the node instead + of just the osd process. Note that this assumes that a single + osd is the only important process on the node. + + bdev_inject_crash: (0) seconds to delay while inducing a synthetic crash. + the delay lets the BlockDevice "accept" more aio operations but blocks + any flush, and then eventually crashes (losing some or all ios). If 0, + no bdev failure injection is enabled. + + bdev_inject_crash_probability: (.5) probability of doing a bdev failure + injection crash vs a normal OSD kill. + + chance_test_backfill_full: (0) chance to simulate full disks stopping + backfill + + chance_test_map_discontinuity: (0) chance to test map discontinuity + map_discontinuity_sleep_time: (40) time to wait for map trims + + ceph_objectstore_tool: (true) whether to export/import a pg while an osd is down + chance_move_pg: (1.0) chance of moving a pg if more than 1 osd is down (default 100%) + + optrack_toggle_delay: (2.0) duration to delay between toggling op tracker + enablement to all osds + + dump_ops_enable: (true) continuously dump ops on all live osds + + noscrub_toggle_delay: (2.0) duration to delay between toggling noscrub + + disable_objectstore_tool_tests: (false) disable ceph_objectstore_tool based + tests + + chance_thrash_cluster_full: .05 + + chance_thrash_pg_upmap: 1.0 + chance_thrash_pg_upmap_items: 1.0 + + aggressive_pg_num_changes: (true) whether we should bypass the careful throttling of pg_num and pgp_num changes in mgr's adjust_pgs() controller + + example: + + tasks: + - ceph: + - thrashosds: + cluster: ceph + chance_down: 10 + op_delay: 3 + min_in: 1 + timeout: 600 + - interactive: + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'thrashosds task only accepts a dict for configuration' + # add default value for sighup_delay + config['sighup_delay'] = config.get('sighup_delay', 0.1) + # add default value for optrack_toggle_delay + config['optrack_toggle_delay'] = config.get('optrack_toggle_delay', 2.0) + # add default value for dump_ops_enable + config['dump_ops_enable'] = config.get('dump_ops_enable', "true") + # add default value for noscrub_toggle_delay + config['noscrub_toggle_delay'] = config.get('noscrub_toggle_delay', 2.0) + # add default value for random_eio + config['random_eio'] = config.get('random_eio', 0.0) + aggro = config.get('aggressive_pg_num_changes', True) + + log.info("config is {config}".format(config=str(config))) + + overrides = ctx.config.get('overrides', {}) + log.info("overrides is {overrides}".format(overrides=str(overrides))) + teuthology.deep_merge(config, overrides.get('thrashosds', {})) + cluster = config.get('cluster', 'ceph') + + log.info("config is {config}".format(config=str(config))) + + if 'powercycle' in config: + + # sync everyone first to avoid collateral damage to / etc. + log.info('Doing preliminary sync to avoid collateral damage...') + ctx.cluster.run(args=['sync']) + + if 'ipmi_user' in ctx.teuthology_config: + for remote in ctx.cluster.remotes.keys(): + log.debug('checking console status of %s' % remote.shortname) + if not remote.console.check_status(): + log.warning('Failed to get console status for %s', + remote.shortname) + + # check that all osd remotes have a valid console + osds = ctx.cluster.only(teuthology.is_type('osd', cluster)) + for remote in osds.remotes.keys(): + if not remote.console.has_ipmi_credentials: + raise Exception( + 'IPMI console required for powercycling, ' + 'but not available on osd role: {r}'.format( + r=remote.name)) + + cluster_manager = ctx.managers[cluster] + for f in ['powercycle', 'bdev_inject_crash']: + if config.get(f): + cluster_manager.config[f] = config.get(f) + + if aggro: + cluster_manager.raw_cluster_cmd( + 'config', 'set', 'mgr', + 'mgr_debug_aggressive_pg_num_changes', + 'true') + + log.info('Beginning thrashosds...') + thrash_proc = ceph_manager.OSDThrasher( + cluster_manager, + config, + "OSDThrasher", + logger=log.getChild('thrasher') + ) + ctx.ceph[cluster].thrashers.append(thrash_proc) + try: + yield + finally: + log.info('joining thrashosds') + thrash_proc.do_join() + cluster_manager.wait_for_all_osds_up() + cluster_manager.flush_all_pg_stats() + cluster_manager.wait_for_recovery(config.get('timeout', 360)) + if aggro: + cluster_manager.raw_cluster_cmd( + 'config', 'rm', 'mgr', + 'mgr_debug_aggressive_pg_num_changes') diff --git a/qa/tasks/tox.py b/qa/tasks/tox.py new file mode 100644 index 000000000..61c5b7411 --- /dev/null +++ b/qa/tasks/tox.py @@ -0,0 +1,50 @@ +import argparse +import contextlib +import logging + +from teuthology import misc as teuthology +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + + +def get_toxvenv_dir(ctx): + return '{tdir}/tox-venv'.format(tdir=teuthology.get_testdir(ctx)) + +@contextlib.contextmanager +def task(ctx, config): + """ + Deploy tox from pip. It's a dependency for both Keystone and Tempest. + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task tox only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + + log.info('Deploying tox from pip...') + for (client, _) in config.items(): + # yup, we have to deploy tox first. The packaged one, available + # on Sepia's Ubuntu machines, is outdated for Keystone/Tempest. + tvdir = get_toxvenv_dir(ctx) + ctx.cluster.only(client).run(args=['python3', '-m', 'venv', tvdir]) + ctx.cluster.only(client).run(args=[ + 'source', '{tvdir}/bin/activate'.format(tvdir=tvdir), + run.Raw('&&'), + 'pip', 'install', 'tox==3.15.0' + ]) + + # export the path Keystone and Tempest + ctx.tox = argparse.Namespace() + ctx.tox.venv_path = get_toxvenv_dir(ctx) + + try: + yield + finally: + for (client, _) in config.items(): + ctx.cluster.only(client).run( + args=[ 'rm', '-rf', get_toxvenv_dir(ctx) ]) diff --git a/qa/tasks/userdata_setup.yaml b/qa/tasks/userdata_setup.yaml new file mode 100644 index 000000000..afcc08e22 --- /dev/null +++ b/qa/tasks/userdata_setup.yaml @@ -0,0 +1,36 @@ +#cloud-config-archive + +- type: text/cloud-config + content: | + output: + all: '| tee -a /var/log/cloud-init-output.log' + +# allow passwordless access for debugging +- | + #!/usr/bin/env bash + exec passwd -d ubuntu + +- | + #!/usr/bin/env bash + + # mount a NFS share for storing logs + sed -i 's/archive.ubuntu.com/old-releases.ubuntu.com/' /etc/apt/sources.list + sed -i 's/security.ubuntu.com/old-releases.ubuntu.com/' /etc/apt/sources.list + apt-get update + + # DST Root CA X3 certificate expired on Sep 30, 2021. It was used by + # Let's Encrypt, which is what git.ceph.com relies on for HTTPS. Get the + # new Let's Encrypt root certificate in place and deactivate the old one + # (lines that begin with "!" are deselected). + apt-get install --only-upgrade ca-certificates libssl1.0.0 + sed -i 's/mozilla\/DST_Root_CA_X3\.crt/!mozilla\/DST_Root_CA_X3\.crt/' /etc/ca-certificates.conf + update-ca-certificates + + apt-get -y install nfs-common + mkdir /mnt/log + # 10.0.2.2 is the host + mount -v -t nfs -o proto=tcp 10.0.2.2:{mnt_dir} /mnt/log + + # mount the iso image that has the test script + mkdir /mnt/cdrom + mount -t auto /dev/cdrom /mnt/cdrom diff --git a/qa/tasks/userdata_teardown.yaml b/qa/tasks/userdata_teardown.yaml new file mode 100644 index 000000000..731d769f0 --- /dev/null +++ b/qa/tasks/userdata_teardown.yaml @@ -0,0 +1,11 @@ +- | + #!/usr/bin/env bash + cp /var/log/cloud-init-output.log /mnt/log + +- | + #!/usr/bin/env bash + umount /mnt/log + +- | + #!/usr/bin/env bash + shutdown -h -P now diff --git a/qa/tasks/util/__init__.py b/qa/tasks/util/__init__.py new file mode 100644 index 000000000..5b8575ed9 --- /dev/null +++ b/qa/tasks/util/__init__.py @@ -0,0 +1,26 @@ +from teuthology import misc + +def get_remote(ctx, cluster, service_type, service_id): + """ + Get the Remote for the host where a particular role runs. + + :param cluster: name of the cluster the service is part of + :param service_type: e.g. 'mds', 'osd', 'client' + :param service_id: The third part of a role, e.g. '0' for + the role 'ceph.client.0' + :return: a Remote instance for the host where the + requested role is placed + """ + def _is_instance(role): + role_tuple = misc.split_role(role) + return role_tuple == (cluster, service_type, str(service_id)) + try: + (remote,) = ctx.cluster.only(_is_instance).remotes.keys() + except ValueError: + raise KeyError("Service {0}.{1}.{2} not found".format(cluster, + service_type, + service_id)) + return remote + +def get_remote_for_role(ctx, role): + return get_remote(ctx, *misc.split_role(role)) diff --git a/qa/tasks/util/rados.py b/qa/tasks/util/rados.py new file mode 100644 index 000000000..a0c54ce4e --- /dev/null +++ b/qa/tasks/util/rados.py @@ -0,0 +1,87 @@ +import logging + +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def rados(ctx, remote, cmd, wait=True, check_status=False): + testdir = teuthology.get_testdir(ctx) + log.info("rados %s" % ' '.join(cmd)) + pre = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rados', + ]; + pre.extend(cmd) + proc = remote.run( + args=pre, + check_status=check_status, + wait=wait, + ) + if wait: + return proc.exitstatus + else: + return proc + +def create_ec_pool(remote, name, profile_name, pgnum, profile={}, cluster_name="ceph", application=None): + remote.run(args=['sudo', 'ceph'] + + cmd_erasure_code_profile(profile_name, profile) + ['--cluster', cluster_name]) + remote.run(args=[ + 'sudo', 'ceph', 'osd', 'pool', 'create', name, + str(pgnum), str(pgnum), 'erasure', profile_name, '--cluster', cluster_name + ]) + if application: + remote.run(args=[ + 'sudo', 'ceph', 'osd', 'pool', 'application', 'enable', name, application, '--cluster', cluster_name + ], check_status=False) # may fail as EINVAL when run in jewel upgrade test + +def create_replicated_pool(remote, name, pgnum, cluster_name="ceph", application=None): + remote.run(args=[ + 'sudo', 'ceph', 'osd', 'pool', 'create', name, str(pgnum), str(pgnum), '--cluster', cluster_name + ]) + if application: + remote.run(args=[ + 'sudo', 'ceph', 'osd', 'pool', 'application', 'enable', name, application, '--cluster', cluster_name + ], check_status=False) + +def create_cache_pool(remote, base_name, cache_name, pgnum, size, cluster_name="ceph"): + remote.run(args=[ + 'sudo', 'ceph', 'osd', 'pool', 'create', cache_name, str(pgnum), '--cluster', cluster_name + ]) + remote.run(args=[ + 'sudo', 'ceph', 'osd', 'tier', 'add-cache', base_name, cache_name, + str(size), '--cluster', cluster_name + ]) + +def cmd_erasure_code_profile(profile_name, profile): + """ + Return the shell command to run to create the erasure code profile + described by the profile parameter. + + :param profile_name: a string matching [A-Za-z0-9-_.]+ + :param profile: a map whose semantic depends on the erasure code plugin + :returns: a shell command as an array suitable for Remote.run + + If profile is {}, it is replaced with + + { 'k': '2', 'm': '1', 'crush-failure-domain': 'osd'} + + for backward compatibility. In previous versions of teuthology, + these values were hardcoded as function arguments and some yaml + files were designed with these implicit values. The teuthology + code should not know anything about the erasure code profile + content or semantic. The valid values and parameters are outside + its scope. + """ + + if profile == {}: + profile = { + 'k': '2', + 'm': '1', + 'crush-failure-domain': 'osd' + } + return [ + 'osd', 'erasure-code-profile', 'set', + profile_name + ] + [ str(key) + '=' + str(value) for key, value in profile.items() ] diff --git a/qa/tasks/util/rgw.py b/qa/tasks/util/rgw.py new file mode 100644 index 000000000..c955f3150 --- /dev/null +++ b/qa/tasks/util/rgw.py @@ -0,0 +1,94 @@ +import logging +import json +import time + +from io import StringIO + +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def rgwadmin(ctx, client, cmd, stdin=StringIO(), check_status=False, + format='json', decode=True, log_level=logging.DEBUG): + log.info('rgwadmin: {client} : {cmd}'.format(client=client,cmd=cmd)) + testdir = teuthology.get_testdir(ctx) + cluster_name, daemon_type, client_id = teuthology.split_role(client) + client_with_id = daemon_type + '.' + client_id + pre = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '--log-to-stderr', + '--format', format, + '-n', client_with_id, + '--cluster', cluster_name, + ] + pre.extend(cmd) + log.log(log_level, 'rgwadmin: cmd=%s' % pre) + (remote,) = ctx.cluster.only(client).remotes.keys() + proc = remote.run( + args=pre, + check_status=check_status, + stdout=StringIO(), + stderr=StringIO(), + stdin=stdin, + ) + r = proc.exitstatus + out = proc.stdout.getvalue() + if not decode: + return (r, out) + j = None + if not r and out != '': + try: + j = json.loads(out) + log.log(log_level, ' json result: %s' % j) + except ValueError: + j = out + log.log(log_level, ' raw result: %s' % j) + return (r, j) + +def get_user_summary(out, user): + """Extract the summary for a given user""" + user_summary = None + for summary in out['summary']: + if summary.get('user') == user: + user_summary = summary + + if not user_summary: + raise AssertionError('No summary info found for user: %s' % user) + + return user_summary + +def get_user_successful_ops(out, user): + summary = out['summary'] + if len(summary) == 0: + return 0 + return get_user_summary(out, user)['total']['successful_ops'] + +def wait_for_radosgw(url, remote): + """ poll the given url until it starts accepting connections + + add_daemon() doesn't wait until radosgw finishes startup, so this is used + to avoid racing with later tasks that expect radosgw to be up and listening + """ + # TODO: use '--retry-connrefused --retry 8' when teuthology is running on + # Centos 8 and other OS's with an updated version of curl + curl_cmd = ['curl', + url] + exit_status = 0 + num_retries = 8 + for seconds in range(num_retries): + proc = remote.run( + args=curl_cmd, + check_status=False, + stdout=StringIO(), + stderr=StringIO(), + stdin=StringIO(), + ) + exit_status = proc.exitstatus + if exit_status == 0: + break + time.sleep(2**seconds) + + assert exit_status == 0 diff --git a/qa/tasks/util/test/__init__.py b/qa/tasks/util/test/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/tasks/util/test/__init__.py diff --git a/qa/tasks/util/test/test_rados.py b/qa/tasks/util/test/test_rados.py new file mode 100644 index 000000000..a8f4cb02d --- /dev/null +++ b/qa/tasks/util/test/test_rados.py @@ -0,0 +1,40 @@ +# +# The MIT License +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +from tasks.util import rados + +class TestRados(object): + + def test_cmd_erasure_code_profile(self): + name = 'NAME' + cmd = rados.cmd_erasure_code_profile(name, {}) + assert 'k=2' in cmd + assert name in cmd + cmd = rados.cmd_erasure_code_profile(name, { 'k': '88' }) + assert 'k=88' in cmd + assert name in cmd diff --git a/qa/tasks/util/workunit.py b/qa/tasks/util/workunit.py new file mode 100644 index 000000000..1f5623af8 --- /dev/null +++ b/qa/tasks/util/workunit.py @@ -0,0 +1,78 @@ +import copy + +from teuthology import misc +from teuthology.orchestra import run + +class Refspec: + def __init__(self, refspec): + self.refspec = refspec + + def __str__(self): + return self.refspec + + def _clone(self, git_url, clonedir, opts=None): + if opts is None: + opts = [] + return (['rm', '-rf', clonedir] + + [run.Raw('&&')] + + ['git', 'clone'] + opts + + [git_url, clonedir]) + + def _cd(self, clonedir): + return ['cd', clonedir] + + def _checkout(self): + return ['git', 'checkout', self.refspec] + + def clone(self, git_url, clonedir): + return (self._clone(git_url, clonedir) + + [run.Raw('&&')] + + self._cd(clonedir) + + [run.Raw('&&')] + + self._checkout()) + + +class Branch(Refspec): + def __init__(self, tag): + Refspec.__init__(self, tag) + + def clone(self, git_url, clonedir): + opts = ['--depth', '1', + '--branch', self.refspec] + return (self._clone(git_url, clonedir, opts) + + [run.Raw('&&')] + + self._cd(clonedir)) + + +class Head(Refspec): + def __init__(self): + Refspec.__init__(self, 'HEAD') + + def clone(self, git_url, clonedir): + opts = ['--depth', '1'] + return (self._clone(git_url, clonedir, opts) + + [run.Raw('&&')] + + self._cd(clonedir)) + + +def get_refspec_after_overrides(config, overrides): + # mimic the behavior of the "install" task, where the "overrides" are + # actually the defaults of that task. in other words, if none of "sha1", + # "tag", or "branch" is specified by a "workunit" tasks, we will update + # it with the information in the "workunit" sub-task nested in "overrides". + overrides = copy.deepcopy(overrides.get('workunit', {})) + refspecs = {'suite_sha1': Refspec, 'suite_branch': Branch, + 'sha1': Refspec, 'tag': Refspec, 'branch': Branch} + if any(map(lambda i: i in config, refspecs.keys())): + for i in refspecs.keys(): + overrides.pop(i, None) + misc.deep_merge(config, overrides) + + for spec, cls in refspecs.items(): + refspec = config.get(spec) + if refspec: + refspec = cls(refspec) + break + if refspec is None: + refspec = Head() + return refspec diff --git a/qa/tasks/vault.py b/qa/tasks/vault.py new file mode 100644 index 000000000..2ff008c4d --- /dev/null +++ b/qa/tasks/vault.py @@ -0,0 +1,288 @@ +""" +Deploy and configure Vault for Teuthology +""" + +import argparse +import contextlib +import logging +import time +import json +from os import path +from http import client as http_client +from urllib.parse import urljoin + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run +from teuthology.exceptions import ConfigError, CommandFailedError + + +log = logging.getLogger(__name__) + + +def assign_ports(ctx, config, initial_port): + """ + Assign port numbers starting from @initial_port + """ + port = initial_port + role_endpoints = {} + for remote, roles_for_host in ctx.cluster.remotes.items(): + for role in roles_for_host: + if role in config: + role_endpoints[role] = (remote.name.split('@')[1], port) + port += 1 + + return role_endpoints + + +@contextlib.contextmanager +def download(ctx, config): + """ + Download Vault Release from Hashicopr website. + Remove downloaded file upon exit. + """ + assert isinstance(config, dict) + log.info('Downloading Vault...') + testdir = teuthology.get_testdir(ctx) + + for (client, cconf) in config.items(): + install_url = cconf.get('install_url') + install_sha256 = cconf.get('install_sha256') + if not install_url or not install_sha256: + raise ConfigError("Missing Vault install_url and/or install_sha256") + install_zip = path.join(testdir, 'vault.zip') + install_dir = path.join(testdir, 'vault') + + log.info('Downloading Vault...') + ctx.cluster.only(client).run( + args=['curl', '-L', install_url, '-o', install_zip]) + + log.info('Verifying SHA256 signature...') + ctx.cluster.only(client).run( + args=['echo', ' '.join([install_sha256, install_zip]), run.Raw('|'), + 'sha256sum', '--check', '--status']) + + log.info('Extracting vault...') + ctx.cluster.only(client).run(args=['mkdir', '-p', install_dir]) + # Using python in case unzip is not installed on hosts + # Using python3 in case python is not installed on hosts + failed=True + for f in [ + lambda z,d: ['unzip', z, '-d', d], + lambda z,d: ['python3', '-m', 'zipfile', '-e', z, d], + lambda z,d: ['python', '-m', 'zipfile', '-e', z, d]]: + try: + ctx.cluster.only(client).run(args=f(install_zip, install_dir)) + failed = False + break + except CommandFailedError as e: + failed = e + if failed: + raise failed + + try: + yield + finally: + log.info('Removing Vault...') + testdir = teuthology.get_testdir(ctx) + for client in config: + ctx.cluster.only(client).run( + args=['rm', '-rf', install_dir, install_zip]) + + +def get_vault_dir(ctx): + return '{tdir}/vault'.format(tdir=teuthology.get_testdir(ctx)) + + +@contextlib.contextmanager +def run_vault(ctx, config): + assert isinstance(config, dict) + + for (client, cconf) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + cluster_name, _, client_id = teuthology.split_role(client) + + _, port = ctx.vault.endpoints[client] + listen_addr = "0.0.0.0:{}".format(port) + + root_token = ctx.vault.root_token = cconf.get('root_token', 'root') + + log.info("Starting Vault listening on %s ...", listen_addr) + v_params = [ + '-dev', + '-dev-listen-address={}'.format(listen_addr), + '-dev-no-store-token', + '-dev-root-token-id={}'.format(root_token) + ] + + cmd = "chmod +x {vdir}/vault && {vdir}/vault server {vargs}".format(vdir=get_vault_dir(ctx), vargs=" ".join(v_params)) + + ctx.daemons.add_daemon( + remote, 'vault', client_id, + cluster=cluster_name, + args=['bash', '-c', cmd, run.Raw('& { read; kill %1; }')], + logger=log.getChild(client), + stdin=run.PIPE, + cwd=get_vault_dir(ctx), + wait=False, + check_status=False, + ) + time.sleep(10) + try: + yield + finally: + log.info('Stopping Vault instance') + ctx.daemons.get_daemon('vault', client_id, cluster_name).stop() + + +@contextlib.contextmanager +def setup_vault(ctx, config): + """ + Mount Transit or KV version 2 secrets engine + """ + (cclient, cconfig) = next(iter(config.items())) + engine = cconfig.get('engine') + + if engine == 'kv': + log.info('Mounting kv version 2 secrets engine') + mount_path = '/v1/sys/mounts/kv' + data = { + "type": "kv", + "options": { + "version": "2" + } + } + elif engine == 'transit': + log.info('Mounting transit secrets engine') + mount_path = '/v1/sys/mounts/transit' + data = { + "type": "transit" + } + else: + raise Exception("Unknown or missing secrets engine") + + send_req(ctx, cconfig, cclient, mount_path, json.dumps(data)) + yield + + +def send_req(ctx, cconfig, client, path, body, method='POST'): + host, port = ctx.vault.endpoints[client] + req = http_client.HTTPConnection(host, port, timeout=30) + token = cconfig.get('root_token', 'atoken') + log.info("Send request to Vault: %s:%s at %s with token: %s", host, port, path, token) + headers = {'X-Vault-Token': token} + req.request(method, path, headers=headers, body=body) + resp = req.getresponse() + log.info(resp.read()) + if not (resp.status >= 200 and resp.status < 300): + raise Exception("Request to Vault server failed with status %d" % resp.status) + return resp + + +@contextlib.contextmanager +def create_secrets(ctx, config): + (cclient, cconfig) = next(iter(config.items())) + engine = cconfig.get('engine') + prefix = cconfig.get('prefix') + secrets = cconfig.get('secrets') + flavor = cconfig.get('flavor') + if secrets is None: + raise ConfigError("No secrets specified, please specify some.") + + ctx.vault.keys[cclient] = [] + for secret in secrets: + try: + path = secret['path'] + except KeyError: + raise ConfigError('Missing "path" field in secret') + exportable = secret.get("exportable", flavor == "old") + + if engine == 'kv': + try: + data = { + "data": { + "key": secret['secret'] + } + } + except KeyError: + raise ConfigError('Missing "secret" field in secret') + elif engine == 'transit': + data = {"exportable": "true" if exportable else "false"} + else: + raise Exception("Unknown or missing secrets engine") + + send_req(ctx, cconfig, cclient, urljoin(prefix, path), json.dumps(data)) + + ctx.vault.keys[cclient].append({ 'Path': path }); + + log.info("secrets created") + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Deploy and configure Vault + + Example of configuration: + + tasks: + - vault: + client.0: + install_url: http://my.special.place/vault.zip + install_sha256: zipfiles-sha256-sum-much-larger-than-this + root_token: test_root_token + engine: transit + flavor: old + prefix: /v1/transit/keys + secrets: + - path: kv/teuthology/key_a + secret: base64_only_if_using_kv_aWxkCmNlcGguY29uZgo= + exportable: true + - path: kv/teuthology/key_b + secret: base64_only_if_using_kv_dApzcmMKVGVzdGluZwo= + + engine can be 'kv' or 'transit' + prefix should be /v1/kv/data/ for kv, /v1/transit/keys/ for transit + flavor should be 'old' only if testing the original transit logic + otherwise omit. + for kv only: 256-bit key value should be specified via secret, + otherwise should omit. + for transit: exportable may be used to make individual keys exportable. + flavor may be set to 'old' to make all keys exportable by default, + which is required by the original transit logic. + """ + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + + overrides = ctx.config.get('overrides', {}) + # merge each client section, not the top level. + for client in config.keys(): + if not config[client]: + config[client] = {} + teuthology.deep_merge(config[client], overrides.get('vault', {})) + + log.debug('Vault config is %s', config) + + ctx.vault = argparse.Namespace() + ctx.vault.endpoints = assign_ports(ctx, config, 8200) + ctx.vault.root_token = None + ctx.vault.prefix = config[client].get('prefix') + ctx.vault.engine = config[client].get('engine') + ctx.vault.keys = {} + q=config[client].get('flavor') + if q: + ctx.vault.flavor = q + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: run_vault(ctx=ctx, config=config), + lambda: setup_vault(ctx=ctx, config=config), + lambda: create_secrets(ctx=ctx, config=config) + ): + yield + diff --git a/qa/tasks/vip.py b/qa/tasks/vip.py new file mode 100644 index 000000000..52114b104 --- /dev/null +++ b/qa/tasks/vip.py @@ -0,0 +1,205 @@ +import contextlib +import ipaddress +import logging +import re + +from teuthology import misc as teuthology +from teuthology.config import config as teuth_config + +log = logging.getLogger(__name__) + + +def subst_vip(ctx, cmd): + p = re.compile(r'({{VIP(\d+)}})') + for m in p.findall(cmd): + n = int(m[1]) + if n >= len(ctx.vip["vips"]): + log.warning(f'no VIP{n} (we have {len(ctx.vip["vips"])})') + else: + cmd = cmd.replace(m[0], str(ctx.vip["vips"][n])) + + if '{{VIPPREFIXLEN}}' in cmd: + cmd = cmd.replace('{{VIPPREFIXLEN}}', str(ctx.vip["vnet"].prefixlen)) + + if '{{VIPSUBNET}}' in cmd: + cmd = cmd.replace('{{VIPSUBNET}}', str(ctx.vip["vnet"].network_address)) + + return cmd + + +def echo(ctx, config): + """ + This is mostly for debugging + """ + for remote in ctx.cluster.remotes.keys(): + log.info(subst_vip(ctx, config)) + + +def exec(ctx, config): + """ + This is similar to the standard 'exec' task, but does the VIP substitutions. + """ + assert isinstance(config, dict), "task exec got invalid config" + + testdir = teuthology.get_testdir(ctx) + + if 'all-roles' in config and len(config) == 1: + a = config['all-roles'] + roles = teuthology.all_roles(ctx.cluster) + config = dict((id_, a) for id_ in roles if not id_.startswith('host.')) + elif 'all-hosts' in config and len(config) == 1: + a = config['all-hosts'] + roles = teuthology.all_roles(ctx.cluster) + config = dict((id_, a) for id_ in roles if id_.startswith('host.')) + + for role, ls in config.items(): + (remote,) = ctx.cluster.only(role).remotes.keys() + log.info('Running commands on role %s host %s', role, remote.name) + for c in ls: + c.replace('$TESTDIR', testdir) + remote.run( + args=[ + 'sudo', + 'TESTDIR={tdir}'.format(tdir=testdir), + 'bash', + '-ex', + '-c', + subst_vip(ctx, c)], + ) + + +def map_vips(mip, count): + for mapping in teuth_config.get('vip', []): + mnet = ipaddress.ip_network(mapping['machine_subnet']) + vnet = ipaddress.ip_network(mapping['virtual_subnet']) + if vnet.prefixlen >= mnet.prefixlen: + log.error(f"virtual_subnet {vnet} prefix >= machine_subnet {mnet} prefix") + return None + if mip in mnet: + pos = list(mnet.hosts()).index(mip) + log.info(f"{mip} in {mnet}, pos {pos}") + r = [] + for sub in vnet.subnets(new_prefix=mnet.prefixlen): + r += [list(sub.hosts())[pos]] + count -= 1 + if count == 0: + break + return vnet, r + return None + + +@contextlib.contextmanager +def task(ctx, config): + """ + Set up a virtual network and allocate virtual IP(s) for each machine. + + The strategy here is to set up a private virtual subnet that is larger than + the subnet the machine(s) exist in, and allocate virtual IPs from that pool. + + - The teuthology.yaml must include a section like:: + + vip: + - machine_subnet: 172.21.0.0/20 + virtual_subnet: 10.0.0.0/16 + + At least one item's machine_subnet should map the subnet the test machine's + primary IP lives in (the one DNS resolves to). The virtual_subnet must have a + shorter prefix (i.e., larger than the machine_subnet). If there are multiple + machine_subnets, they cannot map into the same virtual_subnet. + + - Each machine gets an IP in the virtual_subset statically configured by the vip + task. This lets all test machines reach each other and (most importantly) any + virtual IPs. + + - 1 or more virtual IPs are then mapped for the task. These IPs are chosen based + on one of the remotes. This uses a lot of network space but it avoids any + conflicts between tests. + + To use a virtual IP, the {{VIP0}}, {{VIP1}}, etc. substitutions can be used. + + {{VIPSUBNET}} is the virtual_subnet address (10.0.0.0 in the example). + + {{VIPPREFIXLEN}} is the virtual_subnet prefix (16 in the example. + + These substitutions work for vip.echo, and (at the time of writing) cephadm.apply + and cephadm.shell. + """ + if config is None: + config = {} + count = config.get('count', 1) + + ctx.vip_static = {} + ctx.vip = {} + + log.info("Allocating static IPs for each host...") + for remote in ctx.cluster.remotes.keys(): + ip = remote.ssh.get_transport().getpeername()[0] + log.info(f'peername {ip}') + mip = ipaddress.ip_address(ip) + vnet, vips = map_vips(mip, count + 1) + static = vips.pop(0) + log.info(f"{remote.hostname} static {static}, vnet {vnet}") + + if not ctx.vip: + # do this only once (use the first remote we see), since we only need 1 + # set of virtual IPs, regardless of how many remotes we have. + log.info("VIPs are {map(str, vips)}") + ctx.vip = { + 'vnet': vnet, + 'vips': vips, + } + else: + # all remotes must be in the same virtual network... + assert vnet == ctx.vip['vnet'] + + # pick interface + p = re.compile(r'^(\S+) dev (\S+) (.*)scope link (.*)src (\S+)') + iface = None + for line in remote.sh(['sudo', 'ip','route','ls']).splitlines(): + m = p.findall(line) + if not m: + continue + route_iface = m[0][1] + route_ip = m[0][4] + if route_ip == ip: + iface = route_iface + break + + if not iface: + log.error(f"Unable to find {remote.hostname} interface for {ip}") + continue + + # configure + log.info(f"Configuring {static} on {remote.hostname} iface {iface}...") + remote.sh(['sudo', + 'ip', 'addr', 'add', + str(static) + '/' + str(vnet.prefixlen), + 'dev', iface]) + + ctx.vip_static[remote] = { + "iface": iface, + "static": static, + } + + try: + yield + + finally: + for remote, m in ctx.vip_static.items(): + log.info(f"Removing {m['static']} (and any VIPs) on {remote.hostname} iface {m['iface']}...") + remote.sh(['sudo', + 'ip', 'addr', 'del', + str(m['static']) + '/' + str(ctx.vip['vnet'].prefixlen), + 'dev', m['iface']]) + + for vip in ctx.vip['vips']: + remote.sh( + [ + 'sudo', + 'ip', 'addr', 'del', + str(vip) + '/' + str(ctx.vip['vnet'].prefixlen), + 'dev', m['iface'] + ], + check_status=False, + ) + diff --git a/qa/tasks/vstart_runner.py b/qa/tasks/vstart_runner.py new file mode 100644 index 000000000..db0cb41cf --- /dev/null +++ b/qa/tasks/vstart_runner.py @@ -0,0 +1,1769 @@ +""" +vstart_runner: override Filesystem and Mount interfaces to run a CephFSTestCase against a vstart +ceph instance instead of a packaged/installed cluster. Use this to turn around test cases +quickly during development. + +Simple usage (assuming teuthology and ceph checked out in ~/git): + + # Activate the teuthology virtualenv + source ~/git/teuthology/virtualenv/bin/activate + # Go into your ceph build directory + cd ~/git/ceph/build + # Invoke a test using this script + python ~/git/ceph/qa/tasks/vstart_runner.py --create tasks.cephfs.test_data_scan + +Alternative usage: + + # Alternatively, if you use different paths, specify them as follows: + LD_LIBRARY_PATH=`pwd`/lib PYTHONPATH=~/git/teuthology:~/git/ceph/qa:`pwd`/../src/pybind:`pwd`/lib/cython_modules/lib.3 python ~/git/ceph/qa/tasks/vstart_runner.py + + # If you wish to drop to a python shell on failures, use --interactive: + python ~/git/ceph/qa/tasks/vstart_runner.py --interactive + + # If you wish to run a named test case, pass it as an argument: + python ~/git/ceph/qa/tasks/vstart_runner.py tasks.cephfs.test_data_scan + + # Also, you can create the cluster once and then run named test cases against it: + python ~/git/ceph/qa/tasks/vstart_runner.py --create-cluster-only + python ~/git/ceph/qa/tasks/vstart_runner.py tasks.mgr.dashboard.test_health + python ~/git/ceph/qa/tasks/vstart_runner.py tasks.mgr.dashboard.test_rgw + +""" + +from io import StringIO +from collections import defaultdict +import getpass +import signal +import tempfile +import threading +import datetime +import shutil +import re +import os +import time +import sys +import errno +from IPy import IP +import unittest +import platform +import logging + +from unittest import suite, loader + +from teuthology.orchestra.run import Raw, quote +from teuthology.orchestra.daemon import DaemonGroup +from teuthology.orchestra.remote import Remote +from teuthology.config import config as teuth_config +from teuthology.contextutil import safe_while +from teuthology.contextutil import MaxWhileTries +from teuthology.orchestra.run import CommandFailedError +try: + import urllib3 + urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) +except: + pass + +def init_log(): + global log + if log is not None: + del log + log = logging.getLogger(__name__) + + global logpath + logpath = './vstart_runner.log' + + handler = logging.FileHandler(logpath) + formatter = logging.Formatter( + fmt=u'%(asctime)s.%(msecs)03d %(levelname)s:%(name)s:%(message)s', + datefmt='%Y-%m-%dT%H:%M:%S') + handler.setFormatter(formatter) + log.addHandler(handler) + log.setLevel(logging.INFO) + +log = None +init_log() + + +def respawn_in_path(lib_path, python_paths): + execv_cmd = ['python'] + if platform.system() == "Darwin": + lib_path_var = "DYLD_LIBRARY_PATH" + else: + lib_path_var = "LD_LIBRARY_PATH" + + py_binary = os.environ.get("PYTHON", sys.executable) + + if lib_path_var in os.environ: + if lib_path not in os.environ[lib_path_var]: + os.environ[lib_path_var] += ':' + lib_path + os.execvp(py_binary, execv_cmd + sys.argv) + else: + os.environ[lib_path_var] = lib_path + os.execvp(py_binary, execv_cmd + sys.argv) + + for p in python_paths: + sys.path.insert(0, p) + + +# Let's use some sensible defaults +if os.path.exists("./CMakeCache.txt") and os.path.exists("./bin"): + + # A list of candidate paths for each package we need + guesses = [ + ["~/git/teuthology", "~/scm/teuthology", "~/teuthology"], + ["lib/cython_modules/lib.3"], + ["../src/pybind"], + ] + + python_paths = [] + + # Up one level so that "tasks.foo.bar" imports work + python_paths.append(os.path.abspath( + os.path.join(os.path.dirname(os.path.realpath(__file__)), "..") + )) + + for package_guesses in guesses: + for g in package_guesses: + g_exp = os.path.abspath(os.path.expanduser(g)) + if os.path.exists(g_exp): + python_paths.append(g_exp) + + ld_path = os.path.join(os.getcwd(), "lib/") + print("Using guessed paths {0} {1}".format(ld_path, python_paths)) + respawn_in_path(ld_path, python_paths) + + +try: + from tasks.ceph_manager import CephManager + from tasks.cephfs.fuse_mount import FuseMount + from tasks.cephfs.kernel_mount import KernelMount + from tasks.cephfs.filesystem import Filesystem, MDSCluster, CephCluster + from tasks.cephfs.mount import CephFSMount + from tasks.mgr.mgr_test_case import MgrCluster + from teuthology.task import interactive +except ImportError: + sys.stderr.write("***\nError importing packages, have you activated your teuthology virtualenv " + "and set PYTHONPATH to point to teuthology and ceph-qa-suite?\n***\n\n") + raise + +# Must import after teuthology because of gevent monkey patching +import subprocess + +if os.path.exists("./CMakeCache.txt"): + # Running in build dir of a cmake build + BIN_PREFIX = "./bin/" + SRC_PREFIX = "../src" +else: + # Running in src/ of an autotools build + BIN_PREFIX = "./" + SRC_PREFIX = "./" + + +def rm_nonascii_chars(var): + var = var.replace(b'\xe2\x80\x98', b'\'') + var = var.replace(b'\xe2\x80\x99', b'\'') + return var + +class LocalRemoteProcess(object): + def __init__(self, args, subproc, check_status, stdout, stderr): + self.args = args + self.subproc = subproc + self.stdout = stdout + self.stderr = stderr + # this variable is meant for instance of this class named fuse_daemon. + # child process of the command launched with sudo must be killed, + # since killing parent process alone has no impact on the child + # process. + self.fuse_pid = -1 + + self.check_status = check_status + self.exitstatus = self.returncode = None + + def wait(self): + if self.finished: + # Avoid calling communicate() on a dead process because it'll + # give you stick about std* already being closed + if self.check_status and self.exitstatus != 0: + raise CommandFailedError(self.args, self.exitstatus) + else: + return + + out, err = self.subproc.communicate() + out, err = rm_nonascii_chars(out), rm_nonascii_chars(err) + if isinstance(self.stdout, StringIO): + self.stdout.write(out.decode(errors='ignore')) + elif self.stdout is None: + pass + else: + self.stdout.write(out) + if isinstance(self.stderr, StringIO): + self.stderr.write(err.decode(errors='ignore')) + elif self.stderr is None: + pass + else: + self.stderr.write(err) + + self.exitstatus = self.returncode = self.subproc.returncode + + if self.exitstatus != 0: + sys.stderr.write(out.decode()) + sys.stderr.write(err.decode()) + + if self.check_status and self.exitstatus != 0: + raise CommandFailedError(self.args, self.exitstatus) + + @property + def finished(self): + if self.exitstatus is not None: + return True + + if self.subproc.poll() is not None: + out, err = self.subproc.communicate() + if isinstance(self.stdout, StringIO): + self.stdout.write(out.decode(errors='ignore')) + elif self.stdout is None: + pass + else: + self.stdout.write(out) + if isinstance(self.stderr, StringIO): + self.stderr.write(err.decode(errors='ignore')) + elif self.stderr is None: + pass + else: + self.stderr.write(err) + self.exitstatus = self.returncode = self.subproc.returncode + return True + else: + return False + + def kill(self): + log.debug("kill ") + if self.subproc.pid and not self.finished: + log.debug("kill: killing pid {0} ({1})".format( + self.subproc.pid, self.args)) + if self.fuse_pid != -1: + safe_kill(self.fuse_pid) + else: + safe_kill(self.subproc.pid) + else: + log.debug("kill: already terminated ({0})".format(self.args)) + + @property + def stdin(self): + class FakeStdIn(object): + def __init__(self, mount_daemon): + self.mount_daemon = mount_daemon + + def close(self): + self.mount_daemon.kill() + + return FakeStdIn(self) + + +class LocalRemote(object): + """ + Amusingly named class to present the teuthology RemoteProcess interface when we are really + running things locally for vstart + + Run this inside your src/ dir! + """ + + os = Remote.os + arch = Remote.arch + + def __init__(self): + self.name = "local" + self.hostname = "localhost" + self.user = getpass.getuser() + + def get_file(self, path, sudo, dest_dir): + tmpfile = tempfile.NamedTemporaryFile(delete=False).name + shutil.copy(path, tmpfile) + return tmpfile + + # XXX: This method ignores the error raised when src and dst are + # holding same path. For teuthology, same path still represents + # different locations as they lie on different machines. + def put_file(self, src, dst, sudo=False): + try: + shutil.copy(src, dst) + except shutil.SameFileError: + pass + + # XXX: accepts only two arugments to maintain compatibility with + # teuthology's mkdtemp. + def mkdtemp(self, suffix='', parentdir=None): + from tempfile import mkdtemp + + # XXX: prefix had to be set without that this method failed against + # Python2.7 - + # > /usr/lib64/python2.7/tempfile.py(337)mkdtemp() + # -> file = _os.path.join(dir, prefix + name + suffix) + # (Pdb) p prefix + # None + return mkdtemp(suffix=suffix, prefix='', dir=parentdir) + + def mktemp(self, suffix=None, parentdir=None): + """ + Make a remote temporary file + + Returns: the path of the temp file created. + """ + from tempfile import mktemp + return mktemp(suffix=suffix, dir=parentdir) + + def write_file(self, path, data, sudo=False, mode=None, owner=None, + mkdir=False, append=False): + """ + Write data to file + + :param path: file path on host + :param data: str, binary or fileobj to be written + :param sudo: use sudo to write file, defaults False + :param mode: set file mode bits if provided + :param owner: set file owner if provided + :param mkdir: preliminary create the file directory, defaults False + :param append: append data to the file, defaults False + """ + dd = 'sudo dd' if sudo else 'dd' + args = dd + ' of=' + path + if append: + args += ' conv=notrunc oflag=append' + if mkdir: + mkdirp = 'sudo mkdir -p' if sudo else 'mkdir -p' + dirpath = os.path.dirname(path) + if dirpath: + args = mkdirp + ' ' + dirpath + '\n' + args + if mode: + chmod = 'sudo chmod' if sudo else 'chmod' + args += '\n' + chmod + ' ' + mode + ' ' + path + if owner: + chown = 'sudo chown' if sudo else 'chown' + args += '\n' + chown + ' ' + owner + ' ' + path + omit_sudo = False if sudo else True + self.run(args=args, stdin=data, omit_sudo=omit_sudo) + + def sudo_write_file(self, path, data, **kwargs): + """ + Write data to file with sudo, for more info see `write_file()`. + """ + self.write_file(path, data, sudo=True, **kwargs) + + def _perform_checks_and_return_list_of_args(self, args, omit_sudo): + # Since Python's shell simulation can only work when commands are + # provided as a list of argumensts... + if isinstance(args, str): + args = args.split() + + # We'll let sudo be a part of command even omit flag says otherwise in + # cases of commands which can normally be ran only by root. + try: + if args[args.index('sudo') + 1] in ['-u', 'passwd', 'chown']: + omit_sudo = False + except ValueError: + pass + + # Quotes wrapping a command argument don't work fine in Python's shell + # simulation if the arguments contains spaces too. E.g. '"ls"' is OK + # but "ls /" isn't. + errmsg = "Don't surround arguments commands by quotes if it " + \ + "contains spaces.\nargs - %s" % (args) + for arg in args: + if isinstance(arg, Raw): + continue + + if arg and (arg[0] in ['"', "'"] or arg[-1] in ['"', "'"]) and \ + (arg.find(' ') != -1 and 0 < arg.find(' ') < len(arg) - 1): + raise RuntimeError(errmsg) + + # ['sudo', '-u', 'user', '-s', 'path-to-shell', '-c', 'ls', 'a'] + # and ['sudo', '-u', user, '-s', path_to_shell, '-c', 'ls a'] are + # treated differently by Python's shell simulation. Only latter has + # the desired effect. + errmsg = 'The entire command to executed as other user should be a ' +\ + 'single argument.\nargs - %s' % (args) + if 'sudo' in args and '-u' in args and '-c' in args and \ + args.count('-c') == 1: + if args.index('-c') != len(args) - 2 and \ + args[args.index('-c') + 2].find('-') == -1: + raise RuntimeError(errmsg) + + if omit_sudo: + args = [a for a in args if a != "sudo"] + + return args + + # Wrapper to keep the interface exactly same as that of + # teuthology.remote.run. + def run(self, **kwargs): + return self._do_run(**kwargs) + + # XXX: omit_sudo is set to True since using sudo can change the ownership + # of files which becomes problematic for following executions of + # vstart_runner.py. + def _do_run(self, args, check_status=True, wait=True, stdout=None, + stderr=None, cwd=None, stdin=None, logger=None, label=None, + env=None, timeout=None, omit_sudo=True): + args = self._perform_checks_and_return_list_of_args(args, omit_sudo) + + # We have to use shell=True if any run.Raw was present, e.g. && + shell = any([a for a in args if isinstance(a, Raw)]) + + # Filter out helper tools that don't exist in a vstart environment + args = [a for a in args if a not in ('adjust-ulimits', + 'ceph-coverage')] + + # Adjust binary path prefix if given a bare program name + if not isinstance(args[0], Raw) and "/" not in args[0]: + # If they asked for a bare binary name, and it exists + # in our built tree, use the one there. + local_bin = os.path.join(BIN_PREFIX, args[0]) + if os.path.exists(local_bin): + args = [local_bin] + args[1:] + else: + log.debug("'{0}' is not a binary in the Ceph build dir".format( + args[0] + )) + + log.debug('> ' + + ' '.join([str(a.value) if isinstance(a, Raw) else a for a in args])) + + if shell: + subproc = subprocess.Popen(quote(args), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + stdin=subprocess.PIPE, + cwd=cwd, + env=env, + shell=True) + else: + # Sanity check that we've got a list of strings + for arg in args: + if not isinstance(arg, str): + raise RuntimeError("Oops, can't handle arg {0} type {1}".format( + arg, arg.__class__ + )) + + subproc = subprocess.Popen(args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + stdin=subprocess.PIPE, + cwd=cwd, + env=env) + + if stdin: + # Hack: writing to stdin is not deadlock-safe, but it "always" works + # as long as the input buffer is "small" + if isinstance(stdin, str): + subproc.stdin.write(stdin.encode()) + else: + subproc.stdin.write(stdin) + + proc = LocalRemoteProcess( + args, subproc, check_status, + stdout, stderr + ) + + if wait: + proc.wait() + + return proc + + # XXX: for compatibility keep this method same as teuthology.orchestra.remote.sh + # BytesIO is being used just to keep things identical + def sh(self, script, **kwargs): + """ + Shortcut for run method. + + Usage: + my_name = remote.sh('whoami') + remote_date = remote.sh('date') + """ + from io import BytesIO + + if 'stdout' not in kwargs: + kwargs['stdout'] = BytesIO() + if 'args' not in kwargs: + kwargs['args'] = script + proc = self.run(**kwargs) + out = proc.stdout.getvalue() + if isinstance(out, bytes): + return out.decode() + else: + return out + +class LocalDaemon(object): + def __init__(self, daemon_type, daemon_id): + self.daemon_type = daemon_type + self.daemon_id = daemon_id + self.controller = LocalRemote() + self.proc = None + + @property + def remote(self): + return LocalRemote() + + def running(self): + return self._get_pid() is not None + + def check_status(self): + if self.proc: + return self.proc.poll() + + def _get_pid(self): + """ + Return PID as an integer or None if not found + """ + ps_txt = self.controller.run(args=["ps", "ww", "-u"+str(os.getuid())], + stdout=StringIO()).\ + stdout.getvalue().strip() + lines = ps_txt.split("\n")[1:] + + for line in lines: + if line.find("ceph-{0} -i {1}".format(self.daemon_type, self.daemon_id)) != -1: + log.debug("Found ps line for daemon: {0}".format(line)) + return int(line.split()[0]) + if opt_log_ps_output: + log.debug("No match for {0} {1}: {2}".format( + self.daemon_type, self.daemon_id, ps_txt)) + else: + log.debug("No match for {0} {1}".format(self.daemon_type, + self.daemon_id)) + return None + + def wait(self, timeout): + waited = 0 + while self._get_pid() is not None: + if waited > timeout: + raise MaxWhileTries("Timed out waiting for daemon {0}.{1}".format(self.daemon_type, self.daemon_id)) + time.sleep(1) + waited += 1 + + def stop(self, timeout=300): + if not self.running(): + log.error('tried to stop a non-running daemon') + return + + pid = self._get_pid() + log.debug("Killing PID {0} for {1}.{2}".format(pid, self.daemon_type, self.daemon_id)) + os.kill(pid, signal.SIGTERM) + + waited = 0 + while pid is not None: + new_pid = self._get_pid() + if new_pid is not None and new_pid != pid: + log.debug("Killing new PID {0}".format(new_pid)) + pid = new_pid + os.kill(pid, signal.SIGTERM) + + if new_pid is None: + break + else: + if waited > timeout: + raise MaxWhileTries( + "Timed out waiting for daemon {0}.{1}".format( + self.daemon_type, self.daemon_id)) + time.sleep(1) + waited += 1 + + self.wait(timeout=timeout) + + def restart(self): + if self._get_pid() is not None: + self.stop() + + self.proc = self.controller.run(args=[ + os.path.join(BIN_PREFIX, "./ceph-{0}".format(self.daemon_type)), + "-i", self.daemon_id]) + + def signal(self, sig, silent=False): + if not self.running(): + raise RuntimeError("Can't send signal to non-running daemon") + + os.kill(self._get_pid(), sig) + if not silent: + log.debug("Sent signal {0} to {1}.{2}".format(sig, self.daemon_type, self.daemon_id)) + + +def safe_kill(pid): + """ + os.kill annoyingly raises exception if process already dead. Ignore it. + """ + try: + return os.kill(pid, signal.SIGKILL) + except OSError as e: + if e.errno == errno.ESRCH: + # Raced with process termination + pass + else: + raise + +def mon_in_localhost(config_path="./ceph.conf"): + """ + If the ceph cluster is using the localhost IP as mon host, will must disable ns unsharing + """ + with open(config_path) as f: + for line in f: + local = re.match(r'^\s*mon host\s*=\s*\[((v1|v2):127\.0\.0\.1:\d+,?)+\]', line) + if local: + return True + return False + +class LocalKernelMount(KernelMount): + def __init__(self, ctx, test_dir, client_id=None, + client_keyring_path=None, client_remote=None, + hostfs_mntpt=None, cephfs_name=None, cephfs_mntpt=None, + brxnet=None): + super(LocalKernelMount, self).__init__(ctx=ctx, test_dir=test_dir, + client_id=client_id, client_keyring_path=client_keyring_path, + client_remote=LocalRemote(), hostfs_mntpt=hostfs_mntpt, + cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet) + + @property + def config_path(self): + return "./ceph.conf" + + def get_keyring_path(self): + # This is going to end up in a config file, so use an absolute path + # to avoid assumptions about daemons' pwd + keyring_path = "./client.{0}.keyring".format(self.client_id) + try: + os.stat(keyring_path) + except OSError: + return os.path.join(os.getcwd(), 'keyring') + else: + return keyring_path + + def setupfs(self, name=None): + if name is None and self.fs is not None: + # Previous mount existed, reuse the old name + name = self.fs.name + self.fs = LocalFilesystem(self.ctx, name=name) + log.debug('Wait for MDS to reach steady state...') + self.fs.wait_for_daemons() + log.debug('Ready to start {}...'.format(type(self).__name__)) + + @property + def _prefix(self): + return BIN_PREFIX + + def _asok_path(self): + # In teuthology, the asok is named after the PID of the ceph-fuse process, because it's + # run foreground. When running it daemonized however, the asok is named after + # the PID of the launching process, not the long running ceph-fuse process. Therefore + # we need to give an exact path here as the logic for checking /proc/ for which + # asok is alive does not work. + + # Load the asok path from ceph.conf as vstart.sh now puts admin sockets + # in a tmpdir. All of the paths are the same, so no need to select + # based off of the service type. + d = "./out" + with open(self.config_path) as f: + for line in f: + asok_conf = re.search("^\s*admin\s+socket\s*=\s*(.*?)[^/]+$", line) + if asok_conf: + d = asok_conf.groups(1)[0] + break + path = "{0}/client.{1}.*.asok".format(d, self.client_id) + return path + + def mount(self, mntopts=[], createfs=True, check_status=True, **kwargs): + self.update_attrs(**kwargs) + self.assert_and_log_minimum_mount_details() + + if opt_use_ns: + self.using_namespace = True + self.setup_netns() + else: + self.using_namespace = False + + if not self.cephfs_mntpt: + self.cephfs_mntpt = "/" + # TODO: don't call setupfs() from within mount() + if createfs: + self.setupfs(name=self.cephfs_name) + + opts = 'norequire_active_mds' + if self.client_id: + opts += ',name=' + self.client_id + if self.client_keyring_path and self.client_id: + opts += ",secret=" + self.get_key_from_keyfile() + if self.config_path: + opts += ',conf=' + self.config_path + if self.cephfs_name: + opts += ",mds_namespace={0}".format(self.cephfs_name) + if mntopts: + opts += ',' + ','.join(mntopts) + + stderr = StringIO() + try: + self.client_remote.run(args=['mkdir', '--', self.hostfs_mntpt], + timeout=(5*60), stderr=stderr) + except CommandFailedError: + if 'file exists' not in stderr.getvalue().lower(): + raise + + if self.cephfs_mntpt is None: + self.cephfs_mntpt = "/" + cmdargs = ['sudo'] + if self.using_namespace: + cmdargs += ['nsenter', + '--net=/var/run/netns/{0}'.format(self.netns_name)] + cmdargs += ['./bin/mount.ceph', ':' + self.cephfs_mntpt, + self.hostfs_mntpt, '-v', '-o', opts] + + mountcmd_stdout, mountcmd_stderr = StringIO(), StringIO() + try: + self.client_remote.run(args=cmdargs, timeout=(30*60), + omit_sudo=False, stdout=mountcmd_stdout, + stderr=mountcmd_stderr) + except CommandFailedError as e: + if check_status: + raise + else: + return (e, mountcmd_stdout.getvalue(), + mountcmd_stderr.getvalue()) + + stderr = StringIO() + try: + self.client_remote.run(args=['sudo', 'chmod', '1777', + self.hostfs_mntpt], stderr=stderr, + timeout=(5*60)) + except CommandFailedError: + # the client does not have write permissions in cap it holds for + # the Ceph FS that was just mounted. + if 'permission denied' in stderr.getvalue().lower(): + pass + + self.mounted = True + + def cleanup_netns(self): + if self.using_namespace: + super(type(self), self).cleanup_netns() + + def _run_python(self, pyscript, py_version='python'): + """ + Override this to remove the daemon-helper prefix that is used otherwise + to make the process killable. + """ + return self.client_remote.run(args=[py_version, '-c', pyscript], + wait=False, stdout=StringIO()) + +class LocalFuseMount(FuseMount): + def __init__(self, ctx, test_dir, client_id, client_keyring_path=None, + client_remote=None, hostfs_mntpt=None, cephfs_name=None, + cephfs_mntpt=None, brxnet=None): + super(LocalFuseMount, self).__init__(ctx=ctx, client_config=None, + test_dir=test_dir, client_id=client_id, + client_keyring_path=client_keyring_path, + client_remote=LocalRemote(), hostfs_mntpt=hostfs_mntpt, + cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet) + + @property + def config_path(self): + return "./ceph.conf" + + def get_keyring_path(self): + # This is going to end up in a config file, so use an absolute path + # to avoid assumptions about daemons' pwd + return os.path.abspath("./client.{0}.keyring".format(self.client_id)) + + def setupfs(self, name=None): + if name is None and self.fs is not None: + # Previous mount existed, reuse the old name + name = self.fs.name + self.fs = LocalFilesystem(self.ctx, name=name) + log.debug('Wait for MDS to reach steady state...') + self.fs.wait_for_daemons() + log.debug('Ready to start {}...'.format(type(self).__name__)) + + @property + def _prefix(self): + return BIN_PREFIX + + def _asok_path(self): + # In teuthology, the asok is named after the PID of the ceph-fuse process, because it's + # run foreground. When running it daemonized however, the asok is named after + # the PID of the launching process, not the long running ceph-fuse process. Therefore + # we need to give an exact path here as the logic for checking /proc/ for which + # asok is alive does not work. + + # Load the asok path from ceph.conf as vstart.sh now puts admin sockets + # in a tmpdir. All of the paths are the same, so no need to select + # based off of the service type. + d = "./out" + with open(self.config_path) as f: + for line in f: + asok_conf = re.search("^\s*admin\s+socket\s*=\s*(.*?)[^/]+$", line) + if asok_conf: + d = asok_conf.groups(1)[0] + break + path = "{0}/client.{1}.*.asok".format(d, self.client_id) + return path + + def mount(self, mntopts=[], createfs=True, check_status=True, **kwargs): + self.update_attrs(**kwargs) + self.assert_and_log_minimum_mount_details() + + if opt_use_ns: + self.using_namespace = True + self.setup_netns() + else: + self.using_namespace = False + + # TODO: don't call setupfs() from within mount() + if createfs: + self.setupfs(name=self.cephfs_name) + + stderr = StringIO() + try: + self.client_remote.run(args=['mkdir', '-p', self.hostfs_mntpt], + stderr=stderr) + except CommandFailedError: + if 'file exists' not in stderr.getvalue().lower(): + raise + + def list_connections(): + self.client_remote.run( + args=["mount", "-t", "fusectl", "/sys/fs/fuse/connections", "/sys/fs/fuse/connections"], + check_status=False + ) + + p = self.client_remote.run(args=["ls", "/sys/fs/fuse/connections"], + check_status=False, stdout=StringIO()) + if p.exitstatus != 0: + log.warning("ls conns failed with {0}, assuming none".format(p.exitstatus)) + return [] + + ls_str = p.stdout.getvalue().strip() + if ls_str: + return [int(n) for n in ls_str.split("\n")] + else: + return [] + + # Before starting ceph-fuse process, note the contents of + # /sys/fs/fuse/connections + pre_mount_conns = list_connections() + log.debug("Pre-mount connections: {0}".format(pre_mount_conns)) + + cmdargs = [] + if self.using_namespace: + cmdargs = ['sudo', 'nsenter', + '--net=/var/run/netns/{0}'.format(self.netns_name), + '--setuid', str(os.getuid())] + cmdargs += [os.path.join(BIN_PREFIX, 'ceph-fuse'), self.hostfs_mntpt, + '-f'] + if self.client_id is not None: + cmdargs += ["--id", self.client_id] + if self.client_keyring_path and self.client_id is not None: + cmdargs.extend(['-k', self.client_keyring_path]) + if self.cephfs_name: + cmdargs += ["--client_fs=" + self.cephfs_name] + if self.cephfs_mntpt: + cmdargs += ["--client_mountpoint=" + self.cephfs_mntpt] + if os.getuid() != 0: + cmdargs += ["--client_die_on_failed_dentry_invalidate=false"] + if mntopts: + cmdargs += mntopts + + mountcmd_stdout, mountcmd_stderr = StringIO(), StringIO() + self.fuse_daemon = self.client_remote.run(args=cmdargs, wait=False, + omit_sudo=False, stdout=mountcmd_stdout, stderr=mountcmd_stderr) + self._set_fuse_daemon_pid(check_status) + log.debug("Mounting client.{0} with pid " + "{1}".format(self.client_id, self.fuse_daemon.subproc.pid)) + + # Wait for the connection reference to appear in /sys + waited = 0 + post_mount_conns = list_connections() + while len(post_mount_conns) <= len(pre_mount_conns): + if self.fuse_daemon.finished: + # Did mount fail? Raise the CommandFailedError instead of + # hitting the "failed to populate /sys/" timeout + try: + self.fuse_daemon.wait() + except CommandFailedError as e: + if check_status: + raise + else: + return (e, mountcmd_stdout.getvalue(), + mountcmd_stderr.getvalue()) + time.sleep(1) + waited += 1 + if waited > 30: + raise RuntimeError("Fuse mount failed to populate /sys/ after {0} seconds".format( + waited + )) + post_mount_conns = list_connections() + + log.debug("Post-mount connections: {0}".format(post_mount_conns)) + + # Record our fuse connection number so that we can use it when + # forcing an unmount + new_conns = list(set(post_mount_conns) - set(pre_mount_conns)) + if len(new_conns) == 0: + raise RuntimeError("New fuse connection directory not found ({0})".format(new_conns)) + elif len(new_conns) > 1: + raise RuntimeError("Unexpectedly numerous fuse connections {0}".format(new_conns)) + else: + self._fuse_conn = new_conns[0] + + self.gather_mount_info() + + self.mounted = True + + def _set_fuse_daemon_pid(self, check_status): + # NOTE: When a command <args> is launched with sudo, two processes are + # launched, one with sudo in <args> and other without. Make sure we + # get the PID of latter one. + try: + with safe_while(sleep=1, tries=15) as proceed: + while proceed(): + try: + sock = self.find_admin_socket() + except (RuntimeError, CommandFailedError): + continue + + self.fuse_daemon.fuse_pid = int(re.match(".*\.(\d+)\.asok$", + sock).group(1)) + break + except MaxWhileTries: + if check_status: + raise + else: + pass + + def cleanup_netns(self): + if self.using_namespace: + super(type(self), self).cleanup_netns() + + def _run_python(self, pyscript, py_version='python'): + """ + Override this to remove the daemon-helper prefix that is used otherwise + to make the process killable. + """ + return self.client_remote.run(args=[py_version, '-c', pyscript], + wait=False, stdout=StringIO()) + +# XXX: this class has nothing to do with the Ceph daemon (ceph-mgr) of +# the same name. +class LocalCephManager(CephManager): + def __init__(self): + # Deliberately skip parent init, only inheriting from it to get + # util methods like osd_dump that sit on top of raw_cluster_cmd + self.controller = LocalRemote() + + # A minority of CephManager fns actually bother locking for when + # certain teuthology tests want to run tasks in parallel + self.lock = threading.RLock() + + self.log = lambda x: log.debug(x) + + # Don't bother constructing a map of pools: it should be empty + # at test cluster start, and in any case it would be out of date + # in no time. The attribute needs to exist for some of the CephManager + # methods to work though. + self.pools = {} + + def find_remote(self, daemon_type, daemon_id): + """ + daemon_type like 'mds', 'osd' + daemon_id like 'a', '0' + """ + return LocalRemote() + + def run_ceph_w(self, watch_channel=None): + """ + :param watch_channel: Specifies the channel to be watched. + This can be 'cluster', 'audit', ... + :type watch_channel: str + """ + args = [os.path.join(BIN_PREFIX, "ceph"), "-w"] + if watch_channel is not None: + args.append("--watch-channel") + args.append(watch_channel) + proc = self.controller.run(args=args, wait=False, stdout=StringIO()) + return proc + + def run_cluster_cmd(self, **kwargs): + """ + Run a Ceph command and the object representing the process for the + command. + + Accepts arguments same as teuthology.orchestra.remote.run(). + """ + kwargs['args'] = [os.path.join(BIN_PREFIX,'ceph')]+list(kwargs['args']) + return self.controller.run(**kwargs) + + def raw_cluster_cmd(self, *args, **kwargs) -> str: + """ + args like ["osd", "dump"} + return stdout string + """ + kwargs['args'] = args + if kwargs.get('stdout') is None: + kwargs['stdout'] = StringIO() + return self.run_cluster_cmd(**kwargs).stdout.getvalue() + + def raw_cluster_cmd_result(self, *args, **kwargs): + """ + like raw_cluster_cmd but don't check status, just return rc + """ + kwargs['args'], kwargs['check_status'] = args, False + return self.run_cluster_cmd(**kwargs).exitstatus + + def admin_socket(self, daemon_type, daemon_id, command, check_status=True, + timeout=None, stdout=None): + if stdout is None: + stdout = StringIO() + + return self.controller.run( + args=[os.path.join(BIN_PREFIX, "ceph"), "daemon", + "{0}.{1}".format(daemon_type, daemon_id)] + command, + check_status=check_status, timeout=timeout, stdout=stdout) + + def get_mon_socks(self): + """ + Get monitor sockets. + + :return socks: tuple of strings; strings are individual sockets. + """ + from json import loads + + output = loads(self.raw_cluster_cmd('--format=json', 'mon', 'dump')) + socks = [] + for mon in output['mons']: + for addrvec_mem in mon['public_addrs']['addrvec']: + socks.append(addrvec_mem['addr']) + return tuple(socks) + + def get_msgrv1_mon_socks(self): + """ + Get monitor sockets that use msgrv2 to operate. + + :return socks: tuple of strings; strings are individual sockets. + """ + from json import loads + + output = loads(self.raw_cluster_cmd('--format=json', 'mon', 'dump')) + socks = [] + for mon in output['mons']: + for addrvec_mem in mon['public_addrs']['addrvec']: + if addrvec_mem['type'] == 'v1': + socks.append(addrvec_mem['addr']) + return tuple(socks) + + def get_msgrv2_mon_socks(self): + """ + Get monitor sockets that use msgrv2 to operate. + + :return socks: tuple of strings; strings are individual sockets. + """ + from json import loads + + output = loads(self.raw_cluster_cmd('--format=json', 'mon', 'dump')) + socks = [] + for mon in output['mons']: + for addrvec_mem in mon['public_addrs']['addrvec']: + if addrvec_mem['type'] == 'v2': + socks.append(addrvec_mem['addr']) + return tuple(socks) + + +class LocalCephCluster(CephCluster): + def __init__(self, ctx): + # Deliberately skip calling CephCluster constructor + self._ctx = ctx + self.mon_manager = LocalCephManager() + self._conf = defaultdict(dict) + + @property + def admin_remote(self): + return LocalRemote() + + def get_config(self, key, service_type=None): + if service_type is None: + service_type = 'mon' + + # FIXME hardcoded vstart service IDs + service_id = { + 'mon': 'a', + 'mds': 'a', + 'osd': '0' + }[service_type] + + return self.json_asok(['config', 'get', key], service_type, service_id)[key] + + def _write_conf(self): + # In teuthology, we have the honour of writing the entire ceph.conf, but + # in vstart land it has mostly already been written and we need to carefully + # append to it. + conf_path = "./ceph.conf" + banner = "\n#LOCAL_TEST\n" + existing_str = open(conf_path).read() + + if banner in existing_str: + existing_str = existing_str[0:existing_str.find(banner)] + + existing_str += banner + + for subsys, kvs in self._conf.items(): + existing_str += "\n[{0}]\n".format(subsys) + for key, val in kvs.items(): + # Comment out existing instance if it exists + log.debug("Searching for existing instance {0}/{1}".format( + key, subsys + )) + existing_section = re.search("^\[{0}\]$([\n]|[^\[])+".format( + subsys + ), existing_str, re.MULTILINE) + + if existing_section: + section_str = existing_str[existing_section.start():existing_section.end()] + existing_val = re.search("^\s*[^#]({0}) =".format(key), section_str, re.MULTILINE) + if existing_val: + start = existing_section.start() + existing_val.start(1) + log.debug("Found string to replace at {0}".format( + start + )) + existing_str = existing_str[0:start] + "#" + existing_str[start:] + + existing_str += "{0} = {1}\n".format(key, val) + + open(conf_path, "w").write(existing_str) + + def set_ceph_conf(self, subsys, key, value): + self._conf[subsys][key] = value + self._write_conf() + + def clear_ceph_conf(self, subsys, key): + del self._conf[subsys][key] + self._write_conf() + + +class LocalMDSCluster(LocalCephCluster, MDSCluster): + def __init__(self, ctx): + LocalCephCluster.__init__(self, ctx) + # Deliberately skip calling MDSCluster constructor + self._mds_ids = ctx.daemons.daemons['ceph.mds'].keys() + log.debug("Discovered MDS IDs: {0}".format(self._mds_ids)) + self._mds_daemons = dict([(id_, LocalDaemon("mds", id_)) for id_ in self.mds_ids]) + + @property + def mds_ids(self): + return self._mds_ids + + @property + def mds_daemons(self): + return self._mds_daemons + + def clear_firewall(self): + # FIXME: unimplemented + pass + + def newfs(self, name='cephfs', create=True): + return LocalFilesystem(self._ctx, name=name, create=create) + + def delete_all_filesystems(self): + """ + Remove all filesystems that exist, and any pools in use by them. + """ + for fs in self.status().get_filesystems(): + LocalFilesystem(ctx=self._ctx, fscid=fs['id']).destroy() + + +class LocalMgrCluster(LocalCephCluster, MgrCluster): + def __init__(self, ctx): + super(LocalMgrCluster, self).__init__(ctx) + + self.mgr_ids = ctx.daemons.daemons['ceph.mgr'].keys() + self.mgr_daemons = dict([(id_, LocalDaemon("mgr", id_)) for id_ in self.mgr_ids]) + + +class LocalFilesystem(LocalMDSCluster, Filesystem): + def __init__(self, ctx, fs_config={}, fscid=None, name=None, create=False): + # Deliberately skip calling Filesystem constructor + LocalMDSCluster.__init__(self, ctx) + + self.id = None + self.name = name + self.metadata_pool_name = None + self.metadata_overlay = False + self.data_pool_name = None + self.data_pools = None + self.fs_config = fs_config + self.ec_profile = fs_config.get('ec_profile') + + self.mon_manager = LocalCephManager() + + self.client_remote = LocalRemote() + + self._conf = defaultdict(dict) + + if name is not None: + if fscid is not None: + raise RuntimeError("cannot specify fscid when creating fs") + if create and not self.legacy_configured(): + self.create() + else: + if fscid is not None: + self.id = fscid + self.getinfo(refresh=True) + + # Stash a reference to the first created filesystem on ctx, so + # that if someone drops to the interactive shell they can easily + # poke our methods. + if not hasattr(self._ctx, "filesystem"): + self._ctx.filesystem = self + + @property + def _prefix(self): + return BIN_PREFIX + + def set_clients_block(self, blocked, mds_id=None): + raise NotImplementedError() + + +class LocalCluster(object): + def __init__(self, rolename="placeholder"): + self.remotes = { + LocalRemote(): [rolename] + } + + def only(self, requested): + return self.__class__(rolename=requested) + + def run(self, *args, **kwargs): + r = [] + for remote in self.remotes.keys(): + r.append(remote.run(*args, **kwargs)) + return r + + +class LocalContext(object): + def __init__(self): + self.config = {} + self.teuthology_config = teuth_config + self.cluster = LocalCluster() + self.daemons = DaemonGroup() + + # Shove some LocalDaemons into the ctx.daemons DaemonGroup instance so that any + # tests that want to look these up via ctx can do so. + # Inspect ceph.conf to see what roles exist + for conf_line in open("ceph.conf").readlines(): + for svc_type in ["mon", "osd", "mds", "mgr"]: + prefixed_type = "ceph." + svc_type + if prefixed_type not in self.daemons.daemons: + self.daemons.daemons[prefixed_type] = {} + match = re.match("^\[{0}\.(.+)\]$".format(svc_type), conf_line) + if match: + svc_id = match.group(1) + self.daemons.daemons[prefixed_type][svc_id] = LocalDaemon(svc_type, svc_id) + + def __del__(self): + test_path = self.teuthology_config['test_path'] + # opt_create_cluster_only does not create the test path + if test_path: + shutil.rmtree(test_path) + + +######################################### +# +# stuff necessary for launching tests... +# +######################################### + + +def enumerate_methods(s): + log.debug("e: {0}".format(s)) + for t in s._tests: + if isinstance(t, suite.BaseTestSuite): + for sub in enumerate_methods(t): + yield sub + else: + yield s, t + + +def load_tests(modules, loader): + if modules: + log.debug("Executing modules: {0}".format(modules)) + module_suites = [] + for mod_name in modules: + # Test names like cephfs.test_auto_repair + module_suites.append(loader.loadTestsFromName(mod_name)) + log.debug("Loaded: {0}".format(list(module_suites))) + return suite.TestSuite(module_suites) + else: + log.debug("Executing all cephfs tests") + return loader.discover( + os.path.join(os.path.dirname(os.path.abspath(__file__)), "cephfs") + ) + + +def scan_tests(modules): + overall_suite = load_tests(modules, loader.TestLoader()) + + max_required_mds = 0 + max_required_clients = 0 + max_required_mgr = 0 + require_memstore = False + + for suite_, case in enumerate_methods(overall_suite): + max_required_mds = max(max_required_mds, + getattr(case, "MDSS_REQUIRED", 0)) + max_required_clients = max(max_required_clients, + getattr(case, "CLIENTS_REQUIRED", 0)) + max_required_mgr = max(max_required_mgr, + getattr(case, "MGRS_REQUIRED", 0)) + require_memstore = getattr(case, "REQUIRE_MEMSTORE", False) \ + or require_memstore + + return max_required_mds, max_required_clients, \ + max_required_mgr, require_memstore + + +class LogRotate(): + def __init__(self): + self.conf_file_path = os.path.join(os.getcwd(), 'logrotate.conf') + self.state_file_path = os.path.join(os.getcwd(), 'logrotate.state') + + def run_logrotate(self): + remote.run(args=['logrotate', '-f', self.conf_file_path, '-s', + self.state_file_path, '--verbose']) + + +def teardown_cluster(): + log.info('\ntearing down the cluster...') + remote.run(args=[os.path.join(SRC_PREFIX, "stop.sh")], timeout=60) + remote.run(args=['rm', '-rf', './dev', './out']) + + +def clear_old_log(): + from os import stat + + try: + stat(logpath) + # would need an update when making this py3 compatible. Use FileNotFound + # instead. + except OSError: + return + else: + os.remove(logpath) + with open(logpath, 'w') as logfile: + logfile.write('') + init_log() + log.debug('logging in a fresh file now...') + + +class LogStream(object): + def __init__(self): + self.buffer = "" + self.omit_result_lines = False + + def _del_result_lines(self): + """ + Don't let unittest.TextTestRunner print "Ran X tests in Ys", + vstart_runner.py will do it for itself since it runs tests in a + testsuite one by one. + """ + if self.omit_result_lines: + self.buffer = re.sub('-'*70+'\nran [0-9]* test in [0-9.]*s\n*', + '', self.buffer, flags=re.I) + self.buffer = re.sub('failed \(failures=[0-9]*\)\n', '', self.buffer, + flags=re.I) + self.buffer = self.buffer.replace('OK\n', '') + + def write(self, data): + self.buffer += data + if self.buffer.count("\n") > 5: + self._write() + + def _write(self): + if opt_rotate_logs: + self._del_result_lines() + if self.buffer == '': + return + + lines = self.buffer.split("\n") + for line in lines: + # sys.stderr.write(line + "\n") + log.info(line) + self.buffer = '' + + def flush(self): + pass + + def __del__(self): + self._write() + + +class InteractiveFailureResult(unittest.TextTestResult): + """ + Specialization that implements interactive-on-error style + behavior. + """ + def addFailure(self, test, err): + super(InteractiveFailureResult, self).addFailure(test, err) + log.error(self._exc_info_to_string(err, test)) + log.error("Failure in test '{0}', going interactive".format( + self.getDescription(test) + )) + interactive.task(ctx=None, config=None) + + def addError(self, test, err): + super(InteractiveFailureResult, self).addError(test, err) + log.error(self._exc_info_to_string(err, test)) + log.error("Error in test '{0}', going interactive".format( + self.getDescription(test) + )) + interactive.task(ctx=None, config=None) + + +# XXX: class we require would be inherited from this one and one of +# InteractiveFailureResult and unittestunittest.TextTestResult. +class LoggingResultTemplate(object): + fail_on_skip = False + + def startTest(self, test): + log.info("Starting test: {0}".format(self.getDescription(test))) + test.started_at = datetime.datetime.utcnow() + return super(LoggingResultTemplate, self).startTest(test) + + def stopTest(self, test): + log.info("Stopped test: {0} in {1}s".format( + self.getDescription(test), + (datetime.datetime.utcnow() - test.started_at).total_seconds() + )) + + def addSkip(self, test, reason): + if LoggingResultTemplate.fail_on_skip: + # Don't just call addFailure because that requires a traceback + self.failures.append((test, reason)) + else: + super(LoggingResultTemplate, self).addSkip(test, reason) + + +def launch_tests(overall_suite): + if opt_rotate_logs or not opt_exit_on_test_failure: + return launch_individually(overall_suite) + else: + return launch_entire_suite(overall_suite) + + +def get_logging_result_class(): + result_class = InteractiveFailureResult if opt_interactive_on_error else \ + unittest.TextTestResult + return type('', (LoggingResultTemplate, result_class), {}) + + +def launch_individually(overall_suite): + no_of_tests_execed = 0 + no_of_tests_failed, no_of_tests_execed = 0, 0 + LoggingResult = get_logging_result_class() + stream = LogStream() + stream.omit_result_lines = True + if opt_rotate_logs: + logrotate = LogRotate() + + started_at = datetime.datetime.utcnow() + for suite_, case in enumerate_methods(overall_suite): + # don't run logrotate beforehand since some ceph daemons might be + # down and pre/post-rotate scripts in logrotate.conf might fail. + if opt_rotate_logs: + logrotate.run_logrotate() + + result = unittest.TextTestRunner(stream=stream, + resultclass=LoggingResult, + verbosity=2, failfast=True).run(case) + + if not result.wasSuccessful(): + if opt_exit_on_test_failure: + break + else: + no_of_tests_failed += 1 + + no_of_tests_execed += 1 + time_elapsed = (datetime.datetime.utcnow() - started_at).total_seconds() + + if result.wasSuccessful(): + log.info('') + log.info('-'*70) + log.info(f'Ran {no_of_tests_execed} tests in {time_elapsed}s') + if no_of_tests_failed > 0: + log.info(f'{no_of_tests_failed} tests failed') + log.info('') + log.info('OK') + + return result + + +def launch_entire_suite(overall_suite): + LoggingResult = get_logging_result_class() + + testrunner = unittest.TextTestRunner(stream=LogStream(), + resultclass=LoggingResult, + verbosity=2, failfast=True) + return testrunner.run(overall_suite) + + +def exec_test(): + # Parse arguments + global opt_interactive_on_error + opt_interactive_on_error = False + opt_create_cluster = False + opt_create_cluster_only = False + opt_ignore_missing_binaries = False + opt_teardown_cluster = False + global opt_log_ps_output + opt_log_ps_output = False + use_kernel_client = False + global opt_use_ns + opt_use_ns = False + opt_brxnet= None + opt_verbose = True + global opt_rotate_logs + opt_rotate_logs = False + global opt_exit_on_test_failure + opt_exit_on_test_failure = True + + args = sys.argv[1:] + flags = [a for a in args if a.startswith("-")] + modules = [a for a in args if not a.startswith("-")] + for f in flags: + if f == "--interactive": + opt_interactive_on_error = True + elif f == "--create": + opt_create_cluster = True + elif f == "--create-cluster-only": + opt_create_cluster_only = True + elif f == "--ignore-missing-binaries": + opt_ignore_missing_binaries = True + elif f == '--teardown': + opt_teardown_cluster = True + elif f == '--log-ps-output': + opt_log_ps_output = True + elif f == '--clear-old-log': + clear_old_log() + elif f == "--kclient": + use_kernel_client = True + elif f == '--usens': + opt_use_ns = True + elif '--brxnet' in f: + if re.search(r'=[0-9./]+', f) is None: + log.error("--brxnet=<ip/mask> option needs one argument: '{0}'".format(f)) + sys.exit(-1) + opt_brxnet=f.split('=')[1] + try: + IP(opt_brxnet) + if IP(opt_brxnet).iptype() == 'PUBLIC': + raise RuntimeError('is public') + except Exception as e: + log.error("Invalid ip '{0}' {1}".format(opt_brxnet, e)) + sys.exit(-1) + elif '--no-verbose' == f: + opt_verbose = False + elif f == '--rotate-logs': + opt_rotate_logs = True + elif f == '--run-all-tests': + opt_exit_on_test_failure = False + elif f == '--debug': + log.setLevel(logging.DEBUG) + else: + log.error("Unknown option '{0}'".format(f)) + sys.exit(-1) + + # Help developers by stopping up-front if their tree isn't built enough for all the + # tools that the tests might want to use (add more here if needed) + require_binaries = ["ceph-dencoder", "cephfs-journal-tool", "cephfs-data-scan", + "cephfs-table-tool", "ceph-fuse", "rados", "cephfs-meta-injection"] + missing_binaries = [b for b in require_binaries if not os.path.exists(os.path.join(BIN_PREFIX, b))] + if missing_binaries and not opt_ignore_missing_binaries: + log.error("Some ceph binaries missing, please build them: {0}".format(" ".join(missing_binaries))) + sys.exit(-1) + + max_required_mds, max_required_clients, \ + max_required_mgr, require_memstore = scan_tests(modules) + + global remote + remote = LocalRemote() + + CephFSMount.cleanup_stale_netnses_and_bridge(remote) + + # Tolerate no MDSs or clients running at start + ps_txt = remote.run(args=["ps", "-u"+str(os.getuid())], + stdout=StringIO()).stdout.getvalue().strip() + lines = ps_txt.split("\n")[1:] + for line in lines: + if 'ceph-fuse' in line or 'ceph-mds' in line: + pid = int(line.split()[0]) + log.warning("Killing stray process {0}".format(line)) + os.kill(pid, signal.SIGKILL) + + # Fire up the Ceph cluster if the user requested it + if opt_create_cluster or opt_create_cluster_only: + log.info("Creating cluster with {0} MDS daemons".format( + max_required_mds)) + teardown_cluster() + vstart_env = os.environ.copy() + vstart_env["FS"] = "0" + vstart_env["MDS"] = max_required_mds.__str__() + vstart_env["OSD"] = "4" + vstart_env["MGR"] = max(max_required_mgr, 1).__str__() + + args = [ + os.path.join(SRC_PREFIX, "vstart.sh"), + "-n", + "--nolockdep", + ] + if require_memstore: + args.append("--memstore") + + if opt_verbose: + args.append("-d") + + # usually, i get vstart.sh running completely in less than 100 + # seconds. + remote.run(args=args, env=vstart_env, timeout=(3 * 60)) + + # Wait for OSD to come up so that subsequent injectargs etc will + # definitely succeed + LocalCephCluster(LocalContext()).mon_manager.wait_for_all_osds_up(timeout=30) + + if opt_create_cluster_only: + return + + if opt_use_ns and mon_in_localhost() and not opt_create_cluster: + raise RuntimeError("cluster is on localhost; '--usens' option is incompatible. Or you can pass an extra '--create' option to create a new cluster without localhost!") + + # List of client mounts, sufficient to run the selected tests + clients = [i.__str__() for i in range(0, max_required_clients)] + + test_dir = tempfile.mkdtemp() + teuth_config['test_path'] = test_dir + + ctx = LocalContext() + ceph_cluster = LocalCephCluster(ctx) + mds_cluster = LocalMDSCluster(ctx) + mgr_cluster = LocalMgrCluster(ctx) + + # Construct Mount classes + mounts = [] + for client_id in clients: + # Populate client keyring (it sucks to use client.admin for test clients + # because it's awkward to find the logs later) + client_name = "client.{0}".format(client_id) + + if client_name not in open("./keyring").read(): + p = remote.run(args=[os.path.join(BIN_PREFIX, "ceph"), "auth", "get-or-create", client_name, + "osd", "allow rw", + "mds", "allow", + "mon", "allow r"], stdout=StringIO()) + + open("./keyring", "at").write(p.stdout.getvalue()) + + if use_kernel_client: + mount = LocalKernelMount(ctx=ctx, test_dir=test_dir, + client_id=client_id, brxnet=opt_brxnet) + else: + mount = LocalFuseMount(ctx=ctx, test_dir=test_dir, + client_id=client_id, brxnet=opt_brxnet) + + mounts.append(mount) + if os.path.exists(mount.hostfs_mntpt): + if mount.is_mounted(): + log.warning("unmounting {0}".format(mount.hostfs_mntpt)) + mount.umount_wait() + else: + os.rmdir(mount.hostfs_mntpt) + + from tasks.cephfs_test_runner import DecoratingLoader + + decorating_loader = DecoratingLoader({ + "ctx": ctx, + "mounts": mounts, + "ceph_cluster": ceph_cluster, + "mds_cluster": mds_cluster, + "mgr_cluster": mgr_cluster, + }) + + # For the benefit of polling tests like test_full -- in teuthology land we set this + # in a .yaml, here it's just a hardcoded thing for the developer's pleasure. + remote.run(args=[os.path.join(BIN_PREFIX, "ceph"), "tell", "osd.*", "injectargs", "--osd-mon-report-interval", "5"]) + ceph_cluster.set_ceph_conf("osd", "osd_mon_report_interval", "5") + + # Vstart defaults to two segments, which very easily gets a "behind on trimming" health warning + # from normal IO latency. Increase it for running teests. + ceph_cluster.set_ceph_conf("mds", "mds log max segments", "10") + + # Make sure the filesystem created in tests has uid/gid that will let us talk to + # it after mounting it (without having to go root). Set in 'global' not just 'mds' + # so that cephfs-data-scan will pick it up too. + ceph_cluster.set_ceph_conf("global", "mds root ino uid", "%s" % os.getuid()) + ceph_cluster.set_ceph_conf("global", "mds root ino gid", "%s" % os.getgid()) + + # Monkeypatch get_package_version to avoid having to work out what kind of distro we're on + def _get_package_version(remote, pkg_name): + # Used in cephfs tests to find fuse version. Your development workstation *does* have >=2.9, right? + return "2.9" + + import teuthology.packaging + teuthology.packaging.get_package_version = _get_package_version + + overall_suite = load_tests(modules, decorating_loader) + + # Filter out tests that don't lend themselves to interactive running, + victims = [] + for case, method in enumerate_methods(overall_suite): + fn = getattr(method, method._testMethodName) + + drop_test = False + + if hasattr(fn, 'is_for_teuthology') and getattr(fn, 'is_for_teuthology') is True: + drop_test = True + log.warning("Dropping test because long running: {method_id}".format(method_id=method.id())) + + if getattr(fn, "needs_trimming", False) is True: + drop_test = (os.getuid() != 0) + log.warning("Dropping test because client trim unavailable: {method_id}".format(method_id=method.id())) + + if drop_test: + # Don't drop the test if it was explicitly requested in arguments + is_named = False + for named in modules: + if named.endswith(method.id()): + is_named = True + break + + if not is_named: + victims.append((case, method)) + + log.debug("Disabling {0} tests because of is_for_teuthology or needs_trimming".format(len(victims))) + for s, method in victims: + s._tests.remove(method) + + overall_suite = load_tests(modules, loader.TestLoader()) + result = launch_tests(overall_suite) + + CephFSMount.cleanup_stale_netnses_and_bridge(remote) + if opt_teardown_cluster: + teardown_cluster() + + if not result.wasSuccessful(): + # no point in duplicating if we can have multiple failures in same + # run. + if opt_exit_on_test_failure: + result.printErrors() # duplicate output at end for convenience + + bad_tests = [] + for test, error in result.errors: + bad_tests.append(str(test)) + for test, failure in result.failures: + bad_tests.append(str(test)) + + sys.exit(-1) + else: + sys.exit(0) + + +if __name__ == "__main__": + exec_test() diff --git a/qa/tasks/watch_notify_same_primary.py b/qa/tasks/watch_notify_same_primary.py new file mode 100644 index 000000000..448fee193 --- /dev/null +++ b/qa/tasks/watch_notify_same_primary.py @@ -0,0 +1,129 @@ + +""" +watch_notify_same_primary task +""" +from io import StringIO +import contextlib +import logging + + +from teuthology.orchestra import run +from teuthology.contextutil import safe_while + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run watch_notify_same_primary + + The config should be as follows: + + watch_notify_same_primary: + clients: [client list] + + The client list should contain 1 client + + The test requires 3 osds. + + example: + + tasks: + - ceph: + - watch_notify_same_primary: + clients: [client.0] + - interactive: + """ + log.info('Beginning watch_notify_same_primary...') + assert isinstance(config, dict), \ + "please list clients to run on" + + clients = config.get('clients', ['client.0']) + assert len(clients) == 1 + role = clients[0] + assert isinstance(role, str) + PREFIX = 'client.' + assert role.startswith(PREFIX) + (remote,) = ctx.cluster.only(role).remotes.keys() + manager = ctx.managers['ceph'] + manager.raw_cluster_cmd('osd', 'set', 'noout') + + pool = manager.create_pool_with_unique_name() + def obj(n): return "foo-{num}".format(num=n) + def start_watch(n): + remote.run( + args = [ + "rados", + "-p", pool, + "put", + obj(n), + "/etc/resolv.conf"], + logger=log.getChild('watch.{id}'.format(id=n))) + proc = remote.run( + args = [ + "rados", + "-p", pool, + "watch", + obj(n)], + stdin=run.PIPE, + stdout=StringIO(), + stderr=StringIO(), + wait=False) + return proc + + num = 20 + + watches = [start_watch(i) for i in range(num)] + + # wait for them all to register + for i in range(num): + with safe_while() as proceed: + while proceed(): + lines = remote.sh( + ["rados", "-p", pool, "listwatchers", obj(i)]) + num_watchers = lines.count('watcher=') + log.info('i see %d watchers for %s', num_watchers, obj(i)) + if num_watchers >= 1: + break + + def notify(n, msg): + remote.run( + args = [ + "rados", + "-p", pool, + "notify", + obj(n), + msg], + logger=log.getChild('notify.{id}'.format(id=n))) + + [notify(n, 'notify1') for n in range(len(watches))] + + manager.kill_osd(0) + manager.mark_down_osd(0) + + [notify(n, 'notify2') for n in range(len(watches))] + + try: + yield + finally: + log.info('joining watch_notify_stress') + for watch in watches: + watch.stdin.write("\n") + + run.wait(watches) + + for watch in watches: + lines = watch.stdout.getvalue().split("\n") + got1 = False + got2 = False + for l in lines: + if 'notify1' in l: + got1 = True + if 'notify2' in l: + got2 = True + log.info(lines) + assert got1 and got2 + + manager.revive_osd(0) + manager.remove_pool(pool) diff --git a/qa/tasks/watch_notify_stress.py b/qa/tasks/watch_notify_stress.py new file mode 100644 index 000000000..47747b1ca --- /dev/null +++ b/qa/tasks/watch_notify_stress.py @@ -0,0 +1,69 @@ +""" +test_stress_watch task +""" +import contextlib +import logging + +from teuthology.orchestra import run +from teuthology.task import proc_thrasher + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run test_stress_watch + + The config should be as follows: + + test_stress_watch: + clients: [client list] + + example: + + tasks: + - ceph: + - test_stress_watch: + clients: [client.0] + - interactive: + """ + log.info('Beginning test_stress_watch...') + assert isinstance(config, dict), \ + "please list clients to run on" + testwatch = {} + + remotes = [] + + for role in config.get('clients', ['client.0']): + assert isinstance(role, str) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.keys() + remotes.append(remote) + + args =['CEPH_CLIENT_ID={id_}'.format(id_=id_), + 'CEPH_ARGS="{flags}"'.format(flags=config.get('flags', '')), + 'daemon-helper', + 'kill', + 'multi_stress_watch foo foo' + ] + + log.info("args are %s" % (args,)) + + proc = proc_thrasher.ProcThrasher({}, remote, + args=[run.Raw(i) for i in args], + logger=log.getChild('testwatch.{id}'.format(id=id_)), + stdin=run.PIPE, + wait=False + ) + proc.start() + testwatch[id_] = proc + + try: + yield + finally: + log.info('joining watch_notify_stress') + for i in testwatch.values(): + i.join() diff --git a/qa/tasks/workunit.py b/qa/tasks/workunit.py new file mode 100644 index 000000000..371d2a2dd --- /dev/null +++ b/qa/tasks/workunit.py @@ -0,0 +1,438 @@ +""" +Workunit task -- Run ceph on sets of specific clients +""" +import logging +import pipes +import os +import re +import shlex + +from tasks.util import get_remote_for_role +from tasks.util.workunit import get_refspec_after_overrides + +from teuthology import misc +from teuthology.config import config as teuth_config +from teuthology.orchestra.run import CommandFailedError +from teuthology.parallel import parallel +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Run ceph on all workunits found under the specified path. + + For example:: + + tasks: + - ceph: + - ceph-fuse: [client.0] + - workunit: + clients: + client.0: [direct_io, xattrs.sh] + client.1: [snaps] + branch: foo + + You can also run a list of workunits on all clients: + tasks: + - ceph: + - ceph-fuse: + - workunit: + tag: v0.47 + clients: + all: [direct_io, xattrs.sh, snaps] + + If you have an "all" section it will run all the workunits + on each client simultaneously, AFTER running any workunits specified + for individual clients. (This prevents unintended simultaneous runs.) + + To customize tests, you can specify environment variables as a dict. You + can also specify a time limit for each work unit (defaults to 3h): + + tasks: + - ceph: + - ceph-fuse: + - workunit: + sha1: 9b28948635b17165d17c1cf83d4a870bd138ddf6 + clients: + all: [snaps] + env: + FOO: bar + BAZ: quux + timeout: 3h + + You can also pass optional arguments to the found workunits: + + tasks: + - workunit: + clients: + all: + - test-ceph-helpers.sh test_get_config + + This task supports roles that include a ceph cluster, e.g.:: + + tasks: + - ceph: + - workunit: + clients: + backup.client.0: [foo] + client.1: [bar] # cluster is implicitly 'ceph' + + You can also specify an alternative top-level dir to 'qa/workunits', like + 'qa/standalone', with:: + + tasks: + - install: + - workunit: + basedir: qa/standalone + clients: + client.0: + - test-ceph-helpers.sh + + :param ctx: Context + :param config: Configuration + """ + assert isinstance(config, dict) + assert isinstance(config.get('clients'), dict), \ + 'configuration must contain a dictionary of clients' + + overrides = ctx.config.get('overrides', {}) + refspec = get_refspec_after_overrides(config, overrides) + timeout = config.get('timeout', '3h') + cleanup = config.get('cleanup', True) + + log.info('Pulling workunits from ref %s', refspec) + + created_mountpoint = {} + + if config.get('env') is not None: + assert isinstance(config['env'], dict), 'env must be a dictionary' + clients = config['clients'] + + # Create scratch dirs for any non-all workunits + log.info('Making a separate scratch dir for every client...') + for role in clients.keys(): + assert isinstance(role, str) + if role == "all": + continue + + assert 'client' in role + created_mnt_dir = _make_scratch_dir(ctx, role, config.get('subdir')) + created_mountpoint[role] = created_mnt_dir + + # Execute any non-all workunits + log.info("timeout={}".format(timeout)) + log.info("cleanup={}".format(cleanup)) + with parallel() as p: + for role, tests in clients.items(): + if role != "all": + p.spawn(_run_tests, ctx, refspec, role, tests, + config.get('env'), + basedir=config.get('basedir','qa/workunits'), + timeout=timeout, + cleanup=cleanup, + coverage_and_limits=not config.get('no_coverage_and_limits', None)) + + if cleanup: + # Clean up dirs from any non-all workunits + for role, created in created_mountpoint.items(): + _delete_dir(ctx, role, created) + + # Execute any 'all' workunits + if 'all' in clients: + all_tasks = clients["all"] + _spawn_on_all_clients(ctx, refspec, all_tasks, config.get('env'), + config.get('basedir', 'qa/workunits'), + config.get('subdir'), timeout=timeout, + cleanup=cleanup) + + +def _client_mountpoint(ctx, cluster, id_): + """ + Returns the path to the expected mountpoint for workunits running + on some kind of filesystem. + """ + # for compatibility with tasks like ceph-fuse that aren't cluster-aware yet, + # only include the cluster name in the dir if the cluster is not 'ceph' + if cluster == 'ceph': + dir_ = 'mnt.{0}'.format(id_) + else: + dir_ = 'mnt.{0}.{1}'.format(cluster, id_) + return os.path.join(misc.get_testdir(ctx), dir_) + + +def _delete_dir(ctx, role, created_mountpoint): + """ + Delete file used by this role, and delete the directory that this + role appeared in. + + :param ctx: Context + :param role: "role.#" where # is used for the role id. + """ + cluster, _, id_ = misc.split_role(role) + remote = get_remote_for_role(ctx, role) + mnt = _client_mountpoint(ctx, cluster, id_) + client = os.path.join(mnt, 'client.{id}'.format(id=id_)) + + # Remove the directory inside the mount where the workunit ran + remote.run( + args=[ + 'sudo', + 'rm', + '-rf', + '--', + client, + ], + ) + log.info("Deleted dir {dir}".format(dir=client)) + + # If the mount was an artificially created dir, delete that too + if created_mountpoint: + remote.run( + args=[ + 'rmdir', + '--', + mnt, + ], + ) + log.info("Deleted artificial mount point {dir}".format(dir=client)) + + +def _make_scratch_dir(ctx, role, subdir): + """ + Make scratch directories for this role. This also makes the mount + point if that directory does not exist. + + :param ctx: Context + :param role: "role.#" where # is used for the role id. + :param subdir: use this subdir (False if not used) + """ + created_mountpoint = False + cluster, _, id_ = misc.split_role(role) + remote = get_remote_for_role(ctx, role) + dir_owner = remote.user + mnt = _client_mountpoint(ctx, cluster, id_) + # if neither kclient nor ceph-fuse are required for a workunit, + # mnt may not exist. Stat and create the directory if it doesn't. + try: + remote.run( + args=[ + 'stat', + '--', + mnt, + ], + ) + log.info('Did not need to create dir {dir}'.format(dir=mnt)) + except CommandFailedError: + remote.run( + args=[ + 'mkdir', + '--', + mnt, + ], + ) + log.info('Created dir {dir}'.format(dir=mnt)) + created_mountpoint = True + + if not subdir: + subdir = 'client.{id}'.format(id=id_) + + if created_mountpoint: + remote.run( + args=[ + 'cd', + '--', + mnt, + run.Raw('&&'), + 'mkdir', + '--', + subdir, + ], + ) + else: + remote.run( + args=[ + # cd first so this will fail if the mount point does + # not exist; pure install -d will silently do the + # wrong thing + 'cd', + '--', + mnt, + run.Raw('&&'), + 'sudo', + 'install', + '-d', + '-m', '0755', + '--owner={user}'.format(user=dir_owner), + '--', + subdir, + ], + ) + + return created_mountpoint + + +def _spawn_on_all_clients(ctx, refspec, tests, env, basedir, subdir, timeout=None, cleanup=True): + """ + Make a scratch directory for each client in the cluster, and then for each + test spawn _run_tests() for each role. + + See run_tests() for parameter documentation. + """ + is_client = misc.is_type('client') + client_remotes = {} + created_mountpoint = {} + for remote, roles_for_host in ctx.cluster.remotes.items(): + for role in roles_for_host: + if is_client(role): + client_remotes[role] = remote + created_mountpoint[role] = _make_scratch_dir(ctx, role, subdir) + + for unit in tests: + with parallel() as p: + for role, remote in client_remotes.items(): + p.spawn(_run_tests, ctx, refspec, role, [unit], env, + basedir, + subdir, + timeout=timeout) + + # cleanup the generated client directories + if cleanup: + for role, _ in client_remotes.items(): + _delete_dir(ctx, role, created_mountpoint[role]) + + +def _run_tests(ctx, refspec, role, tests, env, basedir, + subdir=None, timeout=None, cleanup=True, + coverage_and_limits=True): + """ + Run the individual test. Create a scratch directory and then extract the + workunits from git. Make the executables, and then run the tests. + Clean up (remove files created) after the tests are finished. + + :param ctx: Context + :param refspec: branch, sha1, or version tag used to identify this + build + :param tests: specific tests specified. + :param env: environment set in yaml file. Could be None. + :param subdir: subdirectory set in yaml file. Could be None + :param timeout: If present, use the 'timeout' command on the remote host + to limit execution time. Must be specified by a number + followed by 's' for seconds, 'm' for minutes, 'h' for + hours, or 'd' for days. If '0' or anything that evaluates + to False is passed, the 'timeout' command is not used. + """ + testdir = misc.get_testdir(ctx) + assert isinstance(role, str) + cluster, type_, id_ = misc.split_role(role) + assert type_ == 'client' + remote = get_remote_for_role(ctx, role) + mnt = _client_mountpoint(ctx, cluster, id_) + # subdir so we can remove and recreate this a lot without sudo + if subdir is None: + scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp') + else: + scratch_tmp = os.path.join(mnt, subdir) + clonedir = '{tdir}/clone.{role}'.format(tdir=testdir, role=role) + srcdir = '{cdir}/{basedir}'.format(cdir=clonedir, + basedir=basedir) + + git_url = teuth_config.get_ceph_qa_suite_git_url() + # if we are running an upgrade test, and ceph-ci does not have branches like + # `jewel`, so should use ceph.git as an alternative. + try: + remote.run(logger=log.getChild(role), + args=refspec.clone(git_url, clonedir)) + except CommandFailedError: + if git_url.endswith('/ceph-ci.git'): + alt_git_url = git_url.replace('/ceph-ci.git', '/ceph.git') + elif git_url.endswith('/ceph-ci'): + alt_git_url = re.sub(r'/ceph-ci$', '/ceph.git', git_url) + else: + raise + log.info( + "failed to check out '%s' from %s; will also try in %s", + refspec, + git_url, + alt_git_url, + ) + remote.run(logger=log.getChild(role), + args=refspec.clone(alt_git_url, clonedir)) + remote.run( + logger=log.getChild(role), + args=[ + 'cd', '--', srcdir, + run.Raw('&&'), + 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi', + run.Raw('&&'), + 'find', '-executable', '-type', 'f', '-printf', r'%P\0', + run.Raw('>{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)), + ], + ) + + workunits_file = '{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role) + workunits = sorted(remote.read_file(workunits_file).decode().split('\0')) + assert workunits + + try: + assert isinstance(tests, list) + for spec in tests: + dir_or_fname, *optional_args = shlex.split(spec) + log.info('Running workunits matching %s on %s...', dir_or_fname, role) + # match executables named "foo" or "foo/*" with workunit named + # "foo" + to_run = [w for w in workunits + if os.path.commonpath([w, dir_or_fname]) == dir_or_fname] + if not to_run: + raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec)) + for workunit in to_run: + log.info('Running workunit %s...', workunit) + args = [ + 'mkdir', '-p', '--', scratch_tmp, + run.Raw('&&'), + 'cd', '--', scratch_tmp, + run.Raw('&&'), + run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'), + run.Raw('CEPH_REF={ref}'.format(ref=refspec)), + run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)), + run.Raw('CEPH_ARGS="--cluster {0}"'.format(cluster)), + run.Raw('CEPH_ID="{id}"'.format(id=id_)), + run.Raw('PATH=$PATH:/usr/sbin'), + run.Raw('CEPH_BASE={dir}'.format(dir=clonedir)), + run.Raw('CEPH_ROOT={dir}'.format(dir=clonedir)), + run.Raw('CEPH_MNT={dir}'.format(dir=mnt)), + ] + if env is not None: + for var, val in env.items(): + quoted_val = pipes.quote(val) + env_arg = '{var}={val}'.format(var=var, val=quoted_val) + args.append(run.Raw(env_arg)) + if coverage_and_limits: + args.extend([ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir)]) + if timeout and timeout != '0': + args.extend(['timeout', timeout]) + args.extend([ + '{srcdir}/{workunit}'.format( + srcdir=srcdir, + workunit=workunit, + ), + ]) + remote.run( + logger=log.getChild(role), + args=args + optional_args, + label="workunit test {workunit}".format(workunit=workunit) + ) + if cleanup: + args=['sudo', 'rm', '-rf', '--', scratch_tmp] + remote.run(logger=log.getChild(role), args=args, timeout=(60*60)) + finally: + log.info('Stopping %s on %s...', tests, role) + args=['sudo', 'rm', '-rf', '--', workunits_file, clonedir] + # N.B. don't cleanup scratch_tmp! If the mount is broken then rm will hang. + remote.run( + logger=log.getChild(role), + args=args, + ) |