summaryrefslogtreecommitdiffstats
path: root/qa/tasks
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
commit483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch)
treee5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /qa/tasks
parentInitial commit. (diff)
downloadceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz
ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'qa/tasks')
-rw-r--r--qa/tasks/__init__.py6
-rw-r--r--qa/tasks/admin_socket.py194
-rw-r--r--qa/tasks/autotest.py168
-rw-r--r--qa/tasks/aver.py67
-rw-r--r--qa/tasks/blktrace.py96
-rw-r--r--qa/tasks/boto.cfg.template2
-rw-r--r--qa/tasks/cbt.py283
-rw-r--r--qa/tasks/ceph.conf.template105
-rw-r--r--qa/tasks/ceph.py1896
-rw-r--r--qa/tasks/ceph_client.py42
-rw-r--r--qa/tasks/ceph_deploy.py932
-rw-r--r--qa/tasks/ceph_fuse.py160
-rw-r--r--qa/tasks/ceph_manager.py2642
-rw-r--r--qa/tasks/ceph_objectstore_tool.py663
-rw-r--r--qa/tasks/ceph_test_case.py203
-rw-r--r--qa/tasks/cephfs/__init__.py0
-rw-r--r--qa/tasks/cephfs/cephfs_test_case.py324
-rw-r--r--qa/tasks/cephfs/filesystem.py1386
-rw-r--r--qa/tasks/cephfs/fuse_mount.py502
-rw-r--r--qa/tasks/cephfs/kernel_mount.py260
-rw-r--r--qa/tasks/cephfs/mount.py728
-rw-r--r--qa/tasks/cephfs/test_admin.py229
-rw-r--r--qa/tasks/cephfs/test_auto_repair.py90
-rw-r--r--qa/tasks/cephfs/test_backtrace.py78
-rw-r--r--qa/tasks/cephfs/test_cap_flush.py64
-rw-r--r--qa/tasks/cephfs/test_cephfs_shell.py279
-rw-r--r--qa/tasks/cephfs/test_client_limits.py330
-rw-r--r--qa/tasks/cephfs/test_client_recovery.py633
-rw-r--r--qa/tasks/cephfs/test_damage.py569
-rw-r--r--qa/tasks/cephfs/test_data_scan.py695
-rw-r--r--qa/tasks/cephfs/test_dump_tree.py66
-rw-r--r--qa/tasks/cephfs/test_exports.py176
-rw-r--r--qa/tasks/cephfs/test_failover.py638
-rw-r--r--qa/tasks/cephfs/test_flush.py113
-rw-r--r--qa/tasks/cephfs/test_forward_scrub.py298
-rw-r--r--qa/tasks/cephfs/test_fragment.py229
-rw-r--r--qa/tasks/cephfs/test_full.py398
-rw-r--r--qa/tasks/cephfs/test_journal_migration.py100
-rw-r--r--qa/tasks/cephfs/test_journal_repair.py447
-rw-r--r--qa/tasks/cephfs/test_mantle.py109
-rw-r--r--qa/tasks/cephfs/test_misc.py291
-rw-r--r--qa/tasks/cephfs/test_openfiletable.py41
-rw-r--r--qa/tasks/cephfs/test_pool_perm.py113
-rw-r--r--qa/tasks/cephfs/test_quota.py106
-rw-r--r--qa/tasks/cephfs/test_readahead.py31
-rw-r--r--qa/tasks/cephfs/test_recovery_pool.py207
-rw-r--r--qa/tasks/cephfs/test_scrub.py175
-rw-r--r--qa/tasks/cephfs/test_scrub_checks.py405
-rw-r--r--qa/tasks/cephfs/test_sessionmap.py236
-rw-r--r--qa/tasks/cephfs/test_snapshots.py530
-rw-r--r--qa/tasks/cephfs/test_strays.py973
-rw-r--r--qa/tasks/cephfs/test_volume_client.py1765
-rw-r--r--qa/tasks/cephfs/test_volumes.py4435
-rw-r--r--qa/tasks/cephfs_test_runner.py209
-rw-r--r--qa/tasks/cephfs_upgrade_snap.py45
-rw-r--r--qa/tasks/check_counter.py98
-rw-r--r--qa/tasks/cifs_mount.py137
-rw-r--r--qa/tasks/cram.py151
-rw-r--r--qa/tasks/create_verify_lfn_objects.py83
-rw-r--r--qa/tasks/devstack.py379
-rw-r--r--qa/tasks/die_on_err.py70
-rw-r--r--qa/tasks/divergent_priors.py160
-rw-r--r--qa/tasks/divergent_priors2.py192
-rw-r--r--qa/tasks/dnsmasq.py170
-rw-r--r--qa/tasks/dump_stuck.py161
-rw-r--r--qa/tasks/ec_lost_unfound.py158
-rw-r--r--qa/tasks/exec_on_cleanup.py61
-rw-r--r--qa/tasks/filestore_idempotent.py83
-rw-r--r--qa/tasks/fs.py66
-rw-r--r--qa/tasks/kclient.py130
-rw-r--r--qa/tasks/keystone.py397
-rwxr-xr-xqa/tasks/locktest.py134
-rw-r--r--qa/tasks/logrotate.conf13
-rw-r--r--qa/tasks/lost_unfound.py176
-rw-r--r--qa/tasks/manypools.py73
-rw-r--r--qa/tasks/mds_creation_failure.py69
-rw-r--r--qa/tasks/mds_pre_upgrade.py43
-rw-r--r--qa/tasks/mds_thrash.py543
-rw-r--r--qa/tasks/metadata.yaml2
-rw-r--r--qa/tasks/mgr/__init__.py0
-rw-r--r--qa/tasks/mgr/dashboard/__init__.py0
-rw-r--r--qa/tasks/mgr/dashboard/helper.py574
-rw-r--r--qa/tasks/mgr/dashboard/test_auth.py240
-rw-r--r--qa/tasks/mgr/dashboard/test_cephfs.py70
-rw-r--r--qa/tasks/mgr/dashboard/test_cluster_configuration.py388
-rw-r--r--qa/tasks/mgr/dashboard/test_erasure_code_profile.py110
-rw-r--r--qa/tasks/mgr/dashboard/test_ganesha.py168
-rw-r--r--qa/tasks/mgr/dashboard/test_health.py305
-rw-r--r--qa/tasks/mgr/dashboard/test_host.py31
-rw-r--r--qa/tasks/mgr/dashboard/test_logs.py38
-rw-r--r--qa/tasks/mgr/dashboard/test_mgr_module.py160
-rw-r--r--qa/tasks/mgr/dashboard/test_monitor.py25
-rw-r--r--qa/tasks/mgr/dashboard/test_osd.py157
-rw-r--r--qa/tasks/mgr/dashboard/test_perf_counters.py71
-rw-r--r--qa/tasks/mgr/dashboard/test_pool.py364
-rw-r--r--qa/tasks/mgr/dashboard/test_rbd.py797
-rw-r--r--qa/tasks/mgr/dashboard/test_rbd_mirroring.py177
-rw-r--r--qa/tasks/mgr/dashboard/test_requests.py32
-rw-r--r--qa/tasks/mgr/dashboard/test_rgw.py710
-rw-r--r--qa/tasks/mgr/dashboard/test_role.py140
-rw-r--r--qa/tasks/mgr/dashboard/test_settings.py65
-rw-r--r--qa/tasks/mgr/dashboard/test_summary.py40
-rw-r--r--qa/tasks/mgr/dashboard/test_user.py115
-rw-r--r--qa/tasks/mgr/mgr_test_case.py204
-rw-r--r--qa/tasks/mgr/test_crash.py108
-rw-r--r--qa/tasks/mgr/test_dashboard.py140
-rw-r--r--qa/tasks/mgr/test_failover.py148
-rw-r--r--qa/tasks/mgr/test_insights.py203
-rw-r--r--qa/tasks/mgr/test_module_selftest.py335
-rw-r--r--qa/tasks/mgr/test_orchestrator_cli.py154
-rw-r--r--qa/tasks/mgr/test_progress.py376
-rw-r--r--qa/tasks/mgr/test_prometheus.py79
-rw-r--r--qa/tasks/mgr/test_ssh_orchestrator.py23
-rw-r--r--qa/tasks/mon_clock_skew_check.py73
-rw-r--r--qa/tasks/mon_recovery.py80
-rw-r--r--qa/tasks/mon_thrash.py343
-rw-r--r--qa/tasks/multibench.py61
-rw-r--r--qa/tasks/netem.py268
-rw-r--r--qa/tasks/object_source_down.py101
-rw-r--r--qa/tasks/omapbench.py85
-rw-r--r--qa/tasks/openssl_keys.py227
-rw-r--r--qa/tasks/osd_backfill.py104
-rw-r--r--qa/tasks/osd_failsafe_enospc.py219
-rw-r--r--qa/tasks/osd_max_pg_per_osd.py126
-rw-r--r--qa/tasks/osd_recovery.py193
-rw-r--r--qa/tasks/peer.py90
-rw-r--r--qa/tasks/peering_speed_test.py87
-rw-r--r--qa/tasks/populate_rbd_pool.py82
-rw-r--r--qa/tasks/qemu.py580
-rw-r--r--qa/tasks/rados.py272
-rw-r--r--qa/tasks/radosbench.py140
-rw-r--r--qa/tasks/radosbenchsweep.py223
-rw-r--r--qa/tasks/radosgw_admin.py953
-rw-r--r--qa/tasks/radosgw_admin_rest.py721
-rw-r--r--qa/tasks/ragweed.py390
-rw-r--r--qa/tasks/rbd.py628
-rw-r--r--qa/tasks/rbd_fio.py224
-rw-r--r--qa/tasks/rbd_fsx.py114
-rw-r--r--qa/tasks/rbd_mirror.py119
-rw-r--r--qa/tasks/rbd_mirror_thrash.py214
-rw-r--r--qa/tasks/rebuild_mondb.py224
-rw-r--r--qa/tasks/reg11184.py242
-rw-r--r--qa/tasks/rep_lost_unfound_delete.py178
-rw-r--r--qa/tasks/repair_test.py309
-rw-r--r--qa/tasks/resolve_stuck_peering.py112
-rw-r--r--qa/tasks/restart.py163
-rw-r--r--qa/tasks/rgw.py357
-rw-r--r--qa/tasks/rgw_logsocket.py165
l---------qa/tasks/rgw_multi1
-rw-r--r--qa/tasks/rgw_multisite.py436
-rw-r--r--qa/tasks/rgw_multisite_tests.py99
-rw-r--r--qa/tasks/s3a_hadoop.py289
-rw-r--r--qa/tasks/s3readwrite.py353
-rw-r--r--qa/tasks/s3roundtrip.py326
-rw-r--r--qa/tasks/s3tests.py424
-rw-r--r--qa/tasks/samba.py247
-rw-r--r--qa/tasks/scrub.py117
-rw-r--r--qa/tasks/scrub_test.py403
-rw-r--r--qa/tasks/swift.py256
-rw-r--r--qa/tasks/systemd.py135
-rw-r--r--qa/tasks/tempest.py284
-rw-r--r--qa/tasks/tests/__init__.py0
-rw-r--r--qa/tasks/tests/test_devstack.py48
-rw-r--r--qa/tasks/tests/test_radosgw_admin.py35
-rw-r--r--qa/tasks/teuthology_integration.py19
-rw-r--r--qa/tasks/tgt.py177
-rw-r--r--qa/tasks/thrash_pool_snaps.py61
-rw-r--r--qa/tasks/thrashosds-health.yaml15
-rw-r--r--qa/tasks/thrashosds.py219
-rw-r--r--qa/tasks/tox.py50
-rw-r--r--qa/tasks/userdata_setup.yaml25
-rw-r--r--qa/tasks/userdata_teardown.yaml11
-rw-r--r--qa/tasks/util/__init__.py26
-rw-r--r--qa/tasks/util/rados.py87
-rw-r--r--qa/tasks/util/rgw.py94
-rw-r--r--qa/tasks/util/test/__init__.py0
-rw-r--r--qa/tasks/util/test/test_rados.py40
-rw-r--r--qa/tasks/util/workunit.py78
-rw-r--r--qa/tasks/vstart_runner.py1169
-rw-r--r--qa/tasks/watch_notify_same_primary.py130
-rw-r--r--qa/tasks/watch_notify_stress.py70
-rw-r--r--qa/tasks/workunit.py423
182 files changed, 50472 insertions, 0 deletions
diff --git a/qa/tasks/__init__.py b/qa/tasks/__init__.py
new file mode 100644
index 00000000..9a7949a0
--- /dev/null
+++ b/qa/tasks/__init__.py
@@ -0,0 +1,6 @@
+import logging
+
+# Inherit teuthology's log level
+teuthology_log = logging.getLogger('teuthology')
+log = logging.getLogger(__name__)
+log.setLevel(teuthology_log.level)
diff --git a/qa/tasks/admin_socket.py b/qa/tasks/admin_socket.py
new file mode 100644
index 00000000..c454d3d0
--- /dev/null
+++ b/qa/tasks/admin_socket.py
@@ -0,0 +1,194 @@
+"""
+Admin Socket task -- used in rados, powercycle, and smoke testing
+"""
+
+import json
+import logging
+import os
+import time
+
+from teuthology.exceptions import CommandFailedError
+from teuthology.orchestra import run
+from teuthology import misc as teuthology
+from teuthology.parallel import parallel
+from teuthology.config import config as teuth_config
+
+log = logging.getLogger(__name__)
+
+
+def task(ctx, config):
+ """
+ Run an admin socket command, make sure the output is json, and run
+ a test program on it. The test program should read json from
+ stdin. This task succeeds if the test program exits with status 0.
+
+ To run the same test on all clients::
+
+ tasks:
+ - ceph:
+ - rados:
+ - admin_socket:
+ all:
+ dump_requests:
+ test: http://example.com/script
+
+ To restrict it to certain clients::
+
+ tasks:
+ - ceph:
+ - rados: [client.1]
+ - admin_socket:
+ client.1:
+ dump_requests:
+ test: http://example.com/script
+
+ If an admin socket command has arguments, they can be specified as
+ a list::
+
+ tasks:
+ - ceph:
+ - rados: [client.0]
+ - admin_socket:
+ client.0:
+ dump_requests:
+ test: http://example.com/script
+ help:
+ test: http://example.com/test_help_version
+ args: [version]
+
+ Note that there must be a ceph client with an admin socket running
+ before this task is run. The tests are parallelized at the client
+ level. Tests for a single client are run serially.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
+ assert isinstance(config, dict), \
+ 'admin_socket task requires a dict for configuration'
+ teuthology.replace_all_with_clients(ctx.cluster, config)
+
+ with parallel() as ptask:
+ for client, tests in config.items():
+ ptask.spawn(_run_tests, ctx, client, tests)
+
+
+def _socket_command(ctx, remote, socket_path, command, args):
+ """
+ Run an admin socket command and return the result as a string.
+
+ :param ctx: Context
+ :param remote: Remote site
+ :param socket_path: path to socket
+ :param command: command to be run remotely
+ :param args: command arguments
+
+ :returns: output of command in json format
+ """
+ testdir = teuthology.get_testdir(ctx)
+ max_tries = 120
+ while True:
+ try:
+ out = remote.sh([
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'ceph',
+ '--admin-daemon', socket_path,
+ ] + command.split(' ') + args)
+ except CommandFailedError:
+ assert max_tries > 0
+ max_tries -= 1
+ log.info('ceph cli returned an error, command not registered yet?')
+ log.info('sleeping and retrying ...')
+ time.sleep(1)
+ continue
+ break
+ log.debug('admin socket command %s returned %s', command, out)
+ return json.loads(out)
+
+def _run_tests(ctx, client, tests):
+ """
+ Create a temp directory and wait for a client socket to be created.
+ For each test, copy the executable locally and run the test.
+ Remove temp directory when finished.
+
+ :param ctx: Context
+ :param client: client machine to run the test
+ :param tests: list of tests to run
+ """
+ testdir = teuthology.get_testdir(ctx)
+ log.debug('Running admin socket tests on %s', client)
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ socket_path = '/var/run/ceph/ceph-{name}.asok'.format(name=client)
+ overrides = ctx.config.get('overrides', {}).get('admin_socket', {})
+
+ try:
+ tmp_dir = os.path.join(
+ testdir,
+ 'admin_socket_{client}'.format(client=client),
+ )
+ remote.run(
+ args=[
+ 'mkdir',
+ '--',
+ tmp_dir,
+ run.Raw('&&'),
+ # wait for client process to create the socket
+ 'while', 'test', '!', '-e', socket_path, run.Raw(';'),
+ 'do', 'sleep', '1', run.Raw(';'), 'done',
+ ],
+ )
+
+ for command, config in tests.items():
+ if config is None:
+ config = {}
+ teuthology.deep_merge(config, overrides)
+ log.debug('Testing %s with config %s', command, str(config))
+
+ test_path = None
+ if 'test' in config:
+ # hack: the git_url is always ceph-ci or ceph
+ git_url = teuth_config.get_ceph_git_url()
+ repo_name = 'ceph.git'
+ if git_url.count('ceph-ci'):
+ repo_name = 'ceph-ci.git'
+ url = config['test'].format(
+ branch=config.get('branch', 'master'),
+ repo=repo_name,
+ )
+ test_path = os.path.join(tmp_dir, command)
+ remote.run(
+ args=[
+ 'wget',
+ '-q',
+ '-O',
+ test_path,
+ '--',
+ url,
+ run.Raw('&&'),
+ 'chmod',
+ 'u=rx',
+ '--',
+ test_path,
+ ],
+ )
+
+ args = config.get('args', [])
+ assert isinstance(args, list), \
+ 'admin socket command args must be a list'
+ sock_out = _socket_command(ctx, remote, socket_path, command, args)
+ if test_path is not None:
+ remote.run(
+ args=[
+ test_path,
+ ],
+ stdin=json.dumps(sock_out),
+ )
+
+ finally:
+ remote.run(
+ args=[
+ 'rm', '-rf', '--', tmp_dir,
+ ],
+ )
diff --git a/qa/tasks/autotest.py b/qa/tasks/autotest.py
new file mode 100644
index 00000000..a78987dc
--- /dev/null
+++ b/qa/tasks/autotest.py
@@ -0,0 +1,168 @@
+"""
+Run an autotest test on the ceph cluster.
+"""
+import json
+import logging
+import os
+
+import six
+
+from teuthology import misc as teuthology
+from teuthology.parallel import parallel
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+ """
+ Run an autotest test on the ceph cluster.
+
+ Only autotest client tests are supported.
+
+ The config is a mapping from role name to list of tests to run on
+ that client.
+
+ For example::
+
+ tasks:
+ - ceph:
+ - ceph-fuse: [client.0, client.1]
+ - autotest:
+ client.0: [dbench]
+ client.1: [bonnie]
+
+ You can also specify a list of tests to run on all clients::
+
+ tasks:
+ - ceph:
+ - ceph-fuse:
+ - autotest:
+ all: [dbench]
+ """
+ assert isinstance(config, dict)
+ config = teuthology.replace_all_with_clients(ctx.cluster, config)
+ log.info('Setting up autotest...')
+ testdir = teuthology.get_testdir(ctx)
+ with parallel() as p:
+ for role in config.keys():
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ p.spawn(_download, testdir, remote)
+
+ log.info('Making a separate scratch dir for every client...')
+ for role in config.keys():
+ assert isinstance(role, six.string_types)
+ PREFIX = 'client.'
+ assert role.startswith(PREFIX)
+ id_ = role[len(PREFIX):]
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
+ scratch = os.path.join(mnt, 'client.{id}'.format(id=id_))
+ remote.run(
+ args=[
+ 'sudo',
+ 'install',
+ '-d',
+ '-m', '0755',
+ '--owner={user}'.format(user='ubuntu'), #TODO
+ '--',
+ scratch,
+ ],
+ )
+
+ with parallel() as p:
+ for role, tests in config.items():
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ p.spawn(_run_tests, testdir, remote, role, tests)
+
+def _download(testdir, remote):
+ """
+ Download. Does not explicitly support muliple tasks in a single run.
+ """
+ remote.run(
+ args=[
+ # explicitly does not support multiple autotest tasks
+ # in a single run; the result archival would conflict
+ 'mkdir', '{tdir}/archive/autotest'.format(tdir=testdir),
+ run.Raw('&&'),
+ 'mkdir', '{tdir}/autotest'.format(tdir=testdir),
+ run.Raw('&&'),
+ 'wget',
+ '-nv',
+ '--no-check-certificate',
+ 'https://github.com/ceph/autotest/tarball/ceph',
+ '-O-',
+ run.Raw('|'),
+ 'tar',
+ '-C', '{tdir}/autotest'.format(tdir=testdir),
+ '-x',
+ '-z',
+ '-f-',
+ '--strip-components=1',
+ ],
+ )
+
+def _run_tests(testdir, remote, role, tests):
+ """
+ Spawned to run test on remote site
+ """
+ assert isinstance(role, six.string_types)
+ PREFIX = 'client.'
+ assert role.startswith(PREFIX)
+ id_ = role[len(PREFIX):]
+ mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_))
+ scratch = os.path.join(mnt, 'client.{id}'.format(id=id_))
+
+ assert isinstance(tests, list)
+ for idx, testname in enumerate(tests):
+ log.info('Running autotest client test #%d: %s...', idx, testname)
+
+ tag = 'client.{id}.num{idx}.{testname}'.format(
+ idx=idx,
+ testname=testname,
+ id=id_,
+ )
+ control = '{tdir}/control.{tag}'.format(tdir=testdir, tag=tag)
+ teuthology.write_file(
+ remote=remote,
+ path=control,
+ data='import json; data=json.loads({data!r}); job.run_test(**data)'.format(
+ data=json.dumps(dict(
+ url=testname,
+ dir=scratch,
+ # TODO perhaps tag
+ # results will be in {testdir}/autotest/client/results/dbench
+ # or {testdir}/autotest/client/results/dbench.{tag}
+ )),
+ ),
+ )
+ remote.run(
+ args=[
+ '{tdir}/autotest/client/bin/autotest'.format(tdir=testdir),
+ '--verbose',
+ '--harness=simple',
+ '--tag={tag}'.format(tag=tag),
+ control,
+ run.Raw('3>&1'),
+ ],
+ )
+
+ remote.run(
+ args=[
+ 'rm', '-rf', '--', control,
+ ],
+ )
+
+ remote.run(
+ args=[
+ 'mv',
+ '--',
+ '{tdir}/autotest/client/results/{tag}'.format(tdir=testdir, tag=tag),
+ '{tdir}/archive/autotest/{tag}'.format(tdir=testdir, tag=tag),
+ ],
+ )
+
+ remote.run(
+ args=[
+ 'rm', '-rf', '--', '{tdir}/autotest'.format(tdir=testdir),
+ ],
+ )
diff --git a/qa/tasks/aver.py b/qa/tasks/aver.py
new file mode 100644
index 00000000..79ee18c5
--- /dev/null
+++ b/qa/tasks/aver.py
@@ -0,0 +1,67 @@
+"""
+Aver wrapper task
+"""
+import contextlib
+import logging
+from subprocess import check_call, Popen, PIPE
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Execute an aver assertion
+
+ Parameters:
+
+ input: file containing data referred to by the assertions. File name is
+ relative to the job's archive path
+ validations: list of validations in the Aver language
+
+ Example:
+ - aver:
+ input: bench_output.csv
+ validations:
+ - expect performance(alg='ceph') > performance(alg='raw')
+ - for size > 3 expect avg_throughput > 2000
+ """
+ log.info('Beginning aver...')
+ assert isinstance(config, dict), 'expecting dictionary for configuration'
+
+ if 'input' not in config:
+ raise Exception("Expecting 'input' option")
+ if len(config.get('validations', [])) < 1:
+ raise Exception("Expecting at least one entry in 'validations'")
+
+ url = ('https://github.com/ivotron/aver/releases/download/'
+ 'v0.3.0/aver-linux-amd64.tar.bz2')
+
+ aver_path = ctx.archive + '/aver'
+
+ # download binary
+ check_call(['wget', '-O', aver_path + '.tbz', url])
+ check_call(['tar', 'xfj', aver_path + '.tbz', '-C', ctx.archive])
+
+ # print version
+ process = Popen([aver_path, '-v'], stdout=PIPE)
+ log.info(process.communicate()[0])
+
+ # validate
+ for validation in config['validations']:
+ cmd = (aver_path + ' -s -i ' + (ctx.archive + '/' + config['input']) +
+ ' "' + validation + '"')
+ log.info("executing: " + cmd)
+ process = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
+ (stdout, stderr) = process.communicate()
+ if stderr:
+ log.info('aver stderr: ' + stderr)
+ log.info('aver result: ' + stdout)
+ if stdout.strip(' \t\n\r') != 'true':
+ raise Exception('Failed validation: ' + validation)
+
+ try:
+ yield
+ finally:
+ log.info('Removing aver binary...')
+ check_call(['rm', aver_path, aver_path + '.tbz'])
diff --git a/qa/tasks/blktrace.py b/qa/tasks/blktrace.py
new file mode 100644
index 00000000..10b1da0c
--- /dev/null
+++ b/qa/tasks/blktrace.py
@@ -0,0 +1,96 @@
+"""
+Run blktrace program through teuthology
+"""
+import contextlib
+import logging
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+blktrace = '/usr/sbin/blktrace'
+daemon_signal = 'term'
+
+@contextlib.contextmanager
+def setup(ctx, config):
+ """
+ Setup all the remotes
+ """
+ osds = ctx.cluster.only(teuthology.is_type('osd', config['cluster']))
+ log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=teuthology.get_testdir(ctx))
+
+ for remote, roles_for_host in osds.remotes.items():
+ log.info('Creating %s on %s' % (log_dir, remote.name))
+ remote.run(
+ args=['mkdir', '-p', '-m0755', '--', log_dir],
+ wait=False,
+ )
+ yield
+
+@contextlib.contextmanager
+def execute(ctx, config):
+ """
+ Run the blktrace program on remote machines.
+ """
+ procs = []
+ testdir = teuthology.get_testdir(ctx)
+ log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=testdir)
+
+ osds = ctx.cluster.only(teuthology.is_type('osd'))
+ for remote, roles_for_host in osds.remotes.items():
+ roles_to_devs = ctx.disk_config.remote_to_roles_to_dev[remote]
+ for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd',
+ config['cluster']):
+ if roles_to_devs.get(role):
+ dev = roles_to_devs[role]
+ log.info("running blktrace on %s: %s" % (remote.name, dev))
+
+ proc = remote.run(
+ args=[
+ 'cd',
+ log_dir,
+ run.Raw(';'),
+ 'daemon-helper',
+ daemon_signal,
+ 'sudo',
+ blktrace,
+ '-o',
+ dev.rsplit("/", 1)[1],
+ '-d',
+ dev,
+ ],
+ wait=False,
+ stdin=run.PIPE,
+ )
+ procs.append(proc)
+ try:
+ yield
+ finally:
+ osds = ctx.cluster.only(teuthology.is_type('osd'))
+ log.info('stopping blktrace processs')
+ for proc in procs:
+ proc.stdin.close()
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Usage:
+ blktrace:
+
+ or:
+ blktrace:
+ cluster: backup
+
+ Runs blktrace on all osds in the specified cluster (the 'ceph' cluster by
+ default).
+ """
+ if config is None:
+ config = {}
+ config['cluster'] = config.get('cluster', 'ceph')
+
+ with contextutil.nested(
+ lambda: setup(ctx=ctx, config=config),
+ lambda: execute(ctx=ctx, config=config),
+ ):
+ yield
diff --git a/qa/tasks/boto.cfg.template b/qa/tasks/boto.cfg.template
new file mode 100644
index 00000000..cdfe8873
--- /dev/null
+++ b/qa/tasks/boto.cfg.template
@@ -0,0 +1,2 @@
+[Boto]
+http_socket_timeout = {idle_timeout}
diff --git a/qa/tasks/cbt.py b/qa/tasks/cbt.py
new file mode 100644
index 00000000..e234eff9
--- /dev/null
+++ b/qa/tasks/cbt.py
@@ -0,0 +1,283 @@
+import logging
+import os
+import yaml
+
+from teuthology import misc
+from teuthology.orchestra import run
+from teuthology.task import Task
+
+log = logging.getLogger(__name__)
+
+
+class CBT(Task):
+ """
+ Passes through a CBT configuration yaml fragment.
+ """
+ def __init__(self, ctx, config):
+ super(CBT, self).__init__(ctx, config)
+ self.log = log
+
+ def hosts_of_type(self, type_):
+ return [r.name for r in self.ctx.cluster.only(misc.is_type(type_)).remotes.keys()]
+
+ def generate_cbt_config(self):
+ mon_hosts = self.hosts_of_type('mon')
+ osd_hosts = self.hosts_of_type('osd')
+ client_hosts = self.hosts_of_type('client')
+ rgw_client = {}
+ rgw_client[client_hosts[0]] = None
+ rgw_hosts = self.config.get('cluster', {}).get('rgws', rgw_client)
+ cluster_config = dict(
+ user=self.config.get('cluster', {}).get('user', 'ubuntu'),
+ head=mon_hosts[0],
+ osds=osd_hosts,
+ mons=mon_hosts,
+ clients=client_hosts,
+ rgws=rgw_hosts,
+ osds_per_node=self.config.get('cluster', {}).get('osds_per_node', 1),
+ rebuild_every_test=False,
+ use_existing=True,
+ is_teuthology=self.config.get('cluster', {}).get('is_teuthology', True),
+ iterations=self.config.get('cluster', {}).get('iterations', 1),
+ tmp_dir='/tmp/cbt',
+ pool_profiles=self.config.get('cluster', {}).get('pool_profiles'),
+ )
+
+ benchmark_config = self.config.get('benchmarks')
+ benchmark_type = next(iter(benchmark_config.keys()))
+ if benchmark_type == 'librbdfio':
+ testdir = misc.get_testdir(self.ctx)
+ benchmark_config['librbdfio']['cmd_path'] = os.path.join(testdir, 'fio/fio')
+ if benchmark_type == 'cosbench':
+ # create cosbench_dir and cosbench_xml_dir
+ testdir = misc.get_testdir(self.ctx)
+ benchmark_config['cosbench']['cosbench_dir'] = os.path.join(testdir, 'cos')
+ benchmark_config['cosbench']['cosbench_xml_dir'] = os.path.join(testdir, 'xml')
+ self.ctx.cluster.run(args=['mkdir', '-p', '-m0755', '--', benchmark_config['cosbench']['cosbench_xml_dir']])
+ benchmark_config['cosbench']['controller'] = osd_hosts[0]
+
+ # set auth details
+ remotes_and_roles = self.ctx.cluster.remotes.items()
+ ips = [host for (host, port) in
+ (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)]
+ benchmark_config['cosbench']['auth'] = "username=cosbench:operator;password=intel2012;url=http://%s:80/auth/v1.0;retry=9" %(ips[0])
+
+ return dict(
+ cluster=cluster_config,
+ benchmarks=benchmark_config,
+ )
+
+ def install_dependencies(self):
+ system_type = misc.get_system_type(self.first_mon)
+
+ if system_type == 'rpm':
+ install_cmd = ['sudo', 'yum', '-y', 'install']
+ cbt_depends = ['python36-PyYAML', 'python36-lxml', 'librbd-devel', 'pdsh', 'collectl']
+ else:
+ install_cmd = ['sudo', 'apt-get', '-y', '--force-yes', 'install']
+ cbt_depends = ['python3-yaml', 'python3-lxml', 'librbd-dev', 'collectl']
+ self.first_mon.run(args=install_cmd + cbt_depends)
+
+ benchmark_type = next(iter(self.cbt_config.get('benchmarks').keys()))
+ self.log.info('benchmark: %s', benchmark_type)
+
+ if benchmark_type == 'librbdfio':
+ # install fio
+ testdir = misc.get_testdir(self.ctx)
+ self.first_mon.run(
+ args=[
+ 'git', 'clone', '-b', 'master',
+ 'https://github.com/axboe/fio.git',
+ '{tdir}/fio'.format(tdir=testdir)
+ ]
+ )
+ self.first_mon.run(
+ args=[
+ 'cd', os.path.join(testdir, 'fio'), run.Raw('&&'),
+ './configure', run.Raw('&&'),
+ 'make'
+ ]
+ )
+
+ if benchmark_type == 'cosbench':
+ # install cosbench
+ self.log.info('install dependencies for cosbench')
+ if system_type == 'rpm':
+ cosbench_depends = ['wget', 'unzip', 'java-1.7.0-openjdk', 'curl']
+ else:
+ cosbench_depends = ['wget', 'unzip', 'openjdk-8-jre', 'curl']
+ self.first_mon.run(args=install_cmd + cosbench_depends)
+ testdir = misc.get_testdir(self.ctx)
+ cosbench_version = '0.4.2.c3'
+ cosbench_location = 'https://github.com/intel-cloud/cosbench/releases/download/v0.4.2.c3/0.4.2.c3.zip'
+ os_version = misc.get_system_type(self.first_mon, False, True)
+
+ # additional requirements for bionic
+ if os_version == '18.04':
+ self.first_mon.run(
+ args=['sudo', 'apt-get', '-y', 'purge', 'openjdk-11*'])
+ # use our own version of cosbench
+ cosbench_version = 'cosbench-0.4.2.c3.1'
+ # contains additional parameter "-N" to nc
+ cosbench_location = 'http://drop.ceph.com/qa/cosbench-0.4.2.c3.1.zip'
+ cosbench_dir = os.path.join(testdir, cosbench_version)
+ self.ctx.cluster.run(args=['mkdir', '-p', '-m0755', '--', cosbench_dir])
+ self.first_mon.run(
+ args=[
+ 'cd', testdir, run.Raw('&&'),
+ 'wget',
+ cosbench_location, run.Raw('&&'),
+ 'unzip', '{name}.zip'.format(name=cosbench_version), '-d', cosbench_version
+ ]
+ )
+ else:
+ self.first_mon.run(
+ args=[
+ 'cd', testdir, run.Raw('&&'),
+ 'wget',
+ cosbench_location, run.Raw('&&'),
+ 'unzip', '{name}.zip'.format(name=cosbench_version)
+ ]
+ )
+ self.first_mon.run(
+ args=[
+ 'cd', testdir, run.Raw('&&'),
+ 'ln', '-s', cosbench_version, 'cos',
+ ]
+ )
+ self.first_mon.run(
+ args=[
+ 'cd', os.path.join(testdir, 'cos'), run.Raw('&&'),
+ 'chmod', '+x', run.Raw('*.sh'),
+ ]
+ )
+
+ # start cosbench and check info
+ self.log.info('start cosbench')
+ self.first_mon.run(
+ args=[
+ 'cd', testdir, run.Raw('&&'),
+ 'cd', 'cos', run.Raw('&&'),
+ 'sh', 'start-all.sh'
+ ]
+ )
+ self.log.info('check cosbench info')
+ self.first_mon.run(
+ args=[
+ 'cd', testdir, run.Raw('&&'),
+ 'cd', 'cos', run.Raw('&&'),
+ 'sh', 'cli.sh', 'info'
+ ]
+ )
+
+ def checkout_cbt(self):
+ testdir = misc.get_testdir(self.ctx)
+ repo = self.config.get('repo', 'https://github.com/ceph/cbt.git')
+ branch = self.config.get('branch', 'master')
+ branch = self.config.get('force-branch', branch)
+ sha1 = self.config.get('sha1')
+ self.first_mon.run(
+ args=[
+ 'git', 'clone', '-b', branch, repo,
+ '{tdir}/cbt'.format(tdir=testdir)
+ ]
+ )
+ if sha1:
+ self.first_mon.run(
+ args=[
+ 'cd', os.path.join(testdir, 'cbt'), run.Raw('&&'),
+ 'git', 'reset', '--hard', sha1,
+ ]
+ )
+
+ def setup(self):
+ super(CBT, self).setup()
+ self.first_mon = next(iter(self.ctx.cluster.only(misc.get_first_mon(self.ctx, self.config)).remotes.keys()))
+ self.cbt_config = self.generate_cbt_config()
+ self.log.info('cbt configuration is %s', self.cbt_config)
+ self.cbt_dir = os.path.join(misc.get_archive_dir(self.ctx), 'cbt')
+ self.ctx.cluster.run(args=['mkdir', '-p', '-m0755', '--', self.cbt_dir])
+ misc.write_file(self.first_mon, os.path.join(self.cbt_dir, 'cbt_config.yaml'),
+ yaml.safe_dump(self.cbt_config, default_flow_style=False))
+ self.checkout_cbt()
+ self.install_dependencies()
+
+ def begin(self):
+ super(CBT, self).begin()
+ testdir = misc.get_testdir(self.ctx)
+ self.first_mon.run(
+ args=[
+ '{tdir}/cbt/cbt.py'.format(tdir=testdir),
+ '-a', self.cbt_dir,
+ '{cbtdir}/cbt_config.yaml'.format(cbtdir=self.cbt_dir),
+ ],
+ )
+ preserve_file = os.path.join(self.ctx.archive, '.preserve')
+ open(preserve_file, 'a').close()
+
+ def end(self):
+ super(CBT, self).end()
+ testdir = misc.get_testdir(self.ctx)
+ self.first_mon.run(
+ args=[
+ 'rm', '--one-file-system', '-rf', '--',
+ '{tdir}/cbt'.format(tdir=testdir),
+ ]
+ )
+ benchmark_type = next(iter(self.cbt_config.get('benchmarks').keys()))
+ if benchmark_type == 'librbdfio':
+ self.first_mon.run(
+ args=[
+ 'rm', '--one-file-system', '-rf', '--',
+ '{tdir}/fio'.format(tdir=testdir),
+ ]
+ )
+
+ if benchmark_type == 'cosbench':
+ os_version = misc.get_system_type(self.first_mon, False, True)
+ if os_version == '18.04':
+ cosbench_version = 'cosbench-0.4.2.c3.1'
+ else:
+ cosbench_version = '0.4.2.c3'
+ # note: stop-all requires 'nc'
+ self.first_mon.run(
+ args=[
+ 'cd', testdir, run.Raw('&&'),
+ 'cd', 'cos', run.Raw('&&'),
+ 'sh', 'stop-all.sh',
+ run.Raw('||'), 'true'
+ ]
+ )
+ self.first_mon.run(
+ args=[
+ 'sudo', 'killall', '-9', 'java',
+ run.Raw('||'), 'true'
+ ]
+ )
+ self.first_mon.run(
+ args=[
+ 'rm', '--one-file-system', '-rf', '--',
+ '{tdir}/cos'.format(tdir=testdir),
+ ]
+ )
+ self.first_mon.run(
+ args=[
+ 'rm', '--one-file-system', '-rf', '--',
+ '{tdir}/{version}'.format(tdir=testdir, version=cosbench_version),
+ ]
+ )
+ self.first_mon.run(
+ args=[
+ 'rm', '--one-file-system', '-rf', '--',
+ '{tdir}/{version}.zip'.format(tdir=testdir, version=cosbench_version),
+ ]
+ )
+ self.first_mon.run(
+ args=[
+ 'rm', '--one-file-system', '-rf', '--',
+ '{tdir}/xml'.format(tdir=testdir),
+ ]
+ )
+
+
+task = CBT
diff --git a/qa/tasks/ceph.conf.template b/qa/tasks/ceph.conf.template
new file mode 100644
index 00000000..a84043f5
--- /dev/null
+++ b/qa/tasks/ceph.conf.template
@@ -0,0 +1,105 @@
+[global]
+ chdir = ""
+ pid file = /var/run/ceph/$cluster-$name.pid
+ auth supported = cephx
+
+ filestore xattr use omap = true
+
+ mon clock drift allowed = 1.000
+
+ osd crush chooseleaf type = 0
+ auth debug = true
+
+ ms die on old message = true
+ ms die on bug = true
+
+ mon max pg per osd = 10000 # >= luminous
+ mon pg warn max object skew = 0
+
+ osd pool default size = 2
+
+ mon osd allow primary affinity = true
+ mon osd allow pg remap = true
+ mon warn on legacy crush tunables = false
+ mon warn on crush straw calc version zero = false
+ mon warn on no sortbitwise = false
+ mon warn on osd down out interval zero = false
+ mon warn on too few osds = false
+ mon_warn_on_pool_pg_num_not_power_of_two = false
+ mon_warn_on_pool_no_redundancy = false
+
+ osd pool default erasure code profile = "plugin=jerasure technique=reed_sol_van k=2 m=1 ruleset-failure-domain=osd crush-failure-domain=osd"
+
+ osd default data pool replay window = 5
+
+ mon allow pool delete = true
+
+ mon cluster log file level = debug
+ debug asserts on shutdown = true
+ mon health detail to clog = false
+
+ # we see this fail in qa on *nautilus*; bump up retries
+ mon_client_directed_command_retry = 4
+
+[osd]
+ osd journal size = 100
+
+ osd scrub load threshold = 5.0
+ osd scrub max interval = 600
+
+ osd recover clone overlap = true
+ osd recovery max chunk = 1048576
+
+ osd debug shutdown = true
+ osd debug op order = true
+ osd debug verify stray on activate = true
+
+ osd open classes on start = true
+ osd debug pg log writeout = true
+
+ osd deep scrub update digest min age = 30
+
+ osd map max advance = 10
+
+ journal zero on create = true
+
+ filestore ondisk finisher threads = 3
+ filestore apply finisher threads = 3
+
+ bdev debug aio = true
+ osd debug misdirected ops = true
+
+[mgr]
+ debug ms = 1
+ debug mgr = 20
+ debug mon = 20
+ debug auth = 20
+ mon reweight min pgs per osd = 4
+ mon reweight min bytes per osd = 10
+
+[mon]
+ debug ms = 1
+ debug mon = 20
+ debug paxos = 20
+ debug auth = 20
+ mon data avail warn = 5
+ mon mgr mkfs grace = 240
+ mon reweight min pgs per osd = 4
+ mon osd reporter subtree level = osd
+ mon osd prime pg temp = true
+ mon reweight min bytes per osd = 10
+
+ # rotate auth tickets quickly to exercise renewal paths
+ auth mon ticket ttl = 660 # 11m
+ auth service ticket ttl = 240 # 4m
+
+ # don't complain about insecure global_id in the test suite
+ mon_warn_on_insecure_global_id_reclaim = false
+ mon_warn_on_insecure_global_id_reclaim_allowed = false
+
+[client]
+ rgw cache enabled = true
+ rgw enable ops log = true
+ rgw enable usage log = true
+ log file = /var/log/ceph/$cluster-$name.$pid.log
+ admin socket = /var/run/ceph/$cluster-$name.$pid.asok
diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py
new file mode 100644
index 00000000..5551f274
--- /dev/null
+++ b/qa/tasks/ceph.py
@@ -0,0 +1,1896 @@
+"""
+Ceph cluster task.
+
+Handle the setup, starting, and clean-up of a Ceph cluster.
+"""
+from io import BytesIO
+from io import StringIO
+
+import argparse
+import configobj
+import contextlib
+import errno
+import logging
+import os
+import json
+import time
+import gevent
+import re
+import socket
+
+from paramiko import SSHException
+from tasks.ceph_manager import CephManager, write_conf
+from tarfile import ReadError
+from tasks.cephfs.filesystem import Filesystem
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology import exceptions
+from teuthology.orchestra import run
+import tasks.ceph_client as cclient
+from teuthology.orchestra.daemon import DaemonGroup
+
+CEPH_ROLE_TYPES = ['mon', 'mgr', 'osd', 'mds', 'rgw']
+DATA_PATH = '/var/lib/ceph/{type_}/{cluster}-{id_}'
+
+log = logging.getLogger(__name__)
+
+
+def generate_caps(type_):
+ """
+ Each call will return the next capability for each system type
+ (essentially a subset of possible role values). Valid types are osd,
+ mds and client.
+ """
+ defaults = dict(
+ osd=dict(
+ mon='allow *',
+ mgr='allow *',
+ osd='allow *',
+ ),
+ mgr=dict(
+ mon='allow profile mgr',
+ osd='allow *',
+ mds='allow *',
+ ),
+ mds=dict(
+ mon='allow *',
+ mgr='allow *',
+ osd='allow *',
+ mds='allow',
+ ),
+ client=dict(
+ mon='allow rw',
+ mgr='allow r',
+ osd='allow rwx',
+ mds='allow',
+ ),
+ )
+ for subsystem, capability in defaults[type_].items():
+ yield '--cap'
+ yield subsystem
+ yield capability
+
+
+@contextlib.contextmanager
+def ceph_crash(ctx, config):
+ """
+ Gather crash dumps from /var/lib/crash
+ """
+ try:
+ yield
+
+ finally:
+ if ctx.archive is not None:
+ log.info('Archiving crash dumps...')
+ path = os.path.join(ctx.archive, 'remote')
+ try:
+ os.makedirs(path)
+ except OSError:
+ pass
+ for remote in ctx.cluster.remotes.keys():
+ sub = os.path.join(path, remote.shortname)
+ try:
+ os.makedirs(sub)
+ except OSError:
+ pass
+ try:
+ teuthology.pull_directory(remote, '/var/lib/ceph/crash',
+ os.path.join(sub, 'crash'))
+ except ReadError:
+ pass
+
+
+@contextlib.contextmanager
+def ceph_log(ctx, config):
+ """
+ Create /var/log/ceph log directory that is open to everyone.
+ Add valgrind and profiling-logger directories.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
+ log.info('Making ceph log dir writeable by non-root...')
+ run.wait(
+ ctx.cluster.run(
+ args=[
+ 'sudo',
+ 'chmod',
+ '777',
+ '/var/log/ceph',
+ ],
+ wait=False,
+ )
+ )
+ log.info('Disabling ceph logrotate...')
+ run.wait(
+ ctx.cluster.run(
+ args=[
+ 'sudo',
+ 'rm', '-f', '--',
+ '/etc/logrotate.d/ceph',
+ ],
+ wait=False,
+ )
+ )
+ log.info('Creating extra log directories...')
+ run.wait(
+ ctx.cluster.run(
+ args=[
+ 'sudo',
+ 'install', '-d', '-m0777', '--',
+ '/var/log/ceph/valgrind',
+ '/var/log/ceph/profiling-logger',
+ ],
+ wait=False,
+ )
+ )
+
+ class Rotater(object):
+ stop_event = gevent.event.Event()
+
+ def invoke_logrotate(self):
+ # 1) install ceph-test.conf in /etc/logrotate.d
+ # 2) continuously loop over logrotate invocation with ceph-test.conf
+ while not self.stop_event.is_set():
+ self.stop_event.wait(timeout=30)
+ try:
+ run.wait(
+ ctx.cluster.run(
+ args=['sudo', 'logrotate', '/etc/logrotate.d/ceph-test.conf'
+ ],
+ wait=False,
+ )
+ )
+ except exceptions.ConnectionLostError as e:
+ # Some tests may power off nodes during test, in which
+ # case we will see connection errors that we should ignore.
+ log.debug("Missed logrotate, node '{0}' is offline".format(
+ e.node))
+ except EOFError:
+ # Paramiko sometimes raises this when it fails to
+ # connect to a node during open_session. As with
+ # ConnectionLostError, we ignore this because nodes
+ # are allowed to get power cycled during tests.
+ log.debug("Missed logrotate, EOFError")
+ except SSHException:
+ log.debug("Missed logrotate, SSHException")
+ except socket.error as e:
+ if e.errno in (errno.EHOSTUNREACH, errno.ECONNRESET):
+ log.debug("Missed logrotate, host unreachable")
+ else:
+ raise
+
+ def begin(self):
+ self.thread = gevent.spawn(self.invoke_logrotate)
+
+ def end(self):
+ self.stop_event.set()
+ self.thread.get()
+
+ def write_rotate_conf(ctx, daemons):
+ testdir = teuthology.get_testdir(ctx)
+ remote_logrotate_conf = '%s/logrotate.ceph-test.conf' % testdir
+ rotate_conf_path = os.path.join(os.path.dirname(__file__), 'logrotate.conf')
+ with open(rotate_conf_path) as f:
+ conf = ""
+ for daemon, size in daemons.items():
+ log.info('writing logrotate stanza for {}'.format(daemon))
+ conf += f.read().format(daemon_type=daemon,
+ max_size=size)
+ f.seek(0, 0)
+
+ for remote in ctx.cluster.remotes.keys():
+ teuthology.write_file(remote=remote,
+ path=remote_logrotate_conf,
+ data=BytesIO(conf.encode())
+ )
+ remote.run(
+ args=[
+ 'sudo',
+ 'mv',
+ remote_logrotate_conf,
+ '/etc/logrotate.d/ceph-test.conf',
+ run.Raw('&&'),
+ 'sudo',
+ 'chmod',
+ '0644',
+ '/etc/logrotate.d/ceph-test.conf',
+ run.Raw('&&'),
+ 'sudo',
+ 'chown',
+ 'root.root',
+ '/etc/logrotate.d/ceph-test.conf'
+ ]
+ )
+ remote.chcon('/etc/logrotate.d/ceph-test.conf',
+ 'system_u:object_r:etc_t:s0')
+
+ if ctx.config.get('log-rotate'):
+ daemons = ctx.config.get('log-rotate')
+ log.info('Setting up log rotation with ' + str(daemons))
+ write_rotate_conf(ctx, daemons)
+ logrotater = Rotater()
+ logrotater.begin()
+ try:
+ yield
+
+ finally:
+ if ctx.config.get('log-rotate'):
+ log.info('Shutting down logrotate')
+ logrotater.end()
+ ctx.cluster.run(
+ args=['sudo', 'rm', '/etc/logrotate.d/ceph-test.conf'
+ ]
+ )
+ if ctx.archive is not None and \
+ not (ctx.config.get('archive-on-error') and ctx.summary['success']):
+ # and logs
+ log.info('Compressing logs...')
+ run.wait(
+ ctx.cluster.run(
+ args=[
+ 'sudo',
+ 'find',
+ '/var/log/ceph',
+ '-name',
+ '*.log',
+ '-print0',
+ run.Raw('|'),
+ 'sudo',
+ 'xargs',
+ '-0',
+ '--no-run-if-empty',
+ '--',
+ 'gzip',
+ '--',
+ ],
+ wait=False,
+ ),
+ )
+
+ log.info('Archiving logs...')
+ path = os.path.join(ctx.archive, 'remote')
+ try:
+ os.makedirs(path)
+ except OSError:
+ pass
+ for remote in ctx.cluster.remotes.keys():
+ sub = os.path.join(path, remote.shortname)
+ try:
+ os.makedirs(sub)
+ except OSError:
+ pass
+ teuthology.pull_directory(remote, '/var/log/ceph',
+ os.path.join(sub, 'log'))
+
+
+def assign_devs(roles, devs):
+ """
+ Create a dictionary of devs indexed by roles
+
+ :param roles: List of roles
+ :param devs: Corresponding list of devices.
+ :returns: Dictionary of devs indexed by roles.
+ """
+ return dict(zip(roles, devs))
+
+
+@contextlib.contextmanager
+def valgrind_post(ctx, config):
+ """
+ After the tests run, look through all the valgrind logs. Exceptions are raised
+ if textual errors occurred in the logs, or if valgrind exceptions were detected in
+ the logs.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
+ try:
+ yield
+ finally:
+ lookup_procs = list()
+ log.info('Checking for errors in any valgrind logs...')
+ for remote in ctx.cluster.remotes.keys():
+ # look at valgrind logs for each node
+ proc = remote.run(
+ args="sudo zgrep '<kind>' /var/log/ceph/valgrind/* "
+ # include a second file so that we always get
+ # a filename prefix on the output
+ "/dev/null | sort | uniq",
+ wait=False,
+ check_status=False,
+ stdout=StringIO(),
+ )
+ lookup_procs.append((proc, remote))
+
+ valgrind_exception = None
+ for (proc, remote) in lookup_procs:
+ proc.wait()
+ out = proc.stdout.getvalue()
+ for line in out.split('\n'):
+ if line == '':
+ continue
+ try:
+ (file, kind) = line.split(':')
+ except Exception:
+ log.error('failed to split line %s', line)
+ raise
+ log.debug('file %s kind %s', file, kind)
+ if (file.find('mds') >= 0) and kind.find('Lost') > 0:
+ continue
+ log.error('saw valgrind issue %s in %s', kind, file)
+ valgrind_exception = Exception('saw valgrind issues')
+
+ if config.get('expect_valgrind_errors'):
+ if not valgrind_exception:
+ raise Exception('expected valgrind issues and found none')
+ else:
+ if valgrind_exception:
+ raise valgrind_exception
+
+
+@contextlib.contextmanager
+def crush_setup(ctx, config):
+ cluster_name = config['cluster']
+ first_mon = teuthology.get_first_mon(ctx, config, cluster_name)
+ (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys()
+
+ profile = config.get('crush_tunables', 'default')
+ log.info('Setting crush tunables to %s', profile)
+ mon_remote.run(
+ args=['sudo', 'ceph', '--cluster', cluster_name,
+ 'osd', 'crush', 'tunables', profile])
+ yield
+
+
+@contextlib.contextmanager
+def create_rbd_pool(ctx, config):
+ cluster_name = config['cluster']
+ first_mon = teuthology.get_first_mon(ctx, config, cluster_name)
+ (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys()
+ log.info('Waiting for OSDs to come up')
+ teuthology.wait_until_osds_up(
+ ctx,
+ cluster=ctx.cluster,
+ remote=mon_remote,
+ ceph_cluster=cluster_name,
+ )
+ if config.get('create_rbd_pool', True):
+ log.info('Creating RBD pool')
+ mon_remote.run(
+ args=['sudo', 'ceph', '--cluster', cluster_name,
+ 'osd', 'pool', 'create', 'rbd', '8'])
+ mon_remote.run(
+ args=[
+ 'sudo', 'ceph', '--cluster', cluster_name,
+ 'osd', 'pool', 'application', 'enable',
+ 'rbd', 'rbd', '--yes-i-really-mean-it'
+ ],
+ check_status=False)
+ yield
+
+@contextlib.contextmanager
+def cephfs_setup(ctx, config):
+ cluster_name = config['cluster']
+
+ first_mon = teuthology.get_first_mon(ctx, config, cluster_name)
+ (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys()
+ mdss = ctx.cluster.only(teuthology.is_type('mds', cluster_name))
+ # If there are any MDSs, then create a filesystem for them to use
+ # Do this last because requires mon cluster to be up and running
+ if mdss.remotes:
+ log.info('Setting up CephFS filesystem...')
+
+ Filesystem(ctx, fs_config=config.get('cephfs', None), name='cephfs',
+ create=True, ec_profile=config.get('cephfs_ec_profile', None))
+
+ yield
+
+
+def get_mons(roles, ips, cluster_name,
+ mon_bind_msgr2=False,
+ mon_bind_addrvec=False):
+ """
+ Get monitors and their associated addresses
+ """
+ mons = {}
+ v1_ports = {}
+ v2_ports = {}
+ mon_id = 0
+ is_mon = teuthology.is_type('mon', cluster_name)
+ for idx, roles in enumerate(roles):
+ for role in roles:
+ if not is_mon(role):
+ continue
+ if ips[idx] not in v1_ports:
+ v1_ports[ips[idx]] = 6789
+ else:
+ v1_ports[ips[idx]] += 1
+ if mon_bind_msgr2:
+ if ips[idx] not in v2_ports:
+ v2_ports[ips[idx]] = 3300
+ addr = '{ip}'.format(ip=ips[idx])
+ else:
+ assert mon_bind_addrvec
+ v2_ports[ips[idx]] += 1
+ addr = '[v2:{ip}:{port2},v1:{ip}:{port1}]'.format(
+ ip=ips[idx],
+ port2=v2_ports[ips[idx]],
+ port1=v1_ports[ips[idx]],
+ )
+ elif mon_bind_addrvec:
+ addr = '[v1:{ip}:{port}]'.format(
+ ip=ips[idx],
+ port=v1_ports[ips[idx]],
+ )
+ else:
+ addr = '{ip}:{port}'.format(
+ ip=ips[idx],
+ port=v1_ports[ips[idx]],
+ )
+ mon_id += 1
+ mons[role] = addr
+ assert mons
+ return mons
+
+def skeleton_config(ctx, roles, ips, mons, cluster='ceph'):
+ """
+ Returns a ConfigObj that is prefilled with a skeleton config.
+
+ Use conf[section][key]=value or conf.merge to change it.
+
+ Use conf.write to write it out, override .filename first if you want.
+ """
+ path = os.path.join(os.path.dirname(__file__), 'ceph.conf.template')
+ conf = configobj.ConfigObj(path, file_error=True)
+ mon_hosts = []
+ for role, addr in mons.items():
+ mon_cluster, _, _ = teuthology.split_role(role)
+ if mon_cluster != cluster:
+ continue
+ name = teuthology.ceph_role(role)
+ conf.setdefault(name, {})
+ mon_hosts.append(addr)
+ conf.setdefault('global', {})
+ conf['global']['mon host'] = ','.join(mon_hosts)
+ # set up standby mds's
+ is_mds = teuthology.is_type('mds', cluster)
+ for roles_subset in roles:
+ for role in roles_subset:
+ if is_mds(role):
+ name = teuthology.ceph_role(role)
+ conf.setdefault(name, {})
+ return conf
+
+def create_simple_monmap(ctx, remote, conf, mons,
+ path=None,
+ mon_bind_addrvec=False):
+ """
+ Writes a simple monmap based on current ceph.conf into path, or
+ <testdir>/monmap by default.
+
+ Assumes ceph_conf is up to date.
+
+ Assumes mon sections are named "mon.*", with the dot.
+
+ :return the FSID (as a string) of the newly created monmap
+ """
+
+ addresses = list(mons.items())
+ assert addresses, "There are no monitors in config!"
+ log.debug('Ceph mon addresses: %s', addresses)
+
+ testdir = teuthology.get_testdir(ctx)
+ args = [
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'monmaptool',
+ '--create',
+ '--clobber',
+ ]
+ if mon_bind_addrvec:
+ args.extend(['--enable-all-features'])
+ for (role, addr) in addresses:
+ _, _, n = teuthology.split_role(role)
+ if mon_bind_addrvec and (',' in addr or 'v' in addr or ':' in addr):
+ args.extend(('--addv', n, addr))
+ else:
+ args.extend(('--add', n, addr))
+ if not path:
+ path = '{tdir}/monmap'.format(tdir=testdir)
+ args.extend([
+ '--print',
+ path
+ ])
+
+ monmap_output = remote.sh(args)
+ fsid = re.search("generated fsid (.+)$",
+ monmap_output, re.MULTILINE).group(1)
+ return fsid
+
+@contextlib.contextmanager
+def cluster(ctx, config):
+ """
+ Handle the creation and removal of a ceph cluster.
+
+ On startup:
+ Create directories needed for the cluster.
+ Create remote journals for all osds.
+ Create and set keyring.
+ Copy the monmap to the test systems.
+ Setup mon nodes.
+ Setup mds nodes.
+ Mkfs osd nodes.
+ Add keyring information to monmaps
+ Mkfs mon nodes.
+
+ On exit:
+ If errors occurred, extract a failure message and store in ctx.summary.
+ Unmount all test files and temporary journaling files.
+ Save the monitor information and archive all ceph logs.
+ Cleanup the keyring setup, and remove all monitor map and data files left over.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
+ if ctx.config.get('use_existing_cluster', False) is True:
+ log.info("'use_existing_cluster' is true; skipping cluster creation")
+ yield
+
+ testdir = teuthology.get_testdir(ctx)
+ cluster_name = config['cluster']
+ data_dir = '{tdir}/{cluster}.data'.format(tdir=testdir, cluster=cluster_name)
+ log.info('Creating ceph cluster %s...', cluster_name)
+ log.info('config %s', config)
+ log.info('ctx.config %s', ctx.config)
+ run.wait(
+ ctx.cluster.run(
+ args=[
+ 'install', '-d', '-m0755', '--',
+ data_dir,
+ ],
+ wait=False,
+ )
+ )
+
+ run.wait(
+ ctx.cluster.run(
+ args=[
+ 'sudo',
+ 'install', '-d', '-m0777', '--', '/var/run/ceph',
+ ],
+ wait=False,
+ )
+ )
+
+ devs_to_clean = {}
+ remote_to_roles_to_devs = {}
+ osds = ctx.cluster.only(teuthology.is_type('osd', cluster_name))
+ for remote, roles_for_host in osds.remotes.items():
+ devs = teuthology.get_scratch_devices(remote)
+ roles_to_devs = assign_devs(
+ teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name), devs
+ )
+ devs_to_clean[remote] = []
+ log.info('osd dev map: {}'.format(roles_to_devs))
+ assert roles_to_devs, \
+ "remote {} has osd roles, but no osd devices were specified!".format(remote.hostname)
+ remote_to_roles_to_devs[remote] = roles_to_devs
+ log.info("remote_to_roles_to_devs: {}".format(remote_to_roles_to_devs))
+ for osd_role, dev_name in remote_to_roles_to_devs.items():
+ assert dev_name, "{} has no associated device!".format(osd_role)
+
+ log.info('Generating config...')
+ remotes_and_roles = ctx.cluster.remotes.items()
+ roles = [role_list for (remote, role_list) in remotes_and_roles]
+ ips = [host for (host, port) in
+ (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)]
+ mons = get_mons(
+ roles, ips, cluster_name,
+ mon_bind_msgr2=config.get('mon_bind_msgr2'),
+ mon_bind_addrvec=config.get('mon_bind_addrvec'),
+ )
+ conf = skeleton_config(
+ ctx, roles=roles, ips=ips, mons=mons, cluster=cluster_name,
+ )
+ for section, keys in config['conf'].items():
+ for key, value in keys.items():
+ log.info("[%s] %s = %s" % (section, key, value))
+ if section not in conf:
+ conf[section] = {}
+ conf[section][key] = value
+
+ if not hasattr(ctx, 'ceph'):
+ ctx.ceph = {}
+ ctx.ceph[cluster_name] = argparse.Namespace()
+ ctx.ceph[cluster_name].conf = conf
+ ctx.ceph[cluster_name].mons = mons
+
+ default_keyring = '/etc/ceph/{cluster}.keyring'.format(cluster=cluster_name)
+ keyring_path = config.get('keyring_path', default_keyring)
+
+ coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
+
+ firstmon = teuthology.get_first_mon(ctx, config, cluster_name)
+
+ log.info('Setting up %s...' % firstmon)
+ ctx.cluster.only(firstmon).run(
+ args=[
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ coverage_dir,
+ 'ceph-authtool',
+ '--create-keyring',
+ keyring_path,
+ ],
+ )
+ ctx.cluster.only(firstmon).run(
+ args=[
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ coverage_dir,
+ 'ceph-authtool',
+ '--gen-key',
+ '--name=mon.',
+ keyring_path,
+ ],
+ )
+ ctx.cluster.only(firstmon).run(
+ args=[
+ 'sudo',
+ 'chmod',
+ '0644',
+ keyring_path,
+ ],
+ )
+ (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
+ monmap_path = '{tdir}/{cluster}.monmap'.format(tdir=testdir,
+ cluster=cluster_name)
+ fsid = create_simple_monmap(
+ ctx,
+ remote=mon0_remote,
+ conf=conf,
+ mons=mons,
+ path=monmap_path,
+ mon_bind_addrvec=config.get('mon_bind_addrvec'),
+ )
+ if not 'global' in conf:
+ conf['global'] = {}
+ conf['global']['fsid'] = fsid
+
+ default_conf_path = '/etc/ceph/{cluster}.conf'.format(cluster=cluster_name)
+ conf_path = config.get('conf_path', default_conf_path)
+ log.info('Writing %s for FSID %s...' % (conf_path, fsid))
+ write_conf(ctx, conf_path, cluster_name)
+
+ log.info('Creating admin key on %s...' % firstmon)
+ ctx.cluster.only(firstmon).run(
+ args=[
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ coverage_dir,
+ 'ceph-authtool',
+ '--gen-key',
+ '--name=client.admin',
+ '--cap', 'mon', 'allow *',
+ '--cap', 'osd', 'allow *',
+ '--cap', 'mds', 'allow *',
+ '--cap', 'mgr', 'allow *',
+ keyring_path,
+ ],
+ )
+
+ log.info('Copying monmap to all nodes...')
+ keyring = teuthology.get_file(
+ remote=mon0_remote,
+ path=keyring_path,
+ )
+ monmap = teuthology.get_file(
+ remote=mon0_remote,
+ path=monmap_path,
+ )
+
+ for rem in ctx.cluster.remotes.keys():
+ # copy mon key and initial monmap
+ log.info('Sending monmap to node {remote}'.format(remote=rem))
+ teuthology.sudo_write_file(
+ remote=rem,
+ path=keyring_path,
+ data=keyring,
+ perms='0644'
+ )
+ teuthology.write_file(
+ remote=rem,
+ path=monmap_path,
+ data=monmap,
+ )
+
+ log.info('Setting up mon nodes...')
+ mons = ctx.cluster.only(teuthology.is_type('mon', cluster_name))
+
+ if not config.get('skip_mgr_daemons', False):
+ log.info('Setting up mgr nodes...')
+ mgrs = ctx.cluster.only(teuthology.is_type('mgr', cluster_name))
+ for remote, roles_for_host in mgrs.remotes.items():
+ for role in teuthology.cluster_roles_of_type(roles_for_host, 'mgr',
+ cluster_name):
+ _, _, id_ = teuthology.split_role(role)
+ mgr_dir = DATA_PATH.format(
+ type_='mgr', cluster=cluster_name, id_=id_)
+ remote.run(
+ args=[
+ 'sudo',
+ 'mkdir',
+ '-p',
+ mgr_dir,
+ run.Raw('&&'),
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ coverage_dir,
+ 'ceph-authtool',
+ '--create-keyring',
+ '--gen-key',
+ '--name=mgr.{id}'.format(id=id_),
+ mgr_dir + '/keyring',
+ ],
+ )
+
+ log.info('Setting up mds nodes...')
+ mdss = ctx.cluster.only(teuthology.is_type('mds', cluster_name))
+ for remote, roles_for_host in mdss.remotes.items():
+ for role in teuthology.cluster_roles_of_type(roles_for_host, 'mds',
+ cluster_name):
+ _, _, id_ = teuthology.split_role(role)
+ mds_dir = DATA_PATH.format(
+ type_='mds', cluster=cluster_name, id_=id_)
+ remote.run(
+ args=[
+ 'sudo',
+ 'mkdir',
+ '-p',
+ mds_dir,
+ run.Raw('&&'),
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ coverage_dir,
+ 'ceph-authtool',
+ '--create-keyring',
+ '--gen-key',
+ '--name=mds.{id}'.format(id=id_),
+ mds_dir + '/keyring',
+ ],
+ )
+ remote.run(args=[
+ 'sudo', 'chown', '-R', 'ceph:ceph', mds_dir
+ ])
+
+ cclient.create_keyring(ctx, cluster_name)
+ log.info('Running mkfs on osd nodes...')
+
+ if not hasattr(ctx, 'disk_config'):
+ ctx.disk_config = argparse.Namespace()
+ if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev'):
+ ctx.disk_config.remote_to_roles_to_dev = {}
+ if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev_mount_options'):
+ ctx.disk_config.remote_to_roles_to_dev_mount_options = {}
+ if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev_fstype'):
+ ctx.disk_config.remote_to_roles_to_dev_fstype = {}
+
+ teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_dev, remote_to_roles_to_devs)
+
+ log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev)))
+ for remote, roles_for_host in osds.remotes.items():
+ roles_to_devs = remote_to_roles_to_devs[remote]
+
+ for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name):
+ _, _, id_ = teuthology.split_role(role)
+ mnt_point = DATA_PATH.format(
+ type_='osd', cluster=cluster_name, id_=id_)
+ remote.run(
+ args=[
+ 'sudo',
+ 'mkdir',
+ '-p',
+ mnt_point,
+ ])
+ log.info('roles_to_devs: {}'.format(roles_to_devs))
+ log.info('role: {}'.format(role))
+ if roles_to_devs.get(role):
+ dev = roles_to_devs[role]
+ fs = config.get('fs')
+ package = None
+ mkfs_options = config.get('mkfs_options')
+ mount_options = config.get('mount_options')
+ if fs == 'btrfs':
+ # package = 'btrfs-tools'
+ if mount_options is None:
+ mount_options = ['noatime', 'user_subvol_rm_allowed']
+ if mkfs_options is None:
+ mkfs_options = ['-m', 'single',
+ '-l', '32768',
+ '-n', '32768']
+ if fs == 'xfs':
+ # package = 'xfsprogs'
+ if mount_options is None:
+ mount_options = ['noatime']
+ if mkfs_options is None:
+ mkfs_options = ['-f', '-i', 'size=2048']
+ if fs == 'ext4' or fs == 'ext3':
+ if mount_options is None:
+ mount_options = ['noatime', 'user_xattr']
+
+ if mount_options is None:
+ mount_options = []
+ if mkfs_options is None:
+ mkfs_options = []
+ mkfs = ['mkfs.%s' % fs] + mkfs_options
+ log.info('%s on %s on %s' % (mkfs, dev, remote))
+ if package is not None:
+ remote.sh('sudo apt-get install -y %s' % package)
+
+ try:
+ remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev])
+ except run.CommandFailedError:
+ # Newer btfs-tools doesn't prompt for overwrite, use -f
+ if '-f' not in mount_options:
+ mkfs_options.append('-f')
+ mkfs = ['mkfs.%s' % fs] + mkfs_options
+ log.info('%s on %s on %s' % (mkfs, dev, remote))
+ remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev])
+
+ log.info('mount %s on %s -o %s' % (dev, remote,
+ ','.join(mount_options)))
+ remote.run(
+ args=[
+ 'sudo',
+ 'mount',
+ '-t', fs,
+ '-o', ','.join(mount_options),
+ dev,
+ mnt_point,
+ ]
+ )
+ remote.run(
+ args=[
+ 'sudo', '/sbin/restorecon', mnt_point,
+ ],
+ check_status=False,
+ )
+ if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options:
+ ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {}
+ ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][role] = mount_options
+ if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype:
+ ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {}
+ ctx.disk_config.remote_to_roles_to_dev_fstype[remote][role] = fs
+ devs_to_clean[remote].append(mnt_point)
+
+ for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name):
+ _, _, id_ = teuthology.split_role(role)
+ try:
+ remote.run(
+ args=[
+ 'sudo',
+ 'MALLOC_CHECK_=3',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ coverage_dir,
+ 'ceph-osd',
+ '--no-mon-config',
+ '--cluster',
+ cluster_name,
+ '--mkfs',
+ '--mkkey',
+ '-i', id_,
+ '--monmap', monmap_path,
+ ],
+ )
+ except run.CommandFailedError:
+ # try without --no-mon-config.. this may be an upgrade test
+ remote.run(
+ args=[
+ 'sudo',
+ 'MALLOC_CHECK_=3',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ coverage_dir,
+ 'ceph-osd',
+ '--cluster',
+ cluster_name,
+ '--mkfs',
+ '--mkkey',
+ '-i', id_,
+ '--monmap', monmap_path,
+ ],
+ )
+ mnt_point = DATA_PATH.format(
+ type_='osd', cluster=cluster_name, id_=id_)
+ try:
+ remote.run(args=[
+ 'sudo', 'chown', '-R', 'ceph:ceph', mnt_point
+ ])
+ except run.CommandFailedError as e:
+ # hammer does not have ceph user, so ignore this error
+ log.info('ignoring error when chown ceph:ceph,'
+ 'probably installing hammer: %s', e)
+
+ log.info('Reading keys from all nodes...')
+ keys_fp = BytesIO()
+ keys = []
+ for remote, roles_for_host in ctx.cluster.remotes.items():
+ for type_ in ['mgr', 'mds', 'osd']:
+ if type_ == 'mgr' and config.get('skip_mgr_daemons', False):
+ continue
+ for role in teuthology.cluster_roles_of_type(roles_for_host, type_, cluster_name):
+ _, _, id_ = teuthology.split_role(role)
+ data = teuthology.get_file(
+ remote=remote,
+ path=os.path.join(
+ DATA_PATH.format(
+ type_=type_, id_=id_, cluster=cluster_name),
+ 'keyring',
+ ),
+ sudo=True,
+ )
+ keys.append((type_, id_, data))
+ keys_fp.write(data)
+ for remote, roles_for_host in ctx.cluster.remotes.items():
+ for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', cluster_name):
+ _, _, id_ = teuthology.split_role(role)
+ data = teuthology.get_file(
+ remote=remote,
+ path='/etc/ceph/{cluster}.client.{id}.keyring'.format(id=id_, cluster=cluster_name)
+ )
+ keys.append(('client', id_, data))
+ keys_fp.write(data)
+
+ log.info('Adding keys to all mons...')
+ writes = mons.run(
+ args=[
+ 'sudo', 'tee', '-a',
+ keyring_path,
+ ],
+ stdin=run.PIPE,
+ wait=False,
+ stdout=BytesIO(),
+ )
+ keys_fp.seek(0)
+ teuthology.feed_many_stdins_and_close(keys_fp, writes)
+ run.wait(writes)
+ for type_, id_, data in keys:
+ run.wait(
+ mons.run(
+ args=[
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ coverage_dir,
+ 'ceph-authtool',
+ keyring_path,
+ '--name={type}.{id}'.format(
+ type=type_,
+ id=id_,
+ ),
+ ] + list(generate_caps(type_)),
+ wait=False,
+ ),
+ )
+
+ log.info('Running mkfs on mon nodes...')
+ for remote, roles_for_host in mons.remotes.items():
+ for role in teuthology.cluster_roles_of_type(roles_for_host, 'mon', cluster_name):
+ _, _, id_ = teuthology.split_role(role)
+ mnt_point = DATA_PATH.format(
+ type_='mon', id_=id_, cluster=cluster_name)
+ remote.run(
+ args=[
+ 'sudo',
+ 'mkdir',
+ '-p',
+ mnt_point,
+ ],
+ )
+ remote.run(
+ args=[
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ coverage_dir,
+ 'ceph-mon',
+ '--cluster', cluster_name,
+ '--mkfs',
+ '-i', id_,
+ '--monmap', monmap_path,
+ '--keyring', keyring_path,
+ ],
+ )
+ try:
+ remote.run(args=[
+ 'sudo', 'chown', '-R', 'ceph:ceph', mnt_point
+ ])
+ except run.CommandFailedError as e:
+ # hammer does not have ceph user, so ignore this error
+ log.info('ignoring error when chown ceph:ceph,'
+ 'probably installing hammer: %s', e)
+
+ run.wait(
+ mons.run(
+ args=[
+ 'rm',
+ '--',
+ monmap_path,
+ ],
+ wait=False,
+ ),
+ )
+
+ try:
+ yield
+ except Exception:
+ # we need to know this below
+ ctx.summary['success'] = False
+ raise
+ finally:
+ (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
+
+ log.info('Checking cluster log for badness...')
+
+ def first_in_ceph_log(pattern, excludes):
+ """
+ Find the first occurrence of the pattern specified in the Ceph log,
+ Returns None if none found.
+
+ :param pattern: Pattern scanned for.
+ :param excludes: Patterns to ignore.
+ :return: First line of text (or None if not found)
+ """
+ args = [
+ 'sudo',
+ 'egrep', pattern,
+ '/var/log/ceph/{cluster}.log'.format(cluster=cluster_name),
+ ]
+ for exclude in excludes:
+ args.extend([run.Raw('|'), 'egrep', '-v', exclude])
+ args.extend([
+ run.Raw('|'), 'head', '-n', '1',
+ ])
+ stdout = mon0_remote.sh(args)
+ return stdout or None
+
+ if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
+ config['log_whitelist']) is not None:
+ log.warning('Found errors (ERR|WRN|SEC) in cluster log')
+ ctx.summary['success'] = False
+ # use the most severe problem as the failure reason
+ if 'failure_reason' not in ctx.summary:
+ for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
+ match = first_in_ceph_log(pattern, config['log_whitelist'])
+ if match is not None:
+ ctx.summary['failure_reason'] = \
+ '"{match}" in cluster log'.format(
+ match=match.rstrip('\n'),
+ )
+ break
+
+ for remote, dirs in devs_to_clean.items():
+ for dir_ in dirs:
+ log.info('Unmounting %s on %s' % (dir_, remote))
+ try:
+ remote.run(
+ args=[
+ 'sync',
+ run.Raw('&&'),
+ 'sudo',
+ 'umount',
+ '-f',
+ dir_
+ ]
+ )
+ except Exception as e:
+ remote.run(args=[
+ 'sudo',
+ run.Raw('PATH=/usr/sbin:$PATH'),
+ 'lsof',
+ run.Raw(';'),
+ 'ps', 'auxf',
+ ])
+ raise e
+
+ if ctx.archive is not None and \
+ not (ctx.config.get('archive-on-error') and ctx.summary['success']):
+
+ # archive mon data, too
+ log.info('Archiving mon data...')
+ path = os.path.join(ctx.archive, 'data')
+ try:
+ os.makedirs(path)
+ except OSError as e:
+ if e.errno == errno.EEXIST:
+ pass
+ else:
+ raise
+ for remote, roles in mons.remotes.items():
+ for role in roles:
+ is_mon = teuthology.is_type('mon', cluster_name)
+ if is_mon(role):
+ _, _, id_ = teuthology.split_role(role)
+ mon_dir = DATA_PATH.format(
+ type_='mon', id_=id_, cluster=cluster_name)
+ teuthology.pull_directory_tarball(
+ remote,
+ mon_dir,
+ path + '/' + role + '.tgz')
+
+ log.info('Cleaning ceph cluster...')
+ run.wait(
+ ctx.cluster.run(
+ args=[
+ 'sudo',
+ 'rm',
+ '-rf',
+ '--',
+ conf_path,
+ keyring_path,
+ data_dir,
+ monmap_path,
+ run.Raw('{tdir}/../*.pid'.format(tdir=testdir)),
+ ],
+ wait=False,
+ ),
+ )
+
+
+def osd_scrub_pgs(ctx, config):
+ """
+ Scrub pgs when we exit.
+
+ First make sure all pgs are active and clean.
+ Next scrub all osds.
+ Then periodically check until all pgs have scrub time stamps that
+ indicate the last scrub completed. Time out if no progress is made
+ here after two minutes.
+ """
+ retries = 40
+ delays = 20
+ cluster_name = config['cluster']
+ manager = ctx.managers[cluster_name]
+ all_clean = False
+ for _ in range(0, retries):
+ stats = manager.get_pg_stats()
+ unclean = [stat['pgid'] for stat in stats if 'active+clean' not in stat['state']]
+ osd_dump = manager.get_osd_dump_json()
+ for pool in osd_dump['pools']:
+ pg_num_target = pool.get('pg_num_target')
+ if pg_num_target is None:
+ # mimic does not adjust pg num automatically
+ split_merge = False
+ break
+ elif pg_num_target != pool['pg_num']:
+ split_merge = True
+ break
+ else:
+ split_merge = False
+ if not unclean and not split_merge:
+ all_clean = True
+ break
+ log.info(
+ "Waiting for all PGs to be active+clean and split+merged, waiting on %s to go clean and/or %s to split/merge" % (unclean, split_merge))
+ time.sleep(delays)
+ if not all_clean:
+ raise RuntimeError("Scrubbing terminated -- not all pgs were active and clean.")
+ check_time_now = time.localtime()
+ time.sleep(1)
+ all_roles = teuthology.all_roles(ctx.cluster)
+ for role in teuthology.cluster_roles_of_type(all_roles, 'osd', cluster_name):
+ log.info("Scrubbing {osd}".format(osd=role))
+ _, _, id_ = teuthology.split_role(role)
+ # allow this to fail; in certain cases the OSD might not be up
+ # at this point. we will catch all pgs below.
+ try:
+ manager.raw_cluster_cmd('tell', 'osd.' + id_, 'config', 'set',
+ 'osd_debug_deep_scrub_sleep', '0');
+ manager.raw_cluster_cmd('osd', 'deep-scrub', id_)
+ except run.CommandFailedError:
+ pass
+ prev_good = 0
+ gap_cnt = 0
+ loop = True
+ while loop:
+ stats = manager.get_pg_stats()
+ timez = [(stat['pgid'],stat['last_scrub_stamp']) for stat in stats]
+ loop = False
+ thiscnt = 0
+ for (pgid, tmval) in timez:
+ pgtm = time.strptime(tmval[0:tmval.find('.')], '%Y-%m-%d %H:%M:%S')
+ if pgtm > check_time_now:
+ thiscnt += 1
+ else:
+ log.info('pgid %s last_scrub_stamp %s %s <= %s', pgid, tmval, pgtm, check_time_now)
+ loop = True
+ if thiscnt > prev_good:
+ prev_good = thiscnt
+ gap_cnt = 0
+ else:
+ gap_cnt += 1
+ if gap_cnt % 6 == 0:
+ for (pgid, tmval) in timez:
+ # re-request scrub every so often in case the earlier
+ # request was missed. do not do it every time because
+ # the scrub may be in progress or not reported yet and
+ # we will starve progress.
+ manager.raw_cluster_cmd('pg', 'deep-scrub', pgid)
+ if gap_cnt > retries:
+ raise RuntimeError('Exiting scrub checking -- not all pgs scrubbed.')
+ if loop:
+ log.info('Still waiting for all pgs to be scrubbed.')
+ time.sleep(delays)
+
+
+@contextlib.contextmanager
+def run_daemon(ctx, config, type_):
+ """
+ Run daemons for a role type. Handle the startup and termination of a a daemon.
+ On startup -- set coverages, cpu_profile, valgrind values for all remotes,
+ and a max_mds value for one mds.
+ On cleanup -- Stop all existing daemons of this type.
+
+ :param ctx: Context
+ :param config: Configuration
+ :paran type_: Role type
+ """
+ cluster_name = config['cluster']
+ log.info('Starting %s daemons in cluster %s...', type_, cluster_name)
+ testdir = teuthology.get_testdir(ctx)
+ daemons = ctx.cluster.only(teuthology.is_type(type_, cluster_name))
+
+ # check whether any daemons if this type are configured
+ if daemons is None:
+ return
+ coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
+
+ daemon_signal = 'kill'
+ if config.get('coverage') or config.get('valgrind') is not None:
+ daemon_signal = 'term'
+
+ # create osds in order. (this only matters for pre-luminous, which might
+ # be hammer, which doesn't take an id_ argument to legacy 'osd create').
+ osd_uuids = {}
+ for remote, roles_for_host in daemons.remotes.items():
+ is_type_ = teuthology.is_type(type_, cluster_name)
+ for role in roles_for_host:
+ if not is_type_(role):
+ continue
+ _, _, id_ = teuthology.split_role(role)
+
+
+ if type_ == 'osd':
+ datadir='/var/lib/ceph/osd/{cluster}-{id}'.format(
+ cluster=cluster_name, id=id_)
+ osd_uuid = teuthology.get_file(
+ remote=remote,
+ path=datadir + '/fsid',
+ sudo=True,
+ ).decode().strip()
+ osd_uuids[id_] = osd_uuid
+ for osd_id in range(len(osd_uuids)):
+ id_ = str(osd_id)
+ osd_uuid = osd_uuids.get(id_)
+ try:
+ remote.run(
+ args=[
+ 'sudo', 'ceph', '--cluster', cluster_name,
+ 'osd', 'new', osd_uuid, id_,
+ ]
+ )
+ except:
+ # fallback to pre-luminous (hammer or jewel)
+ remote.run(
+ args=[
+ 'sudo', 'ceph', '--cluster', cluster_name,
+ 'osd', 'create', osd_uuid,
+ ]
+ )
+ if config.get('add_osds_to_crush'):
+ remote.run(
+ args=[
+ 'sudo', 'ceph', '--cluster', cluster_name,
+ 'osd', 'crush', 'create-or-move', 'osd.' + id_,
+ '1.0', 'host=localhost', 'root=default',
+ ]
+ )
+
+ for remote, roles_for_host in daemons.remotes.items():
+ is_type_ = teuthology.is_type(type_, cluster_name)
+ for role in roles_for_host:
+ if not is_type_(role):
+ continue
+ _, _, id_ = teuthology.split_role(role)
+
+ run_cmd = [
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ coverage_dir,
+ 'daemon-helper',
+ daemon_signal,
+ ]
+ run_cmd_tail = [
+ 'ceph-%s' % (type_),
+ '-f',
+ '--cluster', cluster_name,
+ '-i', id_]
+
+ if type_ in config.get('cpu_profile', []):
+ profile_path = '/var/log/ceph/profiling-logger/%s.prof' % (role)
+ run_cmd.extend(['env', 'CPUPROFILE=%s' % profile_path])
+
+ if config.get('valgrind') is not None:
+ valgrind_args = None
+ if type_ in config['valgrind']:
+ valgrind_args = config['valgrind'][type_]
+ if role in config['valgrind']:
+ valgrind_args = config['valgrind'][role]
+ run_cmd = teuthology.get_valgrind_args(testdir, role,
+ run_cmd,
+ valgrind_args)
+
+ run_cmd.extend(run_cmd_tail)
+
+ # always register mgr; don't necessarily start
+ ctx.daemons.register_daemon(
+ remote, type_, id_,
+ cluster=cluster_name,
+ args=run_cmd,
+ logger=log.getChild(role),
+ stdin=run.PIPE,
+ wait=False
+ )
+ if type_ != 'mgr' or not config.get('skip_mgr_daemons', False):
+ role = cluster_name + '.' + type_
+ ctx.daemons.get_daemon(type_, id_, cluster_name).restart()
+
+ try:
+ yield
+ finally:
+ teuthology.stop_daemons_of_type(ctx, type_, cluster_name)
+
+
+def healthy(ctx, config):
+ """
+ Wait for all osd's to be up, and for the ceph health monitor to return HEALTH_OK.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
+ config = config if isinstance(config, dict) else dict()
+ cluster_name = config.get('cluster', 'ceph')
+ log.info('Waiting until %s daemons up and pgs clean...', cluster_name)
+ manager = ctx.managers[cluster_name]
+ try:
+ manager.wait_for_mgr_available(timeout=30)
+ except (run.CommandFailedError, AssertionError) as e:
+ log.info('ignoring mgr wait error, probably testing upgrade: %s', e)
+
+ firstmon = teuthology.get_first_mon(ctx, config, cluster_name)
+ (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
+ teuthology.wait_until_osds_up(
+ ctx,
+ cluster=ctx.cluster,
+ remote=mon0_remote,
+ ceph_cluster=cluster_name,
+ )
+
+ try:
+ manager.flush_all_pg_stats()
+ except (run.CommandFailedError, Exception) as e:
+ log.info('ignoring flush pg stats error, probably testing upgrade: %s', e)
+ manager.wait_for_clean()
+
+ if config.get('wait-for-healthy', True):
+ log.info('Waiting until ceph cluster %s is healthy...', cluster_name)
+ teuthology.wait_until_healthy(
+ ctx,
+ remote=mon0_remote,
+ ceph_cluster=cluster_name,
+ )
+
+ if ctx.cluster.only(teuthology.is_type('mds', cluster_name)).remotes:
+ # Some MDSs exist, wait for them to be healthy
+ ceph_fs = Filesystem(ctx) # TODO: make Filesystem cluster-aware
+ ceph_fs.wait_for_daemons(timeout=300)
+
+
+def wait_for_osds_up(ctx, config):
+ """
+ Wait for all osd's to come up.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
+ log.info('Waiting until ceph osds are all up...')
+ cluster_name = config.get('cluster', 'ceph')
+ firstmon = teuthology.get_first_mon(ctx, config, cluster_name)
+ (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
+ teuthology.wait_until_osds_up(
+ ctx,
+ cluster=ctx.cluster,
+ remote=mon0_remote
+ )
+
+
+def wait_for_mon_quorum(ctx, config):
+ """
+ Check renote ceph status until all monitors are up.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
+ if isinstance(config, dict):
+ mons = config['daemons']
+ cluster_name = config.get('cluster', 'ceph')
+ else:
+ assert isinstance(config, list)
+ mons = config
+ cluster_name = 'ceph'
+ firstmon = teuthology.get_first_mon(ctx, config, cluster_name)
+ (remote,) = ctx.cluster.only(firstmon).remotes.keys()
+ with contextutil.safe_while(sleep=10, tries=60,
+ action='wait for monitor quorum') as proceed:
+ while proceed():
+ quorum_status = remote.sh('sudo ceph quorum_status',
+ logger=log.getChild('quorum_status'))
+ j = json.loads(quorum_status)
+ q = j.get('quorum_names', [])
+ log.debug('Quorum: %s', q)
+ if sorted(q) == sorted(mons):
+ break
+
+
+def created_pool(ctx, config):
+ """
+ Add new pools to the dictionary of pools that the ceph-manager
+ knows about.
+ """
+ for new_pool in config:
+ if new_pool not in ctx.managers['ceph'].pools:
+ ctx.managers['ceph'].pools[new_pool] = ctx.managers['ceph'].get_pool_property(
+ new_pool, 'pg_num')
+
+
+@contextlib.contextmanager
+def tweaked_option(ctx, config):
+ """
+ set an option, and then restore it with its original value
+
+ Note, due to the way how tasks are executed/nested, it's not suggested to
+ use this method as a standalone task. otherwise, it's likely that it will
+ restore the tweaked option at the /end/ of 'tasks' block.
+ """
+ saved_options = {}
+ # we can complicate this when necessary
+ options = ['mon-health-to-clog']
+ type_, id_ = 'mon', '*'
+ cluster = config.get('cluster', 'ceph')
+ manager = ctx.managers[cluster]
+ if id_ == '*':
+ get_from = next(teuthology.all_roles_of_type(ctx.cluster, type_))
+ else:
+ get_from = id_
+ for option in options:
+ if option not in config:
+ continue
+ value = 'true' if config[option] else 'false'
+ option = option.replace('-', '_')
+ old_value = manager.get_config(type_, get_from, option)
+ if value != old_value:
+ saved_options[option] = old_value
+ manager.inject_args(type_, id_, option, value)
+ yield
+ for option, value in saved_options.items():
+ manager.inject_args(type_, id_, option, value)
+
+
+@contextlib.contextmanager
+def restart(ctx, config):
+ """
+ restart ceph daemons
+
+ For example::
+ tasks:
+ - ceph.restart: [all]
+
+ For example::
+ tasks:
+ - ceph.restart: [osd.0, mon.1, mds.*]
+
+ or::
+
+ tasks:
+ - ceph.restart:
+ daemons: [osd.0, mon.1]
+ wait-for-healthy: false
+ wait-for-osds-up: true
+
+ :param ctx: Context
+ :param config: Configuration
+ """
+ if config is None:
+ config = {}
+ elif isinstance(config, list):
+ config = {'daemons': config}
+
+ daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES, True)
+ clusters = set()
+
+ with tweaked_option(ctx, config):
+ for role in daemons:
+ cluster, type_, id_ = teuthology.split_role(role)
+ ctx.daemons.get_daemon(type_, id_, cluster).restart()
+ clusters.add(cluster)
+
+ for role in daemons:
+ cluster, type_, id_ = teuthology.split_role(role)
+ if type_ == 'osd':
+ ctx.managers[cluster].mark_down_osd(id_)
+
+ if config.get('wait-for-healthy', True):
+ for cluster in clusters:
+ healthy(ctx=ctx, config=dict(cluster=cluster))
+ if config.get('wait-for-osds-up', False):
+ for cluster in clusters:
+ wait_for_osds_up(ctx=ctx, config=dict(cluster=cluster))
+ yield
+
+
+@contextlib.contextmanager
+def stop(ctx, config):
+ """
+ Stop ceph daemons
+
+ For example::
+ tasks:
+ - ceph.stop: [mds.*]
+
+ tasks:
+ - ceph.stop: [osd.0, osd.2]
+
+ tasks:
+ - ceph.stop:
+ daemons: [osd.0, osd.2]
+
+ """
+ if config is None:
+ config = {}
+ elif isinstance(config, list):
+ config = {'daemons': config}
+
+ daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES, True)
+ for role in daemons:
+ cluster, type_, id_ = teuthology.split_role(role)
+ ctx.daemons.get_daemon(type_, id_, cluster).stop()
+
+ yield
+
+
+@contextlib.contextmanager
+def wait_for_failure(ctx, config):
+ """
+ Wait for a failure of a ceph daemon
+
+ For example::
+ tasks:
+ - ceph.wait_for_failure: [mds.*]
+
+ tasks:
+ - ceph.wait_for_failure: [osd.0, osd.2]
+
+ tasks:
+ - ceph.wait_for_failure:
+ daemons: [osd.0, osd.2]
+
+ """
+ if config is None:
+ config = {}
+ elif isinstance(config, list):
+ config = {'daemons': config}
+
+ daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES, True)
+ for role in daemons:
+ cluster, type_, id_ = teuthology.split_role(role)
+ try:
+ ctx.daemons.get_daemon(type_, id_, cluster).wait()
+ except:
+ log.info('Saw expected daemon failure. Continuing.')
+ pass
+ else:
+ raise RuntimeError('daemon %s did not fail' % role)
+
+ yield
+
+
+def validate_config(ctx, config):
+ """
+ Perform some simple validation on task configuration.
+ Raises exceptions.ConfigError if an error is found.
+ """
+ # check for osds from multiple clusters on the same host
+ for remote, roles_for_host in ctx.cluster.remotes.items():
+ last_cluster = None
+ last_role = None
+ for role in roles_for_host:
+ role_cluster, role_type, _ = teuthology.split_role(role)
+ if role_type != 'osd':
+ continue
+ if last_cluster and last_cluster != role_cluster:
+ msg = "Host should not have osds (%s and %s) from multiple clusters" % (
+ last_role, role)
+ raise exceptions.ConfigError(msg)
+ last_cluster = role_cluster
+ last_role = role
+
+
+def stop_logging_health(remote, cluster, retry):
+ # try this several times, since tell to mons is lossy.
+ args = 'sudo ceph --cluster {cluster} {retry_opts} tell mon.* injectargs -- --no-mon-health-to-clog'
+ try:
+ retry_opts = '--mon-client-directed-command-retry {}'.format(retry)
+ remote.run(
+ args=args.format(cluster=cluster,
+ retry_opts=retry_opts))
+ except run.CommandFailedError:
+ for i in range(retry):
+ try:
+ remote.run(
+ args=args.format(cluster=cluster,
+ retry_opts=''))
+ return
+ except run.CommandFailedError:
+ pass
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Set up and tear down a Ceph cluster.
+
+ For example::
+
+ tasks:
+ - ceph:
+ - interactive:
+
+ You can also specify what branch to run::
+
+ tasks:
+ - ceph:
+ branch: foo
+
+ Or a tag::
+
+ tasks:
+ - ceph:
+ tag: v0.42.13
+
+ Or a sha1::
+
+ tasks:
+ - ceph:
+ sha1: 1376a5ab0c89780eab39ffbbe436f6a6092314ed
+
+ Or a local source dir::
+
+ tasks:
+ - ceph:
+ path: /home/sage/ceph
+
+ To capture code coverage data, use::
+
+ tasks:
+ - ceph:
+ coverage: true
+
+ To use btrfs, ext4, or xfs on the target's scratch disks, use::
+
+ tasks:
+ - ceph:
+ fs: xfs
+ mkfs_options: [-b,size=65536,-l,logdev=/dev/sdc1]
+ mount_options: [nobarrier, inode64]
+
+ To change the cephfs's default max_mds (1), use::
+
+ tasks:
+ - ceph:
+ cephfs:
+ max_mds: 2
+
+ To change the mdsmap's default session_timeout (60 seconds), use::
+
+ tasks:
+ - ceph:
+ cephfs:
+ session_timeout: 300
+
+ Note, this will cause the task to check the /scratch_devs file on each node
+ for available devices. If no such file is found, /dev/sdb will be used.
+
+ To run some daemons under valgrind, include their names
+ and the tool/args to use in a valgrind section::
+
+ tasks:
+ - ceph:
+ valgrind:
+ mds.1: --tool=memcheck
+ osd.1: [--tool=memcheck, --leak-check=no]
+
+ Those nodes which are using memcheck or valgrind will get
+ checked for bad results.
+
+ To adjust or modify config options, use::
+
+ tasks:
+ - ceph:
+ conf:
+ section:
+ key: value
+
+ For example::
+
+ tasks:
+ - ceph:
+ conf:
+ mds.0:
+ some option: value
+ other key: other value
+ client.0:
+ debug client: 10
+ debug ms: 1
+
+ By default, the cluster log is checked for errors and warnings,
+ and the run marked failed if any appear. You can ignore log
+ entries by giving a list of egrep compatible regexes, i.e.:
+
+ tasks:
+ - ceph:
+ log-whitelist: ['foo.*bar', 'bad message']
+
+ To run multiple ceph clusters, use multiple ceph tasks, and roles
+ with a cluster name prefix, e.g. cluster1.client.0. Roles with no
+ cluster use the default cluster name, 'ceph'. OSDs from separate
+ clusters must be on separate hosts. Clients and non-osd daemons
+ from multiple clusters may be colocated. For each cluster, add an
+ instance of the ceph task with the cluster name specified, e.g.::
+
+ roles:
+ - [mon.a, osd.0, osd.1]
+ - [backup.mon.a, backup.osd.0, backup.osd.1]
+ - [client.0, backup.client.0]
+ tasks:
+ - ceph:
+ cluster: ceph
+ - ceph:
+ cluster: backup
+
+ :param ctx: Context
+ :param config: Configuration
+
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ "task ceph only supports a dictionary for configuration"
+
+ overrides = ctx.config.get('overrides', {})
+ teuthology.deep_merge(config, overrides.get('ceph', {}))
+
+ first_ceph_cluster = False
+ if not hasattr(ctx, 'daemons'):
+ first_ceph_cluster = True
+ ctx.daemons = DaemonGroup()
+
+ testdir = teuthology.get_testdir(ctx)
+ if config.get('coverage'):
+ coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
+ log.info('Creating coverage directory...')
+ run.wait(
+ ctx.cluster.run(
+ args=[
+ 'install', '-d', '-m0755', '--',
+ coverage_dir,
+ ],
+ wait=False,
+ )
+ )
+
+ if 'cluster' not in config:
+ config['cluster'] = 'ceph'
+
+ validate_config(ctx, config)
+
+ subtasks = []
+ if first_ceph_cluster:
+ # these tasks handle general log setup and parsing on all hosts,
+ # so they should only be run once
+ subtasks = [
+ lambda: ceph_log(ctx=ctx, config=None),
+ lambda: ceph_crash(ctx=ctx, config=None),
+ lambda: valgrind_post(ctx=ctx, config=config),
+ ]
+
+ subtasks += [
+ lambda: cluster(ctx=ctx, config=dict(
+ conf=config.get('conf', {}),
+ fs=config.get('fs', 'xfs'),
+ mkfs_options=config.get('mkfs_options', None),
+ mount_options=config.get('mount_options', None),
+ skip_mgr_daemons=config.get('skip_mgr_daemons', False),
+ log_whitelist=config.get('log-whitelist', []),
+ cpu_profile=set(config.get('cpu_profile', []),),
+ cluster=config['cluster'],
+ mon_bind_msgr2=config.get('mon_bind_msgr2', True),
+ mon_bind_addrvec=config.get('mon_bind_addrvec', True),
+ )),
+ lambda: run_daemon(ctx=ctx, config=config, type_='mon'),
+ lambda: run_daemon(ctx=ctx, config=config, type_='mgr'),
+ lambda: crush_setup(ctx=ctx, config=config),
+ lambda: run_daemon(ctx=ctx, config=config, type_='osd'),
+ lambda: create_rbd_pool(ctx=ctx, config=config),
+ lambda: cephfs_setup(ctx=ctx, config=config),
+ lambda: run_daemon(ctx=ctx, config=config, type_='mds'),
+ ]
+
+ with contextutil.nested(*subtasks):
+ first_mon = teuthology.get_first_mon(ctx, config, config['cluster'])
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+ if not hasattr(ctx, 'managers'):
+ ctx.managers = {}
+ ctx.managers[config['cluster']] = CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager.' + config['cluster']),
+ cluster=config['cluster'],
+ )
+
+ try:
+ if config.get('wait-for-healthy', True):
+ healthy(ctx=ctx, config=dict(cluster=config['cluster']))
+
+ yield
+ finally:
+ # set pg_num_targets back to actual pg_num, so we don't have to
+ # wait for pending merges (which can take a while!)
+ ctx.managers[config['cluster']].stop_pg_num_changes()
+
+ if config.get('wait-for-scrub', True):
+ osd_scrub_pgs(ctx, config)
+
+ # stop logging health to clog during shutdown, or else we generate
+ # a bunch of scary messages unrelated to our actual run.
+ firstmon = teuthology.get_first_mon(ctx, config, config['cluster'])
+ (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys()
+ stop_logging_health(mon0_remote, config['cluster'], 5)
diff --git a/qa/tasks/ceph_client.py b/qa/tasks/ceph_client.py
new file mode 100644
index 00000000..74e818f9
--- /dev/null
+++ b/qa/tasks/ceph_client.py
@@ -0,0 +1,42 @@
+"""
+Set up client keyring
+"""
+import logging
+
+from teuthology import misc as teuthology
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+def create_keyring(ctx, cluster_name):
+ """
+ Set up key ring on remote sites
+ """
+ log.info('Setting up client nodes...')
+ clients = ctx.cluster.only(teuthology.is_type('client', cluster_name))
+ testdir = teuthology.get_testdir(ctx)
+ coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir)
+ for remote, roles_for_host in clients.remotes.items():
+ for role in teuthology.cluster_roles_of_type(roles_for_host, 'client',
+ cluster_name):
+ name = teuthology.ceph_role(role)
+ client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name, name)
+ remote.run(
+ args=[
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ coverage_dir,
+ 'ceph-authtool',
+ '--create-keyring',
+ '--gen-key',
+ # TODO this --name= is not really obeyed, all unknown "types" are munged to "client"
+ '--name={name}'.format(name=name),
+ client_keyring,
+ run.Raw('&&'),
+ 'sudo',
+ 'chmod',
+ '0644',
+ client_keyring,
+ ],
+ )
diff --git a/qa/tasks/ceph_deploy.py b/qa/tasks/ceph_deploy.py
new file mode 100644
index 00000000..de45fff9
--- /dev/null
+++ b/qa/tasks/ceph_deploy.py
@@ -0,0 +1,932 @@
+"""
+Execute ceph-deploy as a task
+"""
+
+import contextlib
+import os
+import time
+import logging
+import traceback
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.config import config as teuth_config
+from teuthology.task import install as install_fn
+from teuthology.orchestra import run
+from tasks.cephfs.filesystem import Filesystem
+from teuthology.misc import wait_until_healthy
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def download_ceph_deploy(ctx, config):
+ """
+ Downloads ceph-deploy from the ceph.com git mirror and (by default)
+ switches to the master branch. If the `ceph-deploy-branch` is specified, it
+ will use that instead. The `bootstrap` script is ran, with the argument
+ obtained from `python_version`, if specified.
+ """
+ # use mon.a for ceph_admin
+ (ceph_admin,) = ctx.cluster.only('mon.a').remotes.keys()
+
+ try:
+ py_ver = str(config['python_version'])
+ except KeyError:
+ pass
+ else:
+ supported_versions = ['2', '3']
+ if py_ver not in supported_versions:
+ raise ValueError("python_version must be: {}, not {}".format(
+ ' or '.join(supported_versions), py_ver
+ ))
+
+ log.info("Installing Python")
+ system_type = teuthology.get_system_type(ceph_admin)
+
+ if system_type == 'rpm':
+ package = 'python36' if py_ver == '3' else 'python'
+ ctx.cluster.run(args=[
+ 'sudo', 'yum', '-y', 'install',
+ package, 'python-virtualenv'
+ ])
+ else:
+ package = 'python3' if py_ver == '3' else 'python'
+ ctx.cluster.run(args=[
+ 'sudo', 'apt-get', '-y', '--force-yes', 'install',
+ package, 'python-virtualenv'
+ ])
+
+ log.info('Downloading ceph-deploy...')
+ testdir = teuthology.get_testdir(ctx)
+ ceph_deploy_branch = config.get('ceph-deploy-branch', 'master')
+
+ ceph_admin.run(
+ args=[
+ 'git', 'clone', '-b', ceph_deploy_branch,
+ teuth_config.ceph_git_base_url + 'ceph-deploy.git',
+ '{tdir}/ceph-deploy'.format(tdir=testdir),
+ ],
+ )
+ args = [
+ 'cd',
+ '{tdir}/ceph-deploy'.format(tdir=testdir),
+ run.Raw('&&'),
+ './bootstrap',
+ ]
+ try:
+ args.append(str(config['python_version']))
+ except KeyError:
+ pass
+ ceph_admin.run(args=args)
+
+ try:
+ yield
+ finally:
+ log.info('Removing ceph-deploy ...')
+ ceph_admin.run(
+ args=[
+ 'rm',
+ '-rf',
+ '{tdir}/ceph-deploy'.format(tdir=testdir),
+ ],
+ )
+
+
+def is_healthy(ctx, config):
+ """Wait until a Ceph cluster is healthy."""
+ testdir = teuthology.get_testdir(ctx)
+ ceph_admin = teuthology.get_first_mon(ctx, config)
+ (remote,) = ctx.cluster.only(ceph_admin).remotes.keys()
+ max_tries = 90 # 90 tries * 10 secs --> 15 minutes
+ tries = 0
+ while True:
+ tries += 1
+ if tries >= max_tries:
+ msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes"
+ remote.run(
+ args=[
+ 'cd',
+ '{tdir}'.format(tdir=testdir),
+ run.Raw('&&'),
+ 'sudo', 'ceph',
+ 'report',
+ ],
+ )
+ raise RuntimeError(msg)
+
+ out = remote.sh(
+ [
+ 'cd',
+ '{tdir}'.format(tdir=testdir),
+ run.Raw('&&'),
+ 'sudo', 'ceph',
+ 'health',
+ ],
+ logger=log.getChild('health'),
+ )
+ log.info('Ceph health: %s', out.rstrip('\n'))
+ if out.split(None, 1)[0] == 'HEALTH_OK':
+ break
+ time.sleep(10)
+
+
+def get_nodes_using_role(ctx, target_role):
+ """
+ Extract the names of nodes that match a given role from a cluster, and modify the
+ cluster's service IDs to match the resulting node-based naming scheme that ceph-deploy
+ uses, such that if "mon.a" is on host "foo23", it'll be renamed to "mon.foo23".
+ """
+
+ # Nodes containing a service of the specified role
+ nodes_of_interest = []
+
+ # Prepare a modified version of cluster.remotes with ceph-deploy-ized names
+ modified_remotes = {}
+ ceph_deploy_mapped = dict()
+ for _remote, roles_for_host in ctx.cluster.remotes.items():
+ modified_remotes[_remote] = []
+ for svc_id in roles_for_host:
+ if svc_id.startswith("{0}.".format(target_role)):
+ fqdn = str(_remote).split('@')[-1]
+ nodename = str(str(_remote).split('.')[0]).split('@')[1]
+ if target_role == 'mon':
+ nodes_of_interest.append(fqdn)
+ else:
+ nodes_of_interest.append(nodename)
+ mapped_role = "{0}.{1}".format(target_role, nodename)
+ modified_remotes[_remote].append(mapped_role)
+ # keep dict of mapped role for later use by tasks
+ # eg. mon.a => mon.node1
+ ceph_deploy_mapped[svc_id] = mapped_role
+ else:
+ modified_remotes[_remote].append(svc_id)
+
+ ctx.cluster.remotes = modified_remotes
+ # since the function is called multiple times for target roles
+ # append new mapped roles
+ if not hasattr(ctx.cluster, 'mapped_role'):
+ ctx.cluster.mapped_role = ceph_deploy_mapped
+ else:
+ ctx.cluster.mapped_role.update(ceph_deploy_mapped)
+ log.info("New mapped_role={mr}".format(mr=ctx.cluster.mapped_role))
+ return nodes_of_interest
+
+
+def get_dev_for_osd(ctx, config):
+ """Get a list of all osd device names."""
+ osd_devs = []
+ for remote, roles_for_host in ctx.cluster.remotes.items():
+ host = remote.name.split('@')[-1]
+ shortname = host.split('.')[0]
+ devs = teuthology.get_scratch_devices(remote)
+ num_osd_per_host = list(
+ teuthology.roles_of_type(
+ roles_for_host, 'osd'))
+ num_osds = len(num_osd_per_host)
+ if config.get('separate_journal_disk') is not None:
+ num_devs_reqd = 2 * num_osds
+ assert num_devs_reqd <= len(
+ devs), 'fewer data and journal disks than required ' + shortname
+ for dindex in range(0, num_devs_reqd, 2):
+ jd_index = dindex + 1
+ dev_short = devs[dindex].split('/')[-1]
+ jdev_short = devs[jd_index].split('/')[-1]
+ osd_devs.append((shortname, dev_short, jdev_short))
+ else:
+ assert num_osds <= len(devs), 'fewer disks than osds ' + shortname
+ for dev in devs[:num_osds]:
+ dev_short = dev.split('/')[-1]
+ osd_devs.append((shortname, dev_short))
+ return osd_devs
+
+
+def get_all_nodes(ctx, config):
+ """Return a string of node names separated by blanks"""
+ nodelist = []
+ for t, k in ctx.config['targets'].items():
+ host = t.split('@')[-1]
+ simple_host = host.split('.')[0]
+ nodelist.append(simple_host)
+ nodelist = " ".join(nodelist)
+ return nodelist
+
+@contextlib.contextmanager
+def build_ceph_cluster(ctx, config):
+ """Build a ceph cluster"""
+
+ # Expect to find ceph_admin on the first mon by ID, same place that the download task
+ # puts it. Remember this here, because subsequently IDs will change from those in
+ # the test config to those that ceph-deploy invents.
+
+ (ceph_admin,) = ctx.cluster.only('mon.a').remotes.keys()
+
+ def execute_ceph_deploy(cmd):
+ """Remotely execute a ceph_deploy command"""
+ return ceph_admin.run(
+ args=[
+ 'cd',
+ '{tdir}/ceph-deploy'.format(tdir=testdir),
+ run.Raw('&&'),
+ run.Raw(cmd),
+ ],
+ check_status=False,
+ ).exitstatus
+
+ def ceph_disk_osd_create(ctx, config):
+ node_dev_list = get_dev_for_osd(ctx, config)
+ no_of_osds = 0
+ for d in node_dev_list:
+ node = d[0]
+ for disk in d[1:]:
+ zap = './ceph-deploy disk zap ' + node + ':' + disk
+ estatus = execute_ceph_deploy(zap)
+ if estatus != 0:
+ raise RuntimeError("ceph-deploy: Failed to zap osds")
+ osd_create_cmd = './ceph-deploy osd create '
+ # first check for filestore, default is bluestore with ceph-deploy
+ if config.get('filestore') is not None:
+ osd_create_cmd += '--filestore '
+ elif config.get('bluestore') is not None:
+ osd_create_cmd += '--bluestore '
+ if config.get('dmcrypt') is not None:
+ osd_create_cmd += '--dmcrypt '
+ osd_create_cmd += ":".join(d)
+ estatus_osd = execute_ceph_deploy(osd_create_cmd)
+ if estatus_osd == 0:
+ log.info('successfully created osd')
+ no_of_osds += 1
+ else:
+ raise RuntimeError("ceph-deploy: Failed to create osds")
+ return no_of_osds
+
+ def ceph_volume_osd_create(ctx, config):
+ osds = ctx.cluster.only(teuthology.is_type('osd'))
+ no_of_osds = 0
+ for remote in osds.remotes.keys():
+ # all devs should be lvm
+ osd_create_cmd = './ceph-deploy osd create --debug ' + remote.shortname + ' '
+ # default is bluestore so we just need config item for filestore
+ roles = ctx.cluster.remotes[remote]
+ dev_needed = len([role for role in roles
+ if role.startswith('osd')])
+ all_devs = teuthology.get_scratch_devices(remote)
+ log.info("node={n}, need_devs={d}, available={a}".format(
+ n=remote.shortname,
+ d=dev_needed,
+ a=all_devs,
+ ))
+ devs = all_devs[0:dev_needed]
+ # rest of the devices can be used for journal if required
+ jdevs = dev_needed
+ for device in devs:
+ device_split = device.split('/')
+ lv_device = device_split[-2] + '/' + device_split[-1]
+ if config.get('filestore') is not None:
+ osd_create_cmd += '--filestore --data ' + lv_device + ' '
+ # filestore with ceph-volume also needs journal disk
+ try:
+ jdevice = all_devs.pop(jdevs)
+ except IndexError:
+ raise RuntimeError("No device available for \
+ journal configuration")
+ jdevice_split = jdevice.split('/')
+ j_lv = jdevice_split[-2] + '/' + jdevice_split[-1]
+ osd_create_cmd += '--journal ' + j_lv
+ else:
+ osd_create_cmd += ' --data ' + lv_device
+ estatus_osd = execute_ceph_deploy(osd_create_cmd)
+ if estatus_osd == 0:
+ log.info('successfully created osd')
+ no_of_osds += 1
+ else:
+ raise RuntimeError("ceph-deploy: Failed to create osds")
+ return no_of_osds
+
+ try:
+ log.info('Building ceph cluster using ceph-deploy...')
+ testdir = teuthology.get_testdir(ctx)
+ ceph_branch = None
+ if config.get('branch') is not None:
+ cbranch = config.get('branch')
+ for var, val in cbranch.items():
+ ceph_branch = '--{var}={val}'.format(var=var, val=val)
+ all_nodes = get_all_nodes(ctx, config)
+ mds_nodes = get_nodes_using_role(ctx, 'mds')
+ mds_nodes = " ".join(mds_nodes)
+ mon_node = get_nodes_using_role(ctx, 'mon')
+ mon_nodes = " ".join(mon_node)
+ # skip mgr based on config item
+ # this is needed when test uses latest code to install old ceph
+ # versions
+ skip_mgr = config.get('skip-mgr', False)
+ if not skip_mgr:
+ mgr_nodes = get_nodes_using_role(ctx, 'mgr')
+ mgr_nodes = " ".join(mgr_nodes)
+ new_mon = './ceph-deploy new' + " " + mon_nodes
+ if not skip_mgr:
+ mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
+ mon_hostname = mon_nodes.split(' ')[0]
+ mon_hostname = str(mon_hostname)
+ gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname
+ deploy_mds = './ceph-deploy mds create' + " " + mds_nodes
+
+ if mon_nodes is None:
+ raise RuntimeError("no monitor nodes in the config file")
+
+ estatus_new = execute_ceph_deploy(new_mon)
+ if estatus_new != 0:
+ raise RuntimeError("ceph-deploy: new command failed")
+
+ log.info('adding config inputs...')
+ testdir = teuthology.get_testdir(ctx)
+ conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)
+
+ if config.get('conf') is not None:
+ confp = config.get('conf')
+ for section, keys in confp.items():
+ lines = '[{section}]\n'.format(section=section)
+ teuthology.append_lines_to_file(ceph_admin, conf_path, lines,
+ sudo=True)
+ for key, value in keys.items():
+ log.info("[%s] %s = %s" % (section, key, value))
+ lines = '{key} = {value}\n'.format(key=key, value=value)
+ teuthology.append_lines_to_file(
+ ceph_admin, conf_path, lines, sudo=True)
+
+ # install ceph
+ dev_branch = ctx.config['branch']
+ branch = '--dev={branch}'.format(branch=dev_branch)
+ if ceph_branch:
+ option = ceph_branch
+ else:
+ option = branch
+ install_nodes = './ceph-deploy install ' + option + " " + all_nodes
+ estatus_install = execute_ceph_deploy(install_nodes)
+ if estatus_install != 0:
+ raise RuntimeError("ceph-deploy: Failed to install ceph")
+ # install ceph-test package too
+ install_nodes2 = './ceph-deploy install --tests ' + option + \
+ " " + all_nodes
+ estatus_install = execute_ceph_deploy(install_nodes2)
+ if estatus_install != 0:
+ raise RuntimeError("ceph-deploy: Failed to install ceph-test")
+
+ mon_create_nodes = './ceph-deploy mon create-initial'
+ # If the following fails, it is OK, it might just be that the monitors
+ # are taking way more than a minute/monitor to form quorum, so lets
+ # try the next block which will wait up to 15 minutes to gatherkeys.
+ execute_ceph_deploy(mon_create_nodes)
+
+ estatus_gather = execute_ceph_deploy(gather_keys)
+ if estatus_gather != 0:
+ raise RuntimeError("ceph-deploy: Failed during gather keys")
+
+ # install admin key on mons (ceph-create-keys doesn't do this any more)
+ mons = ctx.cluster.only(teuthology.is_type('mon'))
+ for remote in mons.remotes.keys():
+ execute_ceph_deploy('./ceph-deploy admin ' + remote.shortname)
+
+ # create osd's
+ if config.get('use-ceph-volume', False):
+ no_of_osds = ceph_volume_osd_create(ctx, config)
+ else:
+ # this method will only work with ceph-deploy v1.5.39 or older
+ no_of_osds = ceph_disk_osd_create(ctx, config)
+
+ if not skip_mgr:
+ execute_ceph_deploy(mgr_create)
+
+ if mds_nodes:
+ estatus_mds = execute_ceph_deploy(deploy_mds)
+ if estatus_mds != 0:
+ raise RuntimeError("ceph-deploy: Failed to deploy mds")
+
+ if config.get('test_mon_destroy') is not None:
+ for d in range(1, len(mon_node)):
+ mon_destroy_nodes = './ceph-deploy mon destroy' + \
+ " " + mon_node[d]
+ estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes)
+ if estatus_mon_d != 0:
+ raise RuntimeError("ceph-deploy: Failed to delete monitor")
+
+
+
+ if config.get('wait-for-healthy', True) and no_of_osds >= 2:
+ is_healthy(ctx=ctx, config=None)
+
+ log.info('Setting up client nodes...')
+ conf_path = '/etc/ceph/ceph.conf'
+ admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
+ conf_data = teuthology.get_file(
+ remote=mon0_remote,
+ path=conf_path,
+ sudo=True,
+ )
+ admin_keyring = teuthology.get_file(
+ remote=mon0_remote,
+ path=admin_keyring_path,
+ sudo=True,
+ )
+
+ clients = ctx.cluster.only(teuthology.is_type('client'))
+ for remot, roles_for_host in clients.remotes.items():
+ for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
+ client_keyring = \
+ '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
+ mon0_remote.run(
+ args=[
+ 'cd',
+ '{tdir}'.format(tdir=testdir),
+ run.Raw('&&'),
+ 'sudo', 'bash', '-c',
+ run.Raw('"'), 'ceph',
+ 'auth',
+ 'get-or-create',
+ 'client.{id}'.format(id=id_),
+ 'mds', 'allow',
+ 'mon', 'allow *',
+ 'osd', 'allow *',
+ run.Raw('>'),
+ client_keyring,
+ run.Raw('"'),
+ ],
+ )
+ key_data = teuthology.get_file(
+ remote=mon0_remote,
+ path=client_keyring,
+ sudo=True,
+ )
+ teuthology.sudo_write_file(
+ remote=remot,
+ path=client_keyring,
+ data=key_data,
+ perms='0644'
+ )
+ teuthology.sudo_write_file(
+ remote=remot,
+ path=admin_keyring_path,
+ data=admin_keyring,
+ perms='0644'
+ )
+ teuthology.sudo_write_file(
+ remote=remot,
+ path=conf_path,
+ data=conf_data,
+ perms='0644'
+ )
+
+ if mds_nodes:
+ log.info('Configuring CephFS...')
+ Filesystem(ctx, create=True)
+ elif not config.get('only_mon'):
+ raise RuntimeError(
+ "The cluster is NOT operational due to insufficient OSDs")
+ # create rbd pool
+ ceph_admin.run(
+ args=[
+ 'sudo', 'ceph', '--cluster', 'ceph',
+ 'osd', 'pool', 'create', 'rbd', '128', '128'],
+ check_status=False)
+ ceph_admin.run(
+ args=[
+ 'sudo', 'ceph', '--cluster', 'ceph',
+ 'osd', 'pool', 'application', 'enable',
+ 'rbd', 'rbd', '--yes-i-really-mean-it'
+ ],
+ check_status=False)
+ yield
+
+ except Exception:
+ log.info(
+ "Error encountered, logging exception before tearing down ceph-deploy")
+ log.info(traceback.format_exc())
+ raise
+ finally:
+ if config.get('keep_running'):
+ return
+ log.info('Stopping ceph...')
+ ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'],
+ check_status=False)
+ time.sleep(4)
+
+ # and now just check for the processes themselves, as if upstart/sysvinit
+ # is lying to us. Ignore errors if the grep fails
+ ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'),
+ 'grep', '-v', 'grep', run.Raw('|'),
+ 'grep', 'ceph'], check_status=False)
+ ctx.cluster.run(args=['sudo', 'systemctl', run.Raw('|'),
+ 'grep', 'ceph'], check_status=False)
+
+ if ctx.archive is not None:
+ # archive mon data, too
+ log.info('Archiving mon data...')
+ path = os.path.join(ctx.archive, 'data')
+ os.makedirs(path)
+ mons = ctx.cluster.only(teuthology.is_type('mon'))
+ for remote, roles in mons.remotes.items():
+ for role in roles:
+ if role.startswith('mon.'):
+ teuthology.pull_directory_tarball(
+ remote,
+ '/var/lib/ceph/mon',
+ path + '/' + role + '.tgz')
+
+ log.info('Compressing logs...')
+ run.wait(
+ ctx.cluster.run(
+ args=[
+ 'sudo',
+ 'find',
+ '/var/log/ceph',
+ '-name',
+ '*.log',
+ '-print0',
+ run.Raw('|'),
+ 'sudo',
+ 'xargs',
+ '-0',
+ '--no-run-if-empty',
+ '--',
+ 'gzip',
+ '--',
+ ],
+ wait=False,
+ ),
+ )
+
+ log.info('Archiving logs...')
+ path = os.path.join(ctx.archive, 'remote')
+ os.makedirs(path)
+ for remote in ctx.cluster.remotes.keys():
+ sub = os.path.join(path, remote.shortname)
+ os.makedirs(sub)
+ teuthology.pull_directory(remote, '/var/log/ceph',
+ os.path.join(sub, 'log'))
+
+ # Prevent these from being undefined if the try block fails
+ all_nodes = get_all_nodes(ctx, config)
+ purge_nodes = './ceph-deploy purge' + " " + all_nodes
+ purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes
+
+ log.info('Purging package...')
+ execute_ceph_deploy(purge_nodes)
+ log.info('Purging data...')
+ execute_ceph_deploy(purgedata_nodes)
+
+
+@contextlib.contextmanager
+def cli_test(ctx, config):
+ """
+ ceph-deploy cli to exercise most commonly use cli's and ensure
+ all commands works and also startup the init system.
+
+ """
+ log.info('Ceph-deploy Test')
+ if config is None:
+ config = {}
+ test_branch = ''
+ conf_dir = teuthology.get_testdir(ctx) + "/cdtest"
+
+ def execute_cdeploy(admin, cmd, path):
+ """Execute ceph-deploy commands """
+ """Either use git path or repo path """
+ args = ['cd', conf_dir, run.Raw(';')]
+ if path:
+ args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path))
+ else:
+ args.append('ceph-deploy')
+ args.append(run.Raw(cmd))
+ ec = admin.run(args=args, check_status=False).exitstatus
+ if ec != 0:
+ raise RuntimeError(
+ "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec))
+
+ if config.get('rhbuild'):
+ path = None
+ else:
+ path = teuthology.get_testdir(ctx)
+ # test on branch from config eg: wip-* , master or next etc
+ # packages for all distro's should exist for wip*
+ if ctx.config.get('branch'):
+ branch = ctx.config.get('branch')
+ test_branch = ' --dev={branch} '.format(branch=branch)
+ mons = ctx.cluster.only(teuthology.is_type('mon'))
+ for node, role in mons.remotes.items():
+ admin = node
+ admin.run(args=['mkdir', conf_dir], check_status=False)
+ nodename = admin.shortname
+ system_type = teuthology.get_system_type(admin)
+ if config.get('rhbuild'):
+ admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y'])
+ log.info('system type is %s', system_type)
+ osds = ctx.cluster.only(teuthology.is_type('osd'))
+
+ for remote, roles in osds.remotes.items():
+ devs = teuthology.get_scratch_devices(remote)
+ log.info("roles %s", roles)
+ if (len(devs) < 3):
+ log.error(
+ 'Test needs minimum of 3 devices, only found %s',
+ str(devs))
+ raise RuntimeError("Needs minimum of 3 devices ")
+
+ conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir)
+ new_cmd = 'new ' + nodename
+ execute_cdeploy(admin, new_cmd, path)
+ if config.get('conf') is not None:
+ confp = config.get('conf')
+ for section, keys in confp.items():
+ lines = '[{section}]\n'.format(section=section)
+ teuthology.append_lines_to_file(admin, conf_path, lines,
+ sudo=True)
+ for key, value in keys.items():
+ log.info("[%s] %s = %s" % (section, key, value))
+ lines = '{key} = {value}\n'.format(key=key, value=value)
+ teuthology.append_lines_to_file(admin, conf_path, lines,
+ sudo=True)
+ new_mon_install = 'install {branch} --mon '.format(
+ branch=test_branch) + nodename
+ new_mgr_install = 'install {branch} --mgr '.format(
+ branch=test_branch) + nodename
+ new_osd_install = 'install {branch} --osd '.format(
+ branch=test_branch) + nodename
+ new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename
+ create_initial = 'mon create-initial '
+ mgr_create = 'mgr create ' + nodename
+ # either use create-keys or push command
+ push_keys = 'admin ' + nodename
+ execute_cdeploy(admin, new_mon_install, path)
+ execute_cdeploy(admin, new_mgr_install, path)
+ execute_cdeploy(admin, new_osd_install, path)
+ execute_cdeploy(admin, new_admin, path)
+ execute_cdeploy(admin, create_initial, path)
+ execute_cdeploy(admin, mgr_create, path)
+ execute_cdeploy(admin, push_keys, path)
+
+ for i in range(3):
+ zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i])
+ prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i])
+ execute_cdeploy(admin, zap_disk, path)
+ execute_cdeploy(admin, prepare, path)
+
+ log.info("list files for debugging purpose to check file permissions")
+ admin.run(args=['ls', run.Raw('-lt'), conf_dir])
+ remote.run(args=['sudo', 'ceph', '-s'], check_status=False)
+ out = remote.sh('sudo ceph health')
+ log.info('Ceph health: %s', out.rstrip('\n'))
+ log.info("Waiting for cluster to become healthy")
+ with contextutil.safe_while(sleep=10, tries=6,
+ action='check health') as proceed:
+ while proceed():
+ out = remote.sh('sudo ceph health')
+ if (out.split(None, 1)[0] == 'HEALTH_OK'):
+ break
+ rgw_install = 'install {branch} --rgw {node}'.format(
+ branch=test_branch,
+ node=nodename,
+ )
+ rgw_create = 'rgw create ' + nodename
+ execute_cdeploy(admin, rgw_install, path)
+ execute_cdeploy(admin, rgw_create, path)
+ log.info('All ceph-deploy cli tests passed')
+ try:
+ yield
+ finally:
+ log.info("cleaning up")
+ ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'],
+ check_status=False)
+ time.sleep(4)
+ for i in range(3):
+ umount_dev = "{d}1".format(d=devs[i])
+ r = remote.run(args=['sudo', 'umount', run.Raw(umount_dev)])
+ cmd = 'purge ' + nodename
+ execute_cdeploy(admin, cmd, path)
+ cmd = 'purgedata ' + nodename
+ execute_cdeploy(admin, cmd, path)
+ log.info("Removing temporary dir")
+ admin.run(
+ args=[
+ 'rm',
+ run.Raw('-rf'),
+ run.Raw(conf_dir)],
+ check_status=False)
+ if config.get('rhbuild'):
+ admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y'])
+
+
+@contextlib.contextmanager
+def single_node_test(ctx, config):
+ """
+ - ceph-deploy.single_node_test: null
+
+ #rhbuild testing
+ - ceph-deploy.single_node_test:
+ rhbuild: 1.2.3
+
+ """
+ log.info("Testing ceph-deploy on single node")
+ if config is None:
+ config = {}
+ overrides = ctx.config.get('overrides', {})
+ teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
+
+ if config.get('rhbuild'):
+ log.info("RH Build, Skip Download")
+ with contextutil.nested(
+ lambda: cli_test(ctx=ctx, config=config),
+ ):
+ yield
+ else:
+ with contextutil.nested(
+ lambda: install_fn.ship_utilities(ctx=ctx, config=None),
+ lambda: download_ceph_deploy(ctx=ctx, config=config),
+ lambda: cli_test(ctx=ctx, config=config),
+ ):
+ yield
+
+
+@contextlib.contextmanager
+def upgrade(ctx, config):
+ """
+ Upgrade using ceph-deploy
+ eg:
+ ceph-deploy.upgrade:
+ # to upgrade to specific branch, use
+ branch:
+ stable: jewel
+ # to setup mgr node, use
+ setup-mgr-node: True
+ # to wait for cluster to be healthy after all upgrade, use
+ wait-for-healthy: True
+ role: (upgrades the below roles serially)
+ mon.a
+ mon.b
+ osd.0
+ """
+ roles = config.get('roles')
+ # get the roles that are mapped as per ceph-deploy
+ # roles are mapped for mon/mds eg: mon.a => mon.host_short_name
+ mapped_role = ctx.cluster.mapped_role
+ log.info("roles={r}, mapped_roles={mr}".format(r=roles, mr=mapped_role))
+ if config.get('branch'):
+ branch = config.get('branch')
+ (var, val) = branch.items()[0]
+ ceph_branch = '--{var}={val}'.format(var=var, val=val)
+ else:
+ # default to wip-branch under test
+ dev_branch = ctx.config['branch']
+ ceph_branch = '--dev={branch}'.format(branch=dev_branch)
+ # get the node used for initial deployment which is mon.a
+ mon_a = mapped_role.get('mon.a')
+ (ceph_admin,) = ctx.cluster.only(mon_a).remotes.keys()
+ testdir = teuthology.get_testdir(ctx)
+ cmd = './ceph-deploy install ' + ceph_branch
+ for role in roles:
+ # check if this role is mapped (mon or mds)
+ if mapped_role.get(role):
+ role = mapped_role.get(role)
+ remotes_and_roles = ctx.cluster.only(role).remotes
+ for remote, roles in remotes_and_roles.items():
+ nodename = remote.shortname
+ cmd = cmd + ' ' + nodename
+ log.info("Upgrading ceph on %s", nodename)
+ ceph_admin.run(
+ args=[
+ 'cd',
+ '{tdir}/ceph-deploy'.format(tdir=testdir),
+ run.Raw('&&'),
+ run.Raw(cmd),
+ ],
+ )
+ # restart all ceph services, ideally upgrade should but it does not
+ remote.run(
+ args=[
+ 'sudo', 'systemctl', 'restart', 'ceph.target'
+ ]
+ )
+ ceph_admin.run(args=['sudo', 'ceph', '-s'])
+
+ # workaround for http://tracker.ceph.com/issues/20950
+ # write the correct mgr key to disk
+ if config.get('setup-mgr-node', None):
+ mons = ctx.cluster.only(teuthology.is_type('mon'))
+ for remote, roles in mons.remotes.items():
+ remote.run(
+ args=[
+ run.Raw('sudo ceph auth get client.bootstrap-mgr'),
+ run.Raw('|'),
+ run.Raw('sudo tee'),
+ run.Raw('/var/lib/ceph/bootstrap-mgr/ceph.keyring')
+ ]
+ )
+
+ if config.get('setup-mgr-node', None):
+ mgr_nodes = get_nodes_using_role(ctx, 'mgr')
+ mgr_nodes = " ".join(mgr_nodes)
+ mgr_install = './ceph-deploy install --mgr ' + ceph_branch + " " + mgr_nodes
+ mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
+ # install mgr
+ ceph_admin.run(
+ args=[
+ 'cd',
+ '{tdir}/ceph-deploy'.format(tdir=testdir),
+ run.Raw('&&'),
+ run.Raw(mgr_install),
+ ],
+ )
+ # create mgr
+ ceph_admin.run(
+ args=[
+ 'cd',
+ '{tdir}/ceph-deploy'.format(tdir=testdir),
+ run.Raw('&&'),
+ run.Raw(mgr_create),
+ ],
+ )
+ ceph_admin.run(args=['sudo', 'ceph', '-s'])
+ if config.get('wait-for-healthy', None):
+ wait_until_healthy(ctx, ceph_admin, use_sudo=True)
+ yield
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Set up and tear down a Ceph cluster.
+
+ For example::
+
+ tasks:
+ - install:
+ extras: yes
+ - ssh_keys:
+ - ceph-deploy:
+ branch:
+ stable: bobtail
+ mon_initial_members: 1
+ ceph-deploy-branch: my-ceph-deploy-branch
+ only_mon: true
+ keep_running: true
+ # either choose bluestore or filestore, default is bluestore
+ bluestore: True
+ # or
+ filestore: True
+ # skip install of mgr for old release using below flag
+ skip-mgr: True ( default is False )
+ # to use ceph-volume instead of ceph-disk
+ # ceph-disk can only be used with old ceph-deploy release from pypi
+ use-ceph-volume: true
+
+ tasks:
+ - install:
+ extras: yes
+ - ssh_keys:
+ - ceph-deploy:
+ branch:
+ dev: master
+ conf:
+ mon:
+ debug mon = 20
+
+ tasks:
+ - install:
+ extras: yes
+ - ssh_keys:
+ - ceph-deploy:
+ branch:
+ testing:
+ dmcrypt: yes
+ separate_journal_disk: yes
+
+ """
+ if config is None:
+ config = {}
+
+ assert isinstance(config, dict), \
+ "task ceph-deploy only supports a dictionary for configuration"
+
+ overrides = ctx.config.get('overrides', {})
+ teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
+
+ if config.get('branch') is not None:
+ assert isinstance(
+ config['branch'], dict), 'branch must be a dictionary'
+
+ log.info('task ceph-deploy with config ' + str(config))
+
+ # we need to use 1.5.39-stable for testing jewel or master branch with
+ # ceph-disk
+ if config.get('use-ceph-volume', False) is False:
+ # check we are not testing specific branch
+ if config.get('ceph-deploy-branch', False) is False:
+ config['ceph-deploy-branch'] = '1.5.39-stable'
+
+ with contextutil.nested(
+ lambda: install_fn.ship_utilities(ctx=ctx, config=None),
+ lambda: download_ceph_deploy(ctx=ctx, config=config),
+ lambda: build_ceph_cluster(ctx=ctx, config=config),
+ ):
+ yield
diff --git a/qa/tasks/ceph_fuse.py b/qa/tasks/ceph_fuse.py
new file mode 100644
index 00000000..1439ccff
--- /dev/null
+++ b/qa/tasks/ceph_fuse.py
@@ -0,0 +1,160 @@
+"""
+Ceph FUSE client task
+"""
+
+import contextlib
+import logging
+
+from teuthology import misc as teuthology
+from tasks.cephfs.fuse_mount import FuseMount
+
+log = logging.getLogger(__name__)
+
+
+def get_client_configs(ctx, config):
+ """
+ Get a map of the configuration for each FUSE client in the configuration by
+ combining the configuration of the current task with any global overrides.
+
+ :param ctx: Context instance
+ :param config: configuration for this task
+ :return: dict of client name to config or to None
+ """
+ if config is None:
+ config = dict(('client.{id}'.format(id=id_), None)
+ for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client'))
+ elif isinstance(config, list):
+ config = dict((name, None) for name in config)
+
+ overrides = ctx.config.get('overrides', {})
+ teuthology.deep_merge(config, overrides.get('ceph-fuse', {}))
+
+ return config
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Mount/unmount a ``ceph-fuse`` client.
+
+ The config is optional and defaults to mounting on all clients. If
+ a config is given, it is expected to be a list of clients to do
+ this operation on. This lets you e.g. set up one client with
+ ``ceph-fuse`` and another with ``kclient``.
+
+ Example that mounts all clients::
+
+ tasks:
+ - ceph:
+ - ceph-fuse:
+ - interactive:
+
+ Example that uses both ``kclient` and ``ceph-fuse``::
+
+ tasks:
+ - ceph:
+ - ceph-fuse: [client.0]
+ - kclient: [client.1]
+ - interactive:
+
+ Example that enables valgrind:
+
+ tasks:
+ - ceph:
+ - ceph-fuse:
+ client.0:
+ valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes]
+ - interactive:
+
+ Example that stops an already-mounted client:
+
+ ::
+
+ tasks:
+ - ceph:
+ - ceph-fuse: [client.0]
+ - ... do something that requires the FS mounted ...
+ - ceph-fuse:
+ client.0:
+ mounted: false
+ - ... do something that requires the FS unmounted ...
+
+ Example that adds more generous wait time for mount (for virtual machines):
+
+ tasks:
+ - ceph:
+ - ceph-fuse:
+ client.0:
+ mount_wait: 60 # default is 0, do not wait before checking /sys/
+ mount_timeout: 120 # default is 30, give up if /sys/ is not populated
+ - interactive:
+
+ :param ctx: Context
+ :param config: Configuration
+ """
+ log.info('Running ceph_fuse task...')
+
+ testdir = teuthology.get_testdir(ctx)
+ log.info("config is {}".format(str(config)))
+ config = get_client_configs(ctx, config)
+ log.info("new config is {}".format(str(config)))
+
+ # List clients we will configure mounts for, default is all clients
+ clients = list(teuthology.get_clients(ctx=ctx, roles=filter(lambda x: 'client.' in x, config.keys())))
+
+ all_mounts = getattr(ctx, 'mounts', {})
+ mounted_by_me = {}
+ skipped = {}
+
+ # Construct any new FuseMount instances
+ for id_, remote in clients:
+ client_config = config.get("client.%s" % id_)
+ if client_config is None:
+ client_config = {}
+
+ auth_id = client_config.get("auth_id", id_)
+
+ skip = client_config.get("skip", False)
+ if skip:
+ skipped[id_] = skip
+ continue
+
+ if id_ not in all_mounts:
+ fuse_mount = FuseMount(ctx, client_config, testdir, auth_id, remote)
+ all_mounts[id_] = fuse_mount
+ else:
+ # Catch bad configs where someone has e.g. tried to use ceph-fuse and kcephfs for the same client
+ assert isinstance(all_mounts[id_], FuseMount)
+
+ if not config.get("disabled", False) and client_config.get('mounted', True):
+ mounted_by_me[id_] = {"config": client_config, "mount": all_mounts[id_]}
+
+ ctx.mounts = all_mounts
+
+ # Mount any clients we have been asked to (default to mount all)
+ log.info('Mounting ceph-fuse clients...')
+ for info in mounted_by_me.values():
+ config = info["config"]
+ mount_path = config.get("mount_path")
+ mountpoint = config.get("mountpoint")
+ info["mount"].mount(mountpoint=mountpoint, mount_path=mount_path)
+
+ for info in mounted_by_me.values():
+ info["mount"].wait_until_mounted()
+
+ # Umount any pre-existing clients that we have not been asked to mount
+ for client_id in set(all_mounts.keys()) - set(mounted_by_me.keys()) - set(skipped.keys()):
+ mount = all_mounts[client_id]
+ if mount.is_mounted():
+ mount.umount_wait()
+
+ try:
+ yield all_mounts
+ finally:
+ log.info('Unmounting ceph-fuse clients...')
+
+ for info in mounted_by_me.values():
+ # Conditional because an inner context might have umounted it
+ mount = info["mount"]
+ if mount.is_mounted():
+ mount.umount_wait()
diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py
new file mode 100644
index 00000000..3e1a2ec5
--- /dev/null
+++ b/qa/tasks/ceph_manager.py
@@ -0,0 +1,2642 @@
+"""
+ceph manager -- Thrasher and CephManager objects
+"""
+from functools import wraps
+import contextlib
+import random
+import signal
+import time
+import gevent
+import base64
+import json
+import logging
+import threading
+import traceback
+import os
+
+from io import BytesIO, StringIO
+from teuthology import misc as teuthology
+from tasks.scrub import Scrubber
+from tasks.util.rados import cmd_erasure_code_profile
+from tasks.util import get_remote
+from teuthology.contextutil import safe_while
+from teuthology.orchestra.remote import Remote
+from teuthology.orchestra import run
+from teuthology.exceptions import CommandFailedError
+
+try:
+ from subprocess import DEVNULL # py3k
+except ImportError:
+ DEVNULL = open(os.devnull, 'r+')
+
+DEFAULT_CONF_PATH = '/etc/ceph/ceph.conf'
+
+log = logging.getLogger(__name__)
+
+
+def write_conf(ctx, conf_path=DEFAULT_CONF_PATH, cluster='ceph'):
+ conf_fp = BytesIO()
+ ctx.ceph[cluster].conf.write(conf_fp)
+ conf_fp.seek(0)
+ writes = ctx.cluster.run(
+ args=[
+ 'sudo', 'mkdir', '-p', '/etc/ceph', run.Raw('&&'),
+ 'sudo', 'chmod', '0755', '/etc/ceph', run.Raw('&&'),
+ 'sudo', 'tee', conf_path, run.Raw('&&'),
+ 'sudo', 'chmod', '0644', conf_path,
+ run.Raw('>'), '/dev/null',
+
+ ],
+ stdin=run.PIPE,
+ wait=False)
+ teuthology.feed_many_stdins_and_close(conf_fp, writes)
+ run.wait(writes)
+
+
+def mount_osd_data(ctx, remote, cluster, osd):
+ """
+ Mount a remote OSD
+
+ :param ctx: Context
+ :param remote: Remote site
+ :param cluster: name of ceph cluster
+ :param osd: Osd name
+ """
+ log.debug('Mounting data for osd.{o} on {r}'.format(o=osd, r=remote))
+ role = "{0}.osd.{1}".format(cluster, osd)
+ alt_role = role if cluster != 'ceph' else "osd.{0}".format(osd)
+ if remote in ctx.disk_config.remote_to_roles_to_dev:
+ if alt_role in ctx.disk_config.remote_to_roles_to_dev[remote]:
+ role = alt_role
+ if role not in ctx.disk_config.remote_to_roles_to_dev[remote]:
+ return
+ dev = ctx.disk_config.remote_to_roles_to_dev[remote][role]
+ mount_options = ctx.disk_config.\
+ remote_to_roles_to_dev_mount_options[remote][role]
+ fstype = ctx.disk_config.remote_to_roles_to_dev_fstype[remote][role]
+ mnt = os.path.join('/var/lib/ceph/osd', '{0}-{1}'.format(cluster, osd))
+
+ log.info('Mounting osd.{o}: dev: {n}, cluster: {c}'
+ 'mountpoint: {p}, type: {t}, options: {v}'.format(
+ o=osd, n=remote.name, p=mnt, t=fstype, v=mount_options,
+ c=cluster))
+
+ remote.run(
+ args=[
+ 'sudo',
+ 'mount',
+ '-t', fstype,
+ '-o', ','.join(mount_options),
+ dev,
+ mnt,
+ ]
+ )
+
+
+class Thrasher:
+ """
+ Object used to thrash Ceph
+ """
+ def __init__(self, manager, config, logger=None):
+ self.ceph_manager = manager
+ self.cluster = manager.cluster
+ self.ceph_manager.wait_for_clean()
+ osd_status = self.ceph_manager.get_osd_status()
+ self.in_osds = osd_status['in']
+ self.live_osds = osd_status['live']
+ self.out_osds = osd_status['out']
+ self.dead_osds = osd_status['dead']
+ self.stopping = False
+ self.logger = logger
+ self.config = config
+ self.revive_timeout = self.config.get("revive_timeout", 360)
+ self.pools_to_fix_pgp_num = set()
+ if self.config.get('powercycle'):
+ self.revive_timeout += 120
+ self.clean_wait = self.config.get('clean_wait', 0)
+ self.minin = self.config.get("min_in", 4)
+ self.chance_move_pg = self.config.get('chance_move_pg', 1.0)
+ self.sighup_delay = self.config.get('sighup_delay')
+ self.optrack_toggle_delay = self.config.get('optrack_toggle_delay')
+ self.dump_ops_enable = self.config.get('dump_ops_enable')
+ self.noscrub_toggle_delay = self.config.get('noscrub_toggle_delay')
+ self.chance_thrash_cluster_full = self.config.get('chance_thrash_cluster_full', .05)
+ self.chance_thrash_pg_upmap = self.config.get('chance_thrash_pg_upmap', 1.0)
+ self.chance_thrash_pg_upmap_items = self.config.get('chance_thrash_pg_upmap', 1.0)
+ self.random_eio = self.config.get('random_eio')
+ self.chance_force_recovery = self.config.get('chance_force_recovery', 0.3)
+
+ num_osds = self.in_osds + self.out_osds
+ self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * len(num_osds)
+ self.min_pgs = self.config.get("min_pgs_per_pool_osd", 1) * len(num_osds)
+ if self.logger is not None:
+ self.log = lambda x: self.logger.info(x)
+ else:
+ def tmp(x):
+ """
+ Implement log behavior
+ """
+ print(x)
+ self.log = tmp
+ if self.config is None:
+ self.config = dict()
+ # prevent monitor from auto-marking things out while thrasher runs
+ # try both old and new tell syntax, in case we are testing old code
+ self.saved_options = []
+ # assuming that the default settings do not vary from one daemon to
+ # another
+ first_mon = teuthology.get_first_mon(manager.ctx, self.config).split('.')
+ opts = [('mon', 'mon_osd_down_out_interval', 0)]
+ for service, opt, new_value in opts:
+ old_value = manager.get_config(first_mon[0],
+ first_mon[1],
+ opt)
+ self.saved_options.append((service, opt, old_value))
+ manager.inject_args(service, '*', opt, new_value)
+ # initialize ceph_objectstore_tool property - must be done before
+ # do_thrash is spawned - http://tracker.ceph.com/issues/18799
+ if (self.config.get('powercycle') or
+ not self.cmd_exists_on_osds("ceph-objectstore-tool") or
+ self.config.get('disable_objectstore_tool_tests', False)):
+ self.ceph_objectstore_tool = False
+ if self.config.get('powercycle'):
+ self.log("Unable to test ceph-objectstore-tool, "
+ "powercycle testing")
+ else:
+ self.log("Unable to test ceph-objectstore-tool, "
+ "not available on all OSD nodes")
+ else:
+ self.ceph_objectstore_tool = \
+ self.config.get('ceph_objectstore_tool', True)
+ # spawn do_thrash
+ self.thread = gevent.spawn(self.do_thrash)
+ if self.sighup_delay:
+ self.sighup_thread = gevent.spawn(self.do_sighup)
+ if self.optrack_toggle_delay:
+ self.optrack_toggle_thread = gevent.spawn(self.do_optrack_toggle)
+ if self.dump_ops_enable == "true":
+ self.dump_ops_thread = gevent.spawn(self.do_dump_ops)
+ if self.noscrub_toggle_delay:
+ self.noscrub_toggle_thread = gevent.spawn(self.do_noscrub_toggle)
+
+ def cmd_exists_on_osds(self, cmd):
+ allremotes = self.ceph_manager.ctx.cluster.only(\
+ teuthology.is_type('osd', self.cluster)).remotes.keys()
+ allremotes = list(set(allremotes))
+ for remote in allremotes:
+ proc = remote.run(args=['type', cmd], wait=True,
+ check_status=False, stdout=BytesIO(),
+ stderr=BytesIO())
+ if proc.exitstatus != 0:
+ return False;
+ return True;
+
+ def run_ceph_objectstore_tool(self, remote, osd, cmd):
+ return remote.run(
+ args=['sudo', 'adjust-ulimits', 'ceph-objectstore-tool'] + cmd,
+ wait=True, check_status=False,
+ stdout=StringIO(),
+ stderr=StringIO())
+
+ def kill_osd(self, osd=None, mark_down=False, mark_out=False):
+ """
+ :param osd: Osd to be killed.
+ :mark_down: Mark down if true.
+ :mark_out: Mark out if true.
+ """
+ if osd is None:
+ osd = random.choice(self.live_osds)
+ self.log("Killing osd %s, live_osds are %s" % (str(osd),
+ str(self.live_osds)))
+ self.live_osds.remove(osd)
+ self.dead_osds.append(osd)
+ self.ceph_manager.kill_osd(osd)
+ if mark_down:
+ self.ceph_manager.mark_down_osd(osd)
+ if mark_out and osd in self.in_osds:
+ self.out_osd(osd)
+ if self.ceph_objectstore_tool:
+ self.log("Testing ceph-objectstore-tool on down osd")
+ remote = self.ceph_manager.find_remote('osd', osd)
+ FSPATH = self.ceph_manager.get_filepath()
+ JPATH = os.path.join(FSPATH, "journal")
+ exp_osd = imp_osd = osd
+ exp_remote = imp_remote = remote
+ # If an older osd is available we'll move a pg from there
+ if (len(self.dead_osds) > 1 and
+ random.random() < self.chance_move_pg):
+ exp_osd = random.choice(self.dead_osds[:-1])
+ exp_remote = self.ceph_manager.find_remote('osd', exp_osd)
+ if ('keyvaluestore_backend' in
+ self.ceph_manager.ctx.ceph[self.cluster].conf['osd']):
+ prefix = ("sudo adjust-ulimits ceph-objectstore-tool "
+ "--data-path {fpath} --journal-path {jpath} "
+ "--type keyvaluestore "
+ "--log-file="
+ "/var/log/ceph/objectstore_tool.\\$pid.log ".
+ format(fpath=FSPATH, jpath=JPATH))
+ else:
+ prefix = ("sudo adjust-ulimits ceph-objectstore-tool "
+ "--data-path {fpath} --journal-path {jpath} "
+ "--log-file="
+ "/var/log/ceph/objectstore_tool.\\$pid.log ".
+ format(fpath=FSPATH, jpath=JPATH))
+ cmd = (prefix + "--op list-pgs").format(id=exp_osd)
+
+ # ceph-objectstore-tool might be temporarily absent during an
+ # upgrade - see http://tracker.ceph.com/issues/18014
+ with safe_while(sleep=15, tries=40, action="type ceph-objectstore-tool") as proceed:
+ while proceed():
+ proc = exp_remote.run(args=['type', 'ceph-objectstore-tool'],
+ wait=True, check_status=False, stdout=BytesIO(),
+ stderr=BytesIO())
+ if proc.exitstatus == 0:
+ break
+ log.debug("ceph-objectstore-tool binary not present, trying again")
+
+ # ceph-objectstore-tool might bogusly fail with "OSD has the store locked"
+ # see http://tracker.ceph.com/issues/19556
+ with safe_while(sleep=15, tries=40, action="ceph-objectstore-tool --op list-pgs") as proceed:
+ while proceed():
+ proc = exp_remote.run(args=cmd, wait=True,
+ check_status=False,
+ stdout=StringIO(), stderr=StringIO())
+ if proc.exitstatus == 0:
+ break
+ elif (proc.exitstatus == 1 and
+ proc.stderr.getvalue() == "OSD has the store locked"):
+ continue
+ else:
+ raise Exception("ceph-objectstore-tool: "
+ "exp list-pgs failure with status {ret}".
+ format(ret=proc.exitstatus))
+
+ pgs = proc.stdout.getvalue().split('\n')[:-1]
+ if len(pgs) == 0:
+ self.log("No PGs found for osd.{osd}".format(osd=exp_osd))
+ return
+ pg = random.choice(pgs)
+ exp_path = teuthology.get_testdir(self.ceph_manager.ctx)
+ exp_path = os.path.join(exp_path, '{0}.data'.format(self.cluster))
+ exp_path = os.path.join(exp_path,
+ "exp.{pg}.{id}".format(
+ pg=pg,
+ id=exp_osd))
+ # export
+ # Can't use new export-remove op since this is part of upgrade testing
+ cmd = prefix + "--op export --pgid {pg} --file {file}"
+ cmd = cmd.format(id=exp_osd, pg=pg, file=exp_path)
+ proc = exp_remote.run(args=cmd)
+ if proc.exitstatus:
+ raise Exception("ceph-objectstore-tool: "
+ "export failure with status {ret}".
+ format(ret=proc.exitstatus))
+ # remove
+ cmd = prefix + "--force --op remove --pgid {pg}"
+ cmd = cmd.format(id=exp_osd, pg=pg)
+ proc = exp_remote.run(args=cmd)
+ if proc.exitstatus:
+ raise Exception("ceph-objectstore-tool: "
+ "remove failure with status {ret}".
+ format(ret=proc.exitstatus))
+ # If there are at least 2 dead osds we might move the pg
+ if exp_osd != imp_osd:
+ # If pg isn't already on this osd, then we will move it there
+ cmd = (prefix + "--op list-pgs").format(id=imp_osd)
+ proc = imp_remote.run(args=cmd, wait=True,
+ check_status=False, stdout=StringIO())
+ if proc.exitstatus:
+ raise Exception("ceph-objectstore-tool: "
+ "imp list-pgs failure with status {ret}".
+ format(ret=proc.exitstatus))
+ pgs = proc.stdout.getvalue().split('\n')[:-1]
+ if pg not in pgs:
+ self.log("Moving pg {pg} from osd.{fosd} to osd.{tosd}".
+ format(pg=pg, fosd=exp_osd, tosd=imp_osd))
+ if imp_remote != exp_remote:
+ # Copy export file to the other machine
+ self.log("Transfer export file from {srem} to {trem}".
+ format(srem=exp_remote, trem=imp_remote))
+ tmpexport = Remote.get_file(exp_remote, exp_path)
+ Remote.put_file(imp_remote, tmpexport, exp_path)
+ os.remove(tmpexport)
+ else:
+ # Can't move the pg after all
+ imp_osd = exp_osd
+ imp_remote = exp_remote
+ # import
+ cmd = (prefix + "--op import --file {file}")
+ cmd = cmd.format(id=imp_osd, file=exp_path)
+ proc = imp_remote.run(args=cmd, wait=True, check_status=False,
+ stderr=BytesIO())
+ if proc.exitstatus == 1:
+ bogosity = "The OSD you are using is older than the exported PG"
+ if bogosity in proc.stderr.getvalue():
+ self.log("OSD older than exported PG"
+ "...ignored")
+ elif proc.exitstatus == 10:
+ self.log("Pool went away before processing an import"
+ "...ignored")
+ elif proc.exitstatus == 11:
+ self.log("Attempt to import an incompatible export"
+ "...ignored")
+ elif proc.exitstatus == 12:
+ # this should be safe to ignore because we only ever move 1
+ # copy of the pg at a time, and merge is only initiated when
+ # all replicas are peered and happy. /me crosses fingers
+ self.log("PG merged on target"
+ "...ignored")
+ elif proc.exitstatus:
+ raise Exception("ceph-objectstore-tool: "
+ "import failure with status {ret}".
+ format(ret=proc.exitstatus))
+ cmd = "rm -f {file}".format(file=exp_path)
+ exp_remote.run(args=cmd)
+ if imp_remote != exp_remote:
+ imp_remote.run(args=cmd)
+
+ # apply low split settings to each pool
+ for pool in self.ceph_manager.list_pools():
+ no_sudo_prefix = prefix[5:]
+ cmd = ("CEPH_ARGS='--filestore-merge-threshold 1 "
+ "--filestore-split-multiple 1' sudo -E "
+ + no_sudo_prefix + "--op apply-layout-settings --pool " + pool).format(id=osd)
+ proc = remote.run(args=cmd, wait=True, check_status=False,
+ stderr=BytesIO())
+ output = proc.stderr.getvalue()
+ if b'Couldn\'t find pool' in output:
+ continue
+ if proc.exitstatus:
+ raise Exception("ceph-objectstore-tool apply-layout-settings"
+ " failed with {status}".format(status=proc.exitstatus))
+
+ def blackhole_kill_osd(self, osd=None):
+ """
+ If all else fails, kill the osd.
+ :param osd: Osd to be killed.
+ """
+ if osd is None:
+ osd = random.choice(self.live_osds)
+ self.log("Blackholing and then killing osd %s, live_osds are %s" %
+ (str(osd), str(self.live_osds)))
+ self.live_osds.remove(osd)
+ self.dead_osds.append(osd)
+ self.ceph_manager.blackhole_kill_osd(osd)
+
+ def revive_osd(self, osd=None, skip_admin_check=False):
+ """
+ Revive the osd.
+ :param osd: Osd to be revived.
+ """
+ if osd is None:
+ osd = random.choice(self.dead_osds)
+ self.log("Reviving osd %s" % (str(osd),))
+ self.ceph_manager.revive_osd(
+ osd,
+ self.revive_timeout,
+ skip_admin_check=skip_admin_check)
+ self.dead_osds.remove(osd)
+ self.live_osds.append(osd)
+ if self.random_eio > 0 and osd == self.rerrosd:
+ self.ceph_manager.set_config(self.rerrosd,
+ filestore_debug_random_read_err = self.random_eio)
+ self.ceph_manager.set_config(self.rerrosd,
+ bluestore_debug_random_read_err = self.random_eio)
+
+
+ def out_osd(self, osd=None):
+ """
+ Mark the osd out
+ :param osd: Osd to be marked.
+ """
+ if osd is None:
+ osd = random.choice(self.in_osds)
+ self.log("Removing osd %s, in_osds are: %s" %
+ (str(osd), str(self.in_osds)))
+ self.ceph_manager.mark_out_osd(osd)
+ self.in_osds.remove(osd)
+ self.out_osds.append(osd)
+
+ def in_osd(self, osd=None):
+ """
+ Mark the osd out
+ :param osd: Osd to be marked.
+ """
+ if osd is None:
+ osd = random.choice(self.out_osds)
+ if osd in self.dead_osds:
+ return self.revive_osd(osd)
+ self.log("Adding osd %s" % (str(osd),))
+ self.out_osds.remove(osd)
+ self.in_osds.append(osd)
+ self.ceph_manager.mark_in_osd(osd)
+ self.log("Added osd %s" % (str(osd),))
+
+ def reweight_osd_or_by_util(self, osd=None):
+ """
+ Reweight an osd that is in
+ :param osd: Osd to be marked.
+ """
+ if osd is not None or random.choice([True, False]):
+ if osd is None:
+ osd = random.choice(self.in_osds)
+ val = random.uniform(.1, 1.0)
+ self.log("Reweighting osd %s to %s" % (str(osd), str(val)))
+ self.ceph_manager.raw_cluster_cmd('osd', 'reweight',
+ str(osd), str(val))
+ else:
+ # do it several times, the option space is large
+ for i in range(5):
+ options = {
+ 'max_change': random.choice(['0.05', '1.0', '3.0']),
+ 'overage': random.choice(['110', '1000']),
+ 'type': random.choice([
+ 'reweight-by-utilization',
+ 'test-reweight-by-utilization']),
+ }
+ self.log("Reweighting by: %s"%(str(options),))
+ self.ceph_manager.raw_cluster_cmd(
+ 'osd',
+ options['type'],
+ options['overage'],
+ options['max_change'])
+
+ def primary_affinity(self, osd=None):
+ if osd is None:
+ osd = random.choice(self.in_osds)
+ if random.random() >= .5:
+ pa = random.random()
+ elif random.random() >= .5:
+ pa = 1
+ else:
+ pa = 0
+ self.log('Setting osd %s primary_affinity to %f' % (str(osd), pa))
+ self.ceph_manager.raw_cluster_cmd('osd', 'primary-affinity',
+ str(osd), str(pa))
+
+ def thrash_cluster_full(self):
+ """
+ Set and unset cluster full condition
+ """
+ self.log('Setting full ratio to .001')
+ self.ceph_manager.raw_cluster_cmd('osd', 'set-full-ratio', '.001')
+ time.sleep(1)
+ self.log('Setting full ratio back to .95')
+ self.ceph_manager.raw_cluster_cmd('osd', 'set-full-ratio', '.95')
+
+ def thrash_pg_upmap(self):
+ """
+ Install or remove random pg_upmap entries in OSDMap
+ """
+ from random import shuffle
+ out = self.ceph_manager.raw_cluster_cmd('osd', 'dump', '-f', 'json-pretty')
+ j = json.loads(out)
+ self.log('j is %s' % j)
+ try:
+ if random.random() >= .3:
+ pgs = self.ceph_manager.get_pg_stats()
+ pg = random.choice(pgs)
+ pgid = str(pg['pgid'])
+ poolid = int(pgid.split('.')[0])
+ sizes = [x['size'] for x in j['pools'] if x['pool'] == poolid]
+ if len(sizes) == 0:
+ return
+ n = sizes[0]
+ osds = self.in_osds + self.out_osds
+ shuffle(osds)
+ osds = osds[0:n]
+ self.log('Setting %s to %s' % (pgid, osds))
+ cmd = ['osd', 'pg-upmap', pgid] + [str(x) for x in osds]
+ self.log('cmd %s' % cmd)
+ self.ceph_manager.raw_cluster_cmd(*cmd)
+ else:
+ m = j['pg_upmap']
+ if len(m) > 0:
+ shuffle(m)
+ pg = m[0]['pgid']
+ self.log('Clearing pg_upmap on %s' % pg)
+ self.ceph_manager.raw_cluster_cmd(
+ 'osd',
+ 'rm-pg-upmap',
+ pg)
+ else:
+ self.log('No pg_upmap entries; doing nothing')
+ except CommandFailedError:
+ self.log('Failed to rm-pg-upmap, ignoring')
+
+ def thrash_pg_upmap_items(self):
+ """
+ Install or remove random pg_upmap_items entries in OSDMap
+ """
+ from random import shuffle
+ out = self.ceph_manager.raw_cluster_cmd('osd', 'dump', '-f', 'json-pretty')
+ j = json.loads(out)
+ self.log('j is %s' % j)
+ try:
+ if random.random() >= .3:
+ pgs = self.ceph_manager.get_pg_stats()
+ pg = random.choice(pgs)
+ pgid = str(pg['pgid'])
+ poolid = int(pgid.split('.')[0])
+ sizes = [x['size'] for x in j['pools'] if x['pool'] == poolid]
+ if len(sizes) == 0:
+ return
+ n = sizes[0]
+ osds = self.in_osds + self.out_osds
+ shuffle(osds)
+ osds = osds[0:n*2]
+ self.log('Setting %s to %s' % (pgid, osds))
+ cmd = ['osd', 'pg-upmap-items', pgid] + [str(x) for x in osds]
+ self.log('cmd %s' % cmd)
+ self.ceph_manager.raw_cluster_cmd(*cmd)
+ else:
+ m = j['pg_upmap_items']
+ if len(m) > 0:
+ shuffle(m)
+ pg = m[0]['pgid']
+ self.log('Clearing pg_upmap on %s' % pg)
+ self.ceph_manager.raw_cluster_cmd(
+ 'osd',
+ 'rm-pg-upmap-items',
+ pg)
+ else:
+ self.log('No pg_upmap entries; doing nothing')
+ except CommandFailedError:
+ self.log('Failed to rm-pg-upmap-items, ignoring')
+
+ def force_recovery(self):
+ """
+ Force recovery on some of PGs
+ """
+ backfill = random.random() >= 0.5
+ j = self.ceph_manager.get_pgids_to_force(backfill)
+ if j:
+ try:
+ if backfill:
+ self.ceph_manager.raw_cluster_cmd('pg', 'force-backfill', *j)
+ else:
+ self.ceph_manager.raw_cluster_cmd('pg', 'force-recovery', *j)
+ except CommandFailedError:
+ self.log('Failed to force backfill|recovery, ignoring')
+
+
+ def cancel_force_recovery(self):
+ """
+ Force recovery on some of PGs
+ """
+ backfill = random.random() >= 0.5
+ j = self.ceph_manager.get_pgids_to_cancel_force(backfill)
+ if j:
+ try:
+ if backfill:
+ self.ceph_manager.raw_cluster_cmd('pg', 'cancel-force-backfill', *j)
+ else:
+ self.ceph_manager.raw_cluster_cmd('pg', 'cancel-force-recovery', *j)
+ except CommandFailedError:
+ self.log('Failed to force backfill|recovery, ignoring')
+
+ def force_cancel_recovery(self):
+ """
+ Force or cancel forcing recovery
+ """
+ if random.random() >= 0.4:
+ self.force_recovery()
+ else:
+ self.cancel_force_recovery()
+
+ def all_up(self):
+ """
+ Make sure all osds are up and not out.
+ """
+ while len(self.dead_osds) > 0:
+ self.log("reviving osd")
+ self.revive_osd()
+ while len(self.out_osds) > 0:
+ self.log("inning osd")
+ self.in_osd()
+
+ def all_up_in(self):
+ """
+ Make sure all osds are up and fully in.
+ """
+ self.all_up();
+ for osd in self.live_osds:
+ self.ceph_manager.raw_cluster_cmd('osd', 'reweight',
+ str(osd), str(1))
+ self.ceph_manager.raw_cluster_cmd('osd', 'primary-affinity',
+ str(osd), str(1))
+
+ def do_join(self):
+ """
+ Break out of this Ceph loop
+ """
+ self.stopping = True
+ self.thread.get()
+ if self.sighup_delay:
+ self.log("joining the do_sighup greenlet")
+ self.sighup_thread.get()
+ if self.optrack_toggle_delay:
+ self.log("joining the do_optrack_toggle greenlet")
+ self.optrack_toggle_thread.join()
+ if self.dump_ops_enable == "true":
+ self.log("joining the do_dump_ops greenlet")
+ self.dump_ops_thread.join()
+ if self.noscrub_toggle_delay:
+ self.log("joining the do_noscrub_toggle greenlet")
+ self.noscrub_toggle_thread.join()
+
+ def grow_pool(self):
+ """
+ Increase the size of the pool
+ """
+ pool = self.ceph_manager.get_pool()
+ orig_pg_num = self.ceph_manager.get_pool_pg_num(pool)
+ self.log("Growing pool %s" % (pool,))
+ if self.ceph_manager.expand_pool(pool,
+ self.config.get('pool_grow_by', 10),
+ self.max_pgs):
+ self.pools_to_fix_pgp_num.add(pool)
+
+ def shrink_pool(self):
+ """
+ Decrease the size of the pool
+ """
+ pool = self.ceph_manager.get_pool()
+ _ = self.ceph_manager.get_pool_pg_num(pool)
+ self.log("Shrinking pool %s" % (pool,))
+ if self.ceph_manager.contract_pool(
+ pool,
+ self.config.get('pool_shrink_by', 10),
+ self.min_pgs):
+ self.pools_to_fix_pgp_num.add(pool)
+
+ def fix_pgp_num(self, pool=None):
+ """
+ Fix number of pgs in pool.
+ """
+ if pool is None:
+ pool = self.ceph_manager.get_pool()
+ force = False
+ else:
+ force = True
+ self.log("fixing pg num pool %s" % (pool,))
+ if self.ceph_manager.set_pool_pgpnum(pool, force):
+ self.pools_to_fix_pgp_num.discard(pool)
+
+ def test_pool_min_size(self):
+ """
+ Kill and revive all osds except one.
+ """
+ self.log("test_pool_min_size")
+ self.all_up()
+ self.ceph_manager.wait_for_recovery(
+ timeout=self.config.get('timeout')
+ )
+ the_one = random.choice(self.in_osds)
+ self.log("Killing everyone but %s", the_one)
+ to_kill = filter(lambda x: x != the_one, self.in_osds)
+ [self.kill_osd(i) for i in to_kill]
+ [self.out_osd(i) for i in to_kill]
+ time.sleep(self.config.get("test_pool_min_size_time", 10))
+ self.log("Killing %s" % (the_one,))
+ self.kill_osd(the_one)
+ self.out_osd(the_one)
+ self.log("Reviving everyone but %s" % (the_one,))
+ [self.revive_osd(i) for i in to_kill]
+ [self.in_osd(i) for i in to_kill]
+ self.log("Revived everyone but %s" % (the_one,))
+ self.log("Waiting for clean")
+ self.ceph_manager.wait_for_recovery(
+ timeout=self.config.get('timeout')
+ )
+
+ def inject_pause(self, conf_key, duration, check_after, should_be_down):
+ """
+ Pause injection testing. Check for osd being down when finished.
+ """
+ the_one = random.choice(self.live_osds)
+ self.log("inject_pause on {osd}".format(osd=the_one))
+ self.log(
+ "Testing {key} pause injection for duration {duration}".format(
+ key=conf_key,
+ duration=duration
+ ))
+ self.log(
+ "Checking after {after}, should_be_down={shouldbedown}".format(
+ after=check_after,
+ shouldbedown=should_be_down
+ ))
+ self.ceph_manager.set_config(the_one, **{conf_key: duration})
+ if not should_be_down:
+ return
+ time.sleep(check_after)
+ status = self.ceph_manager.get_osd_status()
+ assert the_one in status['down']
+ time.sleep(duration - check_after + 20)
+ status = self.ceph_manager.get_osd_status()
+ assert not the_one in status['down']
+
+ def test_backfill_full(self):
+ """
+ Test backfills stopping when the replica fills up.
+
+ First, use injectfull admin command to simulate a now full
+ osd by setting it to 0 on all of the OSDs.
+
+ Second, on a random subset, set
+ osd_debug_skip_full_check_in_backfill_reservation to force
+ the more complicated check in do_scan to be exercised.
+
+ Then, verify that all backfillings stop.
+ """
+ self.log("injecting backfill full")
+ for i in self.live_osds:
+ self.ceph_manager.set_config(
+ i,
+ osd_debug_skip_full_check_in_backfill_reservation=
+ random.choice(['false', 'true']))
+ self.ceph_manager.osd_admin_socket(i, command=['injectfull', 'backfillfull'],
+ check_status=True, timeout=30, stdout=DEVNULL)
+ for i in range(30):
+ status = self.ceph_manager.compile_pg_status()
+ if 'backfilling' not in status.keys():
+ break
+ self.log(
+ "waiting for {still_going} backfillings".format(
+ still_going=status.get('backfilling')))
+ time.sleep(1)
+ assert('backfilling' not in self.ceph_manager.compile_pg_status().keys())
+ for i in self.live_osds:
+ self.ceph_manager.set_config(
+ i,
+ osd_debug_skip_full_check_in_backfill_reservation='false')
+ self.ceph_manager.osd_admin_socket(i, command=['injectfull', 'none'],
+ check_status=True, timeout=30, stdout=DEVNULL)
+
+ def test_map_discontinuity(self):
+ """
+ 1) Allows the osds to recover
+ 2) kills an osd
+ 3) allows the remaining osds to recover
+ 4) waits for some time
+ 5) revives the osd
+ This sequence should cause the revived osd to have to handle
+ a map gap since the mons would have trimmed
+ """
+ while len(self.in_osds) < (self.minin + 1):
+ self.in_osd()
+ self.log("Waiting for recovery")
+ self.ceph_manager.wait_for_all_osds_up(
+ timeout=self.config.get('timeout')
+ )
+ # now we wait 20s for the pg status to change, if it takes longer,
+ # the test *should* fail!
+ time.sleep(20)
+ self.ceph_manager.wait_for_clean(
+ timeout=self.config.get('timeout')
+ )
+
+ # now we wait 20s for the backfill replicas to hear about the clean
+ time.sleep(20)
+ self.log("Recovered, killing an osd")
+ self.kill_osd(mark_down=True, mark_out=True)
+ self.log("Waiting for clean again")
+ self.ceph_manager.wait_for_clean(
+ timeout=self.config.get('timeout')
+ )
+ self.log("Waiting for trim")
+ time.sleep(int(self.config.get("map_discontinuity_sleep_time", 40)))
+ self.revive_osd()
+
+ def choose_action(self):
+ """
+ Random action selector.
+ """
+ chance_down = self.config.get('chance_down', 0.4)
+ chance_test_backfill_full = \
+ self.config.get('chance_test_backfill_full', 0)
+ if isinstance(chance_down, int):
+ chance_down = float(chance_down) / 100
+ minin = self.minin
+ minout = self.config.get("min_out", 0)
+ minlive = self.config.get("min_live", 2)
+ mindead = self.config.get("min_dead", 0)
+
+ self.log('choose_action: min_in %d min_out '
+ '%d min_live %d min_dead %d' %
+ (minin, minout, minlive, mindead))
+ actions = []
+ if len(self.in_osds) > minin:
+ actions.append((self.out_osd, 1.0,))
+ if len(self.live_osds) > minlive and chance_down > 0:
+ actions.append((self.kill_osd, chance_down,))
+ if len(self.out_osds) > minout:
+ actions.append((self.in_osd, 1.7,))
+ if len(self.dead_osds) > mindead:
+ actions.append((self.revive_osd, 1.0,))
+ if self.config.get('thrash_primary_affinity', True):
+ actions.append((self.primary_affinity, 1.0,))
+ actions.append((self.reweight_osd_or_by_util,
+ self.config.get('reweight_osd', .5),))
+ actions.append((self.grow_pool,
+ self.config.get('chance_pgnum_grow', 0),))
+ actions.append((self.shrink_pool,
+ self.config.get('chance_pgnum_shrink', 0),))
+ actions.append((self.fix_pgp_num,
+ self.config.get('chance_pgpnum_fix', 0),))
+ actions.append((self.test_pool_min_size,
+ self.config.get('chance_test_min_size', 0),))
+ actions.append((self.test_backfill_full,
+ chance_test_backfill_full,))
+ if self.chance_thrash_cluster_full > 0:
+ actions.append((self.thrash_cluster_full, self.chance_thrash_cluster_full,))
+ if self.chance_thrash_pg_upmap > 0:
+ actions.append((self.thrash_pg_upmap, self.chance_thrash_pg_upmap,))
+ if self.chance_thrash_pg_upmap_items > 0:
+ actions.append((self.thrash_pg_upmap_items, self.chance_thrash_pg_upmap_items,))
+ if self.chance_force_recovery > 0:
+ actions.append((self.force_cancel_recovery, self.chance_force_recovery))
+
+ for key in ['heartbeat_inject_failure', 'filestore_inject_stall']:
+ for scenario in [
+ (lambda:
+ self.inject_pause(key,
+ self.config.get('pause_short', 3),
+ 0,
+ False),
+ self.config.get('chance_inject_pause_short', 1),),
+ (lambda:
+ self.inject_pause(key,
+ self.config.get('pause_long', 80),
+ self.config.get('pause_check_after', 70),
+ True),
+ self.config.get('chance_inject_pause_long', 0),)]:
+ actions.append(scenario)
+
+ total = sum([y for (x, y) in actions])
+ val = random.uniform(0, total)
+ for (action, prob) in actions:
+ if val < prob:
+ return action
+ val -= prob
+ return None
+
+ def log_exc(func):
+ @wraps(func)
+ def wrapper(self):
+ try:
+ return func(self)
+ except:
+ self.log(traceback.format_exc())
+ raise
+ return wrapper
+
+ @log_exc
+ def do_sighup(self):
+ """
+ Loops and sends signal.SIGHUP to a random live osd.
+
+ Loop delay is controlled by the config value sighup_delay.
+ """
+ delay = float(self.sighup_delay)
+ self.log("starting do_sighup with a delay of {0}".format(delay))
+ while not self.stopping:
+ osd = random.choice(self.live_osds)
+ self.ceph_manager.signal_osd(osd, signal.SIGHUP, silent=True)
+ time.sleep(delay)
+
+ @log_exc
+ def do_optrack_toggle(self):
+ """
+ Loops and toggle op tracking to all osds.
+
+ Loop delay is controlled by the config value optrack_toggle_delay.
+ """
+ delay = float(self.optrack_toggle_delay)
+ osd_state = "true"
+ self.log("starting do_optrack_toggle with a delay of {0}".format(delay))
+ while not self.stopping:
+ if osd_state == "true":
+ osd_state = "false"
+ else:
+ osd_state = "true"
+ try:
+ self.ceph_manager.inject_args('osd', '*',
+ 'osd_enable_op_tracker',
+ osd_state)
+ except CommandFailedError:
+ self.log('Failed to tell all osds, ignoring')
+ gevent.sleep(delay)
+
+ @log_exc
+ def do_dump_ops(self):
+ """
+ Loops and does op dumps on all osds
+ """
+ self.log("starting do_dump_ops")
+ while not self.stopping:
+ for osd in self.live_osds:
+ # Ignore errors because live_osds is in flux
+ self.ceph_manager.osd_admin_socket(osd, command=['dump_ops_in_flight'],
+ check_status=False, timeout=30, stdout=DEVNULL)
+ self.ceph_manager.osd_admin_socket(osd, command=['dump_blocked_ops'],
+ check_status=False, timeout=30, stdout=DEVNULL)
+ self.ceph_manager.osd_admin_socket(osd, command=['dump_historic_ops'],
+ check_status=False, timeout=30, stdout=DEVNULL)
+ gevent.sleep(0)
+
+ @log_exc
+ def do_noscrub_toggle(self):
+ """
+ Loops and toggle noscrub flags
+
+ Loop delay is controlled by the config value noscrub_toggle_delay.
+ """
+ delay = float(self.noscrub_toggle_delay)
+ scrub_state = "none"
+ self.log("starting do_noscrub_toggle with a delay of {0}".format(delay))
+ while not self.stopping:
+ if scrub_state == "none":
+ self.ceph_manager.raw_cluster_cmd('osd', 'set', 'noscrub')
+ scrub_state = "noscrub"
+ elif scrub_state == "noscrub":
+ self.ceph_manager.raw_cluster_cmd('osd', 'set', 'nodeep-scrub')
+ scrub_state = "both"
+ elif scrub_state == "both":
+ self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'noscrub')
+ scrub_state = "nodeep-scrub"
+ else:
+ self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'nodeep-scrub')
+ scrub_state = "none"
+ gevent.sleep(delay)
+ self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'noscrub')
+ self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'nodeep-scrub')
+
+ @log_exc
+ def do_thrash(self):
+ """
+ Loop to select random actions to thrash ceph manager with.
+ """
+ cleanint = self.config.get("clean_interval", 60)
+ scrubint = self.config.get("scrub_interval", -1)
+ maxdead = self.config.get("max_dead", 0)
+ delay = self.config.get("op_delay", 5)
+ self.rerrosd = self.live_osds[0]
+ if self.random_eio > 0:
+ self.ceph_manager.inject_args('osd', self.rerrosd,
+ 'filestore_debug_random_read_err',
+ self.random_eio)
+ self.ceph_manager.inject_args('osd', self.rerrosd,
+ 'bluestore_debug_random_read_err',
+ self.random_eio)
+ self.log("starting do_thrash")
+ while not self.stopping:
+ to_log = [str(x) for x in ["in_osds: ", self.in_osds,
+ "out_osds: ", self.out_osds,
+ "dead_osds: ", self.dead_osds,
+ "live_osds: ", self.live_osds]]
+ self.log(" ".join(to_log))
+ if random.uniform(0, 1) < (float(delay) / cleanint):
+ while len(self.dead_osds) > maxdead:
+ self.revive_osd()
+ for osd in self.in_osds:
+ self.ceph_manager.raw_cluster_cmd('osd', 'reweight',
+ str(osd), str(1))
+ if random.uniform(0, 1) < float(
+ self.config.get('chance_test_map_discontinuity', 0)) \
+ and len(self.live_osds) > 5: # avoid m=2,k=2 stall, w/ some buffer for crush being picky
+ self.test_map_discontinuity()
+ else:
+ self.ceph_manager.wait_for_recovery(
+ timeout=self.config.get('timeout')
+ )
+ time.sleep(self.clean_wait)
+ if scrubint > 0:
+ if random.uniform(0, 1) < (float(delay) / scrubint):
+ self.log('Scrubbing while thrashing being performed')
+ Scrubber(self.ceph_manager, self.config)
+ self.choose_action()()
+ time.sleep(delay)
+ self.all_up()
+ if self.random_eio > 0:
+ self.ceph_manager.inject_args('osd', self.rerrosd,
+ 'filestore_debug_random_read_err', '0.0')
+ self.ceph_manager.inject_args('osd', self.rerrosd,
+ 'bluestore_debug_random_read_err', '0.0')
+ for pool in list(self.pools_to_fix_pgp_num):
+ if self.ceph_manager.get_pool_pg_num(pool) > 0:
+ self.fix_pgp_num(pool)
+ self.pools_to_fix_pgp_num.clear()
+ for service, opt, saved_value in self.saved_options:
+ self.ceph_manager.inject_args(service, '*', opt, saved_value)
+ self.saved_options = []
+ self.all_up_in()
+
+
+class ObjectStoreTool:
+
+ def __init__(self, manager, pool, **kwargs):
+ self.manager = manager
+ self.pool = pool
+ self.osd = kwargs.get('osd', None)
+ self.object_name = kwargs.get('object_name', None)
+ self.do_revive = kwargs.get('do_revive', True)
+ if self.osd and self.pool and self.object_name:
+ if self.osd == "primary":
+ self.osd = self.manager.get_object_primary(self.pool,
+ self.object_name)
+ assert self.osd
+ if self.object_name:
+ self.pgid = self.manager.get_object_pg_with_shard(self.pool,
+ self.object_name,
+ self.osd)
+ self.remote = next(iter(self.manager.ctx.\
+ cluster.only('osd.{o}'.format(o=self.osd)).remotes.keys()))
+ path = self.manager.get_filepath().format(id=self.osd)
+ self.paths = ("--data-path {path} --journal-path {path}/journal".
+ format(path=path))
+
+ def build_cmd(self, options, args, stdin):
+ lines = []
+ if self.object_name:
+ lines.append("object=$(sudo adjust-ulimits ceph-objectstore-tool "
+ "{paths} --pgid {pgid} --op list |"
+ "grep '\"oid\":\"{name}\"')".
+ format(paths=self.paths,
+ pgid=self.pgid,
+ name=self.object_name))
+ args = '"$object" ' + args
+ options += " --pgid {pgid}".format(pgid=self.pgid)
+ cmd = ("sudo adjust-ulimits ceph-objectstore-tool {paths} {options} {args}".
+ format(paths=self.paths,
+ args=args,
+ options=options))
+ if stdin:
+ cmd = ("echo {payload} | base64 --decode | {cmd}".
+ format(payload=base64.encode(stdin),
+ cmd=cmd))
+ lines.append(cmd)
+ return "\n".join(lines)
+
+ def run(self, options, args):
+ self.manager.kill_osd(self.osd)
+ cmd = self.build_cmd(options, args, None)
+ self.manager.log(cmd)
+ try:
+ proc = self.remote.run(args=['bash', '-e', '-x', '-c', cmd],
+ check_status=False,
+ stdout=BytesIO(),
+ stderr=BytesIO())
+ proc.wait()
+ if proc.exitstatus != 0:
+ self.manager.log("failed with " + str(proc.exitstatus))
+ error = proc.stdout.getvalue().decode() + " " + \
+ proc.stderr.getvalue().decode()
+ raise Exception(error)
+ finally:
+ if self.do_revive:
+ self.manager.revive_osd(self.osd)
+ self.manager.wait_till_osd_is_up(self.osd, 300)
+
+
+class CephManager:
+ """
+ Ceph manager object.
+ Contains several local functions that form a bulk of this module.
+
+ Note: this class has nothing to do with the Ceph daemon (ceph-mgr) of
+ the same name.
+ """
+
+ REPLICATED_POOL = 1
+ ERASURE_CODED_POOL = 3
+
+ def __init__(self, controller, ctx=None, config=None, logger=None,
+ cluster='ceph'):
+ self.lock = threading.RLock()
+ self.ctx = ctx
+ self.config = config
+ self.controller = controller
+ self.next_pool_id = 0
+ self.cluster = cluster
+ if (logger):
+ self.log = lambda x: logger.info(x)
+ else:
+ def tmp(x):
+ """
+ implement log behavior.
+ """
+ print(x)
+ self.log = tmp
+ if self.config is None:
+ self.config = dict()
+ pools = self.list_pools()
+ self.pools = {}
+ for pool in pools:
+ # we may race with a pool deletion; ignore failures here
+ try:
+ self.pools[pool] = self.get_pool_property(pool, 'pg_num')
+ except CommandFailedError:
+ self.log('Failed to get pg_num from pool %s, ignoring' % pool)
+
+ def raw_cluster_cmd(self, *args):
+ """
+ Start ceph on a raw cluster. Return count
+ """
+ testdir = teuthology.get_testdir(self.ctx)
+ ceph_args = [
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'timeout',
+ '120',
+ 'ceph',
+ '--cluster',
+ self.cluster,
+ ]
+ ceph_args.extend(args)
+ proc = self.controller.run(
+ args=ceph_args,
+ stdout=StringIO(),
+ )
+ return proc.stdout.getvalue()
+
+ def raw_cluster_cmd_result(self, *args, **kwargs):
+ """
+ Start ceph on a cluster. Return success or failure information.
+ """
+ testdir = teuthology.get_testdir(self.ctx)
+ ceph_args = [
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'timeout',
+ '900',
+ 'ceph',
+ '--cluster',
+ self.cluster,
+ ]
+ ceph_args.extend(args)
+ kwargs['args'] = ceph_args
+ kwargs['check_status'] = False
+ proc = self.controller.run(**kwargs)
+ return proc.exitstatus
+
+ def run_ceph_w(self, watch_channel=None):
+ """
+ Execute "ceph -w" in the background with stdout connected to a BytesIO,
+ and return the RemoteProcess.
+
+ :param watch_channel: Specifies the channel to be watched. This can be
+ 'cluster', 'audit', ...
+ :type watch_channel: str
+ """
+ args = ["sudo",
+ "daemon-helper",
+ "kill",
+ "ceph",
+ '--cluster',
+ self.cluster,
+ "-w"]
+ if watch_channel is not None:
+ args.append("--watch-channel")
+ args.append(watch_channel)
+ return self.controller.run(args=args, wait=False, stdout=StringIO(), stdin=run.PIPE)
+
+ def flush_pg_stats(self, osds, no_wait=None, wait_for_mon=300):
+ """
+ Flush pg stats from a list of OSD ids, ensuring they are reflected
+ all the way to the monitor. Luminous and later only.
+
+ :param osds: list of OSDs to flush
+ :param no_wait: list of OSDs not to wait for seq id. by default, we
+ wait for all specified osds, but some of them could be
+ moved out of osdmap, so we cannot get their updated
+ stat seq from monitor anymore. in that case, you need
+ to pass a blacklist.
+ :param wait_for_mon: wait for mon to be synced with mgr. 0 to disable
+ it. (5 min by default)
+ """
+ seq = {osd: int(self.raw_cluster_cmd('tell', 'osd.%d' % osd, 'flush_pg_stats'))
+ for osd in osds}
+ if not wait_for_mon:
+ return
+ if no_wait is None:
+ no_wait = []
+ for osd, need in seq.items():
+ if osd in no_wait:
+ continue
+ got = 0
+ while wait_for_mon > 0:
+ got = int(self.raw_cluster_cmd('osd', 'last-stat-seq', 'osd.%d' % osd))
+ self.log('need seq {need} got {got} for osd.{osd}'.format(
+ need=need, got=got, osd=osd))
+ if got >= need:
+ break
+ A_WHILE = 1
+ time.sleep(A_WHILE)
+ wait_for_mon -= A_WHILE
+ else:
+ raise Exception('timed out waiting for mon to be updated with '
+ 'osd.{osd}: {got} < {need}'.
+ format(osd=osd, got=got, need=need))
+
+ def flush_all_pg_stats(self):
+ self.flush_pg_stats(range(len(self.get_osd_dump())))
+
+ def do_rados(self, remote, cmd, check_status=True):
+ """
+ Execute a remote rados command.
+ """
+ testdir = teuthology.get_testdir(self.ctx)
+ pre = [
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'rados',
+ '--cluster',
+ self.cluster,
+ ]
+ pre.extend(cmd)
+ proc = remote.run(
+ args=pre,
+ wait=True,
+ check_status=check_status
+ )
+ return proc
+
+ def rados_write_objects(self, pool, num_objects, size,
+ timelimit, threads, cleanup=False):
+ """
+ Write rados objects
+ Threads not used yet.
+ """
+ args = [
+ '-p', pool,
+ '--num-objects', num_objects,
+ '-b', size,
+ 'bench', timelimit,
+ 'write'
+ ]
+ if not cleanup:
+ args.append('--no-cleanup')
+ return self.do_rados(self.controller, map(str, args))
+
+ def do_put(self, pool, obj, fname, namespace=None):
+ """
+ Implement rados put operation
+ """
+ args = ['-p', pool]
+ if namespace is not None:
+ args += ['-N', namespace]
+ args += [
+ 'put',
+ obj,
+ fname
+ ]
+ return self.do_rados(
+ self.controller,
+ args,
+ check_status=False
+ ).exitstatus
+
+ def do_get(self, pool, obj, fname='/dev/null', namespace=None):
+ """
+ Implement rados get operation
+ """
+ args = ['-p', pool]
+ if namespace is not None:
+ args += ['-N', namespace]
+ args += [
+ 'get',
+ obj,
+ fname
+ ]
+ return self.do_rados(
+ self.controller,
+ args,
+ check_status=False
+ ).exitstatus
+
+ def do_rm(self, pool, obj, namespace=None):
+ """
+ Implement rados rm operation
+ """
+ args = ['-p', pool]
+ if namespace is not None:
+ args += ['-N', namespace]
+ args += [
+ 'rm',
+ obj
+ ]
+ return self.do_rados(
+ self.controller,
+ args,
+ check_status=False
+ ).exitstatus
+
+ def osd_admin_socket(self, osd_id, command, check_status=True, timeout=0, stdout=None):
+ if stdout is None:
+ stdout = StringIO()
+ return self.admin_socket('osd', osd_id, command, check_status, timeout, stdout)
+
+ def find_remote(self, service_type, service_id):
+ """
+ Get the Remote for the host where a particular service runs.
+
+ :param service_type: 'mds', 'osd', 'client'
+ :param service_id: The second part of a role, e.g. '0' for
+ the role 'client.0'
+ :return: a Remote instance for the host where the
+ requested role is placed
+ """
+ return get_remote(self.ctx, self.cluster,
+ service_type, service_id)
+
+ def admin_socket(self, service_type, service_id,
+ command, check_status=True, timeout=0, stdout=None):
+ """
+ Remotely start up ceph specifying the admin socket
+ :param command: a list of words to use as the command
+ to the admin socket
+ """
+ if stdout is None:
+ stdout = StringIO()
+ testdir = teuthology.get_testdir(self.ctx)
+ remote = self.find_remote(service_type, service_id)
+ args = [
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'timeout',
+ str(timeout),
+ 'ceph',
+ '--cluster',
+ self.cluster,
+ '--admin-daemon',
+ '/var/run/ceph/{cluster}-{type}.{id}.asok'.format(
+ cluster=self.cluster,
+ type=service_type,
+ id=service_id),
+ ]
+ args.extend(command)
+ return remote.run(
+ args=args,
+ stdout=stdout,
+ wait=True,
+ check_status=check_status
+ )
+
+ def objectstore_tool(self, pool, options, args, **kwargs):
+ return ObjectStoreTool(self, pool, **kwargs).run(options, args)
+
+ def get_pgid(self, pool, pgnum):
+ """
+ :param pool: pool name
+ :param pgnum: pg number
+ :returns: a string representing this pg.
+ """
+ poolnum = self.get_pool_num(pool)
+ pg_str = "{poolnum}.{pgnum}".format(
+ poolnum=poolnum,
+ pgnum=pgnum)
+ return pg_str
+
+ def get_pg_replica(self, pool, pgnum):
+ """
+ get replica for pool, pgnum (e.g. (data, 0)->0
+ """
+ pg_str = self.get_pgid(pool, pgnum)
+ output = self.raw_cluster_cmd("pg", "map", pg_str, '--format=json')
+ j = json.loads('\n'.join(output.split('\n')[1:]))
+ return int(j['acting'][-1])
+ assert False
+
+ def wait_for_pg_stats(func):
+ # both osd_mon_report_interval and mgr_stats_period are 5 seconds
+ # by default, and take the faulty injection in ms into consideration,
+ # 12 seconds are more than enough
+ delays = [1, 1, 2, 3, 5, 8, 13, 0]
+ @wraps(func)
+ def wrapper(self, *args, **kwargs):
+ exc = None
+ for delay in delays:
+ try:
+ return func(self, *args, **kwargs)
+ except AssertionError as e:
+ time.sleep(delay)
+ exc = e
+ raise exc
+ return wrapper
+
+ def get_pg_primary(self, pool, pgnum):
+ """
+ get primary for pool, pgnum (e.g. (data, 0)->0
+ """
+ pg_str = self.get_pgid(pool, pgnum)
+ output = self.raw_cluster_cmd("pg", "map", pg_str, '--format=json')
+ j = json.loads('\n'.join(output.split('\n')[1:]))
+ return int(j['acting'][0])
+ assert False
+
+ def get_pool_num(self, pool):
+ """
+ get number for pool (e.g., data -> 2)
+ """
+ return int(self.get_pool_dump(pool)['pool'])
+
+ def list_pools(self):
+ """
+ list all pool names
+ """
+ osd_dump = self.get_osd_dump_json()
+ self.log(osd_dump['pools'])
+ return [str(i['pool_name']) for i in osd_dump['pools']]
+
+ def clear_pools(self):
+ """
+ remove all pools
+ """
+ [self.remove_pool(i) for i in self.list_pools()]
+
+ def kick_recovery_wq(self, osdnum):
+ """
+ Run kick_recovery_wq on cluster.
+ """
+ return self.raw_cluster_cmd(
+ 'tell', "osd.%d" % (int(osdnum),),
+ 'debug',
+ 'kick_recovery_wq',
+ '0')
+
+ def wait_run_admin_socket(self, service_type,
+ service_id, args=['version'], timeout=75, stdout=None):
+ """
+ If osd_admin_socket call succeeds, return. Otherwise wait
+ five seconds and try again.
+ """
+ if stdout is None:
+ stdout = StringIO()
+ tries = 0
+ while True:
+ proc = self.admin_socket(service_type, service_id,
+ args, check_status=False, stdout=stdout)
+ if proc.exitstatus == 0:
+ return proc
+ else:
+ tries += 1
+ if (tries * 5) > timeout:
+ raise Exception('timed out waiting for admin_socket '
+ 'to appear after {type}.{id} restart'.
+ format(type=service_type,
+ id=service_id))
+ self.log("waiting on admin_socket for {type}-{id}, "
+ "{command}".format(type=service_type,
+ id=service_id,
+ command=args))
+ time.sleep(5)
+
+ def get_pool_dump(self, pool):
+ """
+ get the osd dump part of a pool
+ """
+ osd_dump = self.get_osd_dump_json()
+ for i in osd_dump['pools']:
+ if i['pool_name'] == pool:
+ return i
+ assert False
+
+ def get_config(self, service_type, service_id, name):
+ """
+ :param node: like 'mon.a'
+ :param name: the option name
+ """
+ proc = self.wait_run_admin_socket(service_type, service_id,
+ ['config', 'show'])
+ j = json.loads(proc.stdout.getvalue())
+ return j[name]
+
+ def inject_args(self, service_type, service_id, name, value):
+ whom = '{0}.{1}'.format(service_type, service_id)
+ if isinstance(value, bool):
+ value = 'true' if value else 'false'
+ opt_arg = '--{name}={value}'.format(name=name, value=value)
+ self.raw_cluster_cmd('--', 'tell', whom, 'injectargs', opt_arg)
+
+ def set_config(self, osdnum, **argdict):
+ """
+ :param osdnum: osd number
+ :param argdict: dictionary containing values to set.
+ """
+ for k, v in argdict.items():
+ self.wait_run_admin_socket(
+ 'osd', osdnum,
+ ['config', 'set', str(k), str(v)])
+
+ def raw_cluster_status(self):
+ """
+ Get status from cluster
+ """
+ status = self.raw_cluster_cmd('status', '--format=json-pretty')
+ return json.loads(status)
+
+ def raw_osd_status(self):
+ """
+ Get osd status from cluster
+ """
+ return self.raw_cluster_cmd('osd', 'dump')
+
+ def get_osd_status(self):
+ """
+ Get osd statuses sorted by states that the osds are in.
+ """
+ osd_lines = list(filter(
+ lambda x: x.startswith('osd.') and (("up" in x) or ("down" in x)),
+ self.raw_osd_status().split('\n')))
+ self.log(osd_lines)
+ in_osds = [int(i[4:].split()[0])
+ for i in filter(lambda x: " in " in x, osd_lines)]
+ out_osds = [int(i[4:].split()[0])
+ for i in filter(lambda x: " out " in x, osd_lines)]
+ up_osds = [int(i[4:].split()[0])
+ for i in filter(lambda x: " up " in x, osd_lines)]
+ down_osds = [int(i[4:].split()[0])
+ for i in filter(lambda x: " down " in x, osd_lines)]
+ dead_osds = [int(x.id_)
+ for x in filter(lambda x:
+ not x.running(),
+ self.ctx.daemons.
+ iter_daemons_of_role('osd', self.cluster))]
+ live_osds = [int(x.id_) for x in
+ filter(lambda x:
+ x.running(),
+ self.ctx.daemons.iter_daemons_of_role('osd',
+ self.cluster))]
+ return {'in': in_osds, 'out': out_osds, 'up': up_osds,
+ 'down': down_osds, 'dead': dead_osds, 'live': live_osds,
+ 'raw': osd_lines}
+
+ def get_num_pgs(self):
+ """
+ Check cluster status for the number of pgs
+ """
+ status = self.raw_cluster_status()
+ self.log(status)
+ return status['pgmap']['num_pgs']
+
+ def create_erasure_code_profile(self, profile_name, profile):
+ """
+ Create an erasure code profile name that can be used as a parameter
+ when creating an erasure coded pool.
+ """
+ with self.lock:
+ args = cmd_erasure_code_profile(profile_name, profile)
+ self.raw_cluster_cmd(*args)
+
+ def create_pool_with_unique_name(self, pg_num=16,
+ erasure_code_profile_name=None,
+ min_size=None,
+ erasure_code_use_overwrites=False):
+ """
+ Create a pool named unique_pool_X where X is unique.
+ """
+ name = ""
+ with self.lock:
+ name = "unique_pool_%s" % (str(self.next_pool_id),)
+ self.next_pool_id += 1
+ self.create_pool(
+ name,
+ pg_num,
+ erasure_code_profile_name=erasure_code_profile_name,
+ min_size=min_size,
+ erasure_code_use_overwrites=erasure_code_use_overwrites)
+ return name
+
+ @contextlib.contextmanager
+ def pool(self, pool_name, pg_num=16, erasure_code_profile_name=None):
+ self.create_pool(pool_name, pg_num, erasure_code_profile_name)
+ yield
+ self.remove_pool(pool_name)
+
+ def create_pool(self, pool_name, pg_num=16,
+ erasure_code_profile_name=None,
+ min_size=None,
+ erasure_code_use_overwrites=False):
+ """
+ Create a pool named from the pool_name parameter.
+ :param pool_name: name of the pool being created.
+ :param pg_num: initial number of pgs.
+ :param erasure_code_profile_name: if set and !None create an
+ erasure coded pool using the profile
+ :param erasure_code_use_overwrites: if true, allow overwrites
+ """
+ with self.lock:
+ assert isinstance(pool_name, str)
+ assert isinstance(pg_num, int)
+ assert pool_name not in self.pools
+ self.log("creating pool_name %s" % (pool_name,))
+ if erasure_code_profile_name:
+ self.raw_cluster_cmd('osd', 'pool', 'create',
+ pool_name, str(pg_num), str(pg_num),
+ 'erasure', erasure_code_profile_name)
+ else:
+ self.raw_cluster_cmd('osd', 'pool', 'create',
+ pool_name, str(pg_num))
+ if min_size is not None:
+ self.raw_cluster_cmd(
+ 'osd', 'pool', 'set', pool_name,
+ 'min_size',
+ str(min_size))
+ if erasure_code_use_overwrites:
+ self.raw_cluster_cmd(
+ 'osd', 'pool', 'set', pool_name,
+ 'allow_ec_overwrites',
+ 'true')
+ self.raw_cluster_cmd(
+ 'osd', 'pool', 'application', 'enable',
+ pool_name, 'rados', '--yes-i-really-mean-it',
+ run.Raw('||'), 'true')
+ self.pools[pool_name] = pg_num
+ time.sleep(1)
+
+ def add_pool_snap(self, pool_name, snap_name):
+ """
+ Add pool snapshot
+ :param pool_name: name of pool to snapshot
+ :param snap_name: name of snapshot to take
+ """
+ self.raw_cluster_cmd('osd', 'pool', 'mksnap',
+ str(pool_name), str(snap_name))
+
+ def remove_pool_snap(self, pool_name, snap_name):
+ """
+ Remove pool snapshot
+ :param pool_name: name of pool to snapshot
+ :param snap_name: name of snapshot to remove
+ """
+ self.raw_cluster_cmd('osd', 'pool', 'rmsnap',
+ str(pool_name), str(snap_name))
+
+ def remove_pool(self, pool_name):
+ """
+ Remove the indicated pool
+ :param pool_name: Pool to be removed
+ """
+ with self.lock:
+ assert isinstance(pool_name, str)
+ assert pool_name in self.pools
+ self.log("removing pool_name %s" % (pool_name,))
+ del self.pools[pool_name]
+ self.raw_cluster_cmd('osd', 'pool', 'rm', pool_name, pool_name,
+ "--yes-i-really-really-mean-it")
+
+ def get_pool(self):
+ """
+ Pick a random pool
+ """
+ with self.lock:
+ return random.sample(self.pools.keys(), 1)[0]
+
+ def get_pool_pg_num(self, pool_name):
+ """
+ Return the number of pgs in the pool specified.
+ """
+ with self.lock:
+ assert isinstance(pool_name, str)
+ if pool_name in self.pools:
+ return self.pools[pool_name]
+ return 0
+
+ def get_pool_property(self, pool_name, prop):
+ """
+ :param pool_name: pool
+ :param prop: property to be checked.
+ :returns: property as an int value.
+ """
+ with self.lock:
+ assert isinstance(pool_name, str)
+ assert isinstance(prop, str)
+ output = self.raw_cluster_cmd(
+ 'osd',
+ 'pool',
+ 'get',
+ pool_name,
+ prop)
+ return int(output.split()[1])
+
+ def set_pool_property(self, pool_name, prop, val):
+ """
+ :param pool_name: pool
+ :param prop: property to be set.
+ :param val: value to set.
+
+ This routine retries if set operation fails.
+ """
+ with self.lock:
+ assert isinstance(pool_name, str)
+ assert isinstance(prop, str)
+ assert isinstance(val, int)
+ tries = 0
+ while True:
+ r = self.raw_cluster_cmd_result(
+ 'osd',
+ 'pool',
+ 'set',
+ pool_name,
+ prop,
+ str(val))
+ if r != 11: # EAGAIN
+ break
+ tries += 1
+ if tries > 50:
+ raise Exception('timed out getting EAGAIN '
+ 'when setting pool property %s %s = %s' %
+ (pool_name, prop, val))
+ self.log('got EAGAIN setting pool property, '
+ 'waiting a few seconds...')
+ time.sleep(2)
+
+ def expand_pool(self, pool_name, by, max_pgs):
+ """
+ Increase the number of pgs in a pool
+ """
+ with self.lock:
+ assert isinstance(pool_name, str)
+ assert isinstance(by, int)
+ assert pool_name in self.pools
+ if self.get_num_creating() > 0:
+ return False
+ if (self.pools[pool_name] + by) > max_pgs:
+ return False
+ self.log("increase pool size by %d" % (by,))
+ new_pg_num = self.pools[pool_name] + by
+ self.set_pool_property(pool_name, "pg_num", new_pg_num)
+ self.pools[pool_name] = new_pg_num
+ return True
+
+ def contract_pool(self, pool_name, by, min_pgs):
+ """
+ Decrease the number of pgs in a pool
+ """
+ with self.lock:
+ self.log('contract_pool %s by %s min %s' % (
+ pool_name, str(by), str(min_pgs)))
+ assert isinstance(pool_name, str)
+ assert isinstance(by, int)
+ assert pool_name in self.pools
+ if self.get_num_creating() > 0:
+ self.log('too many creating')
+ return False
+ proj = self.pools[pool_name] - by
+ if proj < min_pgs:
+ self.log('would drop below min_pgs, proj %d, currently %d' % (proj,self.pools[pool_name],))
+ return False
+ self.log("decrease pool size by %d" % (by,))
+ new_pg_num = self.pools[pool_name] - by
+ self.set_pool_property(pool_name, "pg_num", new_pg_num)
+ self.pools[pool_name] = new_pg_num
+ return True
+
+ def stop_pg_num_changes(self):
+ """
+ Reset all pg_num_targets back to pg_num, canceling splits and merges
+ """
+ self.log('Canceling any pending splits or merges...')
+ osd_dump = self.get_osd_dump_json()
+ for pool in osd_dump['pools']:
+ if 'pg_num_target' not in pool:
+ # mimic does not adjust pg num automatically
+ continue
+ if pool['pg_num'] != pool['pg_num_target']:
+ self.log('Setting pool %s (%d) pg_num %d -> %d' %
+ (pool['pool_name'], pool['pool'],
+ pool['pg_num_target'],
+ pool['pg_num']))
+ self.raw_cluster_cmd('osd', 'pool', 'set', pool['pool_name'],
+ 'pg_num', str(pool['pg_num']))
+
+ def set_pool_pgpnum(self, pool_name, force):
+ """
+ Set pgpnum property of pool_name pool.
+ """
+ with self.lock:
+ assert isinstance(pool_name, str)
+ assert pool_name in self.pools
+ if not force and self.get_num_creating() > 0:
+ return False
+ self.set_pool_property(pool_name, 'pgp_num', self.pools[pool_name])
+ return True
+
+ def list_pg_unfound(self, pgid):
+ """
+ return list of unfound pgs with the id specified
+ """
+ r = None
+ offset = {}
+ while True:
+ out = self.raw_cluster_cmd('--', 'pg', pgid, 'list_unfound',
+ json.dumps(offset))
+ j = json.loads(out)
+ if r is None:
+ r = j
+ else:
+ r['objects'].extend(j['objects'])
+ if not 'more' in j:
+ break
+ if j['more'] == 0:
+ break
+ offset = j['objects'][-1]['oid']
+ if 'more' in r:
+ del r['more']
+ return r
+
+ def get_pg_stats(self):
+ """
+ Dump the cluster and get pg stats
+ """
+ out = self.raw_cluster_cmd('pg', 'dump', '--format=json')
+ j = json.loads('\n'.join(out.split('\n')[1:]))
+ try:
+ return j['pg_map']['pg_stats']
+ except KeyError:
+ return j['pg_stats']
+
+ def get_pgids_to_force(self, backfill):
+ """
+ Return the randomized list of PGs that can have their recovery/backfill forced
+ """
+ j = self.get_pg_stats();
+ pgids = []
+ if backfill:
+ wanted = ['degraded', 'backfilling', 'backfill_wait']
+ else:
+ wanted = ['recovering', 'degraded', 'recovery_wait']
+ for pg in j:
+ status = pg['state'].split('+')
+ for t in wanted:
+ if random.random() > 0.5 and not ('forced_backfill' in status or 'forced_recovery' in status) and t in status:
+ pgids.append(pg['pgid'])
+ break
+ return pgids
+
+ def get_pgids_to_cancel_force(self, backfill):
+ """
+ Return the randomized list of PGs whose recovery/backfill priority is forced
+ """
+ j = self.get_pg_stats();
+ pgids = []
+ if backfill:
+ wanted = 'forced_backfill'
+ else:
+ wanted = 'forced_recovery'
+ for pg in j:
+ status = pg['state'].split('+')
+ if wanted in status and random.random() > 0.5:
+ pgids.append(pg['pgid'])
+ return pgids
+
+ def compile_pg_status(self):
+ """
+ Return a histogram of pg state values
+ """
+ ret = {}
+ j = self.get_pg_stats()
+ for pg in j:
+ for status in pg['state'].split('+'):
+ if status not in ret:
+ ret[status] = 0
+ ret[status] += 1
+ return ret
+
+ @wait_for_pg_stats
+ def with_pg_state(self, pool, pgnum, check):
+ pgstr = self.get_pgid(pool, pgnum)
+ stats = self.get_single_pg_stats(pgstr)
+ assert(check(stats['state']))
+
+ @wait_for_pg_stats
+ def with_pg(self, pool, pgnum, check):
+ pgstr = self.get_pgid(pool, pgnum)
+ stats = self.get_single_pg_stats(pgstr)
+ return check(stats)
+
+ def get_last_scrub_stamp(self, pool, pgnum):
+ """
+ Get the timestamp of the last scrub.
+ """
+ stats = self.get_single_pg_stats(self.get_pgid(pool, pgnum))
+ return stats["last_scrub_stamp"]
+
+ def do_pg_scrub(self, pool, pgnum, stype):
+ """
+ Scrub pg and wait for scrubbing to finish
+ """
+ init = self.get_last_scrub_stamp(pool, pgnum)
+ RESEND_TIMEOUT = 120 # Must be a multiple of SLEEP_TIME
+ FATAL_TIMEOUT = RESEND_TIMEOUT * 3
+ SLEEP_TIME = 10
+ timer = 0
+ while init == self.get_last_scrub_stamp(pool, pgnum):
+ assert timer < FATAL_TIMEOUT, "fatal timeout trying to " + stype
+ self.log("waiting for scrub type %s" % (stype,))
+ if (timer % RESEND_TIMEOUT) == 0:
+ self.raw_cluster_cmd('pg', stype, self.get_pgid(pool, pgnum))
+ # The first time in this loop is the actual request
+ if timer != 0 and stype == "repair":
+ self.log("WARNING: Resubmitted a non-idempotent repair")
+ time.sleep(SLEEP_TIME)
+ timer += SLEEP_TIME
+
+ def wait_snap_trimming_complete(self, pool):
+ """
+ Wait for snap trimming on pool to end
+ """
+ POLL_PERIOD = 10
+ FATAL_TIMEOUT = 600
+ start = time.time()
+ poolnum = self.get_pool_num(pool)
+ poolnumstr = "%s." % (poolnum,)
+ while (True):
+ now = time.time()
+ if (now - start) > FATAL_TIMEOUT:
+ assert (now - start) < FATAL_TIMEOUT, \
+ 'failed to complete snap trimming before timeout'
+ all_stats = self.get_pg_stats()
+ trimming = False
+ for pg in all_stats:
+ if (poolnumstr in pg['pgid']) and ('snaptrim' in pg['state']):
+ self.log("pg {pg} in trimming, state: {state}".format(
+ pg=pg['pgid'],
+ state=pg['state']))
+ trimming = True
+ if not trimming:
+ break
+ self.log("{pool} still trimming, waiting".format(pool=pool))
+ time.sleep(POLL_PERIOD)
+
+ def get_single_pg_stats(self, pgid):
+ """
+ Return pg for the pgid specified.
+ """
+ all_stats = self.get_pg_stats()
+
+ for pg in all_stats:
+ if pg['pgid'] == pgid:
+ return pg
+
+ return None
+
+ def get_object_pg_with_shard(self, pool, name, osdid):
+ """
+ """
+ pool_dump = self.get_pool_dump(pool)
+ object_map = self.get_object_map(pool, name)
+ if pool_dump["type"] == CephManager.ERASURE_CODED_POOL:
+ shard = object_map['acting'].index(osdid)
+ return "{pgid}s{shard}".format(pgid=object_map['pgid'],
+ shard=shard)
+ else:
+ return object_map['pgid']
+
+ def get_object_primary(self, pool, name):
+ """
+ """
+ object_map = self.get_object_map(pool, name)
+ return object_map['acting_primary']
+
+ def get_object_map(self, pool, name):
+ """
+ osd map --format=json converted to a python object
+ :returns: the python object
+ """
+ out = self.raw_cluster_cmd('--format=json', 'osd', 'map', pool, name)
+ return json.loads('\n'.join(out.split('\n')[1:]))
+
+ def get_osd_dump_json(self):
+ """
+ osd dump --format=json converted to a python object
+ :returns: the python object
+ """
+ out = self.raw_cluster_cmd('osd', 'dump', '--format=json')
+ return json.loads('\n'.join(out.split('\n')[1:]))
+
+ def get_osd_dump(self):
+ """
+ Dump osds
+ :returns: all osds
+ """
+ return self.get_osd_dump_json()['osds']
+
+ def get_osd_metadata(self):
+ """
+ osd metadata --format=json converted to a python object
+ :returns: the python object containing osd metadata information
+ """
+ out = self.raw_cluster_cmd('osd', 'metadata', '--format=json')
+ return json.loads('\n'.join(out.split('\n')[1:]))
+
+ def get_mgr_dump(self):
+ out = self.raw_cluster_cmd('mgr', 'dump', '--format=json')
+ return json.loads(out)
+
+ def get_stuck_pgs(self, type_, threshold):
+ """
+ :returns: stuck pg information from the cluster
+ """
+ out = self.raw_cluster_cmd('pg', 'dump_stuck', type_, str(threshold),
+ '--format=json')
+ return json.loads(out).get('stuck_pg_stats',[])
+
+ def get_num_unfound_objects(self):
+ """
+ Check cluster status to get the number of unfound objects
+ """
+ status = self.raw_cluster_status()
+ self.log(status)
+ return status['pgmap'].get('unfound_objects', 0)
+
+ def get_num_creating(self):
+ """
+ Find the number of pgs in creating mode.
+ """
+ pgs = self.get_pg_stats()
+ num = 0
+ for pg in pgs:
+ if 'creating' in pg['state']:
+ num += 1
+ return num
+
+ def get_num_active_clean(self):
+ """
+ Find the number of active and clean pgs.
+ """
+ pgs = self.get_pg_stats()
+ num = 0
+ for pg in pgs:
+ if (pg['state'].count('active') and
+ pg['state'].count('clean') and
+ not pg['state'].count('stale')):
+ num += 1
+ return num
+
+ def get_num_active_recovered(self):
+ """
+ Find the number of active and recovered pgs.
+ """
+ pgs = self.get_pg_stats()
+ num = 0
+ for pg in pgs:
+ if (pg['state'].count('active') and
+ not pg['state'].count('recover') and
+ not pg['state'].count('backfilling') and
+ not pg['state'].count('stale')):
+ num += 1
+ return num
+
+ def get_is_making_recovery_progress(self):
+ """
+ Return whether there is recovery progress discernable in the
+ raw cluster status
+ """
+ status = self.raw_cluster_status()
+ kps = status['pgmap'].get('recovering_keys_per_sec', 0)
+ bps = status['pgmap'].get('recovering_bytes_per_sec', 0)
+ ops = status['pgmap'].get('recovering_objects_per_sec', 0)
+ return kps > 0 or bps > 0 or ops > 0
+
+ def get_num_active(self):
+ """
+ Find the number of active pgs.
+ """
+ pgs = self.get_pg_stats()
+ num = 0
+ for pg in pgs:
+ if pg['state'].count('active') and not pg['state'].count('stale'):
+ num += 1
+ return num
+
+ def get_num_down(self):
+ """
+ Find the number of pgs that are down.
+ """
+ pgs = self.get_pg_stats()
+ num = 0
+ for pg in pgs:
+ if ((pg['state'].count('down') and not
+ pg['state'].count('stale')) or
+ (pg['state'].count('incomplete') and not
+ pg['state'].count('stale'))):
+ num += 1
+ return num
+
+ def get_num_active_down(self):
+ """
+ Find the number of pgs that are either active or down.
+ """
+ pgs = self.get_pg_stats()
+ num = 0
+ for pg in pgs:
+ if ((pg['state'].count('active') and not
+ pg['state'].count('stale')) or
+ (pg['state'].count('down') and not
+ pg['state'].count('stale')) or
+ (pg['state'].count('incomplete') and not
+ pg['state'].count('stale'))):
+ num += 1
+ return num
+
+ def is_clean(self):
+ """
+ True if all pgs are clean
+ """
+ return self.get_num_active_clean() == self.get_num_pgs()
+
+ def is_recovered(self):
+ """
+ True if all pgs have recovered
+ """
+ return self.get_num_active_recovered() == self.get_num_pgs()
+
+ def is_active_or_down(self):
+ """
+ True if all pgs are active or down
+ """
+ return self.get_num_active_down() == self.get_num_pgs()
+
+ def wait_for_clean(self, timeout=1200):
+ """
+ Returns true when all pgs are clean.
+ """
+ self.log("waiting for clean")
+ start = time.time()
+ num_active_clean = self.get_num_active_clean()
+ while not self.is_clean():
+ if timeout is not None:
+ if self.get_is_making_recovery_progress():
+ self.log("making progress, resetting timeout")
+ start = time.time()
+ else:
+ self.log("no progress seen, keeping timeout for now")
+ if time.time() - start >= timeout:
+ self.log('dumping pgs')
+ out = self.raw_cluster_cmd('pg', 'dump')
+ self.log(out)
+ assert time.time() - start < timeout, \
+ 'failed to become clean before timeout expired'
+ cur_active_clean = self.get_num_active_clean()
+ if cur_active_clean != num_active_clean:
+ start = time.time()
+ num_active_clean = cur_active_clean
+ time.sleep(3)
+ self.log("clean!")
+
+ def are_all_osds_up(self):
+ """
+ Returns true if all osds are up.
+ """
+ x = self.get_osd_dump()
+ return (len(x) == sum([(y['up'] > 0) for y in x]))
+
+ def wait_for_all_osds_up(self, timeout=None):
+ """
+ When this exits, either the timeout has expired, or all
+ osds are up.
+ """
+ self.log("waiting for all up")
+ start = time.time()
+ while not self.are_all_osds_up():
+ if timeout is not None:
+ assert time.time() - start < timeout, \
+ 'timeout expired in wait_for_all_osds_up'
+ time.sleep(3)
+ self.log("all up!")
+
+ def pool_exists(self, pool):
+ if pool in self.list_pools():
+ return True
+ return False
+
+ def wait_for_pool(self, pool, timeout=300):
+ """
+ Wait for a pool to exist
+ """
+ self.log('waiting for pool %s to exist' % pool)
+ start = time.time()
+ while not self.pool_exists(pool):
+ if timeout is not None:
+ assert time.time() - start < timeout, \
+ 'timeout expired in wait_for_pool'
+ time.sleep(3)
+
+ def wait_for_pools(self, pools):
+ for pool in pools:
+ self.wait_for_pool(pool)
+
+ def is_mgr_available(self):
+ x = self.get_mgr_dump()
+ return x.get('available', False)
+
+ def wait_for_mgr_available(self, timeout=None):
+ self.log("waiting for mgr available")
+ start = time.time()
+ while not self.is_mgr_available():
+ if timeout is not None:
+ assert time.time() - start < timeout, \
+ 'timeout expired in wait_for_mgr_available'
+ time.sleep(3)
+ self.log("mgr available!")
+
+ def wait_for_recovery(self, timeout=None):
+ """
+ Check peering. When this exists, we have recovered.
+ """
+ self.log("waiting for recovery to complete")
+ start = time.time()
+ num_active_recovered = self.get_num_active_recovered()
+ while not self.is_recovered():
+ now = time.time()
+ if timeout is not None:
+ if self.get_is_making_recovery_progress():
+ self.log("making progress, resetting timeout")
+ start = time.time()
+ else:
+ self.log("no progress seen, keeping timeout for now")
+ if now - start >= timeout:
+ if self.is_recovered():
+ break
+ self.log('dumping pgs')
+ out = self.raw_cluster_cmd('pg', 'dump')
+ self.log(out)
+ assert now - start < timeout, \
+ 'failed to recover before timeout expired'
+ cur_active_recovered = self.get_num_active_recovered()
+ if cur_active_recovered != num_active_recovered:
+ start = time.time()
+ num_active_recovered = cur_active_recovered
+ time.sleep(3)
+ self.log("recovered!")
+
+ def wait_for_active(self, timeout=None):
+ """
+ Check peering. When this exists, we are definitely active
+ """
+ self.log("waiting for peering to complete")
+ start = time.time()
+ num_active = self.get_num_active()
+ while not self.is_active():
+ if timeout is not None:
+ if time.time() - start >= timeout:
+ self.log('dumping pgs')
+ out = self.raw_cluster_cmd('pg', 'dump')
+ self.log(out)
+ assert time.time() - start < timeout, \
+ 'failed to recover before timeout expired'
+ cur_active = self.get_num_active()
+ if cur_active != num_active:
+ start = time.time()
+ num_active = cur_active
+ time.sleep(3)
+ self.log("active!")
+
+ def wait_for_active_or_down(self, timeout=None):
+ """
+ Check peering. When this exists, we are definitely either
+ active or down
+ """
+ self.log("waiting for peering to complete or become blocked")
+ start = time.time()
+ num_active_down = self.get_num_active_down()
+ while not self.is_active_or_down():
+ if timeout is not None:
+ if time.time() - start >= timeout:
+ self.log('dumping pgs')
+ out = self.raw_cluster_cmd('pg', 'dump')
+ self.log(out)
+ assert time.time() - start < timeout, \
+ 'failed to recover before timeout expired'
+ cur_active_down = self.get_num_active_down()
+ if cur_active_down != num_active_down:
+ start = time.time()
+ num_active_down = cur_active_down
+ time.sleep(3)
+ self.log("active or down!")
+
+ def osd_is_up(self, osd):
+ """
+ Wrapper for osd check
+ """
+ osds = self.get_osd_dump()
+ return osds[osd]['up'] > 0
+
+ def wait_till_osd_is_up(self, osd, timeout=None):
+ """
+ Loop waiting for osd.
+ """
+ self.log('waiting for osd.%d to be up' % osd)
+ start = time.time()
+ while not self.osd_is_up(osd):
+ if timeout is not None:
+ assert time.time() - start < timeout, \
+ 'osd.%d failed to come up before timeout expired' % osd
+ time.sleep(3)
+ self.log('osd.%d is up' % osd)
+
+ def is_active(self):
+ """
+ Wrapper to check if all pgs are active
+ """
+ return self.get_num_active() == self.get_num_pgs()
+
+ def wait_till_active(self, timeout=None):
+ """
+ Wait until all pgs are active.
+ """
+ self.log("waiting till active")
+ start = time.time()
+ while not self.is_active():
+ if timeout is not None:
+ if time.time() - start >= timeout:
+ self.log('dumping pgs')
+ out = self.raw_cluster_cmd('pg', 'dump')
+ self.log(out)
+ assert time.time() - start < timeout, \
+ 'failed to become active before timeout expired'
+ time.sleep(3)
+ self.log("active!")
+
+ def wait_till_pg_convergence(self, timeout=None):
+ start = time.time()
+ old_stats = None
+ active_osds = [osd['osd'] for osd in self.get_osd_dump()
+ if osd['in'] and osd['up']]
+ while True:
+ # strictly speaking, no need to wait for mon. but due to the
+ # "ms inject socket failures" setting, the osdmap could be delayed,
+ # so mgr is likely to ignore the pg-stat messages with pgs serving
+ # newly created pools which is not yet known by mgr. so, to make sure
+ # the mgr is updated with the latest pg-stats, waiting for mon/mgr is
+ # necessary.
+ self.flush_pg_stats(active_osds)
+ new_stats = dict((stat['pgid'], stat['state'])
+ for stat in self.get_pg_stats())
+ if old_stats == new_stats:
+ return old_stats
+ if timeout is not None:
+ assert time.time() - start < timeout, \
+ 'failed to reach convergence before %d secs' % timeout
+ old_stats = new_stats
+ # longer than mgr_stats_period
+ time.sleep(5 + 1)
+
+ def mark_out_osd(self, osd):
+ """
+ Wrapper to mark osd out.
+ """
+ self.raw_cluster_cmd('osd', 'out', str(osd))
+
+ def kill_osd(self, osd):
+ """
+ Kill osds by either power cycling (if indicated by the config)
+ or by stopping.
+ """
+ if self.config.get('powercycle'):
+ remote = self.find_remote('osd', osd)
+ self.log('kill_osd on osd.{o} '
+ 'doing powercycle of {s}'.format(o=osd, s=remote.name))
+ self._assert_ipmi(remote)
+ remote.console.power_off()
+ elif self.config.get('bdev_inject_crash') and self.config.get('bdev_inject_crash_probability'):
+ if random.uniform(0, 1) < self.config.get('bdev_inject_crash_probability', .5):
+ self.inject_args(
+ 'osd', osd,
+ 'bdev-inject-crash', self.config.get('bdev_inject_crash'))
+ try:
+ self.ctx.daemons.get_daemon('osd', osd, self.cluster).wait()
+ except:
+ pass
+ else:
+ raise RuntimeError('osd.%s did not fail' % osd)
+ else:
+ self.ctx.daemons.get_daemon('osd', osd, self.cluster).stop()
+ else:
+ self.ctx.daemons.get_daemon('osd', osd, self.cluster).stop()
+
+ @staticmethod
+ def _assert_ipmi(remote):
+ assert remote.console.has_ipmi_credentials, (
+ "powercycling requested but RemoteConsole is not "
+ "initialized. Check ipmi config.")
+
+ def blackhole_kill_osd(self, osd):
+ """
+ Stop osd if nothing else works.
+ """
+ self.inject_args('osd', osd,
+ 'objectstore-blackhole', True)
+ time.sleep(2)
+ self.ctx.daemons.get_daemon('osd', osd, self.cluster).stop()
+
+ def revive_osd(self, osd, timeout=360, skip_admin_check=False):
+ """
+ Revive osds by either power cycling (if indicated by the config)
+ or by restarting.
+ """
+ if self.config.get('powercycle'):
+ remote = self.find_remote('osd', osd)
+ self.log('kill_osd on osd.{o} doing powercycle of {s}'.
+ format(o=osd, s=remote.name))
+ self._assert_ipmi(remote)
+ remote.console.power_on()
+ if not remote.console.check_status(300):
+ raise Exception('Failed to revive osd.{o} via ipmi'.
+ format(o=osd))
+ teuthology.reconnect(self.ctx, 60, [remote])
+ mount_osd_data(self.ctx, remote, self.cluster, str(osd))
+ self.make_admin_daemon_dir(remote)
+ self.ctx.daemons.get_daemon('osd', osd, self.cluster).reset()
+ self.ctx.daemons.get_daemon('osd', osd, self.cluster).restart()
+
+ if not skip_admin_check:
+ # wait for dump_ops_in_flight; this command doesn't appear
+ # until after the signal handler is installed and it is safe
+ # to stop the osd again without making valgrind leak checks
+ # unhappy. see #5924.
+ self.wait_run_admin_socket('osd', osd,
+ args=['dump_ops_in_flight'],
+ timeout=timeout, stdout=DEVNULL)
+
+ def mark_down_osd(self, osd):
+ """
+ Cluster command wrapper
+ """
+ self.raw_cluster_cmd('osd', 'down', str(osd))
+
+ def mark_in_osd(self, osd):
+ """
+ Cluster command wrapper
+ """
+ self.raw_cluster_cmd('osd', 'in', str(osd))
+
+ def signal_osd(self, osd, sig, silent=False):
+ """
+ Wrapper to local get_daemon call which sends the given
+ signal to the given osd.
+ """
+ self.ctx.daemons.get_daemon('osd', osd,
+ self.cluster).signal(sig, silent=silent)
+
+ ## monitors
+ def signal_mon(self, mon, sig, silent=False):
+ """
+ Wrapper to local get_daemon call
+ """
+ self.ctx.daemons.get_daemon('mon', mon,
+ self.cluster).signal(sig, silent=silent)
+
+ def kill_mon(self, mon):
+ """
+ Kill the monitor by either power cycling (if the config says so),
+ or by doing a stop.
+ """
+ if self.config.get('powercycle'):
+ remote = self.find_remote('mon', mon)
+ self.log('kill_mon on mon.{m} doing powercycle of {s}'.
+ format(m=mon, s=remote.name))
+ self._assert_ipmi(remote)
+ remote.console.power_off()
+ else:
+ self.ctx.daemons.get_daemon('mon', mon, self.cluster).stop()
+
+ def revive_mon(self, mon):
+ """
+ Restart by either power cycling (if the config says so),
+ or by doing a normal restart.
+ """
+ if self.config.get('powercycle'):
+ remote = self.find_remote('mon', mon)
+ self.log('revive_mon on mon.{m} doing powercycle of {s}'.
+ format(m=mon, s=remote.name))
+ self._assert_ipmi(remote)
+ remote.console.power_on()
+ self.make_admin_daemon_dir(remote)
+ self.ctx.daemons.get_daemon('mon', mon, self.cluster).restart()
+
+ def revive_mgr(self, mgr):
+ """
+ Restart by either power cycling (if the config says so),
+ or by doing a normal restart.
+ """
+ if self.config.get('powercycle'):
+ remote = self.find_remote('mgr', mgr)
+ self.log('revive_mgr on mgr.{m} doing powercycle of {s}'.
+ format(m=mgr, s=remote.name))
+ self._assert_ipmi(remote)
+ remote.console.power_on()
+ self.make_admin_daemon_dir(remote)
+ self.ctx.daemons.get_daemon('mgr', mgr, self.cluster).restart()
+
+ def get_mon_status(self, mon):
+ """
+ Extract all the monitor status information from the cluster
+ """
+ addr = self.ctx.ceph[self.cluster].mons['mon.%s' % mon]
+ out = self.raw_cluster_cmd('-m', addr, 'mon_status')
+ return json.loads(out)
+
+ def get_mon_quorum(self):
+ """
+ Extract monitor quorum information from the cluster
+ """
+ out = self.raw_cluster_cmd('quorum_status')
+ j = json.loads(out)
+ self.log('quorum_status is %s' % out)
+ return j['quorum']
+
+ def wait_for_mon_quorum_size(self, size, timeout=300):
+ """
+ Loop until quorum size is reached.
+ """
+ self.log('waiting for quorum size %d' % size)
+ start = time.time()
+ while not len(self.get_mon_quorum()) == size:
+ if timeout is not None:
+ assert time.time() - start < timeout, \
+ ('failed to reach quorum size %d '
+ 'before timeout expired' % size)
+ time.sleep(3)
+ self.log("quorum is size %d" % size)
+
+ def get_mon_health(self, debug=False):
+ """
+ Extract all the monitor health information.
+ """
+ out = self.raw_cluster_cmd('health', '--format=json')
+ if debug:
+ self.log('health:\n{h}'.format(h=out))
+ return json.loads(out)
+
+ def get_filepath(self):
+ """
+ Return path to osd data with {id} needing to be replaced
+ """
+ return '/var/lib/ceph/osd/' + self.cluster + '-{id}'
+
+ def make_admin_daemon_dir(self, remote):
+ """
+ Create /var/run/ceph directory on remote site.
+
+ :param ctx: Context
+ :param remote: Remote site
+ """
+ remote.run(args=['sudo',
+ 'install', '-d', '-m0777', '--', '/var/run/ceph', ], )
+
+ def get_service_task_status(self, service, status_key):
+ """
+ Return daemon task status for a given ceph service.
+
+ :param service: ceph service (mds, osd, etc...)
+ :param status_key: matching task status key
+ """
+ task_status = {}
+ status = self.raw_cluster_status()
+ try:
+ for k,v in status['servicemap']['services'][service]['daemons'].items():
+ ts = dict(v).get('task_status', None)
+ if ts:
+ task_status[k] = ts[status_key]
+ except KeyError: # catches missing service and status key
+ return {}
+ self.log(task_status)
+ return task_status
+
+def utility_task(name):
+ """
+ Generate ceph_manager subtask corresponding to ceph_manager
+ method name
+ """
+ def task(ctx, config):
+ if config is None:
+ config = {}
+ args = config.get('args', [])
+ kwargs = config.get('kwargs', {})
+ cluster = config.get('cluster', 'ceph')
+ fn = getattr(ctx.managers[cluster], name)
+ fn(*args, **kwargs)
+ return task
+
+revive_osd = utility_task("revive_osd")
+revive_mon = utility_task("revive_mon")
+kill_osd = utility_task("kill_osd")
+kill_mon = utility_task("kill_mon")
+create_pool = utility_task("create_pool")
+remove_pool = utility_task("remove_pool")
+wait_for_clean = utility_task("wait_for_clean")
+flush_all_pg_stats = utility_task("flush_all_pg_stats")
+set_pool_property = utility_task("set_pool_property")
+do_pg_scrub = utility_task("do_pg_scrub")
+wait_for_pool = utility_task("wait_for_pool")
+wait_for_pools = utility_task("wait_for_pools")
diff --git a/qa/tasks/ceph_objectstore_tool.py b/qa/tasks/ceph_objectstore_tool.py
new file mode 100644
index 00000000..2199266e
--- /dev/null
+++ b/qa/tasks/ceph_objectstore_tool.py
@@ -0,0 +1,663 @@
+"""
+ceph_objectstore_tool - Simple test of ceph-objectstore-tool utility
+"""
+from io import BytesIO
+
+import contextlib
+import json
+import logging
+import os
+import six
+import sys
+import tempfile
+import time
+from tasks import ceph_manager
+from tasks.util.rados import (rados, create_replicated_pool, create_ec_pool)
+from teuthology import misc as teuthology
+from teuthology.orchestra import run
+
+from teuthology.exceptions import CommandFailedError
+
+# from util.rados import (rados, create_ec_pool,
+# create_replicated_pool,
+# create_cache_pool)
+
+log = logging.getLogger(__name__)
+
+# Should get cluster name "ceph" from somewhere
+# and normal path from osd_data and osd_journal in conf
+FSPATH = "/var/lib/ceph/osd/ceph-{id}"
+JPATH = "/var/lib/ceph/osd/ceph-{id}/journal"
+
+
+def cod_setup_local_data(log, ctx, NUM_OBJECTS, DATADIR,
+ BASE_NAME, DATALINECOUNT):
+ objects = range(1, NUM_OBJECTS + 1)
+ for i in objects:
+ NAME = BASE_NAME + "{num}".format(num=i)
+ LOCALNAME = os.path.join(DATADIR, NAME)
+
+ dataline = range(DATALINECOUNT)
+ fd = open(LOCALNAME, "w")
+ data = "This is the data for " + NAME + "\n"
+ for _ in dataline:
+ fd.write(data)
+ fd.close()
+
+
+def cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR,
+ BASE_NAME, DATALINECOUNT):
+
+ objects = range(1, NUM_OBJECTS + 1)
+ for i in objects:
+ NAME = BASE_NAME + "{num}".format(num=i)
+ DDNAME = os.path.join(DATADIR, NAME)
+
+ remote.run(args=['rm', '-f', DDNAME])
+
+ dataline = range(DATALINECOUNT)
+ data = "This is the data for " + NAME + "\n"
+ DATA = ""
+ for _ in dataline:
+ DATA += data
+ teuthology.write_file(remote, DDNAME, DATA)
+
+
+def cod_setup(log, ctx, remote, NUM_OBJECTS, DATADIR,
+ BASE_NAME, DATALINECOUNT, POOL, db, ec):
+ ERRORS = 0
+ log.info("Creating {objs} objects in pool".format(objs=NUM_OBJECTS))
+
+ objects = range(1, NUM_OBJECTS + 1)
+ for i in objects:
+ NAME = BASE_NAME + "{num}".format(num=i)
+ DDNAME = os.path.join(DATADIR, NAME)
+
+ proc = rados(ctx, remote, ['-p', POOL, 'put', NAME, DDNAME],
+ wait=False)
+ # proc = remote.run(args=['rados', '-p', POOL, 'put', NAME, DDNAME])
+ ret = proc.wait()
+ if ret != 0:
+ log.critical("Rados put failed with status {ret}".
+ format(ret=proc.exitstatus))
+ sys.exit(1)
+
+ db[NAME] = {}
+
+ keys = range(i)
+ db[NAME]["xattr"] = {}
+ for k in keys:
+ if k == 0:
+ continue
+ mykey = "key{i}-{k}".format(i=i, k=k)
+ myval = "val{i}-{k}".format(i=i, k=k)
+ proc = remote.run(args=['rados', '-p', POOL, 'setxattr',
+ NAME, mykey, myval])
+ ret = proc.wait()
+ if ret != 0:
+ log.error("setxattr failed with {ret}".format(ret=ret))
+ ERRORS += 1
+ db[NAME]["xattr"][mykey] = myval
+
+ # Erasure coded pools don't support omap
+ if ec:
+ continue
+
+ # Create omap header in all objects but REPobject1
+ if i != 1:
+ myhdr = "hdr{i}".format(i=i)
+ proc = remote.run(args=['rados', '-p', POOL, 'setomapheader',
+ NAME, myhdr])
+ ret = proc.wait()
+ if ret != 0:
+ log.critical("setomapheader failed with {ret}".format(ret=ret))
+ ERRORS += 1
+ db[NAME]["omapheader"] = myhdr
+
+ db[NAME]["omap"] = {}
+ for k in keys:
+ if k == 0:
+ continue
+ mykey = "okey{i}-{k}".format(i=i, k=k)
+ myval = "oval{i}-{k}".format(i=i, k=k)
+ proc = remote.run(args=['rados', '-p', POOL, 'setomapval',
+ NAME, mykey, myval])
+ ret = proc.wait()
+ if ret != 0:
+ log.critical("setomapval failed with {ret}".format(ret=ret))
+ db[NAME]["omap"][mykey] = myval
+
+ return ERRORS
+
+
+def get_lines(filename):
+ tmpfd = open(filename, "r")
+ line = True
+ lines = []
+ while line:
+ line = tmpfd.readline().rstrip('\n')
+ if line:
+ lines += [line]
+ tmpfd.close()
+ os.unlink(filename)
+ return lines
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run ceph_objectstore_tool test
+
+ The config should be as follows::
+
+ ceph_objectstore_tool:
+ objects: 20 # <number of objects>
+ pgnum: 12
+ """
+
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'ceph_objectstore_tool task only accepts a dict for configuration'
+
+ log.info('Beginning ceph_objectstore_tool...')
+
+ log.debug(config)
+ log.debug(ctx)
+ clients = ctx.cluster.only(teuthology.is_type('client'))
+ assert len(clients.remotes) > 0, 'Must specify at least 1 client'
+ (cli_remote, _) = clients.remotes.popitem()
+ log.debug(cli_remote)
+
+ # clients = dict(teuthology.get_clients(ctx=ctx, roles=config.keys()))
+ # client = clients.popitem()
+ # log.info(client)
+ osds = ctx.cluster.only(teuthology.is_type('osd'))
+ log.info("OSDS")
+ log.info(osds)
+ log.info(osds.remotes)
+
+ manager = ctx.managers['ceph']
+ while (len(manager.get_osd_status()['up']) !=
+ len(manager.get_osd_status()['raw'])):
+ time.sleep(10)
+ while (len(manager.get_osd_status()['in']) !=
+ len(manager.get_osd_status()['up'])):
+ time.sleep(10)
+ manager.raw_cluster_cmd('osd', 'set', 'noout')
+ manager.raw_cluster_cmd('osd', 'set', 'nodown')
+
+ PGNUM = config.get('pgnum', 12)
+ log.info("pgnum: {num}".format(num=PGNUM))
+
+ ERRORS = 0
+
+ REP_POOL = "rep_pool"
+ REP_NAME = "REPobject"
+ create_replicated_pool(cli_remote, REP_POOL, PGNUM)
+ ERRORS += test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME)
+
+ EC_POOL = "ec_pool"
+ EC_NAME = "ECobject"
+ create_ec_pool(cli_remote, EC_POOL, 'default', PGNUM)
+ ERRORS += test_objectstore(ctx, config, cli_remote,
+ EC_POOL, EC_NAME, ec=True)
+
+ if ERRORS == 0:
+ log.info("TEST PASSED")
+ else:
+ log.error("TEST FAILED WITH {errcount} ERRORS".format(errcount=ERRORS))
+
+ assert ERRORS == 0
+
+ try:
+ yield
+ finally:
+ log.info('Ending ceph_objectstore_tool')
+
+
+def test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME, ec=False):
+ manager = ctx.managers['ceph']
+
+ osds = ctx.cluster.only(teuthology.is_type('osd'))
+
+ TEUTHDIR = teuthology.get_testdir(ctx)
+ DATADIR = os.path.join(TEUTHDIR, "ceph.data")
+ DATALINECOUNT = 10000
+ ERRORS = 0
+ NUM_OBJECTS = config.get('objects', 10)
+ log.info("objects: {num}".format(num=NUM_OBJECTS))
+
+ pool_dump = manager.get_pool_dump(REP_POOL)
+ REPID = pool_dump['pool']
+
+ log.debug("repid={num}".format(num=REPID))
+
+ db = {}
+
+ LOCALDIR = tempfile.mkdtemp("cod")
+
+ cod_setup_local_data(log, ctx, NUM_OBJECTS, LOCALDIR,
+ REP_NAME, DATALINECOUNT)
+ allremote = []
+ allremote.append(cli_remote)
+ allremote += list(osds.remotes.keys())
+ allremote = list(set(allremote))
+ for remote in allremote:
+ cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR,
+ REP_NAME, DATALINECOUNT)
+
+ ERRORS += cod_setup(log, ctx, cli_remote, NUM_OBJECTS, DATADIR,
+ REP_NAME, DATALINECOUNT, REP_POOL, db, ec)
+
+ pgs = {}
+ for stats in manager.get_pg_stats():
+ if stats["pgid"].find(str(REPID) + ".") != 0:
+ continue
+ if pool_dump["type"] == ceph_manager.CephManager.REPLICATED_POOL:
+ for osd in stats["acting"]:
+ pgs.setdefault(osd, []).append(stats["pgid"])
+ elif pool_dump["type"] == ceph_manager.CephManager.ERASURE_CODED_POOL:
+ shard = 0
+ for osd in stats["acting"]:
+ pgs.setdefault(osd, []).append("{pgid}s{shard}".
+ format(pgid=stats["pgid"],
+ shard=shard))
+ shard += 1
+ else:
+ raise Exception("{pool} has an unexpected type {type}".
+ format(pool=REP_POOL, type=pool_dump["type"]))
+
+ log.info(pgs)
+ log.info(db)
+
+ for osd in manager.get_osd_status()['up']:
+ manager.kill_osd(osd)
+ time.sleep(5)
+
+ pgswithobjects = set()
+ objsinpg = {}
+
+ # Test --op list and generate json for all objects
+ log.info("Test --op list by generating json for all objects")
+ prefix = ("sudo ceph-objectstore-tool "
+ "--data-path {fpath} "
+ "--journal-path {jpath} ").format(fpath=FSPATH, jpath=JPATH)
+ for remote in osds.remotes.keys():
+ log.debug(remote)
+ log.debug(osds.remotes[remote])
+ for role in osds.remotes[remote]:
+ if not role.startswith("osd."):
+ continue
+ osdid = int(role.split('.')[1])
+ log.info("process osd.{id} on {remote}".
+ format(id=osdid, remote=remote))
+ cmd = (prefix + "--op list").format(id=osdid)
+ try:
+ lines = remote.sh(cmd, check_status=False).splitlines()
+ for pgline in lines:
+ if not pgline:
+ continue
+ (pg, obj) = json.loads(pgline)
+ name = obj['oid']
+ if name in db:
+ pgswithobjects.add(pg)
+ objsinpg.setdefault(pg, []).append(name)
+ db[name].setdefault("pg2json",
+ {})[pg] = json.dumps(obj)
+ except CommandFailedError as e:
+ log.error("Bad exit status {ret} from --op list request".
+ format(ret=e.exitstatus))
+ ERRORS += 1
+
+ log.info(db)
+ log.info(pgswithobjects)
+ log.info(objsinpg)
+
+ if pool_dump["type"] == ceph_manager.CephManager.REPLICATED_POOL:
+ # Test get-bytes
+ log.info("Test get-bytes and set-bytes")
+ for basename in db.keys():
+ file = os.path.join(DATADIR, basename)
+ GETNAME = os.path.join(DATADIR, "get")
+ SETNAME = os.path.join(DATADIR, "set")
+
+ for remote in osds.remotes.keys():
+ for role in osds.remotes[remote]:
+ if not role.startswith("osd."):
+ continue
+ osdid = int(role.split('.')[1])
+ if osdid not in pgs:
+ continue
+
+ for pg, JSON in db[basename]["pg2json"].items():
+ if pg in pgs[osdid]:
+ cmd = ((prefix + "--pgid {pg}").
+ format(id=osdid, pg=pg).split())
+ cmd.append(run.Raw("'{json}'".format(json=JSON)))
+ cmd += ("get-bytes {fname}".
+ format(fname=GETNAME).split())
+ proc = remote.run(args=cmd, check_status=False)
+ if proc.exitstatus != 0:
+ remote.run(args="rm -f {getfile}".
+ format(getfile=GETNAME).split())
+ log.error("Bad exit status {ret}".
+ format(ret=proc.exitstatus))
+ ERRORS += 1
+ continue
+ cmd = ("diff -q {file} {getfile}".
+ format(file=file, getfile=GETNAME))
+ proc = remote.run(args=cmd.split())
+ if proc.exitstatus != 0:
+ log.error("Data from get-bytes differ")
+ # log.debug("Got:")
+ # cat_file(logging.DEBUG, GETNAME)
+ # log.debug("Expected:")
+ # cat_file(logging.DEBUG, file)
+ ERRORS += 1
+ remote.run(args="rm -f {getfile}".
+ format(getfile=GETNAME).split())
+
+ data = ("put-bytes going into {file}\n".
+ format(file=file))
+ teuthology.write_file(remote, SETNAME, data)
+ cmd = ((prefix + "--pgid {pg}").
+ format(id=osdid, pg=pg).split())
+ cmd.append(run.Raw("'{json}'".format(json=JSON)))
+ cmd += ("set-bytes {fname}".
+ format(fname=SETNAME).split())
+ proc = remote.run(args=cmd, check_status=False)
+ proc.wait()
+ if proc.exitstatus != 0:
+ log.info("set-bytes failed for object {obj} "
+ "in pg {pg} osd.{id} ret={ret}".
+ format(obj=basename, pg=pg,
+ id=osdid, ret=proc.exitstatus))
+ ERRORS += 1
+
+ cmd = ((prefix + "--pgid {pg}").
+ format(id=osdid, pg=pg).split())
+ cmd.append(run.Raw("'{json}'".format(json=JSON)))
+ cmd += "get-bytes -".split()
+ try:
+ output = remote.sh(cmd, wait=True)
+ if data != output:
+ log.error("Data inconsistent after "
+ "set-bytes, got:")
+ log.error(output)
+ ERRORS += 1
+ except CommandFailedError as e:
+ log.error("get-bytes after "
+ "set-bytes ret={ret}".
+ format(ret=e.exitstatus))
+ ERRORS += 1
+
+ cmd = ((prefix + "--pgid {pg}").
+ format(id=osdid, pg=pg).split())
+ cmd.append(run.Raw("'{json}'".format(json=JSON)))
+ cmd += ("set-bytes {fname}".
+ format(fname=file).split())
+ proc = remote.run(args=cmd, check_status=False)
+ proc.wait()
+ if proc.exitstatus != 0:
+ log.info("set-bytes failed for object {obj} "
+ "in pg {pg} osd.{id} ret={ret}".
+ format(obj=basename, pg=pg,
+ id=osdid, ret=proc.exitstatus))
+ ERRORS += 1
+
+ log.info("Test list-attrs get-attr")
+ for basename in db.keys():
+ file = os.path.join(DATADIR, basename)
+ GETNAME = os.path.join(DATADIR, "get")
+ SETNAME = os.path.join(DATADIR, "set")
+
+ for remote in osds.remotes.keys():
+ for role in osds.remotes[remote]:
+ if not role.startswith("osd."):
+ continue
+ osdid = int(role.split('.')[1])
+ if osdid not in pgs:
+ continue
+
+ for pg, JSON in db[basename]["pg2json"].items():
+ if pg in pgs[osdid]:
+ cmd = ((prefix + "--pgid {pg}").
+ format(id=osdid, pg=pg).split())
+ cmd.append(run.Raw("'{json}'".format(json=JSON)))
+ cmd += ["list-attrs"]
+ try:
+ keys = remote.sh(cmd, wait=True, stderr=BytesIO()).split()
+ except CommandFailedError as e:
+ log.error("Bad exit status {ret}".
+ format(ret=e.exitstatus))
+ ERRORS += 1
+ continue
+ values = dict(db[basename]["xattr"])
+
+ for key in keys:
+ if (key == "_" or
+ key == "snapset" or
+ key == "hinfo_key"):
+ continue
+ key = key.strip("_")
+ if key not in values:
+ log.error("The key {key} should be present".
+ format(key=key))
+ ERRORS += 1
+ continue
+ exp = values.pop(key)
+ cmd = ((prefix + "--pgid {pg}").
+ format(id=osdid, pg=pg).split())
+ cmd.append(run.Raw("'{json}'".format(json=JSON)))
+ cmd += ("get-attr {key}".
+ format(key="_" + key).split())
+ try:
+ val = remote.sh(cmd, wait=True)
+ except CommandFailedError as e:
+ log.error("get-attr failed with {ret}".
+ format(ret=e.exitstatus))
+ ERRORS += 1
+ continue
+ if exp != val:
+ log.error("For key {key} got value {got} "
+ "instead of {expected}".
+ format(key=key, got=val,
+ expected=exp))
+ ERRORS += 1
+ if "hinfo_key" in keys:
+ cmd_prefix = prefix.format(id=osdid)
+ cmd = """
+ expected=$({prefix} --pgid {pg} '{json}' get-attr {key} | base64)
+ echo placeholder | {prefix} --pgid {pg} '{json}' set-attr {key} -
+ test $({prefix} --pgid {pg} '{json}' get-attr {key}) = placeholder
+ echo $expected | base64 --decode | \
+ {prefix} --pgid {pg} '{json}' set-attr {key} -
+ test $({prefix} --pgid {pg} '{json}' get-attr {key} | base64) = $expected
+ """.format(prefix=cmd_prefix, pg=pg, json=JSON,
+ key="hinfo_key")
+ log.debug(cmd)
+ proc = remote.run(args=['bash', '-e', '-x',
+ '-c', cmd],
+ check_status=False,
+ stdout=BytesIO(),
+ stderr=BytesIO())
+ proc.wait()
+ if proc.exitstatus != 0:
+ log.error("failed with " +
+ str(proc.exitstatus))
+ log.error(" ".join([
+ six.ensure_str(proc.stdout.getvalue()),
+ six.ensure_str(proc.stderr.getvalue()),
+ ]))
+ ERRORS += 1
+
+ if len(values) != 0:
+ log.error("Not all keys found, remaining keys:")
+ log.error(values)
+
+ log.info("Test pg info")
+ for remote in osds.remotes.keys():
+ for role in osds.remotes[remote]:
+ if not role.startswith("osd."):
+ continue
+ osdid = int(role.split('.')[1])
+ if osdid not in pgs:
+ continue
+
+ for pg in pgs[osdid]:
+ cmd = ((prefix + "--op info --pgid {pg}").
+ format(id=osdid, pg=pg).split())
+ try:
+ info = remote.sh(cmd, wait=True)
+ except CommandFailedError as e:
+ log.error("Failure of --op info command with {ret}".
+ format(e.exitstatus))
+ ERRORS += 1
+ continue
+ if not str(pg) in info:
+ log.error("Bad data from info: {info}".format(info=info))
+ ERRORS += 1
+
+ log.info("Test pg logging")
+ for remote in osds.remotes.keys():
+ for role in osds.remotes[remote]:
+ if not role.startswith("osd."):
+ continue
+ osdid = int(role.split('.')[1])
+ if osdid not in pgs:
+ continue
+
+ for pg in pgs[osdid]:
+ cmd = ((prefix + "--op log --pgid {pg}").
+ format(id=osdid, pg=pg).split())
+ try:
+ output = remote.sh(cmd, wait=True)
+ except CommandFailedError as e:
+ log.error("Getting log failed for pg {pg} "
+ "from osd.{id} with {ret}".
+ format(pg=pg, id=osdid, ret=e.exitstatus))
+ ERRORS += 1
+ continue
+ HASOBJ = pg in pgswithobjects
+ MODOBJ = "modify" in output
+ if HASOBJ != MODOBJ:
+ log.error("Bad log for pg {pg} from osd.{id}".
+ format(pg=pg, id=osdid))
+ MSG = (HASOBJ and [""] or ["NOT "])[0]
+ log.error("Log should {msg}have a modify entry".
+ format(msg=MSG))
+ ERRORS += 1
+
+ log.info("Test pg export")
+ EXP_ERRORS = 0
+ for remote in osds.remotes.keys():
+ for role in osds.remotes[remote]:
+ if not role.startswith("osd."):
+ continue
+ osdid = int(role.split('.')[1])
+ if osdid not in pgs:
+ continue
+
+ for pg in pgs[osdid]:
+ fpath = os.path.join(DATADIR, "osd{id}.{pg}".
+ format(id=osdid, pg=pg))
+
+ cmd = ((prefix + "--op export --pgid {pg} --file {file}").
+ format(id=osdid, pg=pg, file=fpath))
+ try:
+ remote.sh(cmd, wait=True)
+ except CommandFailedError as e:
+ log.error("Exporting failed for pg {pg} "
+ "on osd.{id} with {ret}".
+ format(pg=pg, id=osdid, ret=e.exitstatus))
+ EXP_ERRORS += 1
+
+ ERRORS += EXP_ERRORS
+
+ log.info("Test pg removal")
+ RM_ERRORS = 0
+ for remote in osds.remotes.keys():
+ for role in osds.remotes[remote]:
+ if not role.startswith("osd."):
+ continue
+ osdid = int(role.split('.')[1])
+ if osdid not in pgs:
+ continue
+
+ for pg in pgs[osdid]:
+ cmd = ((prefix + "--force --op remove --pgid {pg}").
+ format(pg=pg, id=osdid))
+ try:
+ remote.sh(cmd, wait=True)
+ except CommandFailedError as e:
+ log.error("Removing failed for pg {pg} "
+ "on osd.{id} with {ret}".
+ format(pg=pg, id=osdid, ret=e.exitstatus))
+ RM_ERRORS += 1
+
+ ERRORS += RM_ERRORS
+
+ IMP_ERRORS = 0
+ if EXP_ERRORS == 0 and RM_ERRORS == 0:
+ log.info("Test pg import")
+
+ for remote in osds.remotes.keys():
+ for role in osds.remotes[remote]:
+ if not role.startswith("osd."):
+ continue
+ osdid = int(role.split('.')[1])
+ if osdid not in pgs:
+ continue
+
+ for pg in pgs[osdid]:
+ fpath = os.path.join(DATADIR, "osd{id}.{pg}".
+ format(id=osdid, pg=pg))
+
+ cmd = ((prefix + "--op import --file {file}").
+ format(id=osdid, file=fpath))
+ try:
+ remote.sh(cmd, wait=True)
+ except CommandFailedError as e:
+ log.error("Import failed from {file} with {ret}".
+ format(file=fpath, ret=e.exitstatus))
+ IMP_ERRORS += 1
+ else:
+ log.warning("SKIPPING IMPORT TESTS DUE TO PREVIOUS FAILURES")
+
+ ERRORS += IMP_ERRORS
+
+ if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0:
+ log.info("Restarting OSDs....")
+ # They are still look to be up because of setting nodown
+ for osd in manager.get_osd_status()['up']:
+ manager.revive_osd(osd)
+ # Wait for health?
+ time.sleep(5)
+ # Let scrub after test runs verify consistency of all copies
+ log.info("Verify replicated import data")
+ objects = range(1, NUM_OBJECTS + 1)
+ for i in objects:
+ NAME = REP_NAME + "{num}".format(num=i)
+ TESTNAME = os.path.join(DATADIR, "gettest")
+ REFNAME = os.path.join(DATADIR, NAME)
+
+ proc = rados(ctx, cli_remote,
+ ['-p', REP_POOL, 'get', NAME, TESTNAME], wait=False)
+
+ ret = proc.wait()
+ if ret != 0:
+ log.error("After import, rados get failed with {ret}".
+ format(ret=proc.exitstatus))
+ ERRORS += 1
+ continue
+
+ cmd = "diff -q {gettest} {ref}".format(gettest=TESTNAME,
+ ref=REFNAME)
+ proc = cli_remote.run(args=cmd, check_status=False)
+ proc.wait()
+ if proc.exitstatus != 0:
+ log.error("Data comparison failed for {obj}".format(obj=NAME))
+ ERRORS += 1
+
+ return ERRORS
diff --git a/qa/tasks/ceph_test_case.py b/qa/tasks/ceph_test_case.py
new file mode 100644
index 00000000..9e26439e
--- /dev/null
+++ b/qa/tasks/ceph_test_case.py
@@ -0,0 +1,203 @@
+import unittest
+from unittest import case
+import time
+import logging
+
+from teuthology.orchestra.run import CommandFailedError
+
+log = logging.getLogger(__name__)
+
+class TestTimeoutError(RuntimeError):
+ pass
+
+class CephTestCase(unittest.TestCase):
+ """
+ For test tasks that want to define a structured set of
+ tests implemented in python. Subclass this with appropriate
+ helpers for the subsystem you're testing.
+ """
+
+ # Environment references
+ mounts = None
+ fs = None
+ recovery_fs = None
+ ceph_cluster = None
+ mds_cluster = None
+ mgr_cluster = None
+ ctx = None
+
+ mon_manager = None
+
+ # Declarative test requirements: subclasses should override these to indicate
+ # their special needs. If not met, tests will be skipped.
+ REQUIRE_MEMSTORE = False
+
+ def setUp(self):
+ self._mon_configs_set = set()
+
+ self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
+ "Starting test {0}".format(self.id()))
+
+ if self.REQUIRE_MEMSTORE:
+ objectstore = self.ceph_cluster.get_config("osd_objectstore", "osd")
+ if objectstore != "memstore":
+ # You certainly *could* run this on a real OSD, but you don't want to sit
+ # here for hours waiting for the test to fill up a 1TB drive!
+ raise case.SkipTest("Require `memstore` OSD backend (test " \
+ "would take too long on full sized OSDs")
+
+ def tearDown(self):
+ self.config_clear()
+
+ self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
+ "Ended test {0}".format(self.id()))
+
+ def config_clear(self):
+ for section, key in self._mon_configs_set:
+ self.config_rm(section, key)
+ self._mon_configs_set.clear()
+
+ def _fix_key(self, key):
+ return str(key).replace(' ', '_')
+
+ def config_get(self, section, key):
+ key = self._fix_key(key)
+ return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "get", section, key).strip()
+
+ def config_show(self, entity, key):
+ key = self._fix_key(key)
+ return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "show", entity, key).strip()
+
+ def config_minimal(self):
+ return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "generate-minimal-conf").strip()
+
+ def config_rm(self, section, key):
+ key = self._fix_key(key)
+ self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "rm", section, key)
+ # simplification: skip removing from _mon_configs_set;
+ # let tearDown clear everything again
+
+ def config_set(self, section, key, value):
+ key = self._fix_key(key)
+ self._mon_configs_set.add((section, key))
+ self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "set", section, key, str(value))
+
+ def assert_cluster_log(self, expected_pattern, invert_match=False,
+ timeout=10, watch_channel=None):
+ """
+ Context manager. Assert that during execution, or up to 5 seconds later,
+ the Ceph cluster log emits a message matching the expected pattern.
+
+ :param expected_pattern: A string that you expect to see in the log output
+ :type expected_pattern: str
+ :param watch_channel: Specifies the channel to be watched. This can be
+ 'cluster', 'audit', ...
+ :type watch_channel: str
+ """
+
+ ceph_manager = self.ceph_cluster.mon_manager
+
+ class ContextManager(object):
+ def match(self):
+ found = expected_pattern in self.watcher_process.stdout.getvalue()
+ if invert_match:
+ return not found
+
+ return found
+
+ def __enter__(self):
+ self.watcher_process = ceph_manager.run_ceph_w(watch_channel)
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ if not self.watcher_process.finished:
+ # Check if we got an early match, wait a bit if we didn't
+ if self.match():
+ return
+ else:
+ log.debug("No log hits yet, waiting...")
+ # Default monc tick interval is 10s, so wait that long and
+ # then some grace
+ time.sleep(5 + timeout)
+
+ self.watcher_process.stdin.close()
+ try:
+ self.watcher_process.wait()
+ except CommandFailedError:
+ pass
+
+ if not self.match():
+ log.error("Log output: \n{0}\n".format(self.watcher_process.stdout.getvalue()))
+ raise AssertionError("Expected log message not found: '{0}'".format(expected_pattern))
+
+ return ContextManager()
+
+ def wait_for_health(self, pattern, timeout):
+ """
+ Wait until 'ceph health' contains messages matching the pattern
+ """
+ def seen_health_warning():
+ health = self.ceph_cluster.mon_manager.get_mon_health()
+ codes = [s for s in health['checks']]
+ summary_strings = [s[1]['summary']['message'] for s in health['checks'].items()]
+ if len(summary_strings) == 0:
+ log.debug("Not expected number of summary strings ({0})".format(summary_strings))
+ return False
+ else:
+ for ss in summary_strings:
+ if pattern in ss:
+ return True
+ if pattern in codes:
+ return True
+
+ log.debug("Not found expected summary strings yet ({0})".format(summary_strings))
+ return False
+
+ self.wait_until_true(seen_health_warning, timeout)
+
+ def wait_for_health_clear(self, timeout):
+ """
+ Wait until `ceph health` returns no messages
+ """
+ def is_clear():
+ health = self.ceph_cluster.mon_manager.get_mon_health()
+ return len(health['checks']) == 0
+
+ self.wait_until_true(is_clear, timeout)
+
+ def wait_until_equal(self, get_fn, expect_val, timeout, reject_fn=None):
+ period = 5
+ elapsed = 0
+ while True:
+ val = get_fn()
+ if val == expect_val:
+ return
+ elif reject_fn and reject_fn(val):
+ raise RuntimeError("wait_until_equal: forbidden value {0} seen".format(val))
+ else:
+ if elapsed >= timeout:
+ raise TestTimeoutError("Timed out after {0} seconds waiting for {1} (currently {2})".format(
+ elapsed, expect_val, val
+ ))
+ else:
+ log.debug("wait_until_equal: {0} != {1}, waiting...".format(val, expect_val))
+ time.sleep(period)
+ elapsed += period
+
+ log.debug("wait_until_equal: success")
+
+ @classmethod
+ def wait_until_true(cls, condition, timeout, period=5):
+ elapsed = 0
+ while True:
+ if condition():
+ log.debug("wait_until_true: success in {0}s".format(elapsed))
+ return
+ else:
+ if elapsed >= timeout:
+ raise TestTimeoutError("Timed out after {0}s".format(elapsed))
+ else:
+ log.debug("wait_until_true: waiting...")
+ time.sleep(period)
+ elapsed += period
+
+
diff --git a/qa/tasks/cephfs/__init__.py b/qa/tasks/cephfs/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/qa/tasks/cephfs/__init__.py
diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py
new file mode 100644
index 00000000..f901f44b
--- /dev/null
+++ b/qa/tasks/cephfs/cephfs_test_case.py
@@ -0,0 +1,324 @@
+import time
+import json
+import logging
+from unittest import case
+from tasks.ceph_test_case import CephTestCase
+import os
+import re
+
+from tasks.cephfs.fuse_mount import FuseMount
+
+from teuthology.orchestra import run
+from teuthology.orchestra.run import CommandFailedError
+from teuthology.contextutil import safe_while
+
+
+log = logging.getLogger(__name__)
+
+
+def for_teuthology(f):
+ """
+ Decorator that adds an "is_for_teuthology" attribute to the wrapped function
+ """
+ f.is_for_teuthology = True
+ return f
+
+
+def needs_trimming(f):
+ """
+ Mark fn as requiring a client capable of trimming its cache (i.e. for ceph-fuse
+ this means it needs to be able to run as root, currently)
+ """
+ f.needs_trimming = True
+ return f
+
+
+class CephFSTestCase(CephTestCase):
+ """
+ Test case for Ceph FS, requires caller to populate Filesystem and Mounts,
+ into the fs, mount_a, mount_b class attributes (setting mount_b is optional)
+
+ Handles resetting the cluster under test between tests.
+ """
+
+ # FIXME weird explicit naming
+ mount_a = None
+ mount_b = None
+ recovery_mount = None
+
+ # Declarative test requirements: subclasses should override these to indicate
+ # their special needs. If not met, tests will be skipped.
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 1
+ REQUIRE_KCLIENT_REMOTE = False
+ REQUIRE_ONE_CLIENT_REMOTE = False
+
+ # Whether to create the default filesystem during setUp
+ REQUIRE_FILESYSTEM = True
+
+ # requires REQUIRE_FILESYSTEM = True
+ REQUIRE_RECOVERY_FILESYSTEM = False
+
+ LOAD_SETTINGS = []
+
+ def setUp(self):
+ super(CephFSTestCase, self).setUp()
+
+ self.config_set('mon', 'mon_allow_pool_delete', True)
+
+ if len(self.mds_cluster.mds_ids) < self.MDSS_REQUIRED:
+ raise case.SkipTest("Only have {0} MDSs, require {1}".format(
+ len(self.mds_cluster.mds_ids), self.MDSS_REQUIRED
+ ))
+
+ if len(self.mounts) < self.CLIENTS_REQUIRED:
+ raise case.SkipTest("Only have {0} clients, require {1}".format(
+ len(self.mounts), self.CLIENTS_REQUIRED
+ ))
+
+ if self.REQUIRE_KCLIENT_REMOTE:
+ if not isinstance(self.mounts[0], FuseMount) or not isinstance(self.mounts[1], FuseMount):
+ # kclient kill() power cycles nodes, so requires clients to each be on
+ # their own node
+ if self.mounts[0].client_remote.hostname == self.mounts[1].client_remote.hostname:
+ raise case.SkipTest("kclient clients must be on separate nodes")
+
+ if self.REQUIRE_ONE_CLIENT_REMOTE:
+ if self.mounts[0].client_remote.hostname in self.mds_cluster.get_mds_hostnames():
+ raise case.SkipTest("Require first client to be on separate server from MDSs")
+
+ # Create friendly mount_a, mount_b attrs
+ for i in range(0, self.CLIENTS_REQUIRED):
+ setattr(self, "mount_{0}".format(chr(ord('a') + i)), self.mounts[i])
+
+ self.mds_cluster.clear_firewall()
+
+ # Unmount all clients, we are about to blow away the filesystem
+ for mount in self.mounts:
+ if mount.is_mounted():
+ mount.umount_wait(force=True)
+
+ # To avoid any issues with e.g. unlink bugs, we destroy and recreate
+ # the filesystem rather than just doing a rm -rf of files
+ self.mds_cluster.delete_all_filesystems()
+ self.mds_cluster.mds_restart() # to reset any run-time configs, etc.
+ self.fs = None # is now invalid!
+ self.recovery_fs = None
+
+ # In case anything is in the OSD blacklist list, clear it out. This is to avoid
+ # the OSD map changing in the background (due to blacklist expiry) while tests run.
+ try:
+ self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blacklist", "clear")
+ except CommandFailedError:
+ # Fallback for older Ceph cluster
+ blacklist = json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd("osd",
+ "dump", "--format=json-pretty"))['blacklist']
+ log.info("Removing {0} blacklist entries".format(len(blacklist)))
+ for addr, blacklisted_at in blacklist.items():
+ self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blacklist", "rm", addr)
+
+ client_mount_ids = [m.client_id for m in self.mounts]
+ # In case the test changes the IDs of clients, stash them so that we can
+ # reset in tearDown
+ self._original_client_ids = client_mount_ids
+ log.info(client_mount_ids)
+
+ # In case there were any extra auth identities around from a previous
+ # test, delete them
+ for entry in self.auth_list():
+ ent_type, ent_id = entry['entity'].split(".")
+ if ent_type == "client" and ent_id not in client_mount_ids and ent_id != "admin":
+ self.mds_cluster.mon_manager.raw_cluster_cmd("auth", "del", entry['entity'])
+
+ if self.REQUIRE_FILESYSTEM:
+ self.fs = self.mds_cluster.newfs(create=True)
+
+ # In case some test messed with auth caps, reset them
+ for client_id in client_mount_ids:
+ self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', "client.{0}".format(client_id),
+ 'mds', 'allow',
+ 'mon', 'allow r',
+ 'osd', 'allow rw pool={0}'.format(self.fs.get_data_pool_name()))
+
+ # wait for ranks to become active
+ self.fs.wait_for_daemons()
+
+ # Mount the requested number of clients
+ for i in range(0, self.CLIENTS_REQUIRED):
+ self.mounts[i].mount()
+ self.mounts[i].wait_until_mounted()
+
+ if self.REQUIRE_RECOVERY_FILESYSTEM:
+ if not self.REQUIRE_FILESYSTEM:
+ raise case.SkipTest("Recovery filesystem requires a primary filesystem as well")
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'flag', 'set',
+ 'enable_multiple', 'true',
+ '--yes-i-really-mean-it')
+ self.recovery_fs = self.mds_cluster.newfs(name="recovery_fs", create=False)
+ self.recovery_fs.set_metadata_overlay(True)
+ self.recovery_fs.set_data_pool_name(self.fs.get_data_pool_name())
+ self.recovery_fs.create()
+ self.recovery_fs.getinfo(refresh=True)
+ self.recovery_fs.mds_restart()
+ self.recovery_fs.wait_for_daemons()
+
+ # Load an config settings of interest
+ for setting in self.LOAD_SETTINGS:
+ setattr(self, setting, float(self.fs.mds_asok(
+ ['config', 'get', setting], list(self.mds_cluster.mds_ids)[0]
+ )[setting]))
+
+ self.configs_set = set()
+
+ def tearDown(self):
+ self.mds_cluster.clear_firewall()
+ for m in self.mounts:
+ m.teardown()
+
+ for i, m in enumerate(self.mounts):
+ m.client_id = self._original_client_ids[i]
+
+ for subsys, key in self.configs_set:
+ self.mds_cluster.clear_ceph_conf(subsys, key)
+
+ return super(CephFSTestCase, self).tearDown()
+
+ def set_conf(self, subsys, key, value):
+ self.configs_set.add((subsys, key))
+ self.mds_cluster.set_ceph_conf(subsys, key, value)
+
+ def auth_list(self):
+ """
+ Convenience wrapper on "ceph auth ls"
+ """
+ return json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd(
+ "auth", "ls", "--format=json-pretty"
+ ))['auth_dump']
+
+ def assert_session_count(self, expected, ls_data=None, mds_id=None):
+ if ls_data is None:
+ ls_data = self.fs.mds_asok(['session', 'ls'], mds_id=mds_id)
+
+ alive_count = len([s for s in ls_data if s['state'] != 'killing'])
+
+ self.assertEqual(expected, alive_count, "Expected {0} sessions, found {1}".format(
+ expected, alive_count
+ ))
+
+ def assert_session_state(self, client_id, expected_state):
+ self.assertEqual(
+ self._session_by_id(
+ self.fs.mds_asok(['session', 'ls'])).get(client_id, {'state': None})['state'],
+ expected_state)
+
+ def get_session_data(self, client_id):
+ return self._session_by_id(client_id)
+
+ def _session_list(self):
+ ls_data = self.fs.mds_asok(['session', 'ls'])
+ ls_data = [s for s in ls_data if s['state'] not in ['stale', 'closed']]
+ return ls_data
+
+ def get_session(self, client_id, session_ls=None):
+ if session_ls is None:
+ session_ls = self.fs.mds_asok(['session', 'ls'])
+
+ return self._session_by_id(session_ls)[client_id]
+
+ def _session_by_id(self, session_ls):
+ return dict([(s['id'], s) for s in session_ls])
+
+ def perf_dump(self, rank=0, status=None):
+ return self.fs.rank_asok(['perf', 'dump'], rank=rank, status=status)
+
+ def wait_until_evicted(self, client_id, timeout=30):
+ def is_client_evicted():
+ ls = self._session_list()
+ for s in ls:
+ if s['id'] == client_id:
+ return False
+ return True
+ self.wait_until_true(is_client_evicted, timeout)
+
+ def wait_for_daemon_start(self, daemon_ids=None):
+ """
+ Wait until all the daemons appear in the FSMap, either assigned
+ MDS ranks or in the list of standbys
+ """
+ def get_daemon_names():
+ return [info['name'] for info in self.mds_cluster.status().get_all()]
+
+ if daemon_ids is None:
+ daemon_ids = self.mds_cluster.mds_ids
+
+ try:
+ self.wait_until_true(
+ lambda: set(daemon_ids) & set(get_daemon_names()) == set(daemon_ids),
+ timeout=30
+ )
+ except RuntimeError:
+ log.warning("Timeout waiting for daemons {0}, while we have {1}".format(
+ daemon_ids, get_daemon_names()
+ ))
+ raise
+
+ def delete_mds_coredump(self, daemon_id):
+ # delete coredump file, otherwise teuthology.internal.coredump will
+ # catch it later and treat it as a failure.
+ core_pattern = self.mds_cluster.mds_daemons[daemon_id].remote.sh(
+ "sudo sysctl -n kernel.core_pattern")
+ core_dir = os.path.dirname(core_pattern.strip())
+ if core_dir: # Non-default core_pattern with a directory in it
+ # We have seen a core_pattern that looks like it's from teuthology's coredump
+ # task, so proceed to clear out the core file
+ if core_dir[0] == '|':
+ log.info("Piped core dumps to program {0}, skip cleaning".format(core_dir[1:]))
+ return;
+
+ log.info("Clearing core from directory: {0}".format(core_dir))
+
+ # Verify that we see the expected single coredump
+ ls_output = self.mds_cluster.mds_daemons[daemon_id].remote.sh([
+ "cd", core_dir, run.Raw('&&'),
+ "sudo", "ls", run.Raw('|'), "sudo", "xargs", "file"
+ ])
+ cores = [l.partition(":")[0]
+ for l in ls_output.strip().split("\n")
+ if re.match(r'.*ceph-mds.* -i +{0}'.format(daemon_id), l)]
+
+ log.info("Enumerated cores: {0}".format(cores))
+ self.assertEqual(len(cores), 1)
+
+ log.info("Found core file {0}, deleting it".format(cores[0]))
+
+ self.mds_cluster.mds_daemons[daemon_id].remote.run(args=[
+ "cd", core_dir, run.Raw('&&'), "sudo", "rm", "-f", cores[0]
+ ])
+ else:
+ log.info("No core_pattern directory set, nothing to clear (internal.coredump not enabled?)")
+
+ def _wait_subtrees(self, status, rank, test):
+ timeout = 30
+ pause = 2
+ test = sorted(test)
+ for i in range(timeout // pause):
+ subtrees = self.fs.mds_asok(["get", "subtrees"], mds_id=status.get_rank(self.fs.id, rank)['name'])
+ subtrees = filter(lambda s: s['dir']['path'].startswith('/'), subtrees)
+ filtered = sorted([(s['dir']['path'], s['auth_first']) for s in subtrees])
+ log.info("%s =?= %s", filtered, test)
+ if filtered == test:
+ # Confirm export_pin in output is correct:
+ for s in subtrees:
+ self.assertTrue(s['export_pin'] == s['auth_first'])
+ return subtrees
+ time.sleep(pause)
+ raise RuntimeError("rank {0} failed to reach desired subtree state", rank)
+
+ def _wait_until_scrub_complete(self, path="/", recursive=True):
+ out_json = self.fs.rank_tell(["scrub", "start", path] + ["recursive"] if recursive else [])
+ with safe_while(sleep=10, tries=10) as proceed:
+ while proceed():
+ out_json = self.fs.rank_tell(["scrub", "status"])
+ if out_json['status'] == "no active scrubs running":
+ break;
diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py
new file mode 100644
index 00000000..c5531f94
--- /dev/null
+++ b/qa/tasks/cephfs/filesystem.py
@@ -0,0 +1,1386 @@
+
+import json
+import logging
+from gevent import Greenlet
+import os
+import time
+import datetime
+import re
+import errno
+import random
+import traceback
+
+from io import BytesIO
+from io import StringIO
+
+from teuthology.exceptions import CommandFailedError
+from teuthology import misc
+from teuthology.nuke import clear_firewall
+from teuthology.parallel import parallel
+from tasks.ceph_manager import write_conf
+from tasks import ceph_manager
+
+
+log = logging.getLogger(__name__)
+
+
+DAEMON_WAIT_TIMEOUT = 120
+ROOT_INO = 1
+
+class FileLayout(object):
+ def __init__(self, pool=None, pool_namespace=None, stripe_unit=None, stripe_count=None, object_size=None):
+ self.pool = pool
+ self.pool_namespace = pool_namespace
+ self.stripe_unit = stripe_unit
+ self.stripe_count = stripe_count
+ self.object_size = object_size
+
+ @classmethod
+ def load_from_ceph(layout_str):
+ # TODO
+ pass
+
+ def items(self):
+ if self.pool is not None:
+ yield ("pool", self.pool)
+ if self.pool_namespace:
+ yield ("pool_namespace", self.pool_namespace)
+ if self.stripe_unit is not None:
+ yield ("stripe_unit", self.stripe_unit)
+ if self.stripe_count is not None:
+ yield ("stripe_count", self.stripe_count)
+ if self.object_size is not None:
+ yield ("object_size", self.stripe_size)
+
+class ObjectNotFound(Exception):
+ def __init__(self, object_name):
+ self._object_name = object_name
+
+ def __str__(self):
+ return "Object not found: '{0}'".format(self._object_name)
+
+class FSStatus(object):
+ """
+ Operations on a snapshot of the FSMap.
+ """
+ def __init__(self, mon_manager):
+ self.mon = mon_manager
+ self.map = json.loads(self.mon.raw_cluster_cmd("fs", "dump", "--format=json"))
+
+ def __str__(self):
+ return json.dumps(self.map, indent = 2, sort_keys = True)
+
+ # Expose the fsmap for manual inspection.
+ def __getitem__(self, key):
+ """
+ Get a field from the fsmap.
+ """
+ return self.map[key]
+
+ def get_filesystems(self):
+ """
+ Iterator for all filesystems.
+ """
+ for fs in self.map['filesystems']:
+ yield fs
+
+ def get_all(self):
+ """
+ Iterator for all the mds_info components in the FSMap.
+ """
+ for info in self.get_standbys():
+ yield info
+ for fs in self.map['filesystems']:
+ for info in fs['mdsmap']['info'].values():
+ yield info
+
+ def get_standbys(self):
+ """
+ Iterator for all standbys.
+ """
+ for info in self.map['standbys']:
+ yield info
+
+ def get_fsmap(self, fscid):
+ """
+ Get the fsmap for the given FSCID.
+ """
+ for fs in self.map['filesystems']:
+ if fscid is None or fs['id'] == fscid:
+ return fs
+ raise RuntimeError("FSCID {0} not in map".format(fscid))
+
+ def get_fsmap_byname(self, name):
+ """
+ Get the fsmap for the given file system name.
+ """
+ for fs in self.map['filesystems']:
+ if name is None or fs['mdsmap']['fs_name'] == name:
+ return fs
+ raise RuntimeError("FS {0} not in map".format(name))
+
+ def get_replays(self, fscid):
+ """
+ Get the standby:replay MDS for the given FSCID.
+ """
+ fs = self.get_fsmap(fscid)
+ for info in fs['mdsmap']['info'].values():
+ if info['state'] == 'up:standby-replay':
+ yield info
+
+ def get_ranks(self, fscid):
+ """
+ Get the ranks for the given FSCID.
+ """
+ fs = self.get_fsmap(fscid)
+ for info in fs['mdsmap']['info'].values():
+ if info['rank'] >= 0 and info['state'] != 'up:standby-replay':
+ yield info
+
+ def get_rank(self, fscid, rank):
+ """
+ Get the rank for the given FSCID.
+ """
+ for info in self.get_ranks(fscid):
+ if info['rank'] == rank:
+ return info
+ raise RuntimeError("FSCID {0} has no rank {1}".format(fscid, rank))
+
+ def get_mds(self, name):
+ """
+ Get the info for the given MDS name.
+ """
+ for info in self.get_all():
+ if info['name'] == name:
+ return info
+ return None
+
+ def get_mds_addr(self, name):
+ """
+ Return the instance addr as a string, like "10.214.133.138:6807\/10825"
+ """
+ info = self.get_mds(name)
+ if info:
+ return info['addr']
+ else:
+ log.warning(json.dumps(list(self.get_all()), indent=2)) # dump for debugging
+ raise RuntimeError("MDS id '{0}' not found in map".format(name))
+
+class CephCluster(object):
+ @property
+ def admin_remote(self):
+ first_mon = misc.get_first_mon(self._ctx, None)
+ (result,) = self._ctx.cluster.only(first_mon).remotes.keys()
+ return result
+
+ def __init__(self, ctx):
+ self._ctx = ctx
+ self.mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=ctx, logger=log.getChild('ceph_manager'))
+
+ def get_config(self, key, service_type=None):
+ """
+ Get config from mon by default, or a specific service if caller asks for it
+ """
+ if service_type is None:
+ service_type = 'mon'
+
+ service_id = sorted(misc.all_roles_of_type(self._ctx.cluster, service_type))[0]
+ return self.json_asok(['config', 'get', key], service_type, service_id)[key]
+
+ def set_ceph_conf(self, subsys, key, value):
+ if subsys not in self._ctx.ceph['ceph'].conf:
+ self._ctx.ceph['ceph'].conf[subsys] = {}
+ self._ctx.ceph['ceph'].conf[subsys][key] = value
+ write_conf(self._ctx) # XXX because we don't have the ceph task's config object, if they
+ # used a different config path this won't work.
+
+ def clear_ceph_conf(self, subsys, key):
+ del self._ctx.ceph['ceph'].conf[subsys][key]
+ write_conf(self._ctx)
+
+ def json_asok(self, command, service_type, service_id, timeout=None):
+ if timeout is None:
+ timeout = 15*60
+ proc = self.mon_manager.admin_socket(service_type, service_id, command, timeout=timeout)
+ response_data = proc.stdout.getvalue()
+ log.info("_json_asok output: {0}".format(response_data))
+ if response_data.strip():
+ return json.loads(response_data)
+ else:
+ return None
+
+
+class MDSCluster(CephCluster):
+ """
+ Collective operations on all the MDS daemons in the Ceph cluster. These
+ daemons may be in use by various Filesystems.
+
+ For the benefit of pre-multi-filesystem tests, this class is also
+ a parent of Filesystem. The correct way to use MDSCluster going forward is
+ as a separate instance outside of your (multiple) Filesystem instances.
+ """
+ def __init__(self, ctx):
+ super(MDSCluster, self).__init__(ctx)
+
+ self.mds_ids = list(misc.all_roles_of_type(ctx.cluster, 'mds'))
+
+ if len(self.mds_ids) == 0:
+ raise RuntimeError("This task requires at least one MDS")
+
+ if hasattr(self._ctx, "daemons"):
+ # Presence of 'daemons' attribute implies ceph task rather than ceph_deploy task
+ self.mds_daemons = dict([(mds_id, self._ctx.daemons.get_daemon('mds', mds_id)) for mds_id in self.mds_ids])
+
+ def _one_or_all(self, mds_id, cb, in_parallel=True):
+ """
+ Call a callback for a single named MDS, or for all.
+
+ Note that the parallelism here isn't for performance, it's to avoid being overly kind
+ to the cluster by waiting a graceful ssh-latency of time between doing things, and to
+ avoid being overly kind by executing them in a particular order. However, some actions
+ don't cope with being done in parallel, so it's optional (`in_parallel`)
+
+ :param mds_id: MDS daemon name, or None
+ :param cb: Callback taking single argument of MDS daemon name
+ :param in_parallel: whether to invoke callbacks concurrently (else one after the other)
+ """
+ if mds_id is None:
+ if in_parallel:
+ with parallel() as p:
+ for mds_id in self.mds_ids:
+ p.spawn(cb, mds_id)
+ else:
+ for mds_id in self.mds_ids:
+ cb(mds_id)
+ else:
+ cb(mds_id)
+
+ def get_config(self, key, service_type=None):
+ """
+ get_config specialization of service_type="mds"
+ """
+ if service_type != "mds":
+ return super(MDSCluster, self).get_config(key, service_type)
+
+ # Some tests stop MDS daemons, don't send commands to a dead one:
+ running_daemons = [i for i, mds in self.mds_daemons.items() if mds.running()]
+ service_id = random.sample(running_daemons, 1)[0]
+ return self.json_asok(['config', 'get', key], service_type, service_id)[key]
+
+ def mds_stop(self, mds_id=None):
+ """
+ Stop the MDS daemon process(se). If it held a rank, that rank
+ will eventually go laggy.
+ """
+ self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].stop())
+
+ def mds_fail(self, mds_id=None):
+ """
+ Inform MDSMonitor of the death of the daemon process(es). If it held
+ a rank, that rank will be relinquished.
+ """
+ self._one_or_all(mds_id, lambda id_: self.mon_manager.raw_cluster_cmd("mds", "fail", id_))
+
+ def mds_restart(self, mds_id=None):
+ self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].restart())
+
+ def mds_fail_restart(self, mds_id=None):
+ """
+ Variation on restart that includes marking MDSs as failed, so that doing this
+ operation followed by waiting for healthy daemon states guarantees that they
+ have gone down and come up, rather than potentially seeing the healthy states
+ that existed before the restart.
+ """
+ def _fail_restart(id_):
+ self.mds_daemons[id_].stop()
+ self.mon_manager.raw_cluster_cmd("mds", "fail", id_)
+ self.mds_daemons[id_].restart()
+
+ self._one_or_all(mds_id, _fail_restart)
+
+ def mds_signal(self, mds_id, sig, silent=False):
+ """
+ signal a MDS daemon
+ """
+ self.mds_daemons[mds_id].signal(sig, silent);
+
+ def newfs(self, name='cephfs', create=True):
+ return Filesystem(self._ctx, name=name, create=create)
+
+ def status(self):
+ return FSStatus(self.mon_manager)
+
+ def delete_all_filesystems(self):
+ """
+ Remove all filesystems that exist, and any pools in use by them.
+ """
+ pools = json.loads(self.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools']
+ pool_id_name = {}
+ for pool in pools:
+ pool_id_name[pool['pool']] = pool['pool_name']
+
+ # mark cluster down for each fs to prevent churn during deletion
+ status = self.status()
+ for fs in status.get_filesystems():
+ self.mon_manager.raw_cluster_cmd("fs", "fail", str(fs['mdsmap']['fs_name']))
+
+ # get a new copy as actives may have since changed
+ status = self.status()
+ for fs in status.get_filesystems():
+ mdsmap = fs['mdsmap']
+ metadata_pool = pool_id_name[mdsmap['metadata_pool']]
+
+ self.mon_manager.raw_cluster_cmd('fs', 'rm', mdsmap['fs_name'], '--yes-i-really-mean-it')
+ self.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete',
+ metadata_pool, metadata_pool,
+ '--yes-i-really-really-mean-it')
+ for data_pool in mdsmap['data_pools']:
+ data_pool = pool_id_name[data_pool]
+ try:
+ self.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete',
+ data_pool, data_pool,
+ '--yes-i-really-really-mean-it')
+ except CommandFailedError as e:
+ if e.exitstatus == 16: # EBUSY, this data pool is used
+ pass # by two metadata pools, let the 2nd
+ else: # pass delete it
+ raise
+
+ def get_standby_daemons(self):
+ return set([s['name'] for s in self.status().get_standbys()])
+
+ def get_mds_hostnames(self):
+ result = set()
+ for mds_id in self.mds_ids:
+ mds_remote = self.mon_manager.find_remote('mds', mds_id)
+ result.add(mds_remote.hostname)
+
+ return list(result)
+
+ def set_clients_block(self, blocked, mds_id=None):
+ """
+ Block (using iptables) client communications to this MDS. Be careful: if
+ other services are running on this MDS, or other MDSs try to talk to this
+ MDS, their communications may also be blocked as collatoral damage.
+
+ :param mds_id: Optional ID of MDS to block, default to all
+ :return:
+ """
+ da_flag = "-A" if blocked else "-D"
+
+ def set_block(_mds_id):
+ remote = self.mon_manager.find_remote('mds', _mds_id)
+ status = self.status()
+
+ addr = status.get_mds_addr(_mds_id)
+ ip_str, port_str, inst_str = re.match("(.+):(.+)/(.+)", addr).groups()
+
+ remote.run(
+ args=["sudo", "iptables", da_flag, "OUTPUT", "-p", "tcp", "--sport", port_str, "-j", "REJECT", "-m",
+ "comment", "--comment", "teuthology"])
+ remote.run(
+ args=["sudo", "iptables", da_flag, "INPUT", "-p", "tcp", "--dport", port_str, "-j", "REJECT", "-m",
+ "comment", "--comment", "teuthology"])
+
+ self._one_or_all(mds_id, set_block, in_parallel=False)
+
+ def clear_firewall(self):
+ clear_firewall(self._ctx)
+
+ def get_mds_info(self, mds_id):
+ return FSStatus(self.mon_manager).get_mds(mds_id)
+
+ def is_pool_full(self, pool_name):
+ pools = json.loads(self.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools']
+ for pool in pools:
+ if pool['pool_name'] == pool_name:
+ return 'full' in pool['flags_names'].split(",")
+
+ raise RuntimeError("Pool not found '{0}'".format(pool_name))
+
+class Filesystem(MDSCluster):
+ """
+ This object is for driving a CephFS filesystem. The MDS daemons driven by
+ MDSCluster may be shared with other Filesystems.
+ """
+ def __init__(self, ctx, fs_config=None, fscid=None, name=None, create=False,
+ ec_profile=None):
+ super(Filesystem, self).__init__(ctx)
+
+ self.name = name
+ self.ec_profile = ec_profile
+ self.id = None
+ self.metadata_pool_name = None
+ self.metadata_overlay = False
+ self.data_pool_name = None
+ self.data_pools = None
+ self.fs_config = fs_config
+
+ client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client'))
+ self.client_id = client_list[0]
+ self.client_remote = list(misc.get_clients(ctx=ctx, roles=["client.{0}".format(self.client_id)]))[0][1]
+
+ if name is not None:
+ if fscid is not None:
+ raise RuntimeError("cannot specify fscid when creating fs")
+ if create and not self.legacy_configured():
+ self.create()
+ else:
+ if fscid is not None:
+ self.id = fscid
+ self.getinfo(refresh = True)
+
+ # Stash a reference to the first created filesystem on ctx, so
+ # that if someone drops to the interactive shell they can easily
+ # poke our methods.
+ if not hasattr(self._ctx, "filesystem"):
+ self._ctx.filesystem = self
+
+ def get_task_status(self, status_key):
+ return self.mon_manager.get_service_task_status("mds", status_key)
+
+ def getinfo(self, refresh = False):
+ status = self.status()
+ if self.id is not None:
+ fsmap = status.get_fsmap(self.id)
+ elif self.name is not None:
+ fsmap = status.get_fsmap_byname(self.name)
+ else:
+ fss = [fs for fs in status.get_filesystems()]
+ if len(fss) == 1:
+ fsmap = fss[0]
+ elif len(fss) == 0:
+ raise RuntimeError("no file system available")
+ else:
+ raise RuntimeError("more than one file system available")
+ self.id = fsmap['id']
+ self.name = fsmap['mdsmap']['fs_name']
+ self.get_pool_names(status = status, refresh = refresh)
+ return status
+
+ def set_metadata_overlay(self, overlay):
+ if self.id is not None:
+ raise RuntimeError("cannot specify fscid when configuring overlay")
+ self.metadata_overlay = overlay
+
+ def deactivate(self, rank):
+ if rank < 0:
+ raise RuntimeError("invalid rank")
+ elif rank == 0:
+ raise RuntimeError("cannot deactivate rank 0")
+ self.mon_manager.raw_cluster_cmd("mds", "deactivate", "%d:%d" % (self.id, rank))
+
+ def reach_max_mds(self):
+ # Try to reach rank count == max_mds, up or down (UPGRADE SENSITIVE!)
+ status = self.getinfo()
+ mds_map = self.get_mds_map(status=status)
+ max_mds = mds_map['max_mds']
+
+ count = len(list(self.get_ranks(status=status)))
+ if count > max_mds:
+ try:
+ # deactivate mds in decending order
+ status = self.wait_for_daemons(status=status, skip_max_mds_check=True)
+ while count > max_mds:
+ targets = sorted(self.get_ranks(status=status), key=lambda r: r['rank'], reverse=True)
+ target = targets[0]
+ log.debug("deactivating rank %d" % target['rank'])
+ self.deactivate(target['rank'])
+ status = self.wait_for_daemons(skip_max_mds_check=True)
+ count = len(list(self.get_ranks(status=status)))
+ except:
+ # In Mimic, deactivation is done automatically:
+ log.info("Error:\n{}".format(traceback.format_exc()))
+ status = self.wait_for_daemons()
+ else:
+ status = self.wait_for_daemons()
+
+ mds_map = self.get_mds_map(status=status)
+ assert(mds_map['max_mds'] == max_mds)
+ assert(mds_map['in'] == list(range(0, max_mds)))
+
+ def fail(self):
+ self.mon_manager.raw_cluster_cmd("fs", "fail", str(self.name))
+
+ def set_var(self, var, *args):
+ a = map(str, args)
+ self.mon_manager.raw_cluster_cmd("fs", "set", self.name, var, *a)
+
+ def set_down(self, down=True):
+ self.set_var("down", str(down).lower())
+
+ def set_joinable(self, joinable=True):
+ self.set_var("joinable", str(joinable).lower())
+
+ def set_max_mds(self, max_mds):
+ self.set_var("max_mds", "%d" % max_mds)
+
+ def set_session_timeout(self, timeout):
+ self.set_var("session_timeout", "%d" % timeout)
+
+ def set_allow_standby_replay(self, yes):
+ self.set_var("allow_standby_replay", str(yes).lower())
+
+ def set_allow_new_snaps(self, yes):
+ self.set_var("allow_new_snaps", str(yes).lower(), '--yes-i-really-mean-it')
+
+ # In Octopus+, the PG count can be omitted to use the default. We keep the
+ # hard-coded value for deployments of Mimic/Nautilus.
+ pgs_per_fs_pool = 8
+
+ def create(self):
+ if self.name is None:
+ self.name = "cephfs"
+ if self.metadata_pool_name is None:
+ self.metadata_pool_name = "{0}_metadata".format(self.name)
+ if self.data_pool_name is None:
+ data_pool_name = "{0}_data".format(self.name)
+ else:
+ data_pool_name = self.data_pool_name
+
+ log.debug("Creating filesystem '{0}'".format(self.name))
+
+ self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+ self.metadata_pool_name, self.pgs_per_fs_pool.__str__())
+ if self.metadata_overlay:
+ self.mon_manager.raw_cluster_cmd('fs', 'new',
+ self.name, self.metadata_pool_name, data_pool_name,
+ '--allow-dangerous-metadata-overlay')
+ else:
+ if self.ec_profile and 'disabled' not in self.ec_profile:
+ log.debug("EC profile is %s", self.ec_profile)
+ cmd = ['osd', 'erasure-code-profile', 'set', data_pool_name]
+ cmd.extend(self.ec_profile)
+ self.mon_manager.raw_cluster_cmd(*cmd)
+ self.mon_manager.raw_cluster_cmd(
+ 'osd', 'pool', 'create',
+ data_pool_name, self.pgs_per_fs_pool.__str__(), 'erasure',
+ data_pool_name)
+ self.mon_manager.raw_cluster_cmd(
+ 'osd', 'pool', 'set',
+ data_pool_name, 'allow_ec_overwrites', 'true')
+ else:
+ self.mon_manager.raw_cluster_cmd(
+ 'osd', 'pool', 'create',
+ data_pool_name, self.pgs_per_fs_pool.__str__())
+ self.mon_manager.raw_cluster_cmd('fs', 'new',
+ self.name,
+ self.metadata_pool_name,
+ data_pool_name,
+ "--force")
+ self.check_pool_application(self.metadata_pool_name)
+ self.check_pool_application(data_pool_name)
+ # Turn off spurious standby count warnings from modifying max_mds in tests.
+ try:
+ self.mon_manager.raw_cluster_cmd('fs', 'set', self.name, 'standby_count_wanted', '0')
+ except CommandFailedError as e:
+ if e.exitstatus == 22:
+ # standby_count_wanted not available prior to luminous (upgrade tests would fail otherwise)
+ pass
+ else:
+ raise
+
+ if self.fs_config is not None:
+ max_mds = self.fs_config.get('max_mds', 1)
+ if max_mds > 1:
+ self.set_max_mds(max_mds)
+
+ # If absent will use the default value (60 seconds)
+ session_timeout = self.fs_config.get('session_timeout', 60)
+ if session_timeout != 60:
+ self.set_session_timeout(session_timeout)
+
+ self.getinfo(refresh = True)
+
+
+ def check_pool_application(self, pool_name):
+ osd_map = self.mon_manager.get_osd_dump_json()
+ for pool in osd_map['pools']:
+ if pool['pool_name'] == pool_name:
+ if "application_metadata" in pool:
+ if not "cephfs" in pool['application_metadata']:
+ raise RuntimeError("Pool %p does not name cephfs as application!".\
+ format(pool_name))
+
+
+ def __del__(self):
+ if getattr(self._ctx, "filesystem", None) == self:
+ delattr(self._ctx, "filesystem")
+
+ def exists(self):
+ """
+ Whether a filesystem exists in the mon's filesystem list
+ """
+ fs_list = json.loads(self.mon_manager.raw_cluster_cmd('fs', 'ls', '--format=json-pretty'))
+ return self.name in [fs['name'] for fs in fs_list]
+
+ def legacy_configured(self):
+ """
+ Check if a legacy (i.e. pre "fs new") filesystem configuration is present. If this is
+ the case, the caller should avoid using Filesystem.create
+ """
+ try:
+ out_text = self.mon_manager.raw_cluster_cmd('--format=json-pretty', 'osd', 'lspools')
+ pools = json.loads(out_text)
+ metadata_pool_exists = 'metadata' in [p['poolname'] for p in pools]
+ if metadata_pool_exists:
+ self.metadata_pool_name = 'metadata'
+ except CommandFailedError as e:
+ # For use in upgrade tests, Ceph cuttlefish and earlier don't support
+ # structured output (--format) from the CLI.
+ if e.exitstatus == 22:
+ metadata_pool_exists = True
+ else:
+ raise
+
+ return metadata_pool_exists
+
+ def _df(self):
+ return json.loads(self.mon_manager.raw_cluster_cmd("df", "--format=json-pretty"))
+
+ def get_mds_map(self, status=None):
+ if status is None:
+ status = self.status()
+ return status.get_fsmap(self.id)['mdsmap']
+
+ def get_var(self, var, status=None):
+ return self.get_mds_map(status=status)[var]
+
+ def set_dir_layout(self, mount, path, layout):
+ for name, value in layout.items():
+ mount.run_shell(args=["setfattr", "-n", "ceph.dir.layout."+name, "-v", str(value), path])
+
+ def add_data_pool(self, name, create=True):
+ if create:
+ self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name, self.pgs_per_fs_pool.__str__())
+ self.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', self.name, name)
+ self.get_pool_names(refresh = True)
+ for poolid, fs_name in self.data_pools.items():
+ if name == fs_name:
+ return poolid
+ raise RuntimeError("could not get just created pool '{0}'".format(name))
+
+ def get_pool_names(self, refresh = False, status = None):
+ if refresh or self.metadata_pool_name is None or self.data_pools is None:
+ if status is None:
+ status = self.status()
+ fsmap = status.get_fsmap(self.id)
+
+ osd_map = self.mon_manager.get_osd_dump_json()
+ id_to_name = {}
+ for p in osd_map['pools']:
+ id_to_name[p['pool']] = p['pool_name']
+
+ self.metadata_pool_name = id_to_name[fsmap['mdsmap']['metadata_pool']]
+ self.data_pools = {}
+ for data_pool in fsmap['mdsmap']['data_pools']:
+ self.data_pools[data_pool] = id_to_name[data_pool]
+
+ def get_data_pool_name(self, refresh = False):
+ if refresh or self.data_pools is None:
+ self.get_pool_names(refresh = True)
+ assert(len(self.data_pools) == 1)
+ return next(iter(self.data_pools.values()))
+
+ def get_data_pool_id(self, refresh = False):
+ """
+ Don't call this if you have multiple data pools
+ :return: integer
+ """
+ if refresh or self.data_pools is None:
+ self.get_pool_names(refresh = True)
+ assert(len(self.data_pools) == 1)
+ return next(iter(self.data_pools.keys()))
+
+ def get_data_pool_names(self, refresh = False):
+ if refresh or self.data_pools is None:
+ self.get_pool_names(refresh = True)
+ return list(self.data_pools.values())
+
+ def get_metadata_pool_name(self):
+ return self.metadata_pool_name
+
+ def set_data_pool_name(self, name):
+ if self.id is not None:
+ raise RuntimeError("can't set filesystem name if its fscid is set")
+ self.data_pool_name = name
+
+ def get_namespace_id(self):
+ return self.id
+
+ def get_pool_df(self, pool_name):
+ """
+ Return a dict like:
+ {u'bytes_used': 0, u'max_avail': 83848701, u'objects': 0, u'kb_used': 0}
+ """
+ for pool_df in self._df()['pools']:
+ if pool_df['name'] == pool_name:
+ return pool_df['stats']
+
+ raise RuntimeError("Pool name '{0}' not found".format(pool_name))
+
+ def get_usage(self):
+ return self._df()['stats']['total_used_bytes']
+
+ def are_daemons_healthy(self, status=None, skip_max_mds_check=False):
+ """
+ Return true if all daemons are in one of active, standby, standby-replay, and
+ at least max_mds daemons are in 'active'.
+
+ Unlike most of Filesystem, this function is tolerant of new-style `fs`
+ commands being missing, because we are part of the ceph installation
+ process during upgrade suites, so must fall back to old style commands
+ when we get an EINVAL on a new style command.
+
+ :return:
+ """
+ # First, check to see that processes haven't exited with an error code
+ for mds in self._ctx.daemons.iter_daemons_of_role('mds'):
+ mds.check_status()
+
+ active_count = 0
+ try:
+ mds_map = self.get_mds_map(status=status)
+ except CommandFailedError as cfe:
+ # Old version, fall back to non-multi-fs commands
+ if cfe.exitstatus == errno.EINVAL:
+ mds_map = json.loads(
+ self.mon_manager.raw_cluster_cmd('mds', 'dump', '--format=json'))
+ else:
+ raise
+
+ log.debug("are_daemons_healthy: mds map: {0}".format(mds_map))
+
+ for mds_id, mds_status in mds_map['info'].items():
+ if mds_status['state'] not in ["up:active", "up:standby", "up:standby-replay"]:
+ log.warning("Unhealthy mds state {0}:{1}".format(mds_id, mds_status['state']))
+ return False
+ elif mds_status['state'] == 'up:active':
+ active_count += 1
+
+ log.debug("are_daemons_healthy: {0}/{1}".format(
+ active_count, mds_map['max_mds']
+ ))
+
+ if not skip_max_mds_check:
+ if active_count > mds_map['max_mds']:
+ log.debug("are_daemons_healthy: number of actives is greater than max_mds: {0}".format(mds_map))
+ return False
+ elif active_count == mds_map['max_mds']:
+ # The MDSMap says these guys are active, but let's check they really are
+ for mds_id, mds_status in mds_map['info'].items():
+ if mds_status['state'] == 'up:active':
+ try:
+ daemon_status = self.mds_asok(["status"], mds_id=mds_status['name'])
+ except CommandFailedError as cfe:
+ if cfe.exitstatus == errno.EINVAL:
+ # Old version, can't do this check
+ continue
+ else:
+ # MDS not even running
+ return False
+
+ if daemon_status['state'] != 'up:active':
+ # MDS hasn't taken the latest map yet
+ return False
+
+ return True
+ else:
+ return False
+ else:
+ log.debug("are_daemons_healthy: skipping max_mds check")
+ return True
+
+ def get_daemon_names(self, state=None, status=None):
+ """
+ Return MDS daemon names of those daemons in the given state
+ :param state:
+ :return:
+ """
+ mdsmap = self.get_mds_map(status)
+ result = []
+ for mds_status in sorted(mdsmap['info'].values(),
+ key=lambda _: _['rank']):
+ if mds_status['state'] == state or state is None:
+ result.append(mds_status['name'])
+
+ return result
+
+ def get_active_names(self):
+ """
+ Return MDS daemon names of those daemons holding ranks
+ in state up:active
+
+ :return: list of strings like ['a', 'b'], sorted by rank
+ """
+ return self.get_daemon_names("up:active")
+
+ def get_all_mds_rank(self, status=None):
+ mdsmap = self.get_mds_map(status)
+ result = []
+ for mds_status in sorted(mdsmap['info'].values(),
+ key=lambda _: _['rank']):
+ if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay':
+ result.append(mds_status['rank'])
+
+ return result
+
+ def get_rank(self, rank=0, status=None):
+ if status is None:
+ status = self.getinfo()
+ return status.get_rank(self.id, rank)
+
+ def rank_restart(self, rank=0, status=None):
+ name = self.get_rank(rank=rank, status=status)['name']
+ self.mds_restart(mds_id=name)
+
+ def rank_signal(self, signal, rank=0, status=None):
+ name = self.get_rank(rank=rank, status=status)['name']
+ self.mds_signal(name, signal)
+
+ def rank_freeze(self, yes, rank=0):
+ self.mon_manager.raw_cluster_cmd("mds", "freeze", "{}:{}".format(self.id, rank), str(yes).lower())
+
+ def rank_fail(self, rank=0):
+ self.mon_manager.raw_cluster_cmd("mds", "fail", "{}:{}".format(self.id, rank))
+
+ def get_ranks(self, status=None):
+ if status is None:
+ status = self.getinfo()
+ return status.get_ranks(self.id)
+
+ def get_replays(self, status=None):
+ if status is None:
+ status = self.getinfo()
+ return status.get_replays(self.id)
+
+ def get_replay(self, rank=0, status=None):
+ for replay in self.get_replays(status=status):
+ if replay['rank'] == rank:
+ return replay
+ return None
+
+ def get_rank_names(self, status=None):
+ """
+ Return MDS daemon names of those daemons holding a rank,
+ sorted by rank. This includes e.g. up:replay/reconnect
+ as well as active, but does not include standby or
+ standby-replay.
+ """
+ mdsmap = self.get_mds_map(status)
+ result = []
+ for mds_status in sorted(mdsmap['info'].values(),
+ key=lambda _: _['rank']):
+ if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay':
+ result.append(mds_status['name'])
+
+ return result
+
+ def wait_for_daemons(self, timeout=None, skip_max_mds_check=False, status=None):
+ """
+ Wait until all daemons are healthy
+ :return:
+ """
+
+ if timeout is None:
+ timeout = DAEMON_WAIT_TIMEOUT
+
+ if status is None:
+ status = self.status()
+
+ elapsed = 0
+ while True:
+ if self.are_daemons_healthy(status=status, skip_max_mds_check=skip_max_mds_check):
+ return status
+ else:
+ time.sleep(1)
+ elapsed += 1
+
+ if elapsed > timeout:
+ log.debug("status = {0}".format(status))
+ raise RuntimeError("Timed out waiting for MDS daemons to become healthy")
+
+ status = self.status()
+
+ def get_lone_mds_id(self):
+ """
+ Get a single MDS ID: the only one if there is only one
+ configured, else the only one currently holding a rank,
+ else raise an error.
+ """
+ if len(self.mds_ids) != 1:
+ alive = self.get_rank_names()
+ if len(alive) == 1:
+ return alive[0]
+ else:
+ raise ValueError("Explicit MDS argument required when multiple MDSs in use")
+ else:
+ return self.mds_ids[0]
+
+ def recreate(self):
+ log.info("Creating new filesystem")
+ self.delete_all_filesystems()
+ self.id = None
+ self.create()
+
+ def put_metadata_object_raw(self, object_id, infile):
+ """
+ Save an object to the metadata pool
+ """
+ temp_bin_path = infile
+ self.client_remote.run(args=[
+ 'sudo', os.path.join(self._prefix, 'rados'), '-p', self.metadata_pool_name, 'put', object_id, temp_bin_path
+ ])
+
+ def get_metadata_object_raw(self, object_id):
+ """
+ Retrieve an object from the metadata pool and store it in a file.
+ """
+ temp_bin_path = '/tmp/' + object_id + '.bin'
+
+ self.client_remote.run(args=[
+ 'sudo', os.path.join(self._prefix, 'rados'), '-p', self.metadata_pool_name, 'get', object_id, temp_bin_path
+ ])
+
+ return temp_bin_path
+
+ def get_metadata_object(self, object_type, object_id):
+ """
+ Retrieve an object from the metadata pool, pass it through
+ ceph-dencoder to dump it to JSON, and return the decoded object.
+ """
+ temp_bin_path = '/tmp/out.bin'
+
+ self.client_remote.run(args=[
+ 'sudo', os.path.join(self._prefix, 'rados'), '-p', self.metadata_pool_name, 'get', object_id, temp_bin_path
+ ])
+
+ dump_json = self.client_remote.sh([
+ 'sudo', os.path.join(self._prefix, 'ceph-dencoder'), 'type', object_type, 'import', temp_bin_path, 'decode', 'dump_json'
+ ]).strip()
+ try:
+ dump = json.loads(dump_json)
+ except (TypeError, ValueError):
+ log.error("Failed to decode JSON: '{0}'".format(dump_json))
+ raise
+
+ return dump
+
+ def get_journal_version(self):
+ """
+ Read the JournalPointer and Journal::Header objects to learn the version of
+ encoding in use.
+ """
+ journal_pointer_object = '400.00000000'
+ journal_pointer_dump = self.get_metadata_object("JournalPointer", journal_pointer_object)
+ journal_ino = journal_pointer_dump['journal_pointer']['front']
+
+ journal_header_object = "{0:x}.00000000".format(journal_ino)
+ journal_header_dump = self.get_metadata_object('Journaler::Header', journal_header_object)
+
+ version = journal_header_dump['journal_header']['stream_format']
+ log.debug("Read journal version {0}".format(version))
+
+ return version
+
+ def mds_asok(self, command, mds_id=None, timeout=None):
+ if mds_id is None:
+ mds_id = self.get_lone_mds_id()
+
+ return self.json_asok(command, 'mds', mds_id, timeout=timeout)
+
+ def rank_asok(self, command, rank=0, status=None, timeout=None):
+ info = self.get_rank(rank=rank, status=status)
+ return self.json_asok(command, 'mds', info['name'], timeout=timeout)
+
+ def rank_tell(self, command, rank=0, status=None):
+ info = self.get_rank(rank=rank, status=status)
+ return json.loads(self.mon_manager.raw_cluster_cmd("tell", 'mds.{0}'.format(info['name']), *command))
+
+ def read_cache(self, path, depth=None):
+ cmd = ["dump", "tree", path]
+ if depth is not None:
+ cmd.append(depth.__str__())
+ result = self.mds_asok(cmd)
+ if len(result) == 0:
+ raise RuntimeError("Path not found in cache: {0}".format(path))
+
+ return result
+
+ def wait_for_state(self, goal_state, reject=None, timeout=None, mds_id=None, rank=None):
+ """
+ Block until the MDS reaches a particular state, or a failure condition
+ is met.
+
+ When there are multiple MDSs, succeed when exaclty one MDS is in the
+ goal state, or fail when any MDS is in the reject state.
+
+ :param goal_state: Return once the MDS is in this state
+ :param reject: Fail if the MDS enters this state before the goal state
+ :param timeout: Fail if this many seconds pass before reaching goal
+ :return: number of seconds waited, rounded down to integer
+ """
+
+ started_at = time.time()
+ while True:
+ status = self.status()
+ if rank is not None:
+ try:
+ mds_info = status.get_rank(self.id, rank)
+ current_state = mds_info['state'] if mds_info else None
+ log.debug("Looked up MDS state for mds.{0}: {1}".format(rank, current_state))
+ except:
+ mdsmap = self.get_mds_map(status=status)
+ if rank in mdsmap['failed']:
+ log.debug("Waiting for rank {0} to come back.".format(rank))
+ current_state = None
+ else:
+ raise
+ elif mds_id is not None:
+ # mds_info is None if no daemon with this ID exists in the map
+ mds_info = status.get_mds(mds_id)
+ current_state = mds_info['state'] if mds_info else None
+ log.debug("Looked up MDS state for {0}: {1}".format(mds_id, current_state))
+ else:
+ # In general, look for a single MDS
+ states = [m['state'] for m in status.get_ranks(self.id)]
+ if [s for s in states if s == goal_state] == [goal_state]:
+ current_state = goal_state
+ elif reject in states:
+ current_state = reject
+ else:
+ current_state = None
+ log.debug("mapped states {0} to {1}".format(states, current_state))
+
+ elapsed = time.time() - started_at
+ if current_state == goal_state:
+ log.debug("reached state '{0}' in {1}s".format(current_state, elapsed))
+ return elapsed
+ elif reject is not None and current_state == reject:
+ raise RuntimeError("MDS in reject state {0}".format(current_state))
+ elif timeout is not None and elapsed > timeout:
+ log.error("MDS status at timeout: {0}".format(status.get_fsmap(self.id)))
+ raise RuntimeError(
+ "Reached timeout after {0} seconds waiting for state {1}, while in state {2}".format(
+ elapsed, goal_state, current_state
+ ))
+ else:
+ time.sleep(1)
+
+ def _read_data_xattr(self, ino_no, xattr_name, type, pool):
+ mds_id = self.mds_ids[0]
+ remote = self.mds_daemons[mds_id].remote
+ if pool is None:
+ pool = self.get_data_pool_name()
+
+ obj_name = "{0:x}.00000000".format(ino_no)
+
+ args = [
+ os.path.join(self._prefix, "rados"), "-p", pool, "getxattr", obj_name, xattr_name
+ ]
+ try:
+ proc = remote.run(args=args, stdout=BytesIO())
+ except CommandFailedError as e:
+ log.error(e.__str__())
+ raise ObjectNotFound(obj_name)
+
+ data = proc.stdout.getvalue()
+ dump = remote.sh(
+ [os.path.join(self._prefix, "ceph-dencoder"),
+ "type", type,
+ "import", "-",
+ "decode", "dump_json"],
+ stdin=data,
+ stdout=StringIO()
+ )
+
+ return json.loads(dump.strip())
+
+ def _write_data_xattr(self, ino_no, xattr_name, data, pool=None):
+ """
+ Write to an xattr of the 0th data object of an inode. Will
+ succeed whether the object and/or xattr already exist or not.
+
+ :param ino_no: integer inode number
+ :param xattr_name: string name of the xattr
+ :param data: byte array data to write to the xattr
+ :param pool: name of data pool or None to use primary data pool
+ :return: None
+ """
+ remote = self.mds_daemons[self.mds_ids[0]].remote
+ if pool is None:
+ pool = self.get_data_pool_name()
+
+ obj_name = "{0:x}.00000000".format(ino_no)
+ args = [
+ os.path.join(self._prefix, "rados"), "-p", pool, "setxattr",
+ obj_name, xattr_name, data
+ ]
+ remote.sh(args)
+
+ def read_backtrace(self, ino_no, pool=None):
+ """
+ Read the backtrace from the data pool, return a dict in the format
+ given by inode_backtrace_t::dump, which is something like:
+
+ ::
+
+ rados -p cephfs_data getxattr 10000000002.00000000 parent > out.bin
+ ceph-dencoder type inode_backtrace_t import out.bin decode dump_json
+
+ { "ino": 1099511627778,
+ "ancestors": [
+ { "dirino": 1,
+ "dname": "blah",
+ "version": 11}],
+ "pool": 1,
+ "old_pools": []}
+
+ :param pool: name of pool to read backtrace from. If omitted, FS must have only
+ one data pool and that will be used.
+ """
+ return self._read_data_xattr(ino_no, "parent", "inode_backtrace_t", pool)
+
+ def read_layout(self, ino_no, pool=None):
+ """
+ Read 'layout' xattr of an inode and parse the result, returning a dict like:
+ ::
+ {
+ "stripe_unit": 4194304,
+ "stripe_count": 1,
+ "object_size": 4194304,
+ "pool_id": 1,
+ "pool_ns": "",
+ }
+
+ :param pool: name of pool to read backtrace from. If omitted, FS must have only
+ one data pool and that will be used.
+ """
+ return self._read_data_xattr(ino_no, "layout", "file_layout_t", pool)
+
+ def _enumerate_data_objects(self, ino, size):
+ """
+ Get the list of expected data objects for a range, and the list of objects
+ that really exist.
+
+ :return a tuple of two lists of strings (expected, actual)
+ """
+ stripe_size = 1024 * 1024 * 4
+
+ size = max(stripe_size, size)
+
+ want_objects = [
+ "{0:x}.{1:08x}".format(ino, n)
+ for n in range(0, ((size - 1) // stripe_size) + 1)
+ ]
+
+ exist_objects = self.rados(["ls"], pool=self.get_data_pool_name()).split("\n")
+
+ return want_objects, exist_objects
+
+ def data_objects_present(self, ino, size):
+ """
+ Check that *all* the expected data objects for an inode are present in the data pool
+ """
+
+ want_objects, exist_objects = self._enumerate_data_objects(ino, size)
+ missing = set(want_objects) - set(exist_objects)
+
+ if missing:
+ log.debug("Objects missing (ino {0}, size {1}): {2}".format(
+ ino, size, missing
+ ))
+ return False
+ else:
+ log.debug("All objects for ino {0} size {1} found".format(ino, size))
+ return True
+
+ def data_objects_absent(self, ino, size):
+ want_objects, exist_objects = self._enumerate_data_objects(ino, size)
+ present = set(want_objects) & set(exist_objects)
+
+ if present:
+ log.debug("Objects not absent (ino {0}, size {1}): {2}".format(
+ ino, size, present
+ ))
+ return False
+ else:
+ log.debug("All objects for ino {0} size {1} are absent".format(ino, size))
+ return True
+
+ def dirfrag_exists(self, ino, frag):
+ try:
+ self.rados(["stat", "{0:x}.{1:08x}".format(ino, frag)])
+ except CommandFailedError:
+ return False
+ else:
+ return True
+
+ def rados(self, args, pool=None, namespace=None, stdin_data=None,
+ stdin_file=None,
+ stdout_data=None):
+ """
+ Call into the `rados` CLI from an MDS
+ """
+
+ if pool is None:
+ pool = self.get_metadata_pool_name()
+
+ # Doesn't matter which MDS we use to run rados commands, they all
+ # have access to the pools
+ mds_id = self.mds_ids[0]
+ remote = self.mds_daemons[mds_id].remote
+
+ # NB we could alternatively use librados pybindings for this, but it's a one-liner
+ # using the `rados` CLI
+ args = ([os.path.join(self._prefix, "rados"), "-p", pool] +
+ (["--namespace", namespace] if namespace else []) +
+ args)
+
+ if stdin_file is not None:
+ args = ["bash", "-c", "cat " + stdin_file + " | " + " ".join(args)]
+ if stdout_data is None:
+ stdout_data = StringIO()
+
+ p = remote.run(args=args,
+ stdin=stdin_data,
+ stdout=stdout_data)
+ return p.stdout.getvalue().strip()
+
+ def list_dirfrag(self, dir_ino):
+ """
+ Read the named object and return the list of omap keys
+
+ :return a list of 0 or more strings
+ """
+
+ dirfrag_obj_name = "{0:x}.00000000".format(dir_ino)
+
+ try:
+ key_list_str = self.rados(["listomapkeys", dirfrag_obj_name])
+ except CommandFailedError as e:
+ log.error(e.__str__())
+ raise ObjectNotFound(dirfrag_obj_name)
+
+ return key_list_str.split("\n") if key_list_str else []
+
+ def erase_metadata_objects(self, prefix):
+ """
+ For all objects in the metadata pool matching the prefix,
+ erase them.
+
+ This O(N) with the number of objects in the pool, so only suitable
+ for use on toy test filesystems.
+ """
+ all_objects = self.rados(["ls"]).split("\n")
+ matching_objects = [o for o in all_objects if o.startswith(prefix)]
+ for o in matching_objects:
+ self.rados(["rm", o])
+
+ def erase_mds_objects(self, rank):
+ """
+ Erase all the per-MDS objects for a particular rank. This includes
+ inotable, sessiontable, journal
+ """
+
+ def obj_prefix(multiplier):
+ """
+ MDS object naming conventions like rank 1's
+ journal is at 201.***
+ """
+ return "%x." % (multiplier * 0x100 + rank)
+
+ # MDS_INO_LOG_OFFSET
+ self.erase_metadata_objects(obj_prefix(2))
+ # MDS_INO_LOG_BACKUP_OFFSET
+ self.erase_metadata_objects(obj_prefix(3))
+ # MDS_INO_LOG_POINTER_OFFSET
+ self.erase_metadata_objects(obj_prefix(4))
+ # MDSTables & SessionMap
+ self.erase_metadata_objects("mds{rank:d}_".format(rank=rank))
+
+ @property
+ def _prefix(self):
+ """
+ Override this to set a different
+ """
+ return ""
+
+ def _make_rank(self, rank):
+ return "{}:{}".format(self.name, rank)
+
+ def _run_tool(self, tool, args, rank=None, quiet=False):
+ # Tests frequently have [client] configuration that jacks up
+ # the objecter log level (unlikely to be interesting here)
+ # and does not set the mds log level (very interesting here)
+ if quiet:
+ base_args = [os.path.join(self._prefix, tool), '--debug-mds=1', '--debug-objecter=1']
+ else:
+ base_args = [os.path.join(self._prefix, tool), '--debug-mds=4', '--debug-objecter=1']
+
+ if rank is not None:
+ base_args.extend(["--rank", "%s" % str(rank)])
+
+ t1 = datetime.datetime.now()
+ r = self.tool_remote.sh(script=base_args + args, stdout=StringIO()).strip()
+ duration = datetime.datetime.now() - t1
+ log.debug("Ran {0} in time {1}, result:\n{2}".format(
+ base_args + args, duration, r
+ ))
+ return r
+
+ @property
+ def tool_remote(self):
+ """
+ An arbitrary remote to use when invoking recovery tools. Use an MDS host because
+ it'll definitely have keys with perms to access cephfs metadata pool. This is public
+ so that tests can use this remote to go get locally written output files from the tools.
+ """
+ mds_id = self.mds_ids[0]
+ return self.mds_daemons[mds_id].remote
+
+ def journal_tool(self, args, rank, quiet=False):
+ """
+ Invoke cephfs-journal-tool with the passed arguments for a rank, and return its stdout
+ """
+ fs_rank = self._make_rank(rank)
+ return self._run_tool("cephfs-journal-tool", args, fs_rank, quiet)
+
+ def table_tool(self, args, quiet=False):
+ """
+ Invoke cephfs-table-tool with the passed arguments, and return its stdout
+ """
+ return self._run_tool("cephfs-table-tool", args, None, quiet)
+
+ def data_scan(self, args, quiet=False, worker_count=1):
+ """
+ Invoke cephfs-data-scan with the passed arguments, and return its stdout
+
+ :param worker_count: if greater than 1, multiple workers will be run
+ in parallel and the return value will be None
+ """
+
+ workers = []
+
+ for n in range(0, worker_count):
+ if worker_count > 1:
+ # data-scan args first token is a command, followed by args to it.
+ # insert worker arguments after the command.
+ cmd = args[0]
+ worker_args = [cmd] + ["--worker_n", n.__str__(), "--worker_m", worker_count.__str__()] + args[1:]
+ else:
+ worker_args = args
+
+ workers.append(Greenlet.spawn(lambda wargs=worker_args:
+ self._run_tool("cephfs-data-scan", wargs, None, quiet)))
+
+ for w in workers:
+ w.get()
+
+ if worker_count == 1:
+ return workers[0].value
+ else:
+ return None
+
+ def is_full(self):
+ return self.is_pool_full(self.get_data_pool_name())
diff --git a/qa/tasks/cephfs/fuse_mount.py b/qa/tasks/cephfs/fuse_mount.py
new file mode 100644
index 00000000..664de4f4
--- /dev/null
+++ b/qa/tasks/cephfs/fuse_mount.py
@@ -0,0 +1,502 @@
+from io import StringIO
+import json
+import time
+import logging
+
+import six
+
+from textwrap import dedent
+
+from teuthology import misc
+from teuthology.contextutil import MaxWhileTries
+from teuthology.orchestra import run
+from teuthology.orchestra.run import CommandFailedError
+from tasks.cephfs.mount import CephFSMount
+
+log = logging.getLogger(__name__)
+
+
+class FuseMount(CephFSMount):
+ def __init__(self, ctx, client_config, test_dir, client_id, client_remote):
+ super(FuseMount, self).__init__(ctx, test_dir, client_id, client_remote)
+
+ self.client_config = client_config if client_config else {}
+ self.fuse_daemon = None
+ self._fuse_conn = None
+ self.id = None
+ self.inst = None
+ self.addr = None
+
+ def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None):
+ if mountpoint is not None:
+ self.mountpoint = mountpoint
+ self.setupfs(name=mount_fs_name)
+
+ try:
+ return self._mount(mount_path, mount_fs_name)
+ except RuntimeError:
+ # Catch exceptions by the mount() logic (i.e. not remote command
+ # failures) and ensure the mount is not left half-up.
+ # Otherwise we might leave a zombie mount point that causes
+ # anyone traversing cephtest/ to get hung up on.
+ log.warning("Trying to clean up after failed mount")
+ self.umount_wait(force=True)
+ raise
+
+ def _mount(self, mount_path, mount_fs_name):
+ log.info("Client client.%s config is %s" % (self.client_id, self.client_config))
+
+ daemon_signal = 'kill'
+ if self.client_config.get('coverage') or self.client_config.get('valgrind') is not None:
+ daemon_signal = 'term'
+
+ log.info('Mounting ceph-fuse client.{id} at {remote} {mnt}...'.format(
+ id=self.client_id, remote=self.client_remote, mnt=self.mountpoint))
+
+ self.client_remote.run(args=['mkdir', '-p', self.mountpoint],
+ timeout=(15*60), cwd=self.test_dir)
+
+ run_cmd = [
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=self.test_dir),
+ 'daemon-helper',
+ daemon_signal,
+ ]
+
+ fuse_cmd = ['ceph-fuse', "-f"]
+
+ if mount_path is not None:
+ fuse_cmd += ["--client_mountpoint={0}".format(mount_path)]
+
+ if mount_fs_name is not None:
+ fuse_cmd += ["--client_mds_namespace={0}".format(mount_fs_name)]
+
+ fuse_cmd += [
+ '--name', 'client.{id}'.format(id=self.client_id),
+ # TODO ceph-fuse doesn't understand dash dash '--',
+ self.mountpoint,
+ ]
+
+ cwd = self.test_dir
+ if self.client_config.get('valgrind') is not None:
+ run_cmd = misc.get_valgrind_args(
+ self.test_dir,
+ 'client.{id}'.format(id=self.client_id),
+ run_cmd,
+ self.client_config.get('valgrind'),
+ )
+ cwd = None # misc.get_valgrind_args chdir for us
+
+ run_cmd.extend(fuse_cmd)
+
+ def list_connections():
+ from teuthology.misc import get_system_type
+
+ conn_dir = "/sys/fs/fuse/connections"
+
+ self.client_remote.run(args=['sudo', 'modprobe', 'fuse'],
+ check_status=False)
+ self.client_remote.run(
+ args=["sudo", "mount", "-t", "fusectl", conn_dir, conn_dir],
+ check_status=False, timeout=(30))
+
+ try:
+ ls_str = self.client_remote.sh("ls " + conn_dir,
+ stdout=StringIO(),
+ timeout=(15*60)).strip()
+ except CommandFailedError:
+ return []
+
+ if ls_str:
+ return [int(n) for n in ls_str.split("\n")]
+ else:
+ return []
+
+ # Before starting ceph-fuse process, note the contents of
+ # /sys/fs/fuse/connections
+ pre_mount_conns = list_connections()
+ log.info("Pre-mount connections: {0}".format(pre_mount_conns))
+
+ proc = self.client_remote.run(
+ args=run_cmd,
+ cwd=cwd,
+ logger=log.getChild('ceph-fuse.{id}'.format(id=self.client_id)),
+ stdin=run.PIPE,
+ wait=False,
+ )
+ self.fuse_daemon = proc
+
+ # Wait for the connection reference to appear in /sys
+ mount_wait = self.client_config.get('mount_wait', 0)
+ if mount_wait > 0:
+ log.info("Fuse mount waits {0} seconds before checking /sys/".format(mount_wait))
+ time.sleep(mount_wait)
+ timeout = int(self.client_config.get('mount_timeout', 30))
+ waited = 0
+
+ post_mount_conns = list_connections()
+ while len(post_mount_conns) <= len(pre_mount_conns):
+ if self.fuse_daemon.finished:
+ # Did mount fail? Raise the CommandFailedError instead of
+ # hitting the "failed to populate /sys/" timeout
+ self.fuse_daemon.wait()
+ time.sleep(1)
+ waited += 1
+ if waited > timeout:
+ raise RuntimeError("Fuse mount failed to populate /sys/ after {0} seconds".format(
+ waited
+ ))
+ else:
+ post_mount_conns = list_connections()
+
+ log.info("Post-mount connections: {0}".format(post_mount_conns))
+
+ # Record our fuse connection number so that we can use it when
+ # forcing an unmount
+ new_conns = list(set(post_mount_conns) - set(pre_mount_conns))
+ if len(new_conns) == 0:
+ raise RuntimeError("New fuse connection directory not found ({0})".format(new_conns))
+ elif len(new_conns) > 1:
+ raise RuntimeError("Unexpectedly numerous fuse connections {0}".format(new_conns))
+ else:
+ self._fuse_conn = new_conns[0]
+
+ self.gather_mount_info()
+
+ def gather_mount_info(self):
+ status = self.admin_socket(['status'])
+ self.id = status['id']
+ self.client_pid = status['metadata']['pid']
+ try:
+ self.inst = status['inst_str']
+ self.addr = status['addr_str']
+ except KeyError:
+ sessions = self.fs.rank_asok(['session', 'ls'])
+ for s in sessions:
+ if s['id'] == self.id:
+ self.inst = s['inst']
+ self.addr = self.inst.split()[1]
+ if self.inst is None:
+ raise RuntimeError("cannot find client session")
+
+ def is_mounted(self):
+ proc = self.client_remote.run(
+ args=[
+ 'stat',
+ '--file-system',
+ '--printf=%T\n',
+ '--',
+ self.mountpoint,
+ ],
+ cwd=self.test_dir,
+ stdout=StringIO(),
+ stderr=StringIO(),
+ wait=False,
+ timeout=(15*60)
+ )
+ try:
+ proc.wait()
+ except CommandFailedError:
+ error = proc.stderr.getvalue()
+ if ("endpoint is not connected" in error
+ or "Software caused connection abort" in error):
+ # This happens is fuse is killed without unmount
+ log.warning("Found stale moutn point at {0}".format(self.mountpoint))
+ return True
+ else:
+ # This happens if the mount directory doesn't exist
+ log.info('mount point does not exist: %s', self.mountpoint)
+ return False
+
+ fstype = six.ensure_str(proc.stdout.getvalue()).rstrip('\n')
+ if fstype == 'fuseblk':
+ log.info('ceph-fuse is mounted on %s', self.mountpoint)
+ return True
+ else:
+ log.debug('ceph-fuse not mounted, got fs type {fstype!r}'.format(
+ fstype=fstype))
+ return False
+
+ def wait_until_mounted(self):
+ """
+ Check to make sure that fuse is mounted on mountpoint. If not,
+ sleep for 5 seconds and check again.
+ """
+
+ while not self.is_mounted():
+ # Even if it's not mounted, it should at least
+ # be running: catch simple failures where it has terminated.
+ assert not self.fuse_daemon.poll()
+
+ time.sleep(5)
+
+ # Now that we're mounted, set permissions so that the rest of the test will have
+ # unrestricted access to the filesystem mount.
+ try:
+ stderr = StringIO()
+ self.client_remote.run(args=['sudo', 'chmod', '1777', self.mountpoint], timeout=(15*60), cwd=self.test_dir, stderr=stderr)
+ except run.CommandFailedError:
+ stderr = stderr.getvalue()
+ if "Read-only file system".lower() in stderr.lower():
+ pass
+ else:
+ raise
+
+ def _mountpoint_exists(self):
+ return self.client_remote.run(args=["ls", "-d", self.mountpoint], check_status=False, cwd=self.test_dir, timeout=(15*60)).exitstatus == 0
+
+ def umount(self):
+ if not self.is_mounted():
+ return
+
+ try:
+ log.info('Running fusermount -u on {name}...'.format(name=self.client_remote.name))
+ self.client_remote.run(
+ args=[
+ 'sudo',
+ 'fusermount',
+ '-u',
+ self.mountpoint,
+ ],
+ cwd=self.test_dir,
+ timeout=(30*60),
+ )
+ except run.CommandFailedError:
+ log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=self.client_remote.name))
+
+ self.client_remote.run(args=[
+ 'sudo',
+ run.Raw('PATH=/usr/sbin:$PATH'),
+ 'lsof',
+ run.Raw(';'),
+ 'ps',
+ 'auxf',
+ ], timeout=(60*15))
+
+ # abort the fuse mount, killing all hung processes
+ if self._fuse_conn:
+ self.run_python(dedent("""
+ import os
+ path = "/sys/fs/fuse/connections/{0}/abort"
+ if os.path.exists(path):
+ open(path, "w").write("1")
+ """).format(self._fuse_conn))
+ self._fuse_conn = None
+
+ stderr = StringIO()
+ try:
+ # make sure its unmounted
+ self.client_remote.run(
+ args=[
+ 'sudo',
+ 'umount',
+ '-l',
+ '-f',
+ self.mountpoint,
+ ],
+ stderr=stderr,
+ timeout=(60*15)
+ )
+ except CommandFailedError:
+ if self.is_mounted():
+ raise
+
+ assert not self.is_mounted()
+ self._fuse_conn = None
+ self.id = None
+ self.inst = None
+ self.addr = None
+
+ def umount_wait(self, force=False, require_clean=False, timeout=900):
+ """
+ :param force: Complete cleanly even if the MDS is offline
+ """
+ if force:
+ assert not require_clean # mutually exclusive
+
+ # When we expect to be forcing, kill the ceph-fuse process directly.
+ # This should avoid hitting the more aggressive fallback killing
+ # in umount() which can affect other mounts too.
+ self.fuse_daemon.stdin.close()
+
+ # However, we will still hit the aggressive wait if there is an ongoing
+ # mount -o remount (especially if the remount is stuck because MDSs
+ # are unavailable)
+
+ self.umount()
+
+ try:
+ if self.fuse_daemon:
+ # Permit a timeout, so that we do not block forever
+ run.wait([self.fuse_daemon], timeout)
+ except MaxWhileTries:
+ log.error("process failed to terminate after unmount. This probably"
+ " indicates a bug within ceph-fuse.")
+ raise
+ except CommandFailedError:
+ if require_clean:
+ raise
+
+ self.cleanup()
+
+ def cleanup(self):
+ """
+ Remove the mount point.
+
+ Prerequisite: the client is not mounted.
+ """
+ stderr = StringIO()
+ try:
+ self.client_remote.run(
+ args=[
+ 'rmdir',
+ '--',
+ self.mountpoint,
+ ],
+ cwd=self.test_dir,
+ stderr=stderr,
+ timeout=(60*5),
+ check_status=False,
+ )
+ except CommandFailedError:
+ if "No such file or directory" in stderr.getvalue():
+ pass
+ else:
+ raise
+
+ def kill(self):
+ """
+ Terminate the client without removing the mount point.
+ """
+ log.info('Killing ceph-fuse connection on {name}...'.format(name=self.client_remote.name))
+ self.fuse_daemon.stdin.close()
+ try:
+ self.fuse_daemon.wait()
+ except CommandFailedError:
+ pass
+
+ def kill_cleanup(self):
+ """
+ Follow up ``kill`` to get to a clean unmounted state.
+ """
+ log.info('Cleaning up killed ceph-fuse connection')
+ self.umount()
+ self.cleanup()
+
+ def teardown(self):
+ """
+ Whatever the state of the mount, get it gone.
+ """
+ super(FuseMount, self).teardown()
+
+ self.umount()
+
+ if self.fuse_daemon and not self.fuse_daemon.finished:
+ self.fuse_daemon.stdin.close()
+ try:
+ self.fuse_daemon.wait()
+ except CommandFailedError:
+ pass
+
+ # Indiscriminate, unlike the touchier cleanup()
+ self.client_remote.run(
+ args=[
+ 'rm',
+ '-rf',
+ self.mountpoint,
+ ],
+ cwd=self.test_dir,
+ timeout=(60*5)
+ )
+
+ def _asok_path(self):
+ return "/var/run/ceph/ceph-client.{0}.*.asok".format(self.client_id)
+
+ @property
+ def _prefix(self):
+ return ""
+
+ def admin_socket(self, args):
+ pyscript = """
+import glob
+import re
+import os
+import subprocess
+
+def find_socket(client_name):
+ asok_path = "{asok_path}"
+ files = glob.glob(asok_path)
+
+ # Given a non-glob path, it better be there
+ if "*" not in asok_path:
+ assert(len(files) == 1)
+ return files[0]
+
+ for f in files:
+ pid = re.match(".*\.(\d+)\.asok$", f).group(1)
+ if os.path.exists("/proc/{{0}}".format(pid)):
+ return f
+ raise RuntimeError("Client socket {{0}} not found".format(client_name))
+
+print(find_socket("{client_name}"))
+""".format(
+ asok_path=self._asok_path(),
+ client_name="client.{0}".format(self.client_id))
+
+ # Find the admin socket
+ asok_path = self.client_remote.sh(
+ ['sudo', 'python3', '-c', pyscript],
+ stdout=StringIO(),
+ timeout=(15*60)).strip()
+ log.info("Found client admin socket at {0}".format(asok_path))
+
+ # Query client ID from admin socket
+ json_data = self.client_remote.sh(
+ ['sudo', self._prefix + 'ceph', '--admin-daemon', asok_path] + args,
+ stdout=StringIO(),
+ timeout=(15*60))
+ return json.loads(json_data)
+
+ def get_global_id(self):
+ """
+ Look up the CephFS client ID for this mount
+ """
+ return self.admin_socket(['mds_sessions'])['id']
+
+ def get_global_inst(self):
+ """
+ Look up the CephFS client instance for this mount
+ """
+ return self.inst
+
+ def get_global_addr(self):
+ """
+ Look up the CephFS client addr for this mount
+ """
+ return self.addr
+
+ def get_client_pid(self):
+ """
+ return pid of ceph-fuse process
+ """
+ status = self.admin_socket(['status'])
+ return status['metadata']['pid']
+
+ def get_osd_epoch(self):
+ """
+ Return 2-tuple of osd_epoch, osd_epoch_barrier
+ """
+ status = self.admin_socket(['status'])
+ return status['osd_epoch'], status['osd_epoch_barrier']
+
+ def get_dentry_count(self):
+ """
+ Return 2-tuple of dentry_count, dentry_pinned_count
+ """
+ status = self.admin_socket(['status'])
+ return status['dentry_count'], status['dentry_pinned_count']
+
+ def set_cache_size(self, size):
+ return self.admin_socket(['config', 'set', 'client_cache_size', str(size)])
diff --git a/qa/tasks/cephfs/kernel_mount.py b/qa/tasks/cephfs/kernel_mount.py
new file mode 100644
index 00000000..d027bcfc
--- /dev/null
+++ b/qa/tasks/cephfs/kernel_mount.py
@@ -0,0 +1,260 @@
+import json
+import logging
+import time
+from textwrap import dedent
+from teuthology.orchestra.run import CommandFailedError
+from teuthology import misc
+
+from teuthology.orchestra import remote as orchestra_remote
+from teuthology.orchestra import run
+from teuthology.contextutil import MaxWhileTries
+from tasks.cephfs.mount import CephFSMount
+
+log = logging.getLogger(__name__)
+
+
+UMOUNT_TIMEOUT = 300
+
+
+class KernelMount(CephFSMount):
+ def __init__(self, ctx, test_dir, client_id, client_remote,
+ ipmi_user, ipmi_password, ipmi_domain):
+ super(KernelMount, self).__init__(ctx, test_dir, client_id, client_remote)
+
+ self.mounted = False
+ self.ipmi_user = ipmi_user
+ self.ipmi_password = ipmi_password
+ self.ipmi_domain = ipmi_domain
+
+ def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None):
+ if mountpoint is not None:
+ self.mountpoint = mountpoint
+ self.setupfs(name=mount_fs_name)
+
+ log.info('Mounting kclient client.{id} at {remote} {mnt}...'.format(
+ id=self.client_id, remote=self.client_remote, mnt=self.mountpoint))
+
+ self.client_remote.run(args=['mkdir', '-p', self.mountpoint],
+ timeout=(5*60))
+
+ if mount_path is None:
+ mount_path = "/"
+
+ opts = 'name={id},norequire_active_mds,conf={conf}'.format(id=self.client_id,
+ conf=self.config_path)
+
+ if mount_fs_name is not None:
+ opts += ",mds_namespace={0}".format(mount_fs_name)
+
+ self.client_remote.run(
+ args=[
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=self.test_dir),
+ '/bin/mount',
+ '-t',
+ 'ceph',
+ ':{mount_path}'.format(mount_path=mount_path),
+ self.mountpoint,
+ '-v',
+ '-o',
+ opts
+ ],
+ timeout=(30*60),
+ )
+
+ self.client_remote.run(
+ args=['sudo', 'chmod', '1777', self.mountpoint], timeout=(5*60))
+
+ self.mounted = True
+
+ def umount(self, force=False):
+ if not self.is_mounted():
+ return
+
+ log.debug('Unmounting client client.{id}...'.format(id=self.client_id))
+
+ cmd=['sudo', 'umount', self.mountpoint]
+ if force:
+ cmd.append('-f')
+
+ try:
+ self.client_remote.run(args=cmd, timeout=(15*60))
+ except Exception as e:
+ self.client_remote.run(args=[
+ 'sudo',
+ run.Raw('PATH=/usr/sbin:$PATH'),
+ 'lsof',
+ run.Raw(';'),
+ 'ps', 'auxf',
+ ], timeout=(15*60))
+ raise e
+
+ rproc = self.client_remote.run(
+ args=[
+ 'rmdir',
+ '--',
+ self.mountpoint,
+ ],
+ wait=False
+ )
+ run.wait([rproc], UMOUNT_TIMEOUT)
+ self.mounted = False
+
+ def cleanup(self):
+ pass
+
+ def umount_wait(self, force=False, require_clean=False, timeout=900):
+ """
+ Unlike the fuse client, the kernel client's umount is immediate
+ """
+ if not self.is_mounted():
+ return
+
+ try:
+ self.umount(force)
+ except (CommandFailedError, MaxWhileTries):
+ if not force:
+ raise
+
+ self.kill()
+ self.kill_cleanup()
+
+ self.mounted = False
+
+ def is_mounted(self):
+ return self.mounted
+
+ def wait_until_mounted(self):
+ """
+ Unlike the fuse client, the kernel client is up and running as soon
+ as the initial mount() function returns.
+ """
+ assert self.mounted
+
+ def teardown(self):
+ super(KernelMount, self).teardown()
+ if self.mounted:
+ self.umount()
+
+ def kill(self):
+ """
+ The Ceph kernel client doesn't have a mechanism to kill itself (doing
+ that in side the kernel would be weird anyway), so we reboot the whole node
+ to get the same effect.
+
+ We use IPMI to reboot, because we don't want the client to send any
+ releases of capabilities.
+ """
+
+ con = orchestra_remote.getRemoteConsole(self.client_remote.hostname,
+ self.ipmi_user,
+ self.ipmi_password,
+ self.ipmi_domain)
+ con.hard_reset(wait_for_login=False)
+
+ self.mounted = False
+
+ def kill_cleanup(self):
+ assert not self.mounted
+
+ # We need to do a sleep here because we don't know how long it will
+ # take for a hard_reset to be effected.
+ time.sleep(30)
+
+ try:
+ # Wait for node to come back up after reboot
+ misc.reconnect(None, 300, [self.client_remote])
+ except:
+ # attempt to get some useful debug output:
+ con = orchestra_remote.getRemoteConsole(self.client_remote.hostname,
+ self.ipmi_user,
+ self.ipmi_password,
+ self.ipmi_domain)
+ con.check_status(timeout=60)
+ raise
+
+ # Remove mount directory
+ self.client_remote.run(args=['uptime'], timeout=10)
+
+ # Remove mount directory
+ self.client_remote.run(
+ args=[
+ 'rmdir',
+ '--',
+ self.mountpoint,
+ ],
+ timeout=(5*60),
+ check_status=False,
+ )
+
+ def _find_debug_dir(self):
+ """
+ Find the debugfs folder for this mount
+ """
+ pyscript = dedent("""
+ import glob
+ import os
+ import json
+
+ def get_id_to_dir():
+ result = {}
+ for dir in glob.glob("/sys/kernel/debug/ceph/*"):
+ mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines()
+ client_id = mds_sessions_lines[1].split()[1].strip('"')
+
+ result[client_id] = dir
+ return result
+
+ print(json.dumps(get_id_to_dir()))
+ """)
+
+ output = self.client_remote.sh([
+ 'sudo', 'python3', '-c', pyscript
+ ], timeout=(5*60))
+ client_id_to_dir = json.loads(output)
+
+ try:
+ return client_id_to_dir[self.client_id]
+ except KeyError:
+ log.error("Client id '{0}' debug dir not found (clients seen were: {1})".format(
+ self.client_id, ",".join(client_id_to_dir.keys())
+ ))
+ raise
+
+ def _read_debug_file(self, filename):
+ debug_dir = self._find_debug_dir()
+
+ pyscript = dedent("""
+ import os
+
+ print(open(os.path.join("{debug_dir}", "{filename}")).read())
+ """).format(debug_dir=debug_dir, filename=filename)
+
+ output = self.client_remote.sh([
+ 'sudo', 'python3', '-c', pyscript
+ ], timeout=(5*60))
+ return output
+
+ def get_global_id(self):
+ """
+ Look up the CephFS client ID for this mount, using debugfs.
+ """
+
+ assert self.mounted
+
+ mds_sessions = self._read_debug_file("mds_sessions")
+ lines = mds_sessions.split("\n")
+ return int(lines[0].split()[1])
+
+ def get_osd_epoch(self):
+ """
+ Return 2-tuple of osd_epoch, osd_epoch_barrier
+ """
+ osd_map = self._read_debug_file("osdmap")
+ lines = osd_map.split("\n")
+ first_line_tokens = lines[0].split()
+ epoch, barrier = int(first_line_tokens[1]), int(first_line_tokens[3])
+
+ return epoch, barrier
diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py
new file mode 100644
index 00000000..d486f1b6
--- /dev/null
+++ b/qa/tasks/cephfs/mount.py
@@ -0,0 +1,728 @@
+from contextlib import contextmanager
+from io import BytesIO
+import json
+import logging
+import datetime
+import six
+import time
+from six import StringIO
+from textwrap import dedent
+import os
+from teuthology.misc import sudo_write_file
+from teuthology.orchestra import run
+from teuthology.orchestra.run import CommandFailedError, ConnectionLostError, Raw
+from tasks.cephfs.filesystem import Filesystem
+
+log = logging.getLogger(__name__)
+
+
+class CephFSMount(object):
+ def __init__(self, ctx, test_dir, client_id, client_remote):
+ """
+ :param test_dir: Global teuthology test dir
+ :param client_id: Client ID, the 'foo' in client.foo
+ :param client_remote: Remote instance for the host where client will run
+ """
+
+ self.ctx = ctx
+ self.test_dir = test_dir
+ self.client_id = client_id
+ self.client_remote = client_remote
+ self.mountpoint_dir_name = 'mnt.{id}'.format(id=self.client_id)
+ self._mountpoint = None
+ self.fs = None
+
+ self.test_files = ['a', 'b', 'c']
+
+ self.background_procs = []
+
+ @property
+ def mountpoint(self):
+ if self._mountpoint == None:
+ self._mountpoint= os.path.join(
+ self.test_dir, '{dir_name}'.format(dir_name=self.mountpoint_dir_name))
+ return self._mountpoint
+
+ @mountpoint.setter
+ def mountpoint(self, path):
+ if not isinstance(path, str):
+ raise RuntimeError('path should be of str type.')
+ self._mountpoint = path
+
+ def is_mounted(self):
+ raise NotImplementedError()
+
+ def setupfs(self, name=None):
+ if name is None and self.fs is not None:
+ # Previous mount existed, reuse the old name
+ name = self.fs.name
+ self.fs = Filesystem(self.ctx, name=name)
+ log.info('Wait for MDS to reach steady state...')
+ self.fs.wait_for_daemons()
+ log.info('Ready to start {}...'.format(type(self).__name__))
+
+ def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None):
+ raise NotImplementedError()
+
+ def umount(self):
+ raise NotImplementedError()
+
+ def umount_wait(self, force=False, require_clean=False):
+ """
+
+ :param force: Expect that the mount will not shutdown cleanly: kill
+ it hard.
+ :param require_clean: Wait for the Ceph client associated with the
+ mount (e.g. ceph-fuse) to terminate, and
+ raise if it doesn't do so cleanly.
+ :return:
+ """
+ raise NotImplementedError()
+
+ def kill_cleanup(self):
+ raise NotImplementedError()
+
+ def kill(self):
+ raise NotImplementedError()
+
+ def cleanup(self):
+ raise NotImplementedError()
+
+ def wait_until_mounted(self):
+ raise NotImplementedError()
+
+ def get_keyring_path(self):
+ return '/etc/ceph/ceph.client.{id}.keyring'.format(id=self.client_id)
+
+ @property
+ def config_path(self):
+ """
+ Path to ceph.conf: override this if you're not a normal systemwide ceph install
+ :return: stringv
+ """
+ return "/etc/ceph/ceph.conf"
+
+ @contextmanager
+ def mounted(self):
+ """
+ A context manager, from an initially unmounted state, to mount
+ this, yield, and then unmount and clean up.
+ """
+ self.mount()
+ self.wait_until_mounted()
+ try:
+ yield
+ finally:
+ self.umount_wait()
+
+ def is_blacklisted(self):
+ addr = self.get_global_addr()
+ blacklist = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "blacklist", "ls", "--format=json"))
+ for b in blacklist:
+ if addr == b["addr"]:
+ return True
+ return False
+
+ def create_files(self):
+ assert(self.is_mounted())
+
+ for suffix in self.test_files:
+ log.info("Creating file {0}".format(suffix))
+ self.client_remote.run(args=[
+ 'sudo', 'touch', os.path.join(self.mountpoint, suffix)
+ ])
+
+ def check_files(self):
+ assert(self.is_mounted())
+
+ for suffix in self.test_files:
+ log.info("Checking file {0}".format(suffix))
+ r = self.client_remote.run(args=[
+ 'sudo', 'ls', os.path.join(self.mountpoint, suffix)
+ ], check_status=False)
+ if r.exitstatus != 0:
+ raise RuntimeError("Expected file {0} not found".format(suffix))
+
+ def write_file(self, path, data, perms=None):
+ """
+ Write the given data at the given path and set the given perms to the
+ file on the path.
+ """
+ if path.find(self.mountpoint) == -1:
+ path = os.path.join(self.mountpoint, path)
+
+ sudo_write_file(self.client_remote, path, data)
+
+ if perms:
+ self.run_shell(args=f'chmod {perms} {path}')
+
+ def read_file(self, path):
+ """
+ Return the data from the file on given path.
+ """
+ if path.find(self.mountpoint) == -1:
+ path = os.path.join(self.mountpoint, path)
+
+ return self.run_shell(args=['sudo', 'cat', path], omit_sudo=False).\
+ stdout.getvalue().strip()
+
+ def create_destroy(self):
+ assert(self.is_mounted())
+
+ filename = "{0} {1}".format(datetime.datetime.now(), self.client_id)
+ log.debug("Creating test file {0}".format(filename))
+ self.client_remote.run(args=[
+ 'sudo', 'touch', os.path.join(self.mountpoint, filename)
+ ])
+ log.debug("Deleting test file {0}".format(filename))
+ self.client_remote.run(args=[
+ 'sudo', 'rm', '-f', os.path.join(self.mountpoint, filename)
+ ])
+
+ def _run_python(self, pyscript, py_version='python3'):
+ return self.client_remote.run(
+ args=['sudo', 'adjust-ulimits', 'daemon-helper', 'kill',
+ py_version, '-c', pyscript], wait=False, stdin=run.PIPE,
+ stdout=StringIO())
+
+ def run_python(self, pyscript, py_version='python3'):
+ p = self._run_python(pyscript, py_version)
+ p.wait()
+ return six.ensure_str(p.stdout.getvalue().strip())
+
+ def run_shell(self, args, wait=True, check_status=True, omit_sudo=True):
+ if isinstance(args, str):
+ args = args.split()
+
+ args = ["cd", self.mountpoint, run.Raw('&&'), "sudo"] + args
+ return self.client_remote.run(args=args, stdout=StringIO(),
+ stderr=StringIO(), wait=wait,
+ check_status=check_status,
+ omit_sudo=omit_sudo)
+
+ def run_shell_payload(self, payload, **kwargs):
+ return self.run_shell(["bash", "-c", Raw(f"'{payload}'")], **kwargs)
+
+ def open_no_data(self, basename):
+ """
+ A pure metadata operation
+ """
+ assert(self.is_mounted())
+
+ path = os.path.join(self.mountpoint, basename)
+
+ p = self._run_python(dedent(
+ """
+ f = open("{path}", 'w')
+ """.format(path=path)
+ ))
+ p.wait()
+
+ def open_background(self, basename="background_file", write=True):
+ """
+ Open a file for writing, then block such that the client
+ will hold a capability.
+
+ Don't return until the remote process has got as far as opening
+ the file, then return the RemoteProcess instance.
+ """
+ assert(self.is_mounted())
+
+ path = os.path.join(self.mountpoint, basename)
+
+ if write:
+ pyscript = dedent("""
+ import time
+
+ with open("{path}", 'w') as f:
+ f.write('content')
+ f.flush()
+ f.write('content2')
+ while True:
+ time.sleep(1)
+ """).format(path=path)
+ else:
+ pyscript = dedent("""
+ import time
+
+ with open("{path}", 'r') as f:
+ while True:
+ time.sleep(1)
+ """).format(path=path)
+
+ rproc = self._run_python(pyscript)
+ self.background_procs.append(rproc)
+
+ # This wait would not be sufficient if the file had already
+ # existed, but it's simple and in practice users of open_background
+ # are not using it on existing files.
+ self.wait_for_visible(basename)
+
+ return rproc
+
+ def wait_for_dir_empty(self, dirname, timeout=30):
+ i = 0
+ dirpath = os.path.join(self.mountpoint, dirname)
+ while i < timeout:
+ nr_entries = int(self.getfattr(dirpath, "ceph.dir.entries"))
+ if nr_entries == 0:
+ log.debug("Directory {0} seen empty from {1} after {2}s ".format(
+ dirname, self.client_id, i))
+ return
+ else:
+ time.sleep(1)
+ i += 1
+
+ raise RuntimeError("Timed out after {0}s waiting for {1} to become empty from {2}".format(
+ i, dirname, self.client_id))
+
+ def wait_for_visible(self, basename="background_file", timeout=30):
+ i = 0
+ while i < timeout:
+ r = self.client_remote.run(args=[
+ 'sudo', 'ls', os.path.join(self.mountpoint, basename)
+ ], check_status=False)
+ if r.exitstatus == 0:
+ log.debug("File {0} became visible from {1} after {2}s".format(
+ basename, self.client_id, i))
+ return
+ else:
+ time.sleep(1)
+ i += 1
+
+ raise RuntimeError("Timed out after {0}s waiting for {1} to become visible from {2}".format(
+ i, basename, self.client_id))
+
+ def lock_background(self, basename="background_file", do_flock=True):
+ """
+ Open and lock a files for writing, hold the lock in a background process
+ """
+ assert(self.is_mounted())
+
+ path = os.path.join(self.mountpoint, basename)
+
+ script_builder = """
+ import time
+ import fcntl
+ import struct"""
+ if do_flock:
+ script_builder += """
+ f1 = open("{path}-1", 'w')
+ fcntl.flock(f1, fcntl.LOCK_EX | fcntl.LOCK_NB)"""
+ script_builder += """
+ f2 = open("{path}-2", 'w')
+ lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0)
+ fcntl.fcntl(f2, fcntl.F_SETLK, lockdata)
+ while True:
+ time.sleep(1)
+ """
+
+ pyscript = dedent(script_builder).format(path=path)
+
+ log.info("lock_background file {0}".format(basename))
+ rproc = self._run_python(pyscript)
+ self.background_procs.append(rproc)
+ return rproc
+
+ def lock_and_release(self, basename="background_file"):
+ assert(self.is_mounted())
+
+ path = os.path.join(self.mountpoint, basename)
+
+ script = """
+ import time
+ import fcntl
+ import struct
+ f1 = open("{path}-1", 'w')
+ fcntl.flock(f1, fcntl.LOCK_EX)
+ f2 = open("{path}-2", 'w')
+ lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0)
+ fcntl.fcntl(f2, fcntl.F_SETLK, lockdata)
+ """
+ pyscript = dedent(script).format(path=path)
+
+ log.info("lock_and_release file {0}".format(basename))
+ return self._run_python(pyscript)
+
+ def check_filelock(self, basename="background_file", do_flock=True):
+ assert(self.is_mounted())
+
+ path = os.path.join(self.mountpoint, basename)
+
+ script_builder = """
+ import fcntl
+ import errno
+ import struct"""
+ if do_flock:
+ script_builder += """
+ f1 = open("{path}-1", 'r')
+ try:
+ fcntl.flock(f1, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ except IOError as e:
+ if e.errno == errno.EAGAIN:
+ pass
+ else:
+ raise RuntimeError("flock on file {path}-1 not found")"""
+ script_builder += """
+ f2 = open("{path}-2", 'r')
+ try:
+ lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0)
+ fcntl.fcntl(f2, fcntl.F_SETLK, lockdata)
+ except IOError as e:
+ if e.errno == errno.EAGAIN:
+ pass
+ else:
+ raise RuntimeError("posix lock on file {path}-2 not found")
+ """
+ pyscript = dedent(script_builder).format(path=path)
+
+ log.info("check lock on file {0}".format(basename))
+ self.client_remote.run(args=[
+ 'sudo', 'python3', '-c', pyscript
+ ])
+
+ def write_background(self, basename="background_file", loop=False):
+ """
+ Open a file for writing, complete as soon as you can
+ :param basename:
+ :return:
+ """
+ assert(self.is_mounted())
+
+ path = os.path.join(self.mountpoint, basename)
+
+ pyscript = dedent("""
+ import os
+ import time
+
+ fd = os.open("{path}", os.O_RDWR | os.O_CREAT, 0o644)
+ try:
+ while True:
+ os.write(fd, b'content')
+ time.sleep(1)
+ if not {loop}:
+ break
+ except IOError as e:
+ pass
+ os.close(fd)
+ """).format(path=path, loop=str(loop))
+
+ rproc = self._run_python(pyscript)
+ self.background_procs.append(rproc)
+ return rproc
+
+ def write_n_mb(self, filename, n_mb, seek=0, wait=True):
+ """
+ Write the requested number of megabytes to a file
+ """
+ assert(self.is_mounted())
+
+ return self.run_shell(["dd", "if=/dev/urandom", "of={0}".format(filename),
+ "bs=1M", "conv=fdatasync",
+ "count={0}".format(int(n_mb)),
+ "seek={0}".format(int(seek))
+ ], wait=wait)
+
+ def write_test_pattern(self, filename, size):
+ log.info("Writing {0} bytes to {1}".format(size, filename))
+ return self.run_python(dedent("""
+ import zlib
+ path = "{path}"
+ with open(path, 'w') as f:
+ for i in range(0, {size}):
+ val = zlib.crc32(str(i).encode('utf-8')) & 7
+ f.write(chr(val))
+ """.format(
+ path=os.path.join(self.mountpoint, filename),
+ size=size
+ )))
+
+ def validate_test_pattern(self, filename, size):
+ log.info("Validating {0} bytes from {1}".format(size, filename))
+ return self.run_python(dedent("""
+ import zlib
+ path = "{path}"
+ with open(path, 'r') as f:
+ bytes = f.read()
+ if len(bytes) != {size}:
+ raise RuntimeError("Bad length {{0}} vs. expected {{1}}".format(
+ len(bytes), {size}
+ ))
+ for i, b in enumerate(bytes):
+ val = zlib.crc32(str(i).encode('utf-8')) & 7
+ if b != chr(val):
+ raise RuntimeError("Bad data at offset {{0}}".format(i))
+ """.format(
+ path=os.path.join(self.mountpoint, filename),
+ size=size
+ )))
+
+ def open_n_background(self, fs_path, count):
+ """
+ Open N files for writing, hold them open in a background process
+
+ :param fs_path: Path relative to CephFS root, e.g. "foo/bar"
+ :return: a RemoteProcess
+ """
+ assert(self.is_mounted())
+
+ abs_path = os.path.join(self.mountpoint, fs_path)
+
+ pyscript = dedent("""
+ import sys
+ import time
+ import os
+
+ n = {count}
+ abs_path = "{abs_path}"
+
+ if not os.path.exists(abs_path):
+ os.makedirs(abs_path)
+
+ handles = []
+ for i in range(0, n):
+ fname = "file_"+str(i)
+ path = os.path.join(abs_path, fname)
+ handles.append(open(path, 'w'))
+
+ while True:
+ time.sleep(1)
+ """).format(abs_path=abs_path, count=count)
+
+ rproc = self._run_python(pyscript)
+ self.background_procs.append(rproc)
+ return rproc
+
+ def create_n_files(self, fs_path, count, sync=False):
+ assert(self.is_mounted())
+
+ abs_path = os.path.join(self.mountpoint, fs_path)
+
+ pyscript = dedent("""
+ import sys
+ import time
+ import os
+
+ n = {count}
+ abs_path = "{abs_path}"
+
+ if not os.path.exists(os.path.dirname(abs_path)):
+ os.makedirs(os.path.dirname(abs_path))
+
+ for i in range(0, n):
+ fname = "{{0}}_{{1}}".format(abs_path, i)
+ with open(fname, 'w') as f:
+ f.write('content')
+ if {sync}:
+ f.flush()
+ os.fsync(f.fileno())
+ """).format(abs_path=abs_path, count=count, sync=str(sync))
+
+ self.run_python(pyscript)
+
+ def teardown(self):
+ for p in self.background_procs:
+ log.info("Terminating background process")
+ self._kill_background(p)
+
+ self.background_procs = []
+
+ def _kill_background(self, p):
+ if p.stdin:
+ p.stdin.close()
+ try:
+ p.wait()
+ except (CommandFailedError, ConnectionLostError):
+ pass
+
+ def kill_background(self, p):
+ """
+ For a process that was returned by one of the _background member functions,
+ kill it hard.
+ """
+ self._kill_background(p)
+ self.background_procs.remove(p)
+
+ def send_signal(self, signal):
+ signal = signal.lower()
+ if signal.lower() not in ['sigstop', 'sigcont', 'sigterm', 'sigkill']:
+ raise NotImplementedError
+
+ self.client_remote.run(args=['sudo', 'kill', '-{0}'.format(signal),
+ self.client_pid], omit_sudo=False)
+
+ def get_global_id(self):
+ raise NotImplementedError()
+
+ def get_global_inst(self):
+ raise NotImplementedError()
+
+ def get_global_addr(self):
+ raise NotImplementedError()
+
+ def get_osd_epoch(self):
+ raise NotImplementedError()
+
+ def stat(self, fs_path, wait=True):
+ """
+ stat a file, and return the result as a dictionary like this:
+ {
+ "st_ctime": 1414161137.0,
+ "st_mtime": 1414161137.0,
+ "st_nlink": 33,
+ "st_gid": 0,
+ "st_dev": 16777218,
+ "st_size": 1190,
+ "st_ino": 2,
+ "st_uid": 0,
+ "st_mode": 16877,
+ "st_atime": 1431520593.0
+ }
+
+ Raises exception on absent file.
+ """
+ abs_path = os.path.join(self.mountpoint, fs_path)
+
+ pyscript = dedent("""
+ import os
+ import stat
+ import json
+ import sys
+
+ try:
+ s = os.stat("{path}")
+ except OSError as e:
+ sys.exit(e.errno)
+
+ attrs = ["st_mode", "st_ino", "st_dev", "st_nlink", "st_uid", "st_gid", "st_size", "st_atime", "st_mtime", "st_ctime"]
+ print(json.dumps(
+ dict([(a, getattr(s, a)) for a in attrs]),
+ indent=2))
+ """).format(path=abs_path)
+ proc = self._run_python(pyscript)
+ if wait:
+ proc.wait()
+ return json.loads(proc.stdout.getvalue().strip())
+ else:
+ return proc
+
+ def touch(self, fs_path):
+ """
+ Create a dentry if it doesn't already exist. This python
+ implementation exists because the usual command line tool doesn't
+ pass through error codes like EIO.
+
+ :param fs_path:
+ :return:
+ """
+ abs_path = os.path.join(self.mountpoint, fs_path)
+ pyscript = dedent("""
+ import sys
+ import errno
+
+ try:
+ f = open("{path}", "w")
+ f.close()
+ except IOError as e:
+ sys.exit(errno.EIO)
+ """).format(path=abs_path)
+ proc = self._run_python(pyscript)
+ proc.wait()
+
+ def path_to_ino(self, fs_path, follow_symlinks=True):
+ abs_path = os.path.join(self.mountpoint, fs_path)
+
+ if follow_symlinks:
+ pyscript = dedent("""
+ import os
+ import stat
+
+ print(os.stat("{path}").st_ino)
+ """).format(path=abs_path)
+ else:
+ pyscript = dedent("""
+ import os
+ import stat
+
+ print(os.lstat("{path}").st_ino)
+ """).format(path=abs_path)
+
+ proc = self._run_python(pyscript)
+ proc.wait()
+ return int(proc.stdout.getvalue().strip())
+
+ def path_to_nlink(self, fs_path):
+ abs_path = os.path.join(self.mountpoint, fs_path)
+
+ pyscript = dedent("""
+ import os
+ import stat
+
+ print(os.stat("{path}").st_nlink)
+ """).format(path=abs_path)
+
+ proc = self._run_python(pyscript)
+ proc.wait()
+ return int(proc.stdout.getvalue().strip())
+
+ def ls(self, path=None):
+ """
+ Wrap ls: return a list of strings
+ """
+ cmd = ["ls"]
+ if path:
+ cmd.append(path)
+
+ ls_text = self.run_shell(cmd).stdout.getvalue().strip()
+
+ if ls_text:
+ return ls_text.split("\n")
+ else:
+ # Special case because otherwise split on empty string
+ # gives you [''] instead of []
+ return []
+
+ def setfattr(self, path, key, val):
+ """
+ Wrap setfattr.
+
+ :param path: relative to mount point
+ :param key: xattr name
+ :param val: xattr value
+ :return: None
+ """
+ self.run_shell(["setfattr", "-n", key, "-v", val, path])
+
+ def getfattr(self, path, attr):
+ """
+ Wrap getfattr: return the values of a named xattr on one file, or
+ None if the attribute is not found.
+
+ :return: a string
+ """
+ p = self.run_shell(["getfattr", "--only-values", "-n", attr, path], wait=False)
+ try:
+ p.wait()
+ except CommandFailedError as e:
+ if e.exitstatus == 1 and "No such attribute" in p.stderr.getvalue():
+ return None
+ else:
+ raise
+
+ return str(p.stdout.getvalue())
+
+ def df(self):
+ """
+ Wrap df: return a dict of usage fields in bytes
+ """
+
+ p = self.run_shell(["df", "-B1", "."])
+ lines = p.stdout.getvalue().strip().split("\n")
+ fs, total, used, avail = lines[1].split()[:4]
+ log.warning(lines)
+
+ return {
+ "total": int(total),
+ "used": int(used),
+ "available": int(avail)
+ }
diff --git a/qa/tasks/cephfs/test_admin.py b/qa/tasks/cephfs/test_admin.py
new file mode 100644
index 00000000..e4ce5570
--- /dev/null
+++ b/qa/tasks/cephfs/test_admin.py
@@ -0,0 +1,229 @@
+import json
+
+from teuthology.orchestra.run import CommandFailedError
+
+from unittest import case
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from tasks.cephfs.fuse_mount import FuseMount
+
+from tasks.cephfs.filesystem import FileLayout
+
+class TestAdminCommands(CephFSTestCase):
+ """
+ Tests for administration command.
+ """
+
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 1
+
+ def test_fs_status(self):
+ """
+ That `ceph fs status` command functions.
+ """
+
+ s = self.fs.mon_manager.raw_cluster_cmd("fs", "status")
+ self.assertTrue("active" in s)
+
+ def _setup_ec_pools(self, n, metadata=True, overwrites=True):
+ if metadata:
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', n+"-meta", "8")
+ cmd = ['osd', 'erasure-code-profile', 'set', n+"-profile", "m=2", "k=2", "crush-failure-domain=osd"]
+ self.fs.mon_manager.raw_cluster_cmd(*cmd)
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', n+"-data", "8", "erasure", n+"-profile")
+ if overwrites:
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'set', n+"-data", 'allow_ec_overwrites', 'true')
+
+ def _check_pool_application_metadata_key_value(self, pool, app, key, value):
+ output = self.fs.mon_manager.raw_cluster_cmd(
+ 'osd', 'pool', 'application', 'get', pool, app, key)
+ self.assertEqual(str(output.strip()), value)
+
+ def test_add_data_pool_root(self):
+ """
+ That a new data pool can be added and used for the root directory.
+ """
+
+ p = self.fs.add_data_pool("foo")
+ self.fs.set_dir_layout(self.mount_a, ".", FileLayout(pool=p))
+
+ def test_add_data_pool_application_metadata(self):
+ """
+ That the application metadata set on a newly added data pool is as expected.
+ """
+ pool_name = "foo"
+ mon_cmd = self.fs.mon_manager.raw_cluster_cmd
+ mon_cmd('osd', 'pool', 'create', pool_name, str(self.fs.pgs_per_fs_pool))
+ # Check whether https://tracker.ceph.com/issues/43061 is fixed
+ mon_cmd('osd', 'pool', 'application', 'enable', pool_name, 'cephfs')
+ self.fs.add_data_pool(pool_name, create=False)
+ self._check_pool_application_metadata_key_value(
+ pool_name, 'cephfs', 'data', self.fs.name)
+
+ def test_add_data_pool_subdir(self):
+ """
+ That a new data pool can be added and used for a sub-directory.
+ """
+
+ p = self.fs.add_data_pool("foo")
+ self.mount_a.run_shell(["mkdir", "subdir"])
+ self.fs.set_dir_layout(self.mount_a, "subdir", FileLayout(pool=p))
+
+ def test_add_data_pool_non_alphamueric_name_as_subdir(self):
+ """
+ That a new data pool with non-alphanumeric name can be added and used for a sub-directory.
+ """
+ p = self.fs.add_data_pool("I-am-data_pool00.")
+ self.mount_a.run_shell("mkdir subdir")
+ self.fs.set_dir_layout(self.mount_a, "subdir", FileLayout(pool=p))
+
+ def test_add_data_pool_ec(self):
+ """
+ That a new EC data pool can be added.
+ """
+
+ n = "test_add_data_pool_ec"
+ self._setup_ec_pools(n, metadata=False)
+ p = self.fs.add_data_pool(n+"-data", create=False)
+
+ def test_new_default_ec(self):
+ """
+ That a new file system warns/fails with an EC default data pool.
+ """
+
+ self.fs.delete_all_filesystems()
+ n = "test_new_default_ec"
+ self._setup_ec_pools(n)
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data")
+ except CommandFailedError as e:
+ if e.exitstatus == 22:
+ pass
+ else:
+ raise
+ else:
+ raise RuntimeError("expected failure")
+
+ def test_new_default_ec_force(self):
+ """
+ That a new file system succeeds with an EC default data pool with --force.
+ """
+
+ self.fs.delete_all_filesystems()
+ n = "test_new_default_ec_force"
+ self._setup_ec_pools(n)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data", "--force")
+
+ def test_new_default_ec_no_overwrite(self):
+ """
+ That a new file system fails with an EC default data pool without overwrite.
+ """
+
+ self.fs.delete_all_filesystems()
+ n = "test_new_default_ec_no_overwrite"
+ self._setup_ec_pools(n, overwrites=False)
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data")
+ except CommandFailedError as e:
+ if e.exitstatus == 22:
+ pass
+ else:
+ raise
+ else:
+ raise RuntimeError("expected failure")
+ # and even with --force !
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data", "--force")
+ except CommandFailedError as e:
+ if e.exitstatus == 22:
+ pass
+ else:
+ raise
+ else:
+ raise RuntimeError("expected failure")
+
+ def test_fs_new_pool_application_metadata(self):
+ """
+ That the application metadata set on the pools of a newly created filesystem are as expected.
+ """
+ self.fs.delete_all_filesystems()
+ fs_name = "test_fs_new_pool_application"
+ keys = ['metadata', 'data']
+ pool_names = [fs_name+'-'+key for key in keys]
+ mon_cmd = self.fs.mon_manager.raw_cluster_cmd
+ for p in pool_names:
+ mon_cmd('osd', 'pool', 'create', p, str(self.fs.pgs_per_fs_pool))
+ mon_cmd('osd', 'pool', 'application', 'enable', p, 'cephfs')
+ mon_cmd('fs', 'new', fs_name, pool_names[0], pool_names[1])
+ for i in range(2):
+ self._check_pool_application_metadata_key_value(
+ pool_names[i], 'cephfs', keys[i], fs_name)
+
+
+class TestConfigCommands(CephFSTestCase):
+ """
+ Test that daemons and clients respond to the otherwise rarely-used
+ runtime config modification operations.
+ """
+
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 1
+
+ def test_ceph_config_show(self):
+ """
+ That I can successfully show MDS configuration.
+ """
+
+ names = self.fs.get_rank_names()
+ for n in names:
+ s = self.fs.mon_manager.raw_cluster_cmd("config", "show", "mds."+n)
+ self.assertTrue("NAME" in s)
+ self.assertTrue("mon_host" in s)
+
+ def test_client_config(self):
+ """
+ That I can successfully issue asok "config set" commands
+
+ :return:
+ """
+
+ if not isinstance(self.mount_a, FuseMount):
+ raise case.SkipTest("Test only applies to FUSE clients")
+
+ test_key = "client_cache_size"
+ test_val = "123"
+ self.mount_a.admin_socket(['config', 'set', test_key, test_val])
+ out = self.mount_a.admin_socket(['config', 'get', test_key])
+ self.assertEqual(out[test_key], test_val)
+
+ self.mount_a.write_n_mb("file.bin", 1);
+
+ # Implicitly asserting that things don't have lockdep error in shutdown
+ self.mount_a.umount_wait(require_clean=True)
+ self.fs.mds_stop()
+
+ def test_mds_config_asok(self):
+ test_key = "mds_max_purge_ops"
+ test_val = "123"
+ self.fs.mds_asok(['config', 'set', test_key, test_val])
+ out = self.fs.mds_asok(['config', 'get', test_key])
+ self.assertEqual(out[test_key], test_val)
+
+ # Implicitly asserting that things don't have lockdep error in shutdown
+ self.mount_a.umount_wait(require_clean=True)
+ self.fs.mds_stop()
+
+ def test_mds_config_tell(self):
+ test_key = "mds_max_purge_ops"
+ test_val = "123"
+
+ mds_id = self.fs.get_lone_mds_id()
+ self.fs.mon_manager.raw_cluster_cmd("tell", "mds.{0}".format(mds_id), "injectargs",
+ "--{0}={1}".format(test_key, test_val))
+
+ # Read it back with asok because there is no `tell` equivalent
+ out = self.fs.mds_asok(['config', 'get', test_key])
+ self.assertEqual(out[test_key], test_val)
+
+ # Implicitly asserting that things don't have lockdep error in shutdown
+ self.mount_a.umount_wait(require_clean=True)
+ self.fs.mds_stop()
diff --git a/qa/tasks/cephfs/test_auto_repair.py b/qa/tasks/cephfs/test_auto_repair.py
new file mode 100644
index 00000000..c0aa2e4c
--- /dev/null
+++ b/qa/tasks/cephfs/test_auto_repair.py
@@ -0,0 +1,90 @@
+
+"""
+Exercise the MDS's auto repair functions
+"""
+
+import logging
+import time
+
+from teuthology.orchestra.run import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+
+log = logging.getLogger(__name__)
+
+
+# Arbitrary timeouts for operations involving restarting
+# an MDS or waiting for it to come up
+MDS_RESTART_GRACE = 60
+
+
+class TestMDSAutoRepair(CephFSTestCase):
+ def test_backtrace_repair(self):
+ """
+ MDS should verify/fix backtrace on fetch dirfrag
+ """
+
+ self.mount_a.run_shell(["mkdir", "testdir1"])
+ self.mount_a.run_shell(["touch", "testdir1/testfile"])
+ dir_objname = "{:x}.00000000".format(self.mount_a.path_to_ino("testdir1"))
+
+ # drop inodes caps
+ self.mount_a.umount_wait()
+
+ # flush journal entries to dirfrag objects, and expire journal
+ self.fs.mds_asok(['flush', 'journal'])
+
+ # Restart the MDS to drop the metadata cache (because we expired the journal,
+ # nothing gets replayed into cache on restart)
+ self.fs.mds_stop()
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_daemons()
+
+ # remove testdir1's backtrace
+ self.fs.rados(["rmxattr", dir_objname, "parent"])
+
+ # readdir (fetch dirfrag) should fix testdir1's backtrace
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+ self.mount_a.run_shell(["ls", "testdir1"])
+
+ # flush journal entries to dirfrag objects
+ self.fs.mds_asok(['flush', 'journal'])
+
+ # check if backtrace exists
+ self.fs.rados(["getxattr", dir_objname, "parent"])
+
+ def test_mds_readonly(self):
+ """
+ test if MDS behave correct when it's readonly
+ """
+ # operation should successd when MDS is not readonly
+ self.mount_a.run_shell(["touch", "test_file1"])
+ writer = self.mount_a.write_background(loop=True)
+
+ time.sleep(10)
+ self.assertFalse(writer.finished)
+
+ # force MDS to read-only mode
+ self.fs.mds_asok(['force_readonly'])
+ time.sleep(10)
+
+ # touching test file should fail
+ try:
+ self.mount_a.run_shell(["touch", "test_file1"])
+ except CommandFailedError:
+ pass
+ else:
+ self.assertTrue(False)
+
+ # background writer also should fail
+ self.assertTrue(writer.finished)
+
+ # The MDS should report its readonly health state to the mon
+ self.wait_for_health("MDS_READ_ONLY", timeout=30)
+
+ # restart mds to make it writable
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_daemons()
+
+ self.wait_for_health_clear(timeout=30)
diff --git a/qa/tasks/cephfs/test_backtrace.py b/qa/tasks/cephfs/test_backtrace.py
new file mode 100644
index 00000000..af246a1e
--- /dev/null
+++ b/qa/tasks/cephfs/test_backtrace.py
@@ -0,0 +1,78 @@
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+
+class TestBacktrace(CephFSTestCase):
+ def test_backtrace(self):
+ """
+ That the 'parent' and 'layout' xattrs on the head objects of files
+ are updated correctly.
+ """
+
+ old_data_pool_name = self.fs.get_data_pool_name()
+ old_pool_id = self.fs.get_data_pool_id()
+
+ # Create a file for subsequent checks
+ self.mount_a.run_shell(["mkdir", "parent_a"])
+ self.mount_a.run_shell(["touch", "parent_a/alpha"])
+ file_ino = self.mount_a.path_to_ino("parent_a/alpha")
+
+ # That backtrace and layout are written after initial flush
+ self.fs.mds_asok(["flush", "journal"])
+ backtrace = self.fs.read_backtrace(file_ino)
+ self.assertEqual(['alpha', 'parent_a'], [a['dname'] for a in backtrace['ancestors']])
+ layout = self.fs.read_layout(file_ino)
+ self.assertDictEqual(layout, {
+ "stripe_unit": 4194304,
+ "stripe_count": 1,
+ "object_size": 4194304,
+ "pool_id": old_pool_id,
+ "pool_ns": "",
+ })
+ self.assertEqual(backtrace['pool'], old_pool_id)
+
+ # That backtrace is written after parentage changes
+ self.mount_a.run_shell(["mkdir", "parent_b"])
+ self.mount_a.run_shell(["mv", "parent_a/alpha", "parent_b/alpha"])
+
+ self.fs.mds_asok(["flush", "journal"])
+ backtrace = self.fs.read_backtrace(file_ino)
+ self.assertEqual(['alpha', 'parent_b'], [a['dname'] for a in backtrace['ancestors']])
+
+ # Create a new data pool
+ new_pool_name = "data_new"
+ new_pool_id = self.fs.add_data_pool(new_pool_name)
+
+ # That an object which has switched pools gets its backtrace updated
+ self.mount_a.setfattr("./parent_b/alpha",
+ "ceph.file.layout.pool", new_pool_name)
+ self.fs.mds_asok(["flush", "journal"])
+ backtrace_old_pool = self.fs.read_backtrace(file_ino, pool=old_data_pool_name)
+ self.assertEqual(backtrace_old_pool['pool'], new_pool_id)
+ backtrace_new_pool = self.fs.read_backtrace(file_ino, pool=new_pool_name)
+ self.assertEqual(backtrace_new_pool['pool'], new_pool_id)
+ new_pool_layout = self.fs.read_layout(file_ino, pool=new_pool_name)
+ self.assertEqual(new_pool_layout['pool_id'], new_pool_id)
+ self.assertEqual(new_pool_layout['pool_ns'], '')
+
+ # That subsequent linkage changes are only written to new pool backtrace
+ self.mount_a.run_shell(["mkdir", "parent_c"])
+ self.mount_a.run_shell(["mv", "parent_b/alpha", "parent_c/alpha"])
+ self.fs.mds_asok(["flush", "journal"])
+ backtrace_old_pool = self.fs.read_backtrace(file_ino, pool=old_data_pool_name)
+ self.assertEqual(['alpha', 'parent_b'], [a['dname'] for a in backtrace_old_pool['ancestors']])
+ backtrace_new_pool = self.fs.read_backtrace(file_ino, pool=new_pool_name)
+ self.assertEqual(['alpha', 'parent_c'], [a['dname'] for a in backtrace_new_pool['ancestors']])
+
+ # That layout is written to new pool after change to other field in layout
+ self.mount_a.setfattr("./parent_c/alpha",
+ "ceph.file.layout.object_size", "8388608")
+
+ self.fs.mds_asok(["flush", "journal"])
+ new_pool_layout = self.fs.read_layout(file_ino, pool=new_pool_name)
+ self.assertEqual(new_pool_layout['object_size'], 8388608)
+
+ # ...but not to the old pool: the old pool's backtrace points to the new pool, and that's enough,
+ # we don't update the layout in all the old pools whenever it changes
+ old_pool_layout = self.fs.read_layout(file_ino, pool=old_data_pool_name)
+ self.assertEqual(old_pool_layout['object_size'], 4194304)
diff --git a/qa/tasks/cephfs/test_cap_flush.py b/qa/tasks/cephfs/test_cap_flush.py
new file mode 100644
index 00000000..27b9af67
--- /dev/null
+++ b/qa/tasks/cephfs/test_cap_flush.py
@@ -0,0 +1,64 @@
+
+import os
+import time
+from textwrap import dedent
+from unittest import SkipTest
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
+
+class TestCapFlush(CephFSTestCase):
+ @for_teuthology
+ def test_replay_create(self):
+ """
+ MDS starts to handle client caps when it enters clientreplay stage.
+ When handling a client cap in clientreplay stage, it's possible that
+ corresponding inode does not exist because the client request which
+ creates inode hasn't been replayed.
+ """
+
+ if not isinstance(self.mount_a, FuseMount):
+ raise SkipTest("Require FUSE client to inject client release failure")
+
+ dir_path = os.path.join(self.mount_a.mountpoint, "testdir")
+ py_script = dedent("""
+ import os
+ os.mkdir("{0}")
+ fd = os.open("{0}", os.O_RDONLY)
+ os.fchmod(fd, 0o777)
+ os.fsync(fd)
+ """).format(dir_path)
+ self.mount_a.run_python(py_script)
+
+ self.fs.mds_asok(["flush", "journal"])
+
+ # client will only get unsafe replay
+ self.fs.mds_asok(["config", "set", "mds_log_pause", "1"])
+
+ file_name = "testfile"
+ file_path = dir_path + "/" + file_name
+
+ # Create a file and modify its mode. ceph-fuse will mark Ax cap dirty
+ py_script = dedent("""
+ import os
+ os.chdir("{0}")
+ os.setgid(65534)
+ os.setuid(65534)
+ fd = os.open("{1}", os.O_CREAT | os.O_RDWR, 0o644)
+ os.fchmod(fd, 0o640)
+ """).format(dir_path, file_name)
+ self.mount_a.run_python(py_script)
+
+ # Modify file mode by different user. ceph-fuse will send a setattr request
+ self.mount_a.run_shell(["chmod", "600", file_path], wait=False)
+
+ time.sleep(10)
+
+ # Restart mds. Client will re-send the unsafe request and cap flush
+ self.fs.mds_stop()
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_daemons()
+
+ mode = self.mount_a.run_shell(['stat', '-c' '%a', file_path]).stdout.getvalue().strip()
+ # If the cap flush get dropped, mode should be 0644.
+ # (Ax cap stays in dirty state, which prevents setattr reply from updating file mode)
+ self.assertEqual(mode, "600")
diff --git a/qa/tasks/cephfs/test_cephfs_shell.py b/qa/tasks/cephfs/test_cephfs_shell.py
new file mode 100644
index 00000000..8ddbaedb
--- /dev/null
+++ b/qa/tasks/cephfs/test_cephfs_shell.py
@@ -0,0 +1,279 @@
+import os
+import crypt
+import logging
+from six import StringIO
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+
+class TestCephFSShell(CephFSTestCase):
+ CLIENTS_REQUIRED = 1
+
+ def _cephfs_shell(self, cmd, opts=None, stdin=None):
+ args = ["cephfs-shell", "-c", self.mount_a.config_path]
+ if opts is not None:
+ args.extend(opts)
+ args.extend(("--", cmd))
+ log.info("Running command: {}".format(" ".join(args)))
+ status = self.mount_a.client_remote.run(args=args, stdout=StringIO(),
+ stdin=stdin)
+ return status.stdout.getvalue().strip()
+
+ def test_help(self):
+ """
+ Test that help outputs commands.
+ """
+
+ o = self._cephfs_shell("help")
+
+ log.info("output:\n{}".format(o))
+
+ def test_mkdir(self):
+ """
+ Test that mkdir creates directory
+ """
+ o = self._cephfs_shell("mkdir d1")
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ o = self.mount_a.stat('d1')
+ log.info("mount_a output:\n{}".format(o))
+
+ def test_mkdir_with_07000_octal_mode(self):
+ """
+ Test that mkdir fails with octal mode greater than 0777
+ """
+ o = self._cephfs_shell("mkdir -m 07000 d2")
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ # mkdir d2 should fail
+ try:
+ o = self.mount_a.stat('d2')
+ log.info("mount_a output:\n{}".format(o))
+ except:
+ pass
+
+ def test_mkdir_with_negative_octal_mode(self):
+ """
+ Test that mkdir fails with negative octal mode
+ """
+ o = self._cephfs_shell("mkdir -m -0755 d3")
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ # mkdir d3 should fail
+ try:
+ o = self.mount_a.stat('d3')
+ log.info("mount_a output:\n{}".format(o))
+ except:
+ pass
+
+ def test_mkdir_with_non_octal_mode(self):
+ """
+ Test that mkdir passes with non-octal mode
+ """
+ o = self._cephfs_shell("mkdir -m u=rwx d4")
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ # mkdir d4 should pass
+ o = self.mount_a.stat('d4')
+ assert((o['st_mode'] & 0o700) == 0o700)
+
+ def test_mkdir_with_bad_non_octal_mode(self):
+ """
+ Test that mkdir failes with bad non-octal mode
+ """
+ o = self._cephfs_shell("mkdir -m ugx=0755 d5")
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ # mkdir d5 should fail
+ try:
+ o = self.mount_a.stat('d5')
+ log.info("mount_a output:\n{}".format(o))
+ except:
+ pass
+
+ def test_mkdir_path_without_path_option(self):
+ """
+ Test that mkdir fails without path option for creating path
+ """
+ o = self._cephfs_shell("mkdir d5/d6/d7")
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ # mkdir d5/d6/d7 should fail
+ try:
+ o = self.mount_a.stat('d5/d6/d7')
+ log.info("mount_a output:\n{}".format(o))
+ except:
+ pass
+
+ def test_mkdir_path_with_path_option(self):
+ """
+ Test that mkdir passes with path option for creating path
+ """
+ o = self._cephfs_shell("mkdir -p d5/d6/d7")
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ # mkdir d5/d6/d7 should pass
+ o = self.mount_a.stat('d5/d6/d7')
+ log.info("mount_a output:\n{}".format(o))
+
+ def validate_stat_output(self, s):
+ l = s.split('\n')
+ log.info("lines:\n{}".format(l))
+ rv = l[-1] # get last line; a failed stat will have "1" as the line
+ log.info("rv:{}".format(rv))
+ r = 0
+ try:
+ r = int(rv) # a non-numeric line will cause an exception
+ except:
+ pass
+ assert(r == 0)
+
+ def test_put_and_get_without_target_directory(self):
+ """
+ Test that put fails without target path
+ """
+ # generate test data in a directory
+ self._cephfs_shell("!mkdir p1")
+ self._cephfs_shell('!dd if=/dev/urandom of=p1/dump1 bs=1M count=1')
+ self._cephfs_shell('!dd if=/dev/urandom of=p1/dump2 bs=2M count=1')
+ self._cephfs_shell('!dd if=/dev/urandom of=p1/dump3 bs=3M count=1')
+
+ # copy the whole directory over to the cephfs
+ o = self._cephfs_shell("put p1")
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ # put p1 should pass
+ o = self.mount_a.stat('p1')
+ log.info("mount_a output:\n{}".format(o))
+ o = self.mount_a.stat('p1/dump1')
+ log.info("mount_a output:\n{}".format(o))
+ o = self.mount_a.stat('p1/dump2')
+ log.info("mount_a output:\n{}".format(o))
+ o = self.mount_a.stat('p1/dump3')
+ log.info("mount_a output:\n{}".format(o))
+
+ self._cephfs_shell('!rm -rf p1')
+ o = self._cephfs_shell("get p1")
+ o = self._cephfs_shell('!stat p1 || echo $?')
+ log.info("cephfs-shell output:\n{}".format(o))
+ self.validate_stat_output(o)
+
+ o = self._cephfs_shell('!stat p1/dump1 || echo $?')
+ log.info("cephfs-shell output:\n{}".format(o))
+ self.validate_stat_output(o)
+
+ o = self._cephfs_shell('!stat p1/dump2 || echo $?')
+ log.info("cephfs-shell output:\n{}".format(o))
+ self.validate_stat_output(o)
+
+ o = self._cephfs_shell('!stat p1/dump3 || echo $?')
+ log.info("cephfs-shell output:\n{}".format(o))
+ self.validate_stat_output(o)
+
+ # the 'put' command gets tested as well with the 'get' comamnd
+ def test_get_with_target_name(self):
+ """
+ Test that get passes with target name
+ """
+ s = 'C' * 1024
+ s_hash = crypt.crypt(s, '.A')
+ o = self._cephfs_shell("put - dump4", stdin=s)
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ # put - dump4 should pass
+ o = self.mount_a.stat('dump4')
+ log.info("mount_a output:\n{}".format(o))
+
+ o = self._cephfs_shell("get dump4 .")
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ o = self._cephfs_shell("!cat dump4")
+ o_hash = crypt.crypt(o, '.A')
+
+ # s_hash must be equal to o_hash
+ log.info("s_hash:{}".format(s_hash))
+ log.info("o_hash:{}".format(o_hash))
+ assert(s_hash == o_hash)
+
+ def test_get_without_target_name(self):
+ """
+ Test that get passes with target name
+ """
+ s = 'D' * 1024
+ o = self._cephfs_shell("put - dump5", stdin=s)
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ # put - dump5 should pass
+ o = self.mount_a.stat('dump5')
+ log.info("mount_a output:\n{}".format(o))
+
+ # get dump5 should fail
+ o = self._cephfs_shell("get dump5")
+ o = self._cephfs_shell("!stat dump5 || echo $?")
+ log.info("cephfs-shell output:\n{}".format(o))
+ l = o.split('\n')
+ try:
+ ret = int(l[1])
+ # verify that stat dump5 passes
+ # if ret == 1, then that implies the stat failed
+ # which implies that there was a problem with "get dump5"
+ assert(ret != 1)
+ except ValueError:
+ # we have a valid stat output; so this is good
+ # if the int() fails then that means there's a valid stat output
+ pass
+
+ def test_get_to_console(self):
+ """
+ Test that get passes with target name
+ """
+ s = 'E' * 1024
+ s_hash = crypt.crypt(s, '.A')
+ o = self._cephfs_shell("put - dump6", stdin=s)
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ # put - dump6 should pass
+ o = self.mount_a.stat('dump6')
+ log.info("mount_a output:\n{}".format(o))
+
+ # get dump6 - should pass
+ o = self._cephfs_shell("get dump6 -")
+ o_hash = crypt.crypt(o, '.A')
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ # s_hash must be equal to o_hash
+ log.info("s_hash:{}".format(s_hash))
+ log.info("o_hash:{}".format(o_hash))
+ assert(s_hash == o_hash)
+
+# def test_ls(self):
+# """
+# Test that ls passes
+# """
+# o = self._cephfs_shell("ls")
+# log.info("cephfs-shell output:\n{}".format(o))
+#
+# o = self.mount_a.run_shell(['ls']).stdout.getvalue().strip().replace("\n", " ").split()
+# log.info("mount_a output:\n{}".format(o))
+#
+# # ls should not list hidden files without the -a switch
+# if '.' in o or '..' in o:
+# log.info('ls failed')
+# else:
+# log.info('ls succeeded')
+#
+# def test_ls_a(self):
+# """
+# Test that ls -a passes
+# """
+# o = self._cephfs_shell("ls -a")
+# log.info("cephfs-shell output:\n{}".format(o))
+#
+# o = self.mount_a.run_shell(['ls', '-a']).stdout.getvalue().strip().replace("\n", " ").split()
+# log.info("mount_a output:\n{}".format(o))
+#
+# if '.' in o and '..' in o:
+# log.info('ls -a succeeded')
+# else:
+# log.info('ls -a failed')
diff --git a/qa/tasks/cephfs/test_client_limits.py b/qa/tasks/cephfs/test_client_limits.py
new file mode 100644
index 00000000..613a405a
--- /dev/null
+++ b/qa/tasks/cephfs/test_client_limits.py
@@ -0,0 +1,330 @@
+
+"""
+Exercise the MDS's behaviour when clients and the MDCache reach or
+exceed the limits of how many caps/inodes they should hold.
+"""
+
+import logging
+from textwrap import dedent
+from unittest import SkipTest
+from teuthology.orchestra.run import CommandFailedError
+from tasks.ceph_test_case import TestTimeoutError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, needs_trimming
+from tasks.cephfs.fuse_mount import FuseMount
+import os
+
+
+log = logging.getLogger(__name__)
+
+
+# Arbitrary timeouts for operations involving restarting
+# an MDS or waiting for it to come up
+MDS_RESTART_GRACE = 60
+
+# Hardcoded values from Server::recall_client_state
+CAP_RECALL_RATIO = 0.8
+CAP_RECALL_MIN = 100
+
+
+class TestClientLimits(CephFSTestCase):
+ REQUIRE_KCLIENT_REMOTE = True
+ CLIENTS_REQUIRED = 2
+
+ def _test_client_pin(self, use_subdir, open_files):
+ """
+ When a client pins an inode in its cache, for example because the file is held open,
+ it should reject requests from the MDS to trim these caps. The MDS should complain
+ to the user that it is unable to enforce its cache size limits because of this
+ objectionable client.
+
+ :param use_subdir: whether to put test files in a subdir or use root
+ """
+
+ self.config_set('mds', 'mds_cache_memory_limit', "1K")
+ self.config_set('mds', 'mds_recall_max_caps', int(open_files/2))
+ self.config_set('mds', 'mds_recall_warning_threshold', open_files)
+
+ mds_min_caps_per_client = int(self.config_get('mds.a', "mds_min_caps_per_client"))
+ self.config_set('mds', 'mds_min_caps_working_set', mds_min_caps_per_client)
+ mds_recall_warning_decay_rate = float(self.config_get('mds.a', "mds_recall_warning_decay_rate"))
+ self.assertGreaterEqual(open_files, mds_min_caps_per_client)
+
+ mount_a_client_id = self.mount_a.get_global_id()
+ path = "subdir" if use_subdir else "."
+ open_proc = self.mount_a.open_n_background(path, open_files)
+
+ # Client should now hold:
+ # `open_files` caps for the open files
+ # 1 cap for root
+ # 1 cap for subdir
+ self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'],
+ open_files + (2 if use_subdir else 1),
+ timeout=600,
+ reject_fn=lambda x: x > open_files + 2)
+
+ # MDS should not be happy about that, as the client is failing to comply
+ # with the SESSION_RECALL messages it is being sent
+ self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_warning_decay_rate*2)
+
+ # We can also test that the MDS health warning for oversized
+ # cache is functioning as intended.
+ self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_warning_decay_rate*2)
+
+ # When the client closes the files, it should retain only as many caps as allowed
+ # under the SESSION_RECALL policy
+ log.info("Terminating process holding files open")
+ open_proc.stdin.close()
+ try:
+ open_proc.wait()
+ except CommandFailedError:
+ # We killed it, so it raises an error
+ pass
+
+ # The remaining caps should comply with the numbers sent from MDS in SESSION_RECALL message,
+ # which depend on the caps outstanding, cache size and overall ratio
+ def expected_caps():
+ num_caps = self.get_session(mount_a_client_id)['num_caps']
+ if num_caps <= mds_min_caps_per_client:
+ return True
+ else:
+ return False
+
+ self.wait_until_true(expected_caps, timeout=60)
+
+ @needs_trimming
+ def test_client_pin_root(self):
+ self._test_client_pin(False, 400)
+
+ @needs_trimming
+ def test_client_pin(self):
+ self._test_client_pin(True, 800)
+
+ @needs_trimming
+ def test_client_pin_mincaps(self):
+ self._test_client_pin(True, 200)
+
+ def test_client_min_caps_working_set(self):
+ """
+ When a client has inodes pinned in its cache (open files), that the MDS
+ will not warn about the client not responding to cache pressure when
+ the number of caps is below mds_min_caps_working_set.
+ """
+
+ # Set MDS cache memory limit to a low value that will make the MDS to
+ # ask the client to trim the caps.
+ cache_memory_limit = "1K"
+ open_files = 400
+
+ self.config_set('mds', 'mds_cache_memory_limit', cache_memory_limit)
+ self.config_set('mds', 'mds_recall_max_caps', int(open_files/2))
+ self.config_set('mds', 'mds_recall_warning_threshold', open_files)
+ self.config_set('mds', 'mds_min_caps_working_set', open_files*2)
+
+ mds_min_caps_per_client = int(self.config_get('mds.a', "mds_min_caps_per_client"))
+ mds_recall_warning_decay_rate = float(self.config_get('mds.a', "mds_recall_warning_decay_rate"))
+ self.assertGreaterEqual(open_files, mds_min_caps_per_client)
+
+ mount_a_client_id = self.mount_a.get_global_id()
+ self.mount_a.open_n_background("subdir", open_files)
+
+ # Client should now hold:
+ # `open_files` caps for the open files
+ # 1 cap for root
+ # 1 cap for subdir
+ self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'],
+ open_files + 2,
+ timeout=600,
+ reject_fn=lambda x: x > open_files + 2)
+
+ # We can also test that the MDS health warning for oversized
+ # cache is functioning as intended.
+ self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_warning_decay_rate*2)
+
+ try:
+ # MDS should not be happy about that but it's not sending
+ # MDS_CLIENT_RECALL warnings because the client's caps are below
+ # mds_min_caps_working_set.
+ self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_warning_decay_rate*2)
+ except TestTimeoutError:
+ pass
+ else:
+ raise RuntimeError("expected no client recall warning")
+
+ def test_cap_acquisition_throttle_readdir(self):
+ """
+ Mostly readdir acquires caps faster than the mds recalls, so the cap
+ acquisition via readdir is throttled by retrying the readdir after
+ a fraction of second (0.5) by default when throttling condition is met.
+ """
+
+ max_caps_per_client = 500
+ cap_acquisition_throttle = 250
+
+ self.config_set('mds', 'mds_max_caps_per_client', max_caps_per_client)
+ self.config_set('mds', 'mds_session_cap_acquisition_throttle', cap_acquisition_throttle)
+
+ # Create 1500 files split across 6 directories, 250 each.
+ for i in range(1, 7):
+ self.mount_a.create_n_files("dir{0}/file".format(i), cap_acquisition_throttle, sync=True)
+
+ mount_a_client_id = self.mount_a.get_global_id()
+
+ # recursive readdir
+ self.mount_a.run_shell_payload("find | wc")
+
+ # validate cap_acquisition decay counter after readdir to exceed throttle count i.e 250
+ cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value']
+ self.assertGreaterEqual(cap_acquisition_value, cap_acquisition_throttle)
+
+ # validate the throttle condition to be hit atleast once
+ cap_acquisition_throttle_hit_count = self.perf_dump()['mds_server']['cap_acquisition_throttle']
+ self.assertGreaterEqual(cap_acquisition_throttle_hit_count, 1)
+
+ def test_client_release_bug(self):
+ """
+ When a client has a bug (which we will simulate) preventing it from releasing caps,
+ the MDS should notice that releases are not being sent promptly, and generate a health
+ metric to that effect.
+ """
+
+ # The debug hook to inject the failure only exists in the fuse client
+ if not isinstance(self.mount_a, FuseMount):
+ raise SkipTest("Require FUSE client to inject client release failure")
+
+ self.set_conf('client.{0}'.format(self.mount_a.client_id), 'client inject release failure', 'true')
+ self.mount_a.teardown()
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+ mount_a_client_id = self.mount_a.get_global_id()
+
+ # Client A creates a file. He will hold the write caps on the file, and later (simulated bug) fail
+ # to comply with the MDSs request to release that cap
+ self.mount_a.run_shell(["touch", "file1"])
+
+ # Client B tries to stat the file that client A created
+ rproc = self.mount_b.write_background("file1")
+
+ # After session_timeout, we should see a health warning (extra lag from
+ # MDS beacon period)
+ session_timeout = self.fs.get_var("session_timeout")
+ self.wait_for_health("MDS_CLIENT_LATE_RELEASE", session_timeout + 10)
+
+ # Client B should still be stuck
+ self.assertFalse(rproc.finished)
+
+ # Kill client A
+ self.mount_a.kill()
+ self.mount_a.kill_cleanup()
+
+ # Client B should complete
+ self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+ rproc.wait()
+
+ def test_client_oldest_tid(self):
+ """
+ When a client does not advance its oldest tid, the MDS should notice that
+ and generate health warnings.
+ """
+
+ # num of requests client issues
+ max_requests = 1000
+
+ # The debug hook to inject the failure only exists in the fuse client
+ if not isinstance(self.mount_a, FuseMount):
+ raise SkipTest("Require FUSE client to inject client release failure")
+
+ self.set_conf('client', 'client inject fixed oldest tid', 'true')
+ self.mount_a.teardown()
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ self.fs.mds_asok(['config', 'set', 'mds_max_completed_requests', '{0}'.format(max_requests)])
+
+ # Create lots of files
+ self.mount_a.create_n_files("testdir/file1", max_requests + 100)
+
+ # Create a few files synchronously. This makes sure previous requests are completed
+ self.mount_a.create_n_files("testdir/file2", 5, True)
+
+ # Wait for the health warnings. Assume mds can handle 10 request per second at least
+ self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests // 10)
+
+ def _test_client_cache_size(self, mount_subdir):
+ """
+ check if client invalidate kernel dcache according to its cache size config
+ """
+
+ # The debug hook to inject the failure only exists in the fuse client
+ if not isinstance(self.mount_a, FuseMount):
+ raise SkipTest("Require FUSE client to inject client release failure")
+
+ if mount_subdir:
+ # fuse assigns a fix inode number (1) to root inode. But in mounting into
+ # subdir case, the actual inode number of root is not 1. This mismatch
+ # confuses fuse_lowlevel_notify_inval_entry() when invalidating dentries
+ # in root directory.
+ self.mount_a.run_shell(["mkdir", "subdir"])
+ self.mount_a.umount_wait()
+ self.set_conf('client', 'client mountpoint', '/subdir')
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+ root_ino = self.mount_a.path_to_ino(".")
+ self.assertEqual(root_ino, 1);
+
+ dir_path = os.path.join(self.mount_a.mountpoint, "testdir")
+
+ mkdir_script = dedent("""
+ import os
+ os.mkdir("{path}")
+ for n in range(0, {num_dirs}):
+ os.mkdir("{path}/dir{{0}}".format(n))
+ """)
+
+ num_dirs = 1000
+ self.mount_a.run_python(mkdir_script.format(path=dir_path, num_dirs=num_dirs))
+ self.mount_a.run_shell(["sync"])
+
+ dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count()
+ self.assertGreaterEqual(dentry_count, num_dirs)
+ self.assertGreaterEqual(dentry_pinned_count, num_dirs)
+
+ cache_size = num_dirs // 10
+ self.mount_a.set_cache_size(cache_size)
+
+ def trimmed():
+ dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count()
+ log.info("waiting, dentry_count, dentry_pinned_count: {0}, {1}".format(
+ dentry_count, dentry_pinned_count
+ ))
+ if dentry_count > cache_size or dentry_pinned_count > cache_size:
+ return False
+
+ return True
+
+ self.wait_until_true(trimmed, 30)
+
+ @needs_trimming
+ def test_client_cache_size(self):
+ self._test_client_cache_size(False)
+ self._test_client_cache_size(True)
+
+ def test_client_max_caps(self):
+ """
+ That the MDS will not let a client sit above mds_max_caps_per_client caps.
+ """
+
+ mds_min_caps_per_client = int(self.config_get('mds.a', "mds_min_caps_per_client"))
+ mds_max_caps_per_client = 2*mds_min_caps_per_client
+ self.config_set('mds', 'mds_max_caps_per_client', mds_max_caps_per_client)
+
+ self.mount_a.create_n_files("foo/", 3*mds_max_caps_per_client, sync=True)
+
+ mount_a_client_id = self.mount_a.get_global_id()
+ def expected_caps():
+ num_caps = self.get_session(mount_a_client_id)['num_caps']
+ if num_caps <= mds_max_caps_per_client:
+ return True
+ else:
+ return False
+
+ self.wait_until_true(expected_caps, timeout=60)
diff --git a/qa/tasks/cephfs/test_client_recovery.py b/qa/tasks/cephfs/test_client_recovery.py
new file mode 100644
index 00000000..c7806b71
--- /dev/null
+++ b/qa/tasks/cephfs/test_client_recovery.py
@@ -0,0 +1,633 @@
+
+"""
+Teuthology task for exercising CephFS client recovery
+"""
+
+import logging
+from textwrap import dedent
+import time
+import distutils.version as version
+import re
+import os
+
+from teuthology.orchestra.run import CommandFailedError, ConnectionLostError
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.packaging import get_package_version
+from unittest import SkipTest
+
+
+log = logging.getLogger(__name__)
+
+
+# Arbitrary timeouts for operations involving restarting
+# an MDS or waiting for it to come up
+MDS_RESTART_GRACE = 60
+
+
+class TestClientNetworkRecovery(CephFSTestCase):
+ REQUIRE_KCLIENT_REMOTE = True
+ REQUIRE_ONE_CLIENT_REMOTE = True
+ CLIENTS_REQUIRED = 2
+
+ LOAD_SETTINGS = ["mds_reconnect_timeout", "ms_max_backoff"]
+
+ # Environment references
+ mds_reconnect_timeout = None
+ ms_max_backoff = None
+
+ def test_network_death(self):
+ """
+ Simulate software freeze or temporary network failure.
+
+ Check that the client blocks I/O during failure, and completes
+ I/O after failure.
+ """
+
+ session_timeout = self.fs.get_var("session_timeout")
+ self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false'])
+
+ # We only need one client
+ self.mount_b.umount_wait()
+
+ # Initially our one client session should be visible
+ client_id = self.mount_a.get_global_id()
+ ls_data = self._session_list()
+ self.assert_session_count(1, ls_data)
+ self.assertEqual(ls_data[0]['id'], client_id)
+ self.assert_session_state(client_id, "open")
+
+ # ...and capable of doing I/O without blocking
+ self.mount_a.create_files()
+
+ # ...but if we turn off the network
+ self.fs.set_clients_block(True)
+
+ # ...and try and start an I/O
+ write_blocked = self.mount_a.write_background()
+
+ # ...then it should block
+ self.assertFalse(write_blocked.finished)
+ self.assert_session_state(client_id, "open")
+ time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale
+ self.assertFalse(write_blocked.finished)
+ self.assert_session_state(client_id, "stale")
+
+ # ...until we re-enable I/O
+ self.fs.set_clients_block(False)
+
+ # ...when it should complete promptly
+ a = time.time()
+ self.wait_until_true(lambda: write_blocked.finished, self.ms_max_backoff * 2)
+ write_blocked.wait() # Already know we're finished, wait() to raise exception on errors
+ recovery_time = time.time() - a
+ log.info("recovery time: {0}".format(recovery_time))
+ self.assert_session_state(client_id, "open")
+
+
+class TestClientRecovery(CephFSTestCase):
+ REQUIRE_KCLIENT_REMOTE = True
+ CLIENTS_REQUIRED = 2
+
+ LOAD_SETTINGS = ["mds_reconnect_timeout", "ms_max_backoff"]
+
+ # Environment references
+ mds_reconnect_timeout = None
+ ms_max_backoff = None
+
+ def test_basic(self):
+ # Check that two clients come up healthy and see each others' files
+ # =====================================================
+ self.mount_a.create_files()
+ self.mount_a.check_files()
+ self.mount_a.umount_wait()
+
+ self.mount_b.check_files()
+
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ # Check that the admin socket interface is correctly reporting
+ # two sessions
+ # =====================================================
+ ls_data = self._session_list()
+ self.assert_session_count(2, ls_data)
+
+ self.assertSetEqual(
+ set([l['id'] for l in ls_data]),
+ {self.mount_a.get_global_id(), self.mount_b.get_global_id()}
+ )
+
+ def test_restart(self):
+ # Check that after an MDS restart both clients reconnect and continue
+ # to handle I/O
+ # =====================================================
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
+
+ self.mount_a.create_destroy()
+ self.mount_b.create_destroy()
+
+ def _session_num_caps(self, client_id):
+ ls_data = self.fs.mds_asok(['session', 'ls'])
+ return int(self._session_by_id(ls_data).get(client_id, {'num_caps': None})['num_caps'])
+
+ def test_reconnect_timeout(self):
+ # Reconnect timeout
+ # =================
+ # Check that if I stop an MDS and a client goes away, the MDS waits
+ # for the reconnect period
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+
+ mount_a_client_id = self.mount_a.get_global_id()
+ self.mount_a.umount_wait(force=True)
+
+ self.fs.mds_restart()
+
+ self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE)
+ # Check that the MDS locally reports its state correctly
+ status = self.fs.mds_asok(['status'])
+ self.assertIn("reconnect_status", status)
+
+ ls_data = self._session_list()
+ self.assert_session_count(2, ls_data)
+
+ # The session for the dead client should have the 'reconnect' flag set
+ self.assertTrue(self.get_session(mount_a_client_id)['reconnecting'])
+
+ # Wait for the reconnect state to clear, this should take the
+ # reconnect timeout period.
+ in_reconnect_for = self.fs.wait_for_state('up:active', timeout=self.mds_reconnect_timeout * 2)
+ # Check that the period we waited to enter active is within a factor
+ # of two of the reconnect timeout.
+ self.assertGreater(in_reconnect_for, self.mds_reconnect_timeout // 2,
+ "Should have been in reconnect phase for {0} but only took {1}".format(
+ self.mds_reconnect_timeout, in_reconnect_for
+ ))
+
+ self.assert_session_count(1)
+
+ # Check that the client that timed out during reconnect can
+ # mount again and do I/O
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+ self.mount_a.create_destroy()
+
+ self.assert_session_count(2)
+
+ def test_reconnect_eviction(self):
+ # Eviction during reconnect
+ # =========================
+ mount_a_client_id = self.mount_a.get_global_id()
+
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+
+ # The mount goes away while the MDS is offline
+ self.mount_a.kill()
+
+ # wait for it to die
+ time.sleep(5)
+
+ self.fs.mds_restart()
+
+ # Enter reconnect phase
+ self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE)
+ self.assert_session_count(2)
+
+ # Evict the stuck client
+ self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+ self.assert_session_count(1)
+
+ # Observe that we proceed to active phase without waiting full reconnect timeout
+ evict_til_active = self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
+ # Once we evict the troublemaker, the reconnect phase should complete
+ # in well under the reconnect timeout.
+ self.assertLess(evict_til_active, self.mds_reconnect_timeout * 0.5,
+ "reconnect did not complete soon enough after eviction, took {0}".format(
+ evict_til_active
+ ))
+
+ # We killed earlier so must clean up before trying to use again
+ self.mount_a.kill_cleanup()
+
+ # Bring the client back
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+ self.mount_a.create_destroy()
+
+ def _test_stale_caps(self, write):
+ session_timeout = self.fs.get_var("session_timeout")
+
+ # Capability release from stale session
+ # =====================================
+ if write:
+ cap_holder = self.mount_a.open_background()
+ else:
+ self.mount_a.run_shell(["touch", "background_file"])
+ self.mount_a.umount_wait()
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+ cap_holder = self.mount_a.open_background(write=False)
+
+ self.assert_session_count(2)
+ mount_a_gid = self.mount_a.get_global_id()
+
+ # Wait for the file to be visible from another client, indicating
+ # that mount_a has completed its network ops
+ self.mount_b.wait_for_visible()
+
+ # Simulate client death
+ self.mount_a.kill()
+
+ # wait for it to die so it doesn't voluntarily release buffer cap
+ time.sleep(5)
+
+ try:
+ # Now, after session_timeout seconds, the waiter should
+ # complete their operation when the MDS marks the holder's
+ # session stale.
+ cap_waiter = self.mount_b.write_background()
+ a = time.time()
+ cap_waiter.wait()
+ b = time.time()
+
+ # Should have succeeded
+ self.assertEqual(cap_waiter.exitstatus, 0)
+
+ if write:
+ self.assert_session_count(1)
+ else:
+ self.assert_session_state(mount_a_gid, "stale")
+
+ cap_waited = b - a
+ log.info("cap_waiter waited {0}s".format(cap_waited))
+ self.assertTrue(session_timeout / 2.0 <= cap_waited <= session_timeout * 2.0,
+ "Capability handover took {0}, expected approx {1}".format(
+ cap_waited, session_timeout
+ ))
+
+ cap_holder.stdin.close()
+ try:
+ cap_holder.wait()
+ except (CommandFailedError, ConnectionLostError):
+ # We killed it (and possibly its node), so it raises an error
+ pass
+ finally:
+ # teardown() doesn't quite handle this case cleanly, so help it out
+ self.mount_a.kill_cleanup()
+
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ def test_stale_read_caps(self):
+ self._test_stale_caps(False)
+
+ def test_stale_write_caps(self):
+ self._test_stale_caps(True)
+
+ def test_evicted_caps(self):
+ # Eviction while holding a capability
+ # ===================================
+
+ session_timeout = self.fs.get_var("session_timeout")
+
+ # Take out a write capability on a file on client A,
+ # and then immediately kill it.
+ cap_holder = self.mount_a.open_background()
+ mount_a_client_id = self.mount_a.get_global_id()
+
+ # Wait for the file to be visible from another client, indicating
+ # that mount_a has completed its network ops
+ self.mount_b.wait_for_visible()
+
+ # Simulate client death
+ self.mount_a.kill()
+
+ # wait for it to die so it doesn't voluntarily release buffer cap
+ time.sleep(5)
+
+ try:
+ # The waiter should get stuck waiting for the capability
+ # held on the MDS by the now-dead client A
+ cap_waiter = self.mount_b.write_background()
+ time.sleep(5)
+ self.assertFalse(cap_waiter.finished)
+
+ self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+ # Now, because I evicted the old holder of the capability, it should
+ # immediately get handed over to the waiter
+ a = time.time()
+ cap_waiter.wait()
+ b = time.time()
+ cap_waited = b - a
+ log.info("cap_waiter waited {0}s".format(cap_waited))
+ # This is the check that it happened 'now' rather than waiting
+ # for the session timeout
+ self.assertLess(cap_waited, session_timeout / 2.0,
+ "Capability handover took {0}, expected less than {1}".format(
+ cap_waited, session_timeout / 2.0
+ ))
+
+ cap_holder.stdin.close()
+ try:
+ cap_holder.wait()
+ except (CommandFailedError, ConnectionLostError):
+ # We killed it (and possibly its node), so it raises an error
+ pass
+ finally:
+ self.mount_a.kill_cleanup()
+
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ def test_trim_caps(self):
+ # Trim capability when reconnecting MDS
+ # ===================================
+
+ count = 500
+ # Create lots of files
+ for i in range(count):
+ self.mount_a.run_shell(["touch", "f{0}".format(i)])
+
+ # Populate mount_b's cache
+ self.mount_b.run_shell(["ls", "-l"])
+
+ client_id = self.mount_b.get_global_id()
+ num_caps = self._session_num_caps(client_id)
+ self.assertGreaterEqual(num_caps, count)
+
+ # Restart MDS. client should trim its cache when reconnecting to the MDS
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
+
+ num_caps = self._session_num_caps(client_id)
+ self.assertLess(num_caps, count,
+ "should have less than {0} capabilities, have {1}".format(
+ count, num_caps
+ ))
+
+ def _is_flockable(self):
+ a_version_str = get_package_version(self.mount_a.client_remote, "fuse")
+ b_version_str = get_package_version(self.mount_b.client_remote, "fuse")
+ flock_version_str = "2.9"
+
+ version_regex = re.compile(r"[0-9\.]+")
+ a_result = version_regex.match(a_version_str)
+ self.assertTrue(a_result)
+ b_result = version_regex.match(b_version_str)
+ self.assertTrue(b_result)
+ a_version = version.StrictVersion(a_result.group())
+ b_version = version.StrictVersion(b_result.group())
+ flock_version=version.StrictVersion(flock_version_str)
+
+ if (a_version >= flock_version and b_version >= flock_version):
+ log.info("flock locks are available")
+ return True
+ else:
+ log.info("not testing flock locks, machines have versions {av} and {bv}".format(
+ av=a_version_str,bv=b_version_str))
+ return False
+
+ def test_filelock(self):
+ """
+ Check that file lock doesn't get lost after an MDS restart
+ """
+
+ flockable = self._is_flockable()
+ lock_holder = self.mount_a.lock_background(do_flock=flockable)
+
+ self.mount_b.wait_for_visible("background_file-2")
+ self.mount_b.check_filelock(do_flock=flockable)
+
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
+
+ self.mount_b.check_filelock(do_flock=flockable)
+
+ # Tear down the background process
+ lock_holder.stdin.close()
+ try:
+ lock_holder.wait()
+ except (CommandFailedError, ConnectionLostError):
+ # We killed it, so it raises an error
+ pass
+
+ def test_filelock_eviction(self):
+ """
+ Check that file lock held by evicted client is given to
+ waiting client.
+ """
+ if not self._is_flockable():
+ self.skipTest("flock is not available")
+
+ lock_holder = self.mount_a.lock_background()
+ self.mount_b.wait_for_visible("background_file-2")
+ self.mount_b.check_filelock()
+
+ lock_taker = self.mount_b.lock_and_release()
+ # Check the taker is waiting (doesn't get it immediately)
+ time.sleep(2)
+ self.assertFalse(lock_holder.finished)
+ self.assertFalse(lock_taker.finished)
+
+ try:
+ mount_a_client_id = self.mount_a.get_global_id()
+ self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+
+ # Evicting mount_a should let mount_b's attempt to take the lock
+ # succeed
+ self.wait_until_true(lambda: lock_taker.finished, timeout=10)
+ finally:
+ # teardown() doesn't quite handle this case cleanly, so help it out
+ self.mount_a.kill()
+ self.mount_a.kill_cleanup()
+
+ # Bring the client back
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ def test_dir_fsync(self):
+ self._test_fsync(True);
+
+ def test_create_fsync(self):
+ self._test_fsync(False);
+
+ def _test_fsync(self, dirfsync):
+ """
+ That calls to fsync guarantee visibility of metadata to another
+ client immediately after the fsyncing client dies.
+ """
+
+ # Leave this guy out until he's needed
+ self.mount_b.umount_wait()
+
+ # Create dir + child dentry on client A, and fsync the dir
+ path = os.path.join(self.mount_a.mountpoint, "subdir")
+ self.mount_a.run_python(
+ dedent("""
+ import os
+ import time
+
+ path = "{path}"
+
+ print("Starting creation...")
+ start = time.time()
+
+ os.mkdir(path)
+ dfd = os.open(path, os.O_DIRECTORY)
+
+ fd = open(os.path.join(path, "childfile"), "w")
+ print("Finished creation in {{0}}s".format(time.time() - start))
+
+ print("Starting fsync...")
+ start = time.time()
+ if {dirfsync}:
+ os.fsync(dfd)
+ else:
+ os.fsync(fd)
+ print("Finished fsync in {{0}}s".format(time.time() - start))
+ """.format(path=path,dirfsync=str(dirfsync)))
+ )
+
+ # Immediately kill the MDS and then client A
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+ self.mount_a.kill()
+ self.mount_a.kill_cleanup()
+
+ # Restart the MDS. Wait for it to come up, it'll have to time out in clientreplay
+ self.fs.mds_restart()
+ log.info("Waiting for reconnect...")
+ self.fs.wait_for_state("up:reconnect")
+ log.info("Waiting for active...")
+ self.fs.wait_for_state("up:active", timeout=MDS_RESTART_GRACE + self.mds_reconnect_timeout)
+ log.info("Reached active...")
+
+ # Is the child dentry visible from mount B?
+ self.mount_b.mount()
+ self.mount_b.wait_until_mounted()
+ self.mount_b.run_shell(["ls", "subdir/childfile"])
+
+ def test_unmount_for_evicted_client(self):
+ """Test if client hangs on unmount after evicting the client."""
+ mount_a_client_id = self.mount_a.get_global_id()
+ self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+
+ self.mount_a.umount_wait(require_clean=True, timeout=30)
+
+ def test_stale_renew(self):
+ if not isinstance(self.mount_a, FuseMount):
+ raise SkipTest("Require FUSE client to handle signal STOP/CONT")
+
+ session_timeout = self.fs.get_var("session_timeout")
+
+ self.mount_a.run_shell(["mkdir", "testdir"])
+ self.mount_a.run_shell(["touch", "testdir/file1"])
+ # populate readdir cache
+ self.mount_a.run_shell(["ls", "testdir"])
+ self.mount_b.run_shell(["ls", "testdir"])
+
+ # check if readdir cache is effective
+ initial_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency'])
+ self.mount_b.run_shell(["ls", "testdir"])
+ current_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency'])
+ self.assertEqual(current_readdirs, initial_readdirs);
+
+ mount_b_gid = self.mount_b.get_global_id()
+ mount_b_pid = self.mount_b.get_client_pid()
+ # stop ceph-fuse process of mount_b
+ self.mount_b.client_remote.run(args=["sudo", "kill", "-STOP", mount_b_pid])
+
+ self.assert_session_state(mount_b_gid, "open")
+ time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale
+
+ self.mount_a.run_shell(["touch", "testdir/file2"])
+ self.assert_session_state(mount_b_gid, "stale")
+
+ # resume ceph-fuse process of mount_b
+ self.mount_b.client_remote.run(args=["sudo", "kill", "-CONT", mount_b_pid])
+ # Is the new file visible from mount_b? (caps become invalid after session stale)
+ self.mount_b.run_shell(["ls", "testdir/file2"])
+
+ def test_abort_conn(self):
+ """
+ Check that abort_conn() skips closing mds sessions.
+ """
+ if not isinstance(self.mount_a, FuseMount):
+ raise SkipTest("Testing libcephfs function")
+
+ self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false'])
+ session_timeout = self.fs.get_var("session_timeout")
+
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+
+ gid_str = self.mount_a.run_python(dedent("""
+ import cephfs as libcephfs
+ cephfs = libcephfs.LibCephFS(conffile='')
+ cephfs.mount()
+ client_id = cephfs.get_instance_id()
+ cephfs.abort_conn()
+ print(client_id)
+ """)
+ )
+ gid = int(gid_str);
+
+ self.assert_session_state(gid, "open")
+ time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale
+ self.assert_session_state(gid, "stale")
+
+ def test_dont_mark_unresponsive_client_stale(self):
+ """
+ Test that an unresponsive client holding caps is not marked stale or
+ evicted unless another clients wants its caps.
+ """
+ if not isinstance(self.mount_a, FuseMount):
+ self.skipTest("Require FUSE client to handle signal STOP/CONT")
+
+ # XXX: To conduct this test we need at least two clients since a
+ # single client is never evcited by MDS.
+ SESSION_TIMEOUT = 30
+ SESSION_AUTOCLOSE = 50
+ time_at_beg = time.time()
+ mount_a_gid = self.mount_a.get_global_id()
+ _ = self.mount_a.client_pid
+ self.fs.set_var('session_timeout', SESSION_TIMEOUT)
+ self.fs.set_var('session_autoclose', SESSION_AUTOCLOSE)
+ self.assert_session_count(2, self.fs.mds_asok(['session', 'ls']))
+
+ # test that client holding cap not required by any other client is not
+ # marked stale when it becomes unresponsive.
+ self.mount_a.run_shell(['mkdir', 'dir'])
+ self.mount_a.send_signal('sigstop')
+ time.sleep(SESSION_TIMEOUT + 2)
+ self.assert_session_state(mount_a_gid, "open")
+
+ # test that other clients have to wait to get the caps from
+ # unresponsive client until session_autoclose.
+ self.mount_b.run_shell(['stat', 'dir'])
+ self.assert_session_count(1, self.fs.mds_asok(['session', 'ls']))
+ self.assertLess(time.time(), time_at_beg + SESSION_AUTOCLOSE)
+
+ self.mount_a.send_signal('sigcont')
+
+ def test_config_session_timeout(self):
+ self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false'])
+ session_timeout = self.fs.get_var("session_timeout")
+ mount_a_gid = self.mount_a.get_global_id()
+
+ self.fs.mds_asok(['session', 'config', '%s' % mount_a_gid, 'timeout', '%s' % (session_timeout * 2)])
+
+ self.mount_a.kill();
+
+ self.assert_session_count(2)
+
+ time.sleep(session_timeout * 1.5)
+ self.assert_session_state(mount_a_gid, "open")
+
+ time.sleep(session_timeout)
+ self.assert_session_count(1)
+
+ self.mount_a.kill_cleanup()
diff --git a/qa/tasks/cephfs/test_damage.py b/qa/tasks/cephfs/test_damage.py
new file mode 100644
index 00000000..d03e027e
--- /dev/null
+++ b/qa/tasks/cephfs/test_damage.py
@@ -0,0 +1,569 @@
+import json
+import logging
+import errno
+import re
+from teuthology.contextutil import MaxWhileTries
+from teuthology.exceptions import CommandFailedError
+from teuthology.orchestra.run import wait
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
+
+DAMAGED_ON_START = "damaged_on_start"
+DAMAGED_ON_LS = "damaged_on_ls"
+CRASHED = "server crashed"
+NO_DAMAGE = "no damage"
+READONLY = "readonly"
+FAILED_CLIENT = "client failed"
+FAILED_SERVER = "server failed"
+
+# An EIO in response to a stat from the client
+EIO_ON_LS = "eio"
+
+# An EIO, but nothing in damage table (not ever what we expect)
+EIO_NO_DAMAGE = "eio without damage entry"
+
+
+log = logging.getLogger(__name__)
+
+
+class TestDamage(CephFSTestCase):
+ def _simple_workload_write(self):
+ self.mount_a.run_shell(["mkdir", "subdir"])
+ self.mount_a.write_n_mb("subdir/sixmegs", 6)
+ return self.mount_a.stat("subdir/sixmegs")
+
+ def is_marked_damaged(self, rank):
+ mds_map = self.fs.get_mds_map()
+ return rank in mds_map['damaged']
+
+ @for_teuthology #459s
+ def test_object_deletion(self):
+ """
+ That the MDS has a clean 'damaged' response to loss of any single metadata object
+ """
+
+ self._simple_workload_write()
+
+ # Hmm, actually it would be nice to permute whether the metadata pool
+ # state contains sessions or not, but for the moment close this session
+ # to avoid waiting through reconnect on every MDS start.
+ self.mount_a.umount_wait()
+ for mds_name in self.fs.get_active_names():
+ self.fs.mds_asok(["flush", "journal"], mds_name)
+
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+
+ self.fs.rados(['export', '/tmp/metadata.bin'])
+
+ def is_ignored(obj_id, dentry=None):
+ """
+ A filter to avoid redundantly mutating many similar objects (e.g.
+ stray dirfrags) or similar dentries (e.g. stray dir dentries)
+ """
+ if re.match("60.\.00000000", obj_id) and obj_id != "600.00000000":
+ return True
+
+ if dentry and obj_id == "100.00000000":
+ if re.match("stray.+_head", dentry) and dentry != "stray0_head":
+ return True
+
+ return False
+
+ def get_path(obj_id, dentry=None):
+ """
+ What filesystem path does this object or dentry correspond to? i.e.
+ what should I poke to see EIO after damaging it?
+ """
+
+ if obj_id == "1.00000000" and dentry == "subdir_head":
+ return "./subdir"
+ elif obj_id == "10000000000.00000000" and dentry == "sixmegs_head":
+ return "./subdir/sixmegs"
+
+ # None means ls will do an "ls -R" in hope of seeing some errors
+ return None
+
+ objects = self.fs.rados(["ls"]).split("\n")
+ objects = [o for o in objects if not is_ignored(o)]
+
+ # Find all objects with an OMAP header
+ omap_header_objs = []
+ for o in objects:
+ header = self.fs.rados(["getomapheader", o])
+ # The rados CLI wraps the header output in a hex-printed style
+ header_bytes = int(re.match("header \((.+) bytes\)", header).group(1))
+ if header_bytes > 0:
+ omap_header_objs.append(o)
+
+ # Find all OMAP key/vals
+ omap_keys = []
+ for o in objects:
+ keys_str = self.fs.rados(["listomapkeys", o])
+ if keys_str:
+ for key in keys_str.split("\n"):
+ if not is_ignored(o, key):
+ omap_keys.append((o, key))
+
+ # Find objects that have data in their bodies
+ data_objects = []
+ for obj_id in objects:
+ stat_out = self.fs.rados(["stat", obj_id])
+ size = int(re.match(".+, size (.+)$", stat_out).group(1))
+ if size > 0:
+ data_objects.append(obj_id)
+
+ # Define the various forms of damage we will inflict
+ class MetadataMutation(object):
+ def __init__(self, obj_id_, desc_, mutate_fn_, expectation_, ls_path=None):
+ self.obj_id = obj_id_
+ self.desc = desc_
+ self.mutate_fn = mutate_fn_
+ self.expectation = expectation_
+ if ls_path is None:
+ self.ls_path = "."
+ else:
+ self.ls_path = ls_path
+
+ def __eq__(self, other):
+ return self.desc == other.desc
+
+ def __hash__(self):
+ return hash(self.desc)
+
+ junk = "deadbeef" * 10
+ mutations = []
+
+ # Removals
+ for o in objects:
+ if o in [
+ # JournalPointers are auto-replaced if missing (same path as upgrade)
+ "400.00000000",
+ # Missing dirfrags for non-system dirs result in empty directory
+ "10000000000.00000000",
+ # PurgeQueue is auto-created if not found on startup
+ "500.00000000",
+ # open file table is auto-created if not found on startup
+ "mds0_openfiles.0"
+ ]:
+ expectation = NO_DAMAGE
+ else:
+ expectation = DAMAGED_ON_START
+
+ log.info("Expectation on rm '{0}' will be '{1}'".format(
+ o, expectation
+ ))
+
+ mutations.append(MetadataMutation(
+ o,
+ "Delete {0}".format(o),
+ lambda o=o: self.fs.rados(["rm", o]),
+ expectation
+ ))
+
+ # Blatant corruptions
+ for obj_id in data_objects:
+ if obj_id == "500.00000000":
+ # purge queue corruption results in read-only FS
+ mutations.append(MetadataMutation(
+ obj_id,
+ "Corrupt {0}".format(obj_id),
+ lambda o=obj_id: self.fs.rados(["put", o, "-"], stdin_data=junk),
+ READONLY
+ ))
+ else:
+ mutations.append(MetadataMutation(
+ obj_id,
+ "Corrupt {0}".format(obj_id),
+ lambda o=obj_id: self.fs.rados(["put", o, "-"], stdin_data=junk),
+ DAMAGED_ON_START
+ ))
+
+ # Truncations
+ for o in data_objects:
+ if o == "500.00000000":
+ # The PurgeQueue is allowed to be empty: Journaler interprets
+ # an empty header object as an empty journal.
+ expectation = NO_DAMAGE
+ else:
+ expectation = DAMAGED_ON_START
+
+ mutations.append(
+ MetadataMutation(
+ o,
+ "Truncate {0}".format(o),
+ lambda o=o: self.fs.rados(["truncate", o, "0"]),
+ expectation
+ ))
+
+ # OMAP value corruptions
+ for o, k in omap_keys:
+ if o.startswith("100."):
+ # Anything in rank 0's 'mydir'
+ expectation = DAMAGED_ON_START
+ else:
+ expectation = EIO_ON_LS
+
+ mutations.append(
+ MetadataMutation(
+ o,
+ "Corrupt omap key {0}:{1}".format(o, k),
+ lambda o=o,k=k: self.fs.rados(["setomapval", o, k, junk]),
+ expectation,
+ get_path(o, k)
+ )
+ )
+
+ # OMAP header corruptions
+ for o in omap_header_objs:
+ if re.match("60.\.00000000", o) \
+ or o in ["1.00000000", "100.00000000", "mds0_sessionmap"]:
+ expectation = DAMAGED_ON_START
+ else:
+ expectation = NO_DAMAGE
+
+ log.info("Expectation on corrupt header '{0}' will be '{1}'".format(
+ o, expectation
+ ))
+
+ mutations.append(
+ MetadataMutation(
+ o,
+ "Corrupt omap header on {0}".format(o),
+ lambda o=o: self.fs.rados(["setomapheader", o, junk]),
+ expectation
+ )
+ )
+
+ results = {}
+
+ for mutation in mutations:
+ log.info("Applying mutation '{0}'".format(mutation.desc))
+
+ # Reset MDS state
+ self.mount_a.umount_wait(force=True)
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+ self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
+
+ # Reset RADOS pool state
+ self.fs.rados(['import', '/tmp/metadata.bin'])
+
+ # Inject the mutation
+ mutation.mutate_fn()
+
+ # Try starting the MDS
+ self.fs.mds_restart()
+
+ # How long we'll wait between starting a daemon and expecting
+ # it to make it through startup, and potentially declare itself
+ # damaged to the mon cluster.
+ startup_timeout = 60
+
+ if mutation.expectation not in (EIO_ON_LS, DAMAGED_ON_LS, NO_DAMAGE):
+ if mutation.expectation == DAMAGED_ON_START:
+ # The MDS may pass through active before making it to damaged
+ try:
+ self.wait_until_true(lambda: self.is_marked_damaged(0), startup_timeout)
+ except RuntimeError:
+ pass
+
+ # Wait for MDS to either come up or go into damaged state
+ try:
+ self.wait_until_true(lambda: self.is_marked_damaged(0) or self.fs.are_daemons_healthy(), startup_timeout)
+ except RuntimeError:
+ crashed = False
+ # Didn't make it to healthy or damaged, did it crash?
+ for daemon_id, daemon in self.fs.mds_daemons.items():
+ if daemon.proc and daemon.proc.finished:
+ crashed = True
+ log.error("Daemon {0} crashed!".format(daemon_id))
+ daemon.proc = None # So that subsequent stop() doesn't raise error
+ if not crashed:
+ # Didn't go health, didn't go damaged, didn't crash, so what?
+ raise
+ else:
+ log.info("Result: Mutation '{0}' led to crash".format(mutation.desc))
+ results[mutation] = CRASHED
+ continue
+ if self.is_marked_damaged(0):
+ log.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation.desc))
+ results[mutation] = DAMAGED_ON_START
+ continue
+ else:
+ log.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation.desc))
+ else:
+ try:
+ self.wait_until_true(self.fs.are_daemons_healthy, 60)
+ except RuntimeError:
+ log.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation.desc))
+ if self.is_marked_damaged(0):
+ results[mutation] = DAMAGED_ON_START
+ else:
+ results[mutation] = FAILED_SERVER
+ continue
+ log.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation.desc))
+
+ # MDS is up, should go damaged on ls or client mount
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+ if mutation.ls_path == ".":
+ proc = self.mount_a.run_shell(["ls", "-R", mutation.ls_path], wait=False)
+ else:
+ proc = self.mount_a.stat(mutation.ls_path, wait=False)
+
+ if mutation.expectation == DAMAGED_ON_LS:
+ try:
+ self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
+ log.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation.desc))
+ results[mutation] = DAMAGED_ON_LS
+ except RuntimeError:
+ if self.fs.are_daemons_healthy():
+ log.error("Result: Failed to go damaged on mutation '{0}', actually went active".format(
+ mutation.desc))
+ results[mutation] = NO_DAMAGE
+ else:
+ log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc))
+ results[mutation] = FAILED_SERVER
+ elif mutation.expectation == READONLY:
+ proc = self.mount_a.run_shell(["mkdir", "foo"], wait=False)
+ try:
+ proc.wait()
+ except CommandFailedError:
+ stderr = proc.stderr.getvalue()
+ log.info(stderr)
+ if "Read-only file system".lower() in stderr.lower():
+ pass
+ else:
+ raise
+ else:
+ try:
+ wait([proc], 20)
+ log.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation.desc))
+ results[mutation] = NO_DAMAGE
+ except MaxWhileTries:
+ log.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation.desc))
+ results[mutation] = FAILED_CLIENT
+ except CommandFailedError as e:
+ if e.exitstatus == errno.EIO:
+ log.info("Result: EIO on client")
+ results[mutation] = EIO_ON_LS
+ else:
+ log.info("Result: unexpected error {0} on client".format(e))
+ results[mutation] = FAILED_CLIENT
+
+ if mutation.expectation == EIO_ON_LS:
+ # EIOs mean something handled by DamageTable: assert that it has
+ # been populated
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty'))
+ if len(damage) == 0:
+ results[mutation] = EIO_NO_DAMAGE
+
+ failures = [(mutation, result) for (mutation, result) in results.items() if mutation.expectation != result]
+ if failures:
+ log.error("{0} mutations had unexpected outcomes:".format(len(failures)))
+ for mutation, result in failures:
+ log.error(" Expected '{0}' actually '{1}' from '{2}'".format(
+ mutation.expectation, result, mutation.desc
+ ))
+ raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures)))
+ else:
+ log.info("All {0} mutations had expected outcomes".format(len(mutations)))
+
+ def test_damaged_dentry(self):
+ # Damage to dentrys is interesting because it leaves the
+ # directory's `complete` flag in a subtle state where
+ # we have marked the dir complete in order that folks
+ # can access it, but in actual fact there is a dentry
+ # missing
+ self.mount_a.run_shell(["mkdir", "subdir/"])
+
+ self.mount_a.run_shell(["touch", "subdir/file_undamaged"])
+ self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"])
+
+ subdir_ino = self.mount_a.path_to_ino("subdir")
+
+ self.mount_a.umount_wait()
+ for mds_name in self.fs.get_active_names():
+ self.fs.mds_asok(["flush", "journal"], mds_name)
+
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+
+ # Corrupt a dentry
+ junk = "deadbeef" * 10
+ dirfrag_obj = "{0:x}.00000000".format(subdir_ino)
+ self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
+
+ # Start up and try to list it
+ self.fs.mds_restart()
+ self.fs.wait_for_daemons()
+
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+ dentries = self.mount_a.ls("subdir/")
+
+ # The damaged guy should have disappeared
+ self.assertEqual(dentries, ["file_undamaged"])
+
+ # I should get ENOENT if I try and read it normally, because
+ # the dir is considered complete
+ try:
+ self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ raise AssertionError("Expected ENOENT")
+
+ # The fact that there is damaged should have bee recorded
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "ls", '--format=json-pretty'))
+ self.assertEqual(len(damage), 1)
+ damage_id = damage[0]['id']
+
+ # If I try to create a dentry with the same name as the damaged guy
+ # then that should be forbidden
+ try:
+ self.mount_a.touch("subdir/file_to_be_damaged")
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.EIO)
+ else:
+ raise AssertionError("Expected EIO")
+
+ # Attempting that touch will clear the client's complete flag, now
+ # when I stat it I'll get EIO instead of ENOENT
+ try:
+ self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
+ except CommandFailedError as e:
+ if isinstance(self.mount_a, FuseMount):
+ self.assertEqual(e.exitstatus, errno.EIO)
+ else:
+ # Kernel client handles this case differently
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ raise AssertionError("Expected EIO")
+
+ nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
+ self.assertEqual(nfiles, "2")
+
+ self.mount_a.umount_wait()
+
+ # Now repair the stats
+ scrub_json = self.fs.rank_tell(["scrub", "start", "/subdir", "repair"])
+ log.info(json.dumps(scrub_json, indent=2))
+
+ self.assertEqual(scrub_json["passed_validation"], False)
+ self.assertEqual(scrub_json["raw_stats"]["checked"], True)
+ self.assertEqual(scrub_json["raw_stats"]["passed"], False)
+
+ # Check that the file count is now correct
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+ nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
+ self.assertEqual(nfiles, "1")
+
+ # Clean up the omap object
+ self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
+
+ # Clean up the damagetable entry
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "rm", "{did}".format(did=damage_id))
+
+ # Now I should be able to create a file with the same name as the
+ # damaged guy if I want.
+ self.mount_a.touch("subdir/file_to_be_damaged")
+
+ def test_open_ino_errors(self):
+ """
+ That errors encountered during opening inos are properly propagated
+ """
+
+ self.mount_a.run_shell(["mkdir", "dir1"])
+ self.mount_a.run_shell(["touch", "dir1/file1"])
+ self.mount_a.run_shell(["mkdir", "dir2"])
+ self.mount_a.run_shell(["touch", "dir2/file2"])
+ self.mount_a.run_shell(["mkdir", "testdir"])
+ self.mount_a.run_shell(["ln", "dir1/file1", "testdir/hardlink1"])
+ self.mount_a.run_shell(["ln", "dir2/file2", "testdir/hardlink2"])
+
+ file1_ino = self.mount_a.path_to_ino("dir1/file1")
+ file2_ino = self.mount_a.path_to_ino("dir2/file2")
+ dir2_ino = self.mount_a.path_to_ino("dir2")
+
+ # Ensure everything is written to backing store
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(["flush", "journal"])
+
+ # Drop everything from the MDS cache
+ self.mds_cluster.mds_stop()
+ self.fs.journal_tool(['journal', 'reset'], 0)
+ self.mds_cluster.mds_fail_restart()
+ self.fs.wait_for_daemons()
+
+ self.mount_a.mount()
+
+ # Case 1: un-decodeable backtrace
+
+ # Validate that the backtrace is present and decodable
+ self.fs.read_backtrace(file1_ino)
+ # Go corrupt the backtrace of alpha/target (used for resolving
+ # bravo/hardlink).
+ self.fs._write_data_xattr(file1_ino, "parent", "rhubarb")
+
+ # Check that touching the hardlink gives EIO
+ ran = self.mount_a.run_shell(["stat", "testdir/hardlink1"], wait=False)
+ try:
+ ran.wait()
+ except CommandFailedError:
+ self.assertTrue("Input/output error" in ran.stderr.getvalue())
+
+ # Check that an entry is created in the damage table
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "ls", '--format=json-pretty'))
+ self.assertEqual(len(damage), 1)
+ self.assertEqual(damage[0]['damage_type'], "backtrace")
+ self.assertEqual(damage[0]['ino'], file1_ino)
+
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "rm", str(damage[0]['id']))
+
+
+ # Case 2: missing dirfrag for the target inode
+
+ self.fs.rados(["rm", "{0:x}.00000000".format(dir2_ino)])
+
+ # Check that touching the hardlink gives EIO
+ ran = self.mount_a.run_shell(["stat", "testdir/hardlink2"], wait=False)
+ try:
+ ran.wait()
+ except CommandFailedError:
+ self.assertTrue("Input/output error" in ran.stderr.getvalue())
+
+ # Check that an entry is created in the damage table
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "ls", '--format=json-pretty'))
+ self.assertEqual(len(damage), 2)
+ if damage[0]['damage_type'] == "backtrace" :
+ self.assertEqual(damage[0]['ino'], file2_ino)
+ self.assertEqual(damage[1]['damage_type'], "dir_frag")
+ self.assertEqual(damage[1]['ino'], dir2_ino)
+ else:
+ self.assertEqual(damage[0]['damage_type'], "dir_frag")
+ self.assertEqual(damage[0]['ino'], dir2_ino)
+ self.assertEqual(damage[1]['damage_type'], "backtrace")
+ self.assertEqual(damage[1]['ino'], file2_ino)
+
+ for entry in damage:
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "rm", str(entry['id']))
diff --git a/qa/tasks/cephfs/test_data_scan.py b/qa/tasks/cephfs/test_data_scan.py
new file mode 100644
index 00000000..cbd5109a
--- /dev/null
+++ b/qa/tasks/cephfs/test_data_scan.py
@@ -0,0 +1,695 @@
+
+"""
+Test our tools for recovering metadata from the data pool
+"""
+import json
+
+import logging
+import os
+import time
+import traceback
+
+from io import BytesIO
+from collections import namedtuple, defaultdict
+from textwrap import dedent
+
+from teuthology.orchestra.run import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
+
+log = logging.getLogger(__name__)
+
+
+ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
+
+
+class Workload(object):
+ def __init__(self, filesystem, mount):
+ self._mount = mount
+ self._filesystem = filesystem
+ self._initial_state = None
+
+ # Accumulate backtraces for every failed validation, and return them. Backtraces
+ # are rather verbose, but we only see them when something breaks, and they
+ # let us see which check failed without having to decorate each check with
+ # a string
+ self._errors = []
+
+ def assert_equal(self, a, b):
+ try:
+ if a != b:
+ raise AssertionError("{0} != {1}".format(a, b))
+ except AssertionError as e:
+ self._errors.append(
+ ValidationError(e, traceback.format_exc(3))
+ )
+
+ def write(self):
+ """
+ Write the workload files to the mount
+ """
+ raise NotImplementedError()
+
+ def validate(self):
+ """
+ Read from the mount and validate that the workload files are present (i.e. have
+ survived or been reconstructed from the test scenario)
+ """
+ raise NotImplementedError()
+
+ def damage(self):
+ """
+ Damage the filesystem pools in ways that will be interesting to recover from. By
+ default just wipe everything in the metadata pool
+ """
+ # Delete every object in the metadata pool
+ objects = self._filesystem.rados(["ls"]).split("\n")
+ for o in objects:
+ self._filesystem.rados(["rm", o])
+
+ def flush(self):
+ """
+ Called after client unmount, after write: flush whatever you want
+ """
+ self._filesystem.mds_asok(["flush", "journal"])
+
+
+class SimpleWorkload(Workload):
+ """
+ Single file, single directory, check that it gets recovered and so does its size
+ """
+ def write(self):
+ self._mount.run_shell(["mkdir", "subdir"])
+ self._mount.write_n_mb("subdir/sixmegs", 6)
+ self._initial_state = self._mount.stat("subdir/sixmegs")
+
+ def validate(self):
+ self._mount.run_shell(["ls", "subdir"])
+ st = self._mount.stat("subdir/sixmegs")
+ self.assert_equal(st['st_size'], self._initial_state['st_size'])
+ return self._errors
+
+
+class MovedFile(Workload):
+ def write(self):
+ # Create a file whose backtrace disagrees with his eventual position
+ # in the metadata. We will see that he gets reconstructed in his
+ # original position according to his backtrace.
+ self._mount.run_shell(["mkdir", "subdir_alpha"])
+ self._mount.run_shell(["mkdir", "subdir_bravo"])
+ self._mount.write_n_mb("subdir_alpha/sixmegs", 6)
+ self._filesystem.mds_asok(["flush", "journal"])
+ self._mount.run_shell(["mv", "subdir_alpha/sixmegs", "subdir_bravo/sixmegs"])
+ self._initial_state = self._mount.stat("subdir_bravo/sixmegs")
+
+ def flush(self):
+ pass
+
+ def validate(self):
+ self.assert_equal(self._mount.ls(), ["subdir_alpha"])
+ st = self._mount.stat("subdir_alpha/sixmegs")
+ self.assert_equal(st['st_size'], self._initial_state['st_size'])
+ return self._errors
+
+
+class BacktracelessFile(Workload):
+ def write(self):
+ self._mount.run_shell(["mkdir", "subdir"])
+ self._mount.write_n_mb("subdir/sixmegs", 6)
+ self._initial_state = self._mount.stat("subdir/sixmegs")
+
+ def flush(self):
+ # Never flush metadata, so backtrace won't be written
+ pass
+
+ def validate(self):
+ ino_name = "%x" % self._initial_state["st_ino"]
+
+ # The inode should be linked into lost+found because we had no path for it
+ self.assert_equal(self._mount.ls(), ["lost+found"])
+ self.assert_equal(self._mount.ls("lost+found"), [ino_name])
+ st = self._mount.stat("lost+found/{ino_name}".format(ino_name=ino_name))
+
+ # We might not have got the name or path, but we should still get the size
+ self.assert_equal(st['st_size'], self._initial_state['st_size'])
+
+ return self._errors
+
+
+class StripedStashedLayout(Workload):
+ def __init__(self, fs, m):
+ super(StripedStashedLayout, self).__init__(fs, m)
+
+ # Nice small stripes so we can quickly do our writes+validates
+ self.sc = 4
+ self.ss = 65536
+ self.os = 262144
+
+ self.interesting_sizes = [
+ # Exactly stripe_count objects will exist
+ self.os * self.sc,
+ # Fewer than stripe_count objects will exist
+ self.os * self.sc // 2,
+ self.os * (self.sc - 1) + self.os // 2,
+ self.os * (self.sc - 1) + self.os // 2 - 1,
+ self.os * (self.sc + 1) + self.os // 2,
+ self.os * (self.sc + 1) + self.os // 2 + 1,
+ # More than stripe_count objects will exist
+ self.os * self.sc + self.os * self.sc // 2
+ ]
+
+ def write(self):
+ # Create a dir with a striped layout set on it
+ self._mount.run_shell(["mkdir", "stripey"])
+
+ self._mount.setfattr("./stripey", "ceph.dir.layout",
+ "stripe_unit={ss} stripe_count={sc} object_size={os} pool={pool}".format(
+ ss=self.ss, os=self.os, sc=self.sc,
+ pool=self._filesystem.get_data_pool_name()
+ ))
+
+ # Write files, then flush metadata so that its layout gets written into an xattr
+ for i, n_bytes in enumerate(self.interesting_sizes):
+ self._mount.write_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes)
+ # This is really just validating the validator
+ self._mount.validate_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes)
+ self._filesystem.mds_asok(["flush", "journal"])
+
+ # Write another file in the same way, but this time don't flush the metadata,
+ # so that it won't have the layout xattr
+ self._mount.write_test_pattern("stripey/unflushed_file", 1024 * 512)
+ self._mount.validate_test_pattern("stripey/unflushed_file", 1024 * 512)
+
+ self._initial_state = {
+ "unflushed_ino": self._mount.path_to_ino("stripey/unflushed_file")
+ }
+
+ def flush(self):
+ # Pass because we already selectively flushed during write
+ pass
+
+ def validate(self):
+ # The first files should have been recovered into its original location
+ # with the correct layout: read back correct data
+ for i, n_bytes in enumerate(self.interesting_sizes):
+ try:
+ self._mount.validate_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes)
+ except CommandFailedError as e:
+ self._errors.append(
+ ValidationError("File {0} (size {1}): {2}".format(i, n_bytes, e), traceback.format_exc(3))
+ )
+
+ # The unflushed file should have been recovered into lost+found without
+ # the correct layout: read back junk
+ ino_name = "%x" % self._initial_state["unflushed_ino"]
+ self.assert_equal(self._mount.ls("lost+found"), [ino_name])
+ try:
+ self._mount.validate_test_pattern(os.path.join("lost+found", ino_name), 1024 * 512)
+ except CommandFailedError:
+ pass
+ else:
+ self._errors.append(
+ ValidationError("Unexpectedly valid data in unflushed striped file", "")
+ )
+
+ return self._errors
+
+
+class ManyFilesWorkload(Workload):
+ def __init__(self, filesystem, mount, file_count):
+ super(ManyFilesWorkload, self).__init__(filesystem, mount)
+ self.file_count = file_count
+
+ def write(self):
+ self._mount.run_shell(["mkdir", "subdir"])
+ for n in range(0, self.file_count):
+ self._mount.write_test_pattern("subdir/{0}".format(n), 6 * 1024 * 1024)
+
+ def validate(self):
+ for n in range(0, self.file_count):
+ try:
+ self._mount.validate_test_pattern("subdir/{0}".format(n), 6 * 1024 * 1024)
+ except CommandFailedError as e:
+ self._errors.append(
+ ValidationError("File {0}: {1}".format(n, e), traceback.format_exc(3))
+ )
+
+ return self._errors
+
+
+class MovedDir(Workload):
+ def write(self):
+ # Create a nested dir that we will then move. Two files with two different
+ # backtraces referring to the moved dir, claiming two different locations for
+ # it. We will see that only one backtrace wins and the dir ends up with
+ # single linkage.
+ self._mount.run_shell(["mkdir", "-p", "grandmother/parent"])
+ self._mount.write_n_mb("grandmother/parent/orig_pos_file", 1)
+ self._filesystem.mds_asok(["flush", "journal"])
+ self._mount.run_shell(["mkdir", "grandfather"])
+ self._mount.run_shell(["mv", "grandmother/parent", "grandfather"])
+ self._mount.write_n_mb("grandfather/parent/new_pos_file", 2)
+ self._filesystem.mds_asok(["flush", "journal"])
+
+ self._initial_state = (
+ self._mount.stat("grandfather/parent/orig_pos_file"),
+ self._mount.stat("grandfather/parent/new_pos_file")
+ )
+
+ def validate(self):
+ root_files = self._mount.ls()
+ self.assert_equal(len(root_files), 1)
+ self.assert_equal(root_files[0] in ["grandfather", "grandmother"], True)
+ winner = root_files[0]
+ st_opf = self._mount.stat("{0}/parent/orig_pos_file".format(winner))
+ st_npf = self._mount.stat("{0}/parent/new_pos_file".format(winner))
+
+ self.assert_equal(st_opf['st_size'], self._initial_state[0]['st_size'])
+ self.assert_equal(st_npf['st_size'], self._initial_state[1]['st_size'])
+
+
+class MissingZerothObject(Workload):
+ def write(self):
+ self._mount.run_shell(["mkdir", "subdir"])
+ self._mount.write_n_mb("subdir/sixmegs", 6)
+ self._initial_state = self._mount.stat("subdir/sixmegs")
+
+ def damage(self):
+ super(MissingZerothObject, self).damage()
+ zeroth_id = "{0:x}.00000000".format(self._initial_state['st_ino'])
+ self._filesystem.rados(["rm", zeroth_id], pool=self._filesystem.get_data_pool_name())
+
+ def validate(self):
+ st = self._mount.stat("lost+found/{0:x}".format(self._initial_state['st_ino']))
+ self.assert_equal(st['st_size'], self._initial_state['st_size'])
+
+
+class NonDefaultLayout(Workload):
+ """
+ Check that the reconstruction copes with files that have a different
+ object size in their layout
+ """
+ def write(self):
+ self._mount.run_shell(["touch", "datafile"])
+ self._mount.setfattr("./datafile", "ceph.file.layout.object_size", "8388608")
+ self._mount.run_shell(["dd", "if=/dev/urandom", "of=./datafile", "bs=1M", "count=32"])
+ self._initial_state = self._mount.stat("datafile")
+
+ def validate(self):
+ # Check we got the layout reconstructed properly
+ object_size = int(self._mount.getfattr(
+ "./datafile", "ceph.file.layout.object_size"))
+ self.assert_equal(object_size, 8388608)
+
+ # Check we got the file size reconstructed properly
+ st = self._mount.stat("datafile")
+ self.assert_equal(st['st_size'], self._initial_state['st_size'])
+
+
+class TestDataScan(CephFSTestCase):
+ MDSS_REQUIRED = 2
+
+ def is_marked_damaged(self, rank):
+ mds_map = self.fs.get_mds_map()
+ return rank in mds_map['damaged']
+
+ def _rebuild_metadata(self, workload, workers=1):
+ """
+ That when all objects in metadata pool are removed, we can rebuild a metadata pool
+ based on the contents of a data pool, and a client can see and read our files.
+ """
+
+ # First, inject some files
+
+ workload.write()
+
+ # Unmount the client and flush the journal: the tool should also cope with
+ # situations where there is dirty metadata, but we'll test that separately
+ self.mount_a.umount_wait()
+ workload.flush()
+
+ # Stop the MDS
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+
+ # After recovery, we need the MDS to not be strict about stats (in production these options
+ # are off by default, but in QA we need to explicitly disable them)
+ self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
+ self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
+
+ # Apply any data damage the workload wants
+ workload.damage()
+
+ # Reset the MDS map in case multiple ranks were in play: recovery procedure
+ # only understands how to rebuild metadata under rank 0
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name,
+ '--yes-i-really-mean-it')
+
+ self.fs.mds_restart()
+
+ def get_state(mds_id):
+ info = self.mds_cluster.get_mds_info(mds_id)
+ return info['state'] if info is not None else None
+
+ self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
+ for mds_id in self.fs.mds_ids:
+ self.wait_until_equal(
+ lambda: get_state(mds_id),
+ "up:standby",
+ timeout=60)
+
+ self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
+ self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
+ self.fs.table_tool([self.fs.name + ":0", "reset", "inode"])
+
+ # Run the recovery procedure
+ if False:
+ with self.assertRaises(CommandFailedError):
+ # Normal reset should fail when no objects are present, we'll use --force instead
+ self.fs.journal_tool(["journal", "reset"], 0)
+
+ self.fs.journal_tool(["journal", "reset", "--force"], 0)
+ self.fs.data_scan(["init"])
+ self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()], worker_count=workers)
+ self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()], worker_count=workers)
+
+ # Mark the MDS repaired
+ self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
+
+ # Start the MDS
+ self.fs.mds_restart()
+ self.fs.wait_for_daemons()
+ log.info(str(self.mds_cluster.status()))
+
+ # Mount a client
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ # See that the files are present and correct
+ errors = workload.validate()
+ if errors:
+ log.error("Validation errors found: {0}".format(len(errors)))
+ for e in errors:
+ log.error(e.exception)
+ log.error(e.backtrace)
+ raise AssertionError("Validation failed, first error: {0}\n{1}".format(
+ errors[0].exception, errors[0].backtrace
+ ))
+
+ def test_rebuild_simple(self):
+ self._rebuild_metadata(SimpleWorkload(self.fs, self.mount_a))
+
+ def test_rebuild_moved_file(self):
+ self._rebuild_metadata(MovedFile(self.fs, self.mount_a))
+
+ def test_rebuild_backtraceless(self):
+ self._rebuild_metadata(BacktracelessFile(self.fs, self.mount_a))
+
+ def test_rebuild_moved_dir(self):
+ self._rebuild_metadata(MovedDir(self.fs, self.mount_a))
+
+ def test_rebuild_missing_zeroth(self):
+ self._rebuild_metadata(MissingZerothObject(self.fs, self.mount_a))
+
+ def test_rebuild_nondefault_layout(self):
+ self._rebuild_metadata(NonDefaultLayout(self.fs, self.mount_a))
+
+ def test_stashed_layout(self):
+ self._rebuild_metadata(StripedStashedLayout(self.fs, self.mount_a))
+
+ def _dirfrag_keys(self, object_id):
+ keys_str = self.fs.rados(["listomapkeys", object_id])
+ if keys_str:
+ return keys_str.split("\n")
+ else:
+ return []
+
+ def test_fragmented_injection(self):
+ """
+ That when injecting a dentry into a fragmented directory, we put it in the right fragment.
+ """
+
+ file_count = 100
+ file_names = ["%s" % n for n in range(0, file_count)]
+
+ # Make sure and disable dirfrag auto merging and splitting
+ self.fs.set_ceph_conf('mds', 'mds bal merge size', 0)
+ self.fs.set_ceph_conf('mds', 'mds bal split size', 100 * file_count)
+
+ # Create a directory of `file_count` files, each named after its
+ # decimal number and containing the string of its decimal number
+ self.mount_a.run_python(dedent("""
+ import os
+ path = os.path.join("{path}", "subdir")
+ os.mkdir(path)
+ for n in range(0, {file_count}):
+ open(os.path.join(path, "%s" % n), 'w').write("%s" % n)
+ """.format(
+ path=self.mount_a.mountpoint,
+ file_count=file_count
+ )))
+
+ dir_ino = self.mount_a.path_to_ino("subdir")
+
+ # Only one MDS should be active!
+ self.assertEqual(len(self.fs.get_active_names()), 1)
+
+ # Ensure that one directory is fragmented
+ mds_id = self.fs.get_active_names()[0]
+ self.fs.mds_asok(["dirfrag", "split", "/subdir", "0/0", "1"], mds_id)
+
+ # Flush journal and stop MDS
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(["flush", "journal"], mds_id)
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+
+ # Pick a dentry and wipe out its key
+ # Because I did a 1 bit split, I know one frag will be named <inode>.01000000
+ frag_obj_id = "{0:x}.01000000".format(dir_ino)
+ keys = self._dirfrag_keys(frag_obj_id)
+ victim_key = keys[7] # arbitrary choice
+ log.info("victim_key={0}".format(victim_key))
+ victim_dentry = victim_key.split("_head")[0]
+ self.fs.rados(["rmomapkey", frag_obj_id, victim_key])
+
+ # Start filesystem back up, observe that the file appears to be gone in an `ls`
+ self.fs.mds_restart()
+ self.fs.wait_for_daemons()
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+ files = self.mount_a.run_shell(["ls", "subdir/"]).stdout.getvalue().strip().split("\n")
+ self.assertListEqual(sorted(files), sorted(list(set(file_names) - set([victim_dentry]))))
+
+ # Stop the filesystem
+ self.mount_a.umount_wait()
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+
+ # Run data-scan, observe that it inserts our dentry back into the correct fragment
+ # by checking the omap now has the dentry's key again
+ self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
+ self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()])
+ self.fs.data_scan(["scan_links"])
+ self.assertIn(victim_key, self._dirfrag_keys(frag_obj_id))
+
+ # Start the filesystem and check that the dentry we deleted is now once again visible
+ # and points to the correct file data.
+ self.fs.mds_restart()
+ self.fs.wait_for_daemons()
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+ out = self.mount_a.run_shell(["cat", "subdir/{0}".format(victim_dentry)]).stdout.getvalue().strip()
+ self.assertEqual(out, victim_dentry)
+
+ # Finally, close the loop by checking our injected dentry survives a merge
+ mds_id = self.fs.get_active_names()[0]
+ self.mount_a.ls("subdir") # Do an ls to ensure both frags are in cache so the merge will work
+ self.fs.mds_asok(["dirfrag", "merge", "/subdir", "0/0"], mds_id)
+ self.fs.mds_asok(["flush", "journal"], mds_id)
+ frag_obj_id = "{0:x}.00000000".format(dir_ino)
+ keys = self._dirfrag_keys(frag_obj_id)
+ self.assertListEqual(sorted(keys), sorted(["%s_head" % f for f in file_names]))
+
+ # run scrub to update and make sure rstat.rbytes info in subdir inode and dirfrag
+ # are matched
+ out_json = self.fs.rank_tell(["scrub", "start", "/subdir", "repair", "recursive"])
+ self.assertNotEqual(out_json, None)
+
+ # Remove the whole 'sudbdir' directory
+ self.mount_a.run_shell(["rm", "-rf", "subdir/"])
+
+ @for_teuthology
+ def test_parallel_execution(self):
+ self._rebuild_metadata(ManyFilesWorkload(self.fs, self.mount_a, 25), workers=7)
+
+ def test_pg_files(self):
+ """
+ That the pg files command tells us which files are associated with
+ a particular PG
+ """
+ file_count = 20
+ self.mount_a.run_shell(["mkdir", "mydir"])
+ self.mount_a.create_n_files("mydir/myfile", file_count)
+
+ # Some files elsewhere in the system that we will ignore
+ # to check that the tool is filtering properly
+ self.mount_a.run_shell(["mkdir", "otherdir"])
+ self.mount_a.create_n_files("otherdir/otherfile", file_count)
+
+ pgs_to_files = defaultdict(list)
+ # Rough (slow) reimplementation of the logic
+ for i in range(0, file_count):
+ file_path = "mydir/myfile_{0}".format(i)
+ ino = self.mount_a.path_to_ino(file_path)
+ obj = "{0:x}.{1:08x}".format(ino, 0)
+ pgid = json.loads(self.fs.mon_manager.raw_cluster_cmd(
+ "osd", "map", self.fs.get_data_pool_name(), obj,
+ "--format=json-pretty"
+ ))['pgid']
+ pgs_to_files[pgid].append(file_path)
+ log.info("{0}: {1}".format(file_path, pgid))
+
+ pg_count = self.fs.pgs_per_fs_pool
+ for pg_n in range(0, pg_count):
+ pg_str = "{0}.{1}".format(self.fs.get_data_pool_id(), pg_n)
+ out = self.fs.data_scan(["pg_files", "mydir", pg_str])
+ lines = [l for l in out.split("\n") if l]
+ log.info("{0}: {1}".format(pg_str, lines))
+ self.assertSetEqual(set(lines), set(pgs_to_files[pg_str]))
+
+ def test_rebuild_linkage(self):
+ """
+ The scan_links command fixes linkage errors
+ """
+ self.mount_a.run_shell(["mkdir", "testdir1"])
+ self.mount_a.run_shell(["mkdir", "testdir2"])
+ dir1_ino = self.mount_a.path_to_ino("testdir1")
+ dir2_ino = self.mount_a.path_to_ino("testdir2")
+ dirfrag1_oid = "{0:x}.00000000".format(dir1_ino)
+ dirfrag2_oid = "{0:x}.00000000".format(dir2_ino)
+
+ self.mount_a.run_shell(["touch", "testdir1/file1"])
+ self.mount_a.run_shell(["ln", "testdir1/file1", "testdir1/link1"])
+ self.mount_a.run_shell(["ln", "testdir1/file1", "testdir2/link2"])
+
+ mds_id = self.fs.get_active_names()[0]
+ self.fs.mds_asok(["flush", "journal"], mds_id)
+
+ dirfrag1_keys = self._dirfrag_keys(dirfrag1_oid)
+
+ # introduce duplicated primary link
+ file1_key = "file1_head"
+ self.assertIn(file1_key, dirfrag1_keys)
+ file1_omap_data = self.fs.rados(["getomapval", dirfrag1_oid, file1_key, '-'],
+ stdout_data=BytesIO())
+ self.fs.rados(["setomapval", dirfrag2_oid, file1_key], stdin_data=file1_omap_data)
+ self.assertIn(file1_key, self._dirfrag_keys(dirfrag2_oid))
+
+ # remove a remote link, make inode link count incorrect
+ link1_key = 'link1_head'
+ self.assertIn(link1_key, dirfrag1_keys)
+ self.fs.rados(["rmomapkey", dirfrag1_oid, link1_key])
+
+ # increase good primary link's version
+ self.mount_a.run_shell(["touch", "testdir1/file1"])
+ self.mount_a.umount_wait()
+
+ self.fs.mds_asok(["flush", "journal"], mds_id)
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+
+ # repair linkage errors
+ self.fs.data_scan(["scan_links"])
+
+ # primary link in testdir2 was deleted?
+ self.assertNotIn(file1_key, self._dirfrag_keys(dirfrag2_oid))
+
+ self.fs.mds_restart()
+ self.fs.wait_for_daemons()
+
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ # link count was adjusted?
+ file1_nlink = self.mount_a.path_to_nlink("testdir1/file1")
+ self.assertEqual(file1_nlink, 2)
+
+ def test_rebuild_inotable(self):
+ """
+ The scan_links command repair inotables
+ """
+ self.fs.set_max_mds(2)
+ self.fs.wait_for_daemons()
+
+ active_mds_names = self.fs.get_active_names()
+ mds0_id = active_mds_names[0]
+ mds1_id = active_mds_names[1]
+
+ self.mount_a.run_shell(["mkdir", "dir1"])
+ dir_ino = self.mount_a.path_to_ino("dir1")
+ self.mount_a.setfattr("dir1", "ceph.dir.pin", "1")
+ # wait for subtree migration
+
+ file_ino = 0;
+ while True:
+ time.sleep(1)
+ # allocate an inode from mds.1
+ self.mount_a.run_shell(["touch", "dir1/file1"])
+ file_ino = self.mount_a.path_to_ino("dir1/file1")
+ if file_ino >= (2 << 40):
+ break
+ self.mount_a.run_shell(["rm", "-f", "dir1/file1"])
+
+ self.mount_a.umount_wait()
+
+ self.fs.mds_asok(["flush", "journal"], mds0_id)
+ self.fs.mds_asok(["flush", "journal"], mds1_id)
+ self.mds_cluster.mds_stop()
+
+ self.fs.rados(["rm", "mds0_inotable"])
+ self.fs.rados(["rm", "mds1_inotable"])
+
+ self.fs.data_scan(["scan_links", "--filesystem", self.fs.name])
+
+ mds0_inotable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "inode"]))
+ self.assertGreaterEqual(
+ mds0_inotable['0']['data']['inotable']['free'][0]['start'], dir_ino)
+
+ mds1_inotable = json.loads(self.fs.table_tool([self.fs.name + ":1", "show", "inode"]))
+ self.assertGreaterEqual(
+ mds1_inotable['1']['data']['inotable']['free'][0]['start'], file_ino)
+
+ def test_rebuild_snaptable(self):
+ """
+ The scan_links command repair snaptable
+ """
+ self.fs.set_allow_new_snaps(True)
+
+ self.mount_a.run_shell(["mkdir", "dir1"])
+ self.mount_a.run_shell(["mkdir", "dir1/.snap/s1"])
+ self.mount_a.run_shell(["mkdir", "dir1/.snap/s2"])
+ self.mount_a.run_shell(["rmdir", "dir1/.snap/s2"])
+
+ self.mount_a.umount_wait()
+
+ mds0_id = self.fs.get_active_names()[0]
+ self.fs.mds_asok(["flush", "journal"], mds0_id)
+
+ # wait for mds to update removed snaps
+ time.sleep(10)
+
+ old_snaptable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "snap"]))
+ # stamps may have minor difference
+ for item in old_snaptable['snapserver']['snaps']:
+ del item['stamp']
+
+ self.fs.rados(["rm", "mds_snaptable"])
+ self.fs.data_scan(["scan_links", "--filesystem", self.fs.name])
+
+ new_snaptable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "snap"]))
+ for item in new_snaptable['snapserver']['snaps']:
+ del item['stamp']
+ self.assertGreaterEqual(
+ new_snaptable['snapserver']['last_snap'], old_snaptable['snapserver']['last_snap'])
+ self.assertEqual(
+ new_snaptable['snapserver']['snaps'], old_snaptable['snapserver']['snaps'])
diff --git a/qa/tasks/cephfs/test_dump_tree.py b/qa/tasks/cephfs/test_dump_tree.py
new file mode 100644
index 00000000..48a2c6f0
--- /dev/null
+++ b/qa/tasks/cephfs/test_dump_tree.py
@@ -0,0 +1,66 @@
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+import random
+import os
+
+class TestDumpTree(CephFSTestCase):
+ def get_paths_to_ino(self):
+ inos = {}
+ p = self.mount_a.run_shell(["find", "./"])
+ paths = p.stdout.getvalue().strip().split()
+ for path in paths:
+ inos[path] = self.mount_a.path_to_ino(path, False)
+
+ return inos
+
+ def populate(self):
+ self.mount_a.run_shell(["git", "clone",
+ "https://github.com/ceph/ceph-qa-suite"])
+
+ def test_basic(self):
+ self.mount_a.run_shell(["mkdir", "parent"])
+ self.mount_a.run_shell(["mkdir", "parent/child"])
+ self.mount_a.run_shell(["touch", "parent/child/file"])
+ self.mount_a.run_shell(["mkdir", "parent/child/grandchild"])
+ self.mount_a.run_shell(["touch", "parent/child/grandchild/file"])
+
+ inos = self.get_paths_to_ino()
+ tree = self.fs.mds_asok(["dump", "tree", "/parent/child", "1"])
+
+ target_inos = [inos["./parent/child"], inos["./parent/child/file"],
+ inos["./parent/child/grandchild"]]
+
+ for ino in tree:
+ del target_inos[target_inos.index(ino['ino'])] # don't catch!
+
+ assert(len(target_inos) == 0)
+
+ def test_random(self):
+ random.seed(0)
+
+ self.populate()
+ inos = self.get_paths_to_ino()
+ target = random.sample(inos.keys(), 1)[0]
+
+ if target != "./":
+ target = os.path.dirname(target)
+
+ subtree = [path for path in inos.keys() if path.startswith(target)]
+ target_inos = [inos[path] for path in subtree]
+ tree = self.fs.mds_asok(["dump", "tree", target[1:]])
+
+ for ino in tree:
+ del target_inos[target_inos.index(ino['ino'])] # don't catch!
+
+ assert(len(target_inos) == 0)
+
+ target_depth = target.count('/')
+ maxdepth = max([path.count('/') for path in subtree]) - target_depth
+ depth = random.randint(0, maxdepth)
+ target_inos = [inos[path] for path in subtree \
+ if path.count('/') <= depth + target_depth]
+ tree = self.fs.mds_asok(["dump", "tree", target[1:], str(depth)])
+
+ for ino in tree:
+ del target_inos[target_inos.index(ino['ino'])] # don't catch!
+
+ assert(len(target_inos) == 0)
diff --git a/qa/tasks/cephfs/test_exports.py b/qa/tasks/cephfs/test_exports.py
new file mode 100644
index 00000000..abaf92e6
--- /dev/null
+++ b/qa/tasks/cephfs/test_exports.py
@@ -0,0 +1,176 @@
+import logging
+import time
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+class TestExports(CephFSTestCase):
+ MDSS_REQUIRED = 2
+ CLIENTS_REQUIRED = 2
+
+ def test_export_pin(self):
+ self.fs.set_max_mds(2)
+ self.fs.wait_for_daemons()
+
+ status = self.fs.status()
+
+ self.mount_a.run_shell(["mkdir", "-p", "1/2/3"])
+ self._wait_subtrees(status, 0, [])
+
+ # NOP
+ self.mount_a.setfattr("1", "ceph.dir.pin", "-1")
+ self._wait_subtrees(status, 0, [])
+
+ # NOP (rank < -1)
+ self.mount_a.setfattr("1", "ceph.dir.pin", "-2341")
+ self._wait_subtrees(status, 0, [])
+
+ # pin /1 to rank 1
+ self.mount_a.setfattr("1", "ceph.dir.pin", "1")
+ self._wait_subtrees(status, 1, [('/1', 1)])
+
+ # Check export_targets is set properly
+ status = self.fs.status()
+ log.info(status)
+ r0 = status.get_rank(self.fs.id, 0)
+ self.assertTrue(sorted(r0['export_targets']) == [1])
+
+ # redundant pin /1/2 to rank 1
+ self.mount_a.setfattr("1/2", "ceph.dir.pin", "1")
+ self._wait_subtrees(status, 1, [('/1', 1), ('/1/2', 1)])
+
+ # change pin /1/2 to rank 0
+ self.mount_a.setfattr("1/2", "ceph.dir.pin", "0")
+ self._wait_subtrees(status, 1, [('/1', 1), ('/1/2', 0)])
+ self._wait_subtrees(status, 0, [('/1', 1), ('/1/2', 0)])
+
+ # change pin /1/2/3 to (presently) non-existent rank 2
+ self.mount_a.setfattr("1/2/3", "ceph.dir.pin", "2")
+ self._wait_subtrees(status, 0, [('/1', 1), ('/1/2', 0)])
+ self._wait_subtrees(status, 1, [('/1', 1), ('/1/2', 0)])
+
+ # change pin /1/2 back to rank 1
+ self.mount_a.setfattr("1/2", "ceph.dir.pin", "1")
+ self._wait_subtrees(status, 1, [('/1', 1), ('/1/2', 1)])
+
+ # add another directory pinned to 1
+ self.mount_a.run_shell(["mkdir", "-p", "1/4/5"])
+ self.mount_a.setfattr("1/4/5", "ceph.dir.pin", "1")
+ self._wait_subtrees(status, 1, [('/1', 1), ('/1/2', 1), ('/1/4/5', 1)])
+
+ # change pin /1 to 0
+ self.mount_a.setfattr("1", "ceph.dir.pin", "0")
+ self._wait_subtrees(status, 0, [('/1', 0), ('/1/2', 1), ('/1/4/5', 1)])
+
+ # change pin /1/2 to default (-1); does the subtree root properly respect it's parent pin?
+ self.mount_a.setfattr("1/2", "ceph.dir.pin", "-1")
+ self._wait_subtrees(status, 0, [('/1', 0), ('/1/4/5', 1)])
+
+ if len(list(status.get_standbys())):
+ self.fs.set_max_mds(3)
+ self.fs.wait_for_state('up:active', rank=2)
+ self._wait_subtrees(status, 0, [('/1', 0), ('/1/4/5', 1), ('/1/2/3', 2)])
+
+ # Check export_targets is set properly
+ status = self.fs.status()
+ log.info(status)
+ r0 = status.get_rank(self.fs.id, 0)
+ self.assertTrue(sorted(r0['export_targets']) == [1,2])
+ r1 = status.get_rank(self.fs.id, 1)
+ self.assertTrue(sorted(r1['export_targets']) == [0])
+ r2 = status.get_rank(self.fs.id, 2)
+ self.assertTrue(sorted(r2['export_targets']) == [])
+
+ # Test rename
+ self.mount_a.run_shell(["mkdir", "-p", "a/b", "aa/bb"])
+ self.mount_a.setfattr("a", "ceph.dir.pin", "1")
+ self.mount_a.setfattr("aa/bb", "ceph.dir.pin", "0")
+ if (len(self.fs.get_active_names()) > 2):
+ self._wait_subtrees(status, 0, [('/1', 0), ('/1/4/5', 1), ('/1/2/3', 2), ('/a', 1), ('/aa/bb', 0)])
+ else:
+ self._wait_subtrees(status, 0, [('/1', 0), ('/1/4/5', 1), ('/a', 1), ('/aa/bb', 0)])
+ self.mount_a.run_shell(["mv", "aa", "a/b/"])
+ if (len(self.fs.get_active_names()) > 2):
+ self._wait_subtrees(status, 0, [('/1', 0), ('/1/4/5', 1), ('/1/2/3', 2), ('/a', 1), ('/a/b/aa/bb', 0)])
+ else:
+ self._wait_subtrees(status, 0, [('/1', 0), ('/1/4/5', 1), ('/a', 1), ('/a/b/aa/bb', 0)])
+
+ def test_export_pin_getfattr(self):
+ self.fs.set_max_mds(2)
+ self.fs.wait_for_daemons()
+
+ status = self.fs.status()
+
+ self.mount_a.run_shell(["mkdir", "-p", "1/2/3"])
+ self._wait_subtrees(status, 0, [])
+
+ # pin /1 to rank 0
+ self.mount_a.setfattr("1", "ceph.dir.pin", "1")
+ self._wait_subtrees(status, 1, [('/1', 1)])
+
+ # pin /1/2 to rank 1
+ self.mount_a.setfattr("1/2", "ceph.dir.pin", "1")
+ self._wait_subtrees(status, 1, [('/1', 1), ('/1/2', 1)])
+
+ # change pin /1/2 to rank 0
+ self.mount_a.setfattr("1/2", "ceph.dir.pin", "0")
+ self._wait_subtrees(status, 1, [('/1', 1), ('/1/2', 0)])
+ self._wait_subtrees(status, 0, [('/1', 1), ('/1/2', 0)])
+
+ # change pin /1/2/3 to (presently) non-existent rank 2
+ self.mount_a.setfattr("1/2/3", "ceph.dir.pin", "2")
+ self._wait_subtrees(status, 0, [('/1', 1), ('/1/2', 0)])
+
+ if len(list(status.get_standbys())):
+ self.fs.set_max_mds(3)
+ self.fs.wait_for_state('up:active', rank=2)
+ self._wait_subtrees(status, 0, [('/1', 1), ('/1/2', 0), ('/1/2/3', 2)])
+
+ if not isinstance(self.mount_a, FuseMount):
+ p = self.mount_a.client_remote.sh('uname -r', wait=True)
+ dir_pin = self.mount_a.getfattr("1", "ceph.dir.pin")
+ log.debug("mount.getfattr('1','ceph.dir.pin'): %s " % dir_pin)
+ if str(p) < "5" and not(dir_pin):
+ self.skipTest("Kernel does not support getting the extended attribute ceph.dir.pin")
+ self.assertEqual(self.mount_a.getfattr("1", "ceph.dir.pin"), '1')
+ self.assertEqual(self.mount_a.getfattr("1/2", "ceph.dir.pin"), '0')
+ if (len(self.fs.get_active_names()) > 2):
+ self.assertEqual(self.mount_a.getfattr("1/2/3", "ceph.dir.pin"), '2')
+
+ def test_session_race(self):
+ """
+ Test session creation race.
+
+ See: https://tracker.ceph.com/issues/24072#change-113056
+ """
+
+ self.fs.set_max_mds(2)
+ status = self.fs.wait_for_daemons()
+
+ rank1 = self.fs.get_rank(rank=1, status=status)
+
+ # Create a directory that is pre-exported to rank 1
+ self.mount_a.run_shell(["mkdir", "-p", "a/aa"])
+ self.mount_a.setfattr("a", "ceph.dir.pin", "1")
+ self._wait_subtrees(status, 1, [('/a', 1)])
+
+ # Now set the mds config to allow the race
+ self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "true"], rank=1)
+
+ # Now create another directory and try to export it
+ self.mount_b.run_shell(["mkdir", "-p", "b/bb"])
+ self.mount_b.setfattr("b", "ceph.dir.pin", "1")
+
+ time.sleep(5)
+
+ # Now turn off the race so that it doesn't wait again
+ self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "false"], rank=1)
+
+ # Now try to create a session with rank 1 by accessing a dir known to
+ # be there, if buggy, this should cause the rank 1 to crash:
+ self.mount_b.run_shell(["ls", "a"])
+
+ # Check if rank1 changed (standby tookover?)
+ new_rank1 = self.fs.get_rank(rank=1)
+ self.assertEqual(rank1['gid'], new_rank1['gid'])
diff --git a/qa/tasks/cephfs/test_failover.py b/qa/tasks/cephfs/test_failover.py
new file mode 100644
index 00000000..c87afbf6
--- /dev/null
+++ b/qa/tasks/cephfs/test_failover.py
@@ -0,0 +1,638 @@
+import time
+import signal
+import logging
+from unittest import case, SkipTest
+from random import randint
+from six.moves import range
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.fuse_mount import FuseMount
+
+log = logging.getLogger(__name__)
+
+
+class TestClusterResize(CephFSTestCase):
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 3
+
+ def grow(self, n):
+ grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
+
+ fscid = self.fs.id
+ status = self.fs.status()
+ log.info("status = {0}".format(status))
+
+ original_ranks = set([info['gid'] for info in status.get_ranks(fscid)])
+ _ = set([info['gid'] for info in status.get_standbys()])
+
+ oldmax = self.fs.get_var('max_mds')
+ self.assertTrue(n > oldmax)
+ self.fs.set_max_mds(n)
+
+ log.info("Waiting for cluster to grow.")
+ status = self.fs.wait_for_daemons(timeout=60+grace*2)
+ ranks = set([info['gid'] for info in status.get_ranks(fscid)])
+ self.assertTrue(original_ranks.issubset(ranks) and len(ranks) == n)
+ return status
+
+ def shrink(self, n):
+ grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
+
+ fscid = self.fs.id
+ status = self.fs.status()
+ log.info("status = {0}".format(status))
+
+ original_ranks = set([info['gid'] for info in status.get_ranks(fscid)])
+ _ = set([info['gid'] for info in status.get_standbys()])
+
+ oldmax = self.fs.get_var('max_mds')
+ self.assertTrue(n < oldmax)
+ self.fs.set_max_mds(n)
+
+ # Wait until the monitor finishes stopping ranks >= n
+ log.info("Waiting for cluster to shink.")
+ status = self.fs.wait_for_daemons(timeout=60+grace*2)
+ ranks = set([info['gid'] for info in status.get_ranks(fscid)])
+ self.assertTrue(ranks.issubset(original_ranks) and len(ranks) == n)
+ return status
+
+
+ def test_grow(self):
+ """
+ That the MDS cluster grows after increasing max_mds.
+ """
+
+ # Need all my standbys up as well as the active daemons
+ # self.wait_for_daemon_start() necessary?
+
+ self.grow(2)
+ self.grow(3)
+
+
+ def test_shrink(self):
+ """
+ That the MDS cluster shrinks automatically after decreasing max_mds.
+ """
+
+ self.grow(3)
+ self.shrink(1)
+
+ def test_up_less_than_max(self):
+ """
+ That a health warning is generated when max_mds is greater than active count.
+ """
+
+ status = self.fs.status()
+ mdss = [info['gid'] for info in status.get_all()]
+ self.fs.set_max_mds(len(mdss)+1)
+ self.wait_for_health("MDS_UP_LESS_THAN_MAX", 30)
+ self.shrink(2)
+ self.wait_for_health_clear(30)
+
+ def test_down_health(self):
+ """
+ That marking a FS down does not generate a health warning
+ """
+
+ self.mount_a.umount_wait()
+
+ self.fs.set_down()
+ try:
+ self.wait_for_health("", 30)
+ raise RuntimeError("got health warning?")
+ except RuntimeError as e:
+ if "Timed out after" in str(e):
+ pass
+ else:
+ raise
+
+ def test_down_twice(self):
+ """
+ That marking a FS down twice does not wipe old_max_mds.
+ """
+
+ self.mount_a.umount_wait()
+
+ self.grow(2)
+ self.fs.set_down()
+ self.fs.wait_for_daemons()
+ self.fs.set_down(False)
+ self.assertEqual(self.fs.get_var("max_mds"), 2)
+ self.fs.wait_for_daemons(timeout=60)
+
+ def test_down_grow(self):
+ """
+ That setting max_mds undoes down.
+ """
+
+ self.mount_a.umount_wait()
+
+ self.fs.set_down()
+ self.fs.wait_for_daemons()
+ self.grow(2)
+ self.fs.wait_for_daemons()
+
+ def test_down(self):
+ """
+ That down setting toggles and sets max_mds appropriately.
+ """
+
+ self.mount_a.umount_wait()
+
+ self.fs.set_down()
+ self.fs.wait_for_daemons()
+ self.assertEqual(self.fs.get_var("max_mds"), 0)
+ self.fs.set_down(False)
+ self.assertEqual(self.fs.get_var("max_mds"), 1)
+ self.fs.wait_for_daemons()
+ self.assertEqual(self.fs.get_var("max_mds"), 1)
+
+ def test_hole(self):
+ """
+ Test that a hole cannot be created in the FS ranks.
+ """
+
+ fscid = self.fs.id
+
+ self.grow(2)
+
+ self.fs.set_max_mds(1)
+ log.info("status = {0}".format(self.fs.status()))
+
+ self.fs.set_max_mds(3)
+ # Don't wait for rank 1 to stop
+
+ self.fs.set_max_mds(2)
+ # Prevent another MDS from taking rank 1
+ # XXX This is a little racy because rank 1 may have stopped and a
+ # standby assigned to rank 1 before joinable=0 is set.
+ self.fs.set_joinable(False) # XXX keep in mind changing max_mds clears this flag
+
+ try:
+ status = self.fs.wait_for_daemons(timeout=90)
+ raise RuntimeError("should not be able to successfully shrink cluster!")
+ except:
+ # could not shrink to max_mds=2 and reach 2 actives (because joinable=False)
+ status = self.fs.status()
+ ranks = set([info['rank'] for info in status.get_ranks(fscid)])
+ self.assertTrue(ranks == set([0]))
+ finally:
+ log.info("status = {0}".format(status))
+
+ def test_thrash(self):
+ """
+ Test that thrashing max_mds does not fail.
+ """
+
+ max_mds = 2
+ for i in range(0, 100):
+ self.fs.set_max_mds(max_mds)
+ max_mds = (max_mds+1)%3+1
+
+ self.fs.wait_for_daemons(timeout=90)
+
+class TestFailover(CephFSTestCase):
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 2
+
+ def test_simple(self):
+ """
+ That when the active MDS is killed, a standby MDS is promoted into
+ its rank after the grace period.
+
+ This is just a simple unit test, the harder cases are covered
+ in thrashing tests.
+ """
+
+ # Need all my standbys up as well as the active daemons
+ self.wait_for_daemon_start()
+
+ (original_active, ) = self.fs.get_active_names()
+ original_standbys = self.mds_cluster.get_standby_daemons()
+
+ # Kill the rank 0 daemon's physical process
+ self.fs.mds_stop(original_active)
+
+ grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
+
+ # Wait until the monitor promotes his replacement
+ def promoted():
+ active = self.fs.get_active_names()
+ return active and active[0] in original_standbys
+
+ log.info("Waiting for promotion of one of the original standbys {0}".format(
+ original_standbys))
+ self.wait_until_true(
+ promoted,
+ timeout=grace*2)
+
+ # Start the original rank 0 daemon up again, see that he becomes a standby
+ self.fs.mds_restart(original_active)
+ self.wait_until_true(
+ lambda: original_active in self.mds_cluster.get_standby_daemons(),
+ timeout=60 # Approximately long enough for MDS to start and mon to notice
+ )
+
+ def test_client_abort(self):
+ """
+ That a client will respect fuse_require_active_mds and error out
+ when the cluster appears to be unavailable.
+ """
+
+ if not isinstance(self.mount_a, FuseMount):
+ raise SkipTest("Requires FUSE client to inject client metadata")
+
+ require_active = self.fs.get_config("fuse_require_active_mds", service_type="mon").lower() == "true"
+ if not require_active:
+ raise case.SkipTest("fuse_require_active_mds is not set")
+
+ grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
+
+ # Check it's not laggy to begin with
+ (original_active, ) = self.fs.get_active_names()
+ self.assertNotIn("laggy_since", self.fs.status().get_mds(original_active))
+
+ self.mounts[0].umount_wait()
+
+ # Control: that we can mount and unmount usually, while the cluster is healthy
+ self.mounts[0].mount()
+ self.mounts[0].wait_until_mounted()
+ self.mounts[0].umount_wait()
+
+ # Stop the daemon processes
+ self.fs.mds_stop()
+
+ # Wait for everyone to go laggy
+ def laggy():
+ mdsmap = self.fs.get_mds_map()
+ for info in mdsmap['info'].values():
+ if "laggy_since" not in info:
+ return False
+
+ return True
+
+ self.wait_until_true(laggy, grace * 2)
+ with self.assertRaises(CommandFailedError):
+ self.mounts[0].mount()
+
+ def test_standby_count_wanted(self):
+ """
+ That cluster health warnings are generated by insufficient standbys available.
+ """
+
+ # Need all my standbys up as well as the active daemons
+ self.wait_for_daemon_start()
+
+ grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
+
+ standbys = self.mds_cluster.get_standby_daemons()
+ self.assertGreaterEqual(len(standbys), 1)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)))
+
+ # Kill a standby and check for warning
+ victim = standbys.pop()
+ self.fs.mds_stop(victim)
+ log.info("waiting for insufficient standby daemon warning")
+ self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2)
+
+ # restart the standby, see that he becomes a standby, check health clears
+ self.fs.mds_restart(victim)
+ self.wait_until_true(
+ lambda: victim in self.mds_cluster.get_standby_daemons(),
+ timeout=60 # Approximately long enough for MDS to start and mon to notice
+ )
+ self.wait_for_health_clear(timeout=30)
+
+ # Set it one greater than standbys ever seen
+ standbys = self.mds_cluster.get_standby_daemons()
+ self.assertGreaterEqual(len(standbys), 1)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)+1))
+ log.info("waiting for insufficient standby daemon warning")
+ self.wait_for_health("MDS_INSUFFICIENT_STANDBY", grace*2)
+
+ # Set it to 0
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', '0')
+ self.wait_for_health_clear(timeout=30)
+
+ def test_discontinuous_mdsmap(self):
+ """
+ That discontinuous mdsmap does not affect failover.
+ See http://tracker.ceph.com/issues/24856.
+ """
+ self.fs.set_max_mds(2)
+ status = self.fs.wait_for_daemons()
+
+ self.mount_a.umount_wait()
+
+ grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
+ monc_timeout = float(self.fs.get_config("mon_client_ping_timeout", service_type="mds"))
+
+ mds_0 = self.fs.get_rank(rank=0, status=status)
+ self.fs.rank_freeze(True, rank=0) # prevent failover
+ self.fs.rank_signal(signal.SIGSTOP, rank=0, status=status)
+ self.wait_until_true(
+ lambda: "laggy_since" in self.fs.get_rank(),
+ timeout=grace * 2
+ )
+
+ self.fs.rank_fail(rank=1)
+ self.fs.wait_for_state('up:resolve', rank=1, timeout=30)
+
+ # Make sure of mds_0's monitor connection gets reset
+ time.sleep(monc_timeout * 2)
+
+ # Continue rank 0, it will get discontinuous mdsmap
+ self.fs.rank_signal(signal.SIGCONT, rank=0)
+ self.wait_until_true(
+ lambda: "laggy_since" not in self.fs.get_rank(rank=0),
+ timeout=grace * 2
+ )
+
+ # mds.b will be stuck at 'reconnect' state if snapserver gets confused
+ # by discontinuous mdsmap
+ self.fs.wait_for_state('up:active', rank=1, timeout=30)
+ self.assertEqual(mds_0['gid'], self.fs.get_rank(rank=0)['gid'])
+ self.fs.rank_freeze(False, rank=0)
+
+class TestStandbyReplay(CephFSTestCase):
+ MDSS_REQUIRED = 4
+
+ def _confirm_no_replay(self):
+ status = self.fs.status()
+ _ = len(list(status.get_standbys()))
+ self.assertEqual(0, len(list(self.fs.get_replays(status=status))))
+ return status
+
+ def _confirm_single_replay(self, full=True, status=None, retries=3):
+ status = self.fs.wait_for_daemons(status=status)
+ ranks = sorted(self.fs.get_mds_map(status=status)['in'])
+ replays = list(self.fs.get_replays(status=status))
+ checked_replays = set()
+ for rank in ranks:
+ has_replay = False
+ for replay in replays:
+ if replay['rank'] == rank:
+ self.assertFalse(has_replay)
+ has_replay = True
+ checked_replays.add(replay['gid'])
+ if full and not has_replay:
+ if retries <= 0:
+ raise RuntimeError("rank "+str(rank)+" has no standby-replay follower")
+ else:
+ retries = retries-1
+ time.sleep(2)
+ self.assertEqual(checked_replays, set(info['gid'] for info in replays))
+ return status
+
+ def _check_replay_takeover(self, status, rank=0):
+ replay = self.fs.get_replay(rank=rank, status=status)
+ new_status = self.fs.wait_for_daemons()
+ new_active = self.fs.get_rank(rank=rank, status=new_status)
+ if replay:
+ self.assertEqual(replay['gid'], new_active['gid'])
+ else:
+ # double check takeover came from a standby (or some new daemon via restart)
+ found = False
+ for info in status.get_standbys():
+ if info['gid'] == new_active['gid']:
+ found = True
+ break
+ if not found:
+ for info in status.get_all():
+ self.assertNotEqual(info['gid'], new_active['gid'])
+ return new_status
+
+ def test_standby_replay_singleton(self):
+ """
+ That only one MDS becomes standby-replay.
+ """
+
+ self._confirm_no_replay()
+ self.fs.set_allow_standby_replay(True)
+ time.sleep(30)
+ self._confirm_single_replay()
+
+ def test_standby_replay_singleton_fail(self):
+ """
+ That failures don't violate singleton constraint.
+ """
+
+ self._confirm_no_replay()
+ self.fs.set_allow_standby_replay(True)
+ status = self._confirm_single_replay()
+
+ for i in range(10):
+ time.sleep(randint(1, 5))
+ self.fs.rank_restart(status=status)
+ status = self._check_replay_takeover(status)
+ status = self._confirm_single_replay(status=status)
+
+ for i in range(10):
+ time.sleep(randint(1, 5))
+ self.fs.rank_fail()
+ status = self._check_replay_takeover(status)
+ status = self._confirm_single_replay(status=status)
+
+ def test_standby_replay_singleton_fail_multimds(self):
+ """
+ That failures don't violate singleton constraint with multiple actives.
+ """
+
+ status = self._confirm_no_replay()
+ new_max_mds = randint(2, len(list(status.get_standbys())))
+ self.fs.set_max_mds(new_max_mds)
+ self.fs.wait_for_daemons() # wait for actives to come online!
+ self.fs.set_allow_standby_replay(True)
+ status = self._confirm_single_replay(full=False)
+
+ for i in range(10):
+ time.sleep(randint(1, 5))
+ victim = randint(0, new_max_mds-1)
+ self.fs.rank_restart(rank=victim, status=status)
+ status = self._check_replay_takeover(status, rank=victim)
+ status = self._confirm_single_replay(status=status, full=False)
+
+ for i in range(10):
+ time.sleep(randint(1, 5))
+ victim = randint(0, new_max_mds-1)
+ self.fs.rank_fail(rank=victim)
+ status = self._check_replay_takeover(status, rank=victim)
+ status = self._confirm_single_replay(status=status, full=False)
+
+ def test_standby_replay_failure(self):
+ """
+ That the failure of a standby-replay daemon happens cleanly
+ and doesn't interrupt anything else.
+ """
+
+ status = self._confirm_no_replay()
+ self.fs.set_max_mds(1)
+ self.fs.set_allow_standby_replay(True)
+ status = self._confirm_single_replay()
+
+ for i in range(10):
+ time.sleep(randint(1, 5))
+ victim = self.fs.get_replay(status=status)
+ self.fs.mds_restart(mds_id=victim['name'])
+ status = self._confirm_single_replay(status=status)
+
+ def test_rank_stopped(self):
+ """
+ That when a rank is STOPPED, standby replays for
+ that rank get torn down
+ """
+
+ status = self._confirm_no_replay()
+ standby_count = len(list(status.get_standbys()))
+ self.fs.set_max_mds(2)
+ self.fs.set_allow_standby_replay(True)
+ status = self._confirm_single_replay()
+
+ self.fs.set_max_mds(1) # stop rank 1
+
+ status = self._confirm_single_replay()
+ self.assertTrue(standby_count, len(list(status.get_standbys())))
+
+
+class TestMultiFilesystems(CephFSTestCase):
+ CLIENTS_REQUIRED = 2
+ MDSS_REQUIRED = 4
+
+ # We'll create our own filesystems and start our own daemons
+ REQUIRE_FILESYSTEM = False
+
+ def setUp(self):
+ super(TestMultiFilesystems, self).setUp()
+ self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set",
+ "enable_multiple", "true",
+ "--yes-i-really-mean-it")
+
+ def _setup_two(self):
+ fs_a = self.mds_cluster.newfs("alpha")
+ fs_b = self.mds_cluster.newfs("bravo")
+
+ self.mds_cluster.mds_restart()
+
+ # Wait for both filesystems to go healthy
+ fs_a.wait_for_daemons()
+ fs_b.wait_for_daemons()
+
+ # Reconfigure client auth caps
+ for mount in self.mounts:
+ self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', "client.{0}".format(mount.client_id),
+ 'mds', 'allow',
+ 'mon', 'allow r',
+ 'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+ fs_a.get_data_pool_name(), fs_b.get_data_pool_name()))
+
+ return fs_a, fs_b
+
+ def test_clients(self):
+ fs_a, fs_b = self._setup_two()
+
+ # Mount a client on fs_a
+ self.mount_a.mount(mount_fs_name=fs_a.name)
+ self.mount_a.write_n_mb("pad.bin", 1)
+ self.mount_a.write_n_mb("test.bin", 2)
+ a_created_ino = self.mount_a.path_to_ino("test.bin")
+ self.mount_a.create_files()
+
+ # Mount a client on fs_b
+ self.mount_b.mount(mount_fs_name=fs_b.name)
+ self.mount_b.write_n_mb("test.bin", 1)
+ b_created_ino = self.mount_b.path_to_ino("test.bin")
+ self.mount_b.create_files()
+
+ # Check that a non-default filesystem mount survives an MDS
+ # failover (i.e. that map subscription is continuous, not
+ # just the first time), reproduces #16022
+ old_fs_b_mds = fs_b.get_active_names()[0]
+ self.mds_cluster.mds_stop(old_fs_b_mds)
+ self.mds_cluster.mds_fail(old_fs_b_mds)
+ fs_b.wait_for_daemons()
+ background = self.mount_b.write_background()
+ # Raise exception if the write doesn't finish (i.e. if client
+ # has not kept up with MDS failure)
+ try:
+ self.wait_until_true(lambda: background.finished, timeout=30)
+ except RuntimeError:
+ # The mount is stuck, we'll have to force it to fail cleanly
+ background.stdin.close()
+ self.mount_b.umount_wait(force=True)
+ raise
+
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+
+ # See that the client's files went into the correct pool
+ self.assertTrue(fs_a.data_objects_present(a_created_ino, 1024 * 1024))
+ self.assertTrue(fs_b.data_objects_present(b_created_ino, 1024 * 1024))
+
+ def test_standby(self):
+ fs_a, fs_b = self._setup_two()
+
+ # Assert that the remaining two MDS daemons are now standbys
+ a_daemons = fs_a.get_active_names()
+ b_daemons = fs_b.get_active_names()
+ self.assertEqual(len(a_daemons), 1)
+ self.assertEqual(len(b_daemons), 1)
+ original_a = a_daemons[0]
+ original_b = b_daemons[0]
+ expect_standby_daemons = set(self.mds_cluster.mds_ids) - (set(a_daemons) | set(b_daemons))
+
+ # Need all my standbys up as well as the active daemons
+ self.wait_for_daemon_start()
+ self.assertEqual(expect_standby_daemons, self.mds_cluster.get_standby_daemons())
+
+ # Kill fs_a's active MDS, see a standby take over
+ self.mds_cluster.mds_stop(original_a)
+ self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_a)
+ self.wait_until_equal(lambda: len(fs_a.get_active_names()), 1, 30,
+ reject_fn=lambda v: v > 1)
+ # Assert that it's a *different* daemon that has now appeared in the map for fs_a
+ self.assertNotEqual(fs_a.get_active_names()[0], original_a)
+
+ # Kill fs_b's active MDS, see a standby take over
+ self.mds_cluster.mds_stop(original_b)
+ self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_b)
+ self.wait_until_equal(lambda: len(fs_b.get_active_names()), 1, 30,
+ reject_fn=lambda v: v > 1)
+ # Assert that it's a *different* daemon that has now appeared in the map for fs_a
+ self.assertNotEqual(fs_b.get_active_names()[0], original_b)
+
+ # Both of the original active daemons should be gone, and all standbys used up
+ self.assertEqual(self.mds_cluster.get_standby_daemons(), set())
+
+ # Restart the ones I killed, see them reappear as standbys
+ self.mds_cluster.mds_restart(original_a)
+ self.mds_cluster.mds_restart(original_b)
+ self.wait_until_true(
+ lambda: {original_a, original_b} == self.mds_cluster.get_standby_daemons(),
+ timeout=30
+ )
+
+ def test_grow_shrink(self):
+ # Usual setup...
+ fs_a, fs_b = self._setup_two()
+
+ # Increase max_mds on fs_b, see a standby take up the role
+ fs_b.set_max_mds(2)
+ self.wait_until_equal(lambda: len(fs_b.get_active_names()), 2, 30,
+ reject_fn=lambda v: v > 2 or v < 1)
+
+ # Increase max_mds on fs_a, see a standby take up the role
+ fs_a.set_max_mds(2)
+ self.wait_until_equal(lambda: len(fs_a.get_active_names()), 2, 30,
+ reject_fn=lambda v: v > 2 or v < 1)
+
+ # Shrink fs_b back to 1, see a daemon go back to standby
+ fs_b.set_max_mds(1)
+ self.wait_until_equal(lambda: len(fs_b.get_active_names()), 1, 30,
+ reject_fn=lambda v: v > 2 or v < 1)
+
+ # Grow fs_a up to 3, see the former fs_b daemon join it.
+ fs_a.set_max_mds(3)
+ self.wait_until_equal(lambda: len(fs_a.get_active_names()), 3, 60,
+ reject_fn=lambda v: v > 3 or v < 2)
diff --git a/qa/tasks/cephfs/test_flush.py b/qa/tasks/cephfs/test_flush.py
new file mode 100644
index 00000000..ee0b1c92
--- /dev/null
+++ b/qa/tasks/cephfs/test_flush.py
@@ -0,0 +1,113 @@
+
+from textwrap import dedent
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from tasks.cephfs.filesystem import ObjectNotFound, ROOT_INO
+
+
+class TestFlush(CephFSTestCase):
+ def test_flush(self):
+ self.mount_a.run_shell(["mkdir", "mydir"])
+ self.mount_a.run_shell(["touch", "mydir/alpha"])
+ dir_ino = self.mount_a.path_to_ino("mydir")
+ file_ino = self.mount_a.path_to_ino("mydir/alpha")
+
+ # Unmount the client so that it isn't still holding caps
+ self.mount_a.umount_wait()
+
+ # Before flush, the dirfrag object does not exist
+ with self.assertRaises(ObjectNotFound):
+ self.fs.list_dirfrag(dir_ino)
+
+ # Before flush, the file's backtrace has not been written
+ with self.assertRaises(ObjectNotFound):
+ self.fs.read_backtrace(file_ino)
+
+ # Before flush, there are no dentries in the root
+ self.assertEqual(self.fs.list_dirfrag(ROOT_INO), [])
+
+ # Execute flush
+ flush_data = self.fs.mds_asok(["flush", "journal"])
+ self.assertEqual(flush_data['return_code'], 0)
+
+ # After flush, the dirfrag object has been created
+ dir_list = self.fs.list_dirfrag(dir_ino)
+ self.assertEqual(dir_list, ["alpha_head"])
+
+ # And the 'mydir' dentry is in the root
+ self.assertEqual(self.fs.list_dirfrag(ROOT_INO), ['mydir_head'])
+
+ # ...and the data object has its backtrace
+ backtrace = self.fs.read_backtrace(file_ino)
+ self.assertEqual(['alpha', 'mydir'], [a['dname'] for a in backtrace['ancestors']])
+ self.assertEqual([dir_ino, 1], [a['dirino'] for a in backtrace['ancestors']])
+ self.assertEqual(file_ino, backtrace['ino'])
+
+ # ...and the journal is truncated to just a single subtreemap from the
+ # newly created segment
+ summary_output = self.fs.journal_tool(["event", "get", "summary"], 0)
+ try:
+ self.assertEqual(summary_output,
+ dedent(
+ """
+ Events by type:
+ SUBTREEMAP: 1
+ Errors: 0
+ """
+ ).strip())
+ except AssertionError:
+ # In some states, flushing the journal will leave you
+ # an extra event from locks a client held. This is
+ # correct behaviour: the MDS is flushing the journal,
+ # it's just that new events are getting added too.
+ # In this case, we should nevertheless see a fully
+ # empty journal after a second flush.
+ self.assertEqual(summary_output,
+ dedent(
+ """
+ Events by type:
+ SUBTREEMAP: 1
+ UPDATE: 1
+ Errors: 0
+ """
+ ).strip())
+ flush_data = self.fs.mds_asok(["flush", "journal"])
+ self.assertEqual(flush_data['return_code'], 0)
+ self.assertEqual(self.fs.journal_tool(["event", "get", "summary"], 0),
+ dedent(
+ """
+ Events by type:
+ SUBTREEMAP: 1
+ Errors: 0
+ """
+ ).strip())
+
+ # Now for deletion!
+ # We will count the RADOS deletions and MDS file purges, to verify that
+ # the expected behaviour is happening as a result of the purge
+ initial_dels = self.fs.mds_asok(['perf', 'dump', 'objecter'])['objecter']['osdop_delete']
+ initial_purges = self.fs.mds_asok(['perf', 'dump', 'mds_cache'])['mds_cache']['strays_enqueued']
+
+ # Use a client to delete a file
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+ self.mount_a.run_shell(["rm", "-rf", "mydir"])
+
+ # Flush the journal so that the directory inode can be purged
+ flush_data = self.fs.mds_asok(["flush", "journal"])
+ self.assertEqual(flush_data['return_code'], 0)
+
+ # We expect to see a single file purge
+ self.wait_until_true(
+ lambda: self.fs.mds_asok(['perf', 'dump', 'mds_cache'])['mds_cache']['strays_enqueued'] - initial_purges >= 2,
+ 60)
+
+ # We expect two deletions, one of the dirfrag and one of the backtrace
+ self.wait_until_true(
+ lambda: self.fs.mds_asok(['perf', 'dump', 'objecter'])['objecter']['osdop_delete'] - initial_dels >= 2,
+ 60) # timeout is fairly long to allow for tick+rados latencies
+
+ with self.assertRaises(ObjectNotFound):
+ self.fs.list_dirfrag(dir_ino)
+ with self.assertRaises(ObjectNotFound):
+ self.fs.read_backtrace(file_ino)
+ self.assertEqual(self.fs.list_dirfrag(ROOT_INO), [])
diff --git a/qa/tasks/cephfs/test_forward_scrub.py b/qa/tasks/cephfs/test_forward_scrub.py
new file mode 100644
index 00000000..cc861b38
--- /dev/null
+++ b/qa/tasks/cephfs/test_forward_scrub.py
@@ -0,0 +1,298 @@
+
+"""
+Test that the forward scrub functionality can traverse metadata and apply
+requested tags, on well formed metadata.
+
+This is *not* the real testing for forward scrub, which will need to test
+how the functionality responds to damaged metadata.
+
+"""
+import json
+
+import logging
+import six
+
+from collections import namedtuple
+from io import BytesIO
+from textwrap import dedent
+
+from teuthology.orchestra.run import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+import struct
+
+log = logging.getLogger(__name__)
+
+
+ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
+
+
+class TestForwardScrub(CephFSTestCase):
+ MDSS_REQUIRED = 1
+
+ def _read_str_xattr(self, pool, obj, attr):
+ """
+ Read a ceph-encoded string from a rados xattr
+ """
+ output = self.fs.rados(["getxattr", obj, attr], pool=pool,
+ stdout_data=BytesIO())
+ strlen = struct.unpack('i', output[0:4])[0]
+ return six.ensure_str(output[4:(4 + strlen)], encoding='ascii')
+
+ def _get_paths_to_ino(self):
+ inos = {}
+ p = self.mount_a.run_shell(["find", "./"])
+ paths = p.stdout.getvalue().strip().split()
+ for path in paths:
+ inos[path] = self.mount_a.path_to_ino(path)
+
+ return inos
+
+ def test_apply_tag(self):
+ self.mount_a.run_shell(["mkdir", "parentdir"])
+ self.mount_a.run_shell(["mkdir", "parentdir/childdir"])
+ self.mount_a.run_shell(["touch", "rfile"])
+ self.mount_a.run_shell(["touch", "parentdir/pfile"])
+ self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"])
+
+ # Build a structure mapping path to inode, as we will later want
+ # to check object by object and objects are named after ino number
+ inos = self._get_paths_to_ino()
+
+ # Flush metadata: this is a friendly test of forward scrub so we're skipping
+ # the part where it's meant to cope with dirty metadata
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(["flush", "journal"])
+
+ tag = "mytag"
+
+ # Execute tagging forward scrub
+ self.fs.mds_asok(["tag", "path", "/parentdir", tag])
+ # Wait for completion
+ import time
+ time.sleep(10)
+ # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll
+ # watch that instead
+
+ # Check that dirs were tagged
+ for dirpath in ["./parentdir", "./parentdir/childdir"]:
+ self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name())
+
+ # Check that files were tagged
+ for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]:
+ self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name())
+
+ # This guy wasn't in the tag path, shouldn't have been tagged
+ self.assertUntagged(inos["./rfile"])
+
+ def assertUntagged(self, ino):
+ file_obj_name = "{0:x}.00000000".format(ino)
+ with self.assertRaises(CommandFailedError):
+ self._read_str_xattr(
+ self.fs.get_data_pool_name(),
+ file_obj_name,
+ "scrub_tag"
+ )
+
+ def assertTagged(self, ino, tag, pool):
+ file_obj_name = "{0:x}.00000000".format(ino)
+ wrote = self._read_str_xattr(
+ pool,
+ file_obj_name,
+ "scrub_tag"
+ )
+ self.assertEqual(wrote, tag)
+
+ def _validate_linkage(self, expected):
+ inos = self._get_paths_to_ino()
+ try:
+ self.assertDictEqual(inos, expected)
+ except AssertionError:
+ log.error("Expected: {0}".format(json.dumps(expected, indent=2)))
+ log.error("Actual: {0}".format(json.dumps(inos, indent=2)))
+ raise
+
+ def test_orphan_scan(self):
+ # Create some files whose metadata we will flush
+ self.mount_a.run_python(dedent("""
+ import os
+ mount_point = "{mount_point}"
+ parent = os.path.join(mount_point, "parent")
+ os.mkdir(parent)
+ flushed = os.path.join(parent, "flushed")
+ os.mkdir(flushed)
+ for f in ["alpha", "bravo", "charlie"]:
+ open(os.path.join(flushed, f), 'w').write(f)
+ """.format(mount_point=self.mount_a.mountpoint)))
+
+ inos = self._get_paths_to_ino()
+
+ # Flush journal
+ # Umount before flush to avoid cap releases putting
+ # things we don't want in the journal later.
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(["flush", "journal"])
+
+ # Create a new inode that's just in the log, i.e. would
+ # look orphaned to backward scan if backward scan wisnae
+ # respectin' tha scrub_tag xattr.
+ self.mount_a.mount()
+ self.mount_a.run_shell(["mkdir", "parent/unflushed"])
+ self.mount_a.run_shell(["dd", "if=/dev/urandom",
+ "of=./parent/unflushed/jfile",
+ "bs=1M", "count=8"])
+ inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed")
+ inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile")
+ self.mount_a.umount_wait()
+
+ # Orphan an inode by deleting its dentry
+ # Our victim will be.... bravo.
+ self.mount_a.umount_wait()
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+ self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
+ self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
+ frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"])
+ self.fs.rados(["rmomapkey", frag_obj_id, "bravo_head"])
+
+ self.fs.mds_restart()
+ self.fs.wait_for_daemons()
+
+ # See that the orphaned file is indeed missing from a client's POV
+ self.mount_a.mount()
+ damaged_state = self._get_paths_to_ino()
+ self.assertNotIn("./parent/flushed/bravo", damaged_state)
+ self.mount_a.umount_wait()
+
+ # Run a tagging forward scrub
+ tag = "mytag123"
+ self.fs.mds_asok(["tag", "path", "/parent", tag])
+
+ # See that the orphan wisnae tagged
+ self.assertUntagged(inos['./parent/flushed/bravo'])
+
+ # See that the flushed-metadata-and-still-present files are tagged
+ self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name())
+ self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name())
+
+ # See that journalled-but-not-flushed file *was* tagged
+ self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name())
+
+ # Run cephfs-data-scan targeting only orphans
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+ self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
+ self.fs.data_scan([
+ "scan_inodes",
+ "--filter-tag", tag,
+ self.fs.get_data_pool_name()
+ ])
+
+ # After in-place injection stats should be kosher again
+ self.fs.set_ceph_conf('mds', 'mds verify scatter', True)
+ self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True)
+
+ # And we should have all the same linkage we started with,
+ # and no lost+found, and no extra inodes!
+ self.fs.mds_restart()
+ self.fs.wait_for_daemons()
+ self.mount_a.mount()
+ self._validate_linkage(inos)
+
+ def _stash_inotable(self):
+ # Get all active ranks
+ ranks = self.fs.get_all_mds_rank()
+
+ inotable_dict = {}
+ for rank in ranks:
+ inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable"
+ print("Trying to fetch inotable object: " + inotable_oid)
+
+ #self.fs.get_metadata_object("InoTable", "mds0_inotable")
+ inotable_raw = self.fs.get_metadata_object_raw(inotable_oid)
+ inotable_dict[inotable_oid] = inotable_raw
+ return inotable_dict
+
+ def test_inotable_sync(self):
+ self.mount_a.write_n_mb("file1_sixmegs", 6)
+
+ # Flush journal
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(["flush", "journal"])
+
+ inotable_copy = self._stash_inotable()
+
+ self.mount_a.mount()
+
+ self.mount_a.write_n_mb("file2_sixmegs", 6)
+ self.mount_a.write_n_mb("file3_sixmegs", 6)
+
+ inos = self._get_paths_to_ino()
+
+ # Flush journal
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(["flush", "journal"])
+
+ self.mount_a.umount_wait()
+
+ with self.assert_cluster_log("inode table repaired", invert_match=True):
+ out_json = self.fs.rank_tell(["scrub", "start", "/", "repair", "recursive"])
+ self.assertNotEqual(out_json, None)
+
+ self.mds_cluster.mds_stop()
+ self.mds_cluster.mds_fail()
+
+ # Truncate the journal (to ensure the inotable on disk
+ # is all that will be in the InoTable in memory)
+
+ self.fs.journal_tool(["event", "splice",
+ "--inode={0}".format(inos["./file2_sixmegs"]), "summary"], 0)
+
+ self.fs.journal_tool(["event", "splice",
+ "--inode={0}".format(inos["./file3_sixmegs"]), "summary"], 0)
+
+ # Revert to old inotable.
+ for key, value in inotable_copy.items():
+ self.fs.put_metadata_object_raw(key, value)
+
+ self.mds_cluster.mds_restart()
+ self.fs.wait_for_daemons()
+
+ with self.assert_cluster_log("inode table repaired"):
+ out_json = self.fs.rank_tell(["scrub", "start", "/", "repair", "recursive"])
+ self.assertNotEqual(out_json, None)
+
+ self.mds_cluster.mds_stop()
+ table_text = self.fs.table_tool(["0", "show", "inode"])
+ table = json.loads(table_text)
+ self.assertGreater(
+ table['0']['data']['inotable']['free'][0]['start'],
+ inos['./file3_sixmegs'])
+
+ def test_backtrace_repair(self):
+ """
+ That the MDS can repair an inodes backtrace in the data pool
+ if it is found to be damaged.
+ """
+ # Create a file for subsequent checks
+ self.mount_a.run_shell(["mkdir", "parent_a"])
+ self.mount_a.run_shell(["touch", "parent_a/alpha"])
+ file_ino = self.mount_a.path_to_ino("parent_a/alpha")
+
+ # That backtrace and layout are written after initial flush
+ self.fs.mds_asok(["flush", "journal"])
+ backtrace = self.fs.read_backtrace(file_ino)
+ self.assertEqual(['alpha', 'parent_a'],
+ [a['dname'] for a in backtrace['ancestors']])
+
+ # Go corrupt the backtrace
+ self.fs._write_data_xattr(file_ino, "parent",
+ "oh i'm sorry did i overwrite your xattr?")
+
+ with self.assert_cluster_log("bad backtrace on inode"):
+ out_json = self.fs.rank_tell(["scrub", "start", "/", "repair", "recursive"])
+ self.assertNotEqual(out_json, None)
+ self.fs.mds_asok(["flush", "journal"])
+ backtrace = self.fs.read_backtrace(file_ino)
+ self.assertEqual(['alpha', 'parent_a'],
+ [a['dname'] for a in backtrace['ancestors']])
diff --git a/qa/tasks/cephfs/test_fragment.py b/qa/tasks/cephfs/test_fragment.py
new file mode 100644
index 00000000..0ed5da28
--- /dev/null
+++ b/qa/tasks/cephfs/test_fragment.py
@@ -0,0 +1,229 @@
+
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.orchestra import run
+
+import logging
+log = logging.getLogger(__name__)
+
+
+class TestFragmentation(CephFSTestCase):
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 1
+
+ def get_splits(self):
+ return self.fs.mds_asok(['perf', 'dump', 'mds'])['mds']['dir_split']
+
+ def get_merges(self):
+ return self.fs.mds_asok(['perf', 'dump', 'mds'])['mds']['dir_merge']
+
+ def get_dir_ino(self, path):
+ dir_cache = self.fs.read_cache(path, 0)
+ dir_ino = None
+ dir_inono = self.mount_a.path_to_ino(path.strip("/"))
+ for ino in dir_cache:
+ if ino['ino'] == dir_inono:
+ dir_ino = ino
+ break
+ self.assertIsNotNone(dir_ino)
+ return dir_ino
+
+ def _configure(self, **kwargs):
+ """
+ Apply kwargs as MDS configuration settings, enable dirfrags
+ and restart the MDSs.
+ """
+
+ for k, v in kwargs.items():
+ self.ceph_cluster.set_ceph_conf("mds", k, v.__str__())
+
+ self.mds_cluster.mds_fail_restart()
+ self.fs.wait_for_daemons()
+
+ def test_oversize(self):
+ """
+ That a directory is split when it becomes too large.
+ """
+
+ split_size = 20
+ merge_size = 5
+
+ self._configure(
+ mds_bal_split_size=split_size,
+ mds_bal_merge_size=merge_size,
+ mds_bal_split_bits=1
+ )
+
+ self.assertEqual(self.get_splits(), 0)
+
+ self.mount_a.create_n_files("splitdir/file", split_size + 1)
+
+ self.wait_until_true(
+ lambda: self.get_splits() == 1,
+ timeout=30
+ )
+
+ frags = self.get_dir_ino("/splitdir")['dirfrags']
+ self.assertEqual(len(frags), 2)
+ self.assertEqual(frags[0]['dirfrag'], "0x10000000000.0*")
+ self.assertEqual(frags[1]['dirfrag'], "0x10000000000.1*")
+ self.assertEqual(
+ sum([len(f['dentries']) for f in frags]),
+ split_size + 1
+ )
+
+ self.assertEqual(self.get_merges(), 0)
+
+ self.mount_a.run_shell(["rm", "-f", run.Raw("splitdir/file*")])
+
+ self.wait_until_true(
+ lambda: self.get_merges() == 1,
+ timeout=30
+ )
+
+ self.assertEqual(len(self.get_dir_ino("/splitdir")["dirfrags"]), 1)
+
+ def test_rapid_creation(self):
+ """
+ That the fast-splitting limit of 1.5x normal limit is
+ applied when creating dentries quickly.
+ """
+
+ split_size = 100
+ merge_size = 1
+
+ self._configure(
+ mds_bal_split_size=split_size,
+ mds_bal_merge_size=merge_size,
+ mds_bal_split_bits=3,
+ mds_bal_fragment_size_max=int(split_size * 1.5 + 2)
+ )
+
+ # We test this only at a single split level. If a client was sending
+ # IO so fast that it hit a second split before the first split
+ # was complete, it could violate mds_bal_fragment_size_max -- there
+ # is a window where the child dirfrags of a split are unfrozen
+ # (so they can grow), but still have STATE_FRAGMENTING (so they
+ # can't be split).
+
+ # By writing 4x the split size when the split bits are set
+ # to 3 (i.e. 4-ways), I am reasonably sure to see precisely
+ # one split. The test is to check whether that split
+ # happens soon enough that the client doesn't exceed
+ # 2x the split_size (the "immediate" split mode should
+ # kick in at 1.5x the split size).
+
+ self.assertEqual(self.get_splits(), 0)
+ self.mount_a.create_n_files("splitdir/file", split_size * 4)
+ self.wait_until_equal(
+ self.get_splits,
+ 1,
+ reject_fn=lambda s: s > 1,
+ timeout=30
+ )
+
+ def test_deep_split(self):
+ """
+ That when the directory grows many times larger than split size,
+ the fragments get split again.
+ """
+
+ split_size = 100
+ merge_size = 1 # i.e. don't merge frag unless its empty
+ split_bits = 1
+
+ branch_factor = 2**split_bits
+
+ # Arbitrary: how many levels shall we try fragmenting before
+ # ending the test?
+ max_depth = 5
+
+ self._configure(
+ mds_bal_split_size=split_size,
+ mds_bal_merge_size=merge_size,
+ mds_bal_split_bits=split_bits
+ )
+
+ # Each iteration we will create another level of fragments. The
+ # placement of dentries into fragments is by hashes (i.e. pseudo
+ # random), so we rely on statistics to get the behaviour that
+ # by writing about 1.5x as many dentries as the split_size times
+ # the number of frags, we will get them all to exceed their
+ # split size and trigger a split.
+ depth = 0
+ files_written = 0
+ splits_expected = 0
+ while depth < max_depth:
+ log.info("Writing files for depth {0}".format(depth))
+ target_files = branch_factor**depth * int(split_size * 1.5)
+ create_files = target_files - files_written
+
+ self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
+ "{0} Writing {1} files (depth={2})".format(
+ self.__class__.__name__, create_files, depth
+ ))
+ self.mount_a.create_n_files("splitdir/file_{0}".format(depth),
+ create_files)
+ self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
+ "{0} Done".format(self.__class__.__name__))
+
+ files_written += create_files
+ log.info("Now have {0} files".format(files_written))
+
+ splits_expected += branch_factor**depth
+ log.info("Waiting to see {0} splits".format(splits_expected))
+ try:
+ self.wait_until_equal(
+ self.get_splits,
+ splits_expected,
+ timeout=30,
+ reject_fn=lambda x: x > splits_expected
+ )
+
+ frags = self.get_dir_ino("/splitdir")['dirfrags']
+ self.assertEqual(len(frags), branch_factor**(depth+1))
+ self.assertEqual(
+ sum([len(f['dentries']) for f in frags]),
+ target_files
+ )
+ except:
+ # On failures, log what fragmentation we actually ended
+ # up with. This block is just for logging, at the end
+ # we raise the exception again.
+ frags = self.get_dir_ino("/splitdir")['dirfrags']
+ log.info("depth={0} splits_expected={1} files_written={2}".format(
+ depth, splits_expected, files_written
+ ))
+ log.info("Dirfrags:")
+ for f in frags:
+ log.info("{0}: {1}".format(
+ f['dirfrag'], len(f['dentries'])
+ ))
+ raise
+
+ depth += 1
+
+ # Remember the inode number because we will be checking for
+ # objects later.
+ dir_inode_no = self.mount_a.path_to_ino("splitdir")
+
+ self.mount_a.run_shell(["rm", "-rf", "splitdir/"])
+ self.mount_a.umount_wait()
+
+ self.fs.mds_asok(['flush', 'journal'])
+
+ # Wait for all strays to purge
+ self.wait_until_equal(
+ lambda: self.fs.mds_asok(['perf', 'dump', 'mds_cache']
+ )['mds_cache']['num_strays'],
+ 0,
+ timeout=1200
+ )
+ # Check that the metadata pool objects for all the myriad
+ # child fragments are gone
+ metadata_objs = self.fs.rados(["ls"])
+ frag_objs = []
+ for o in metadata_objs:
+ if o.startswith("{0:x}.".format(dir_inode_no)):
+ frag_objs.append(o)
+ self.assertListEqual(frag_objs, [])
diff --git a/qa/tasks/cephfs/test_full.py b/qa/tasks/cephfs/test_full.py
new file mode 100644
index 00000000..21470c87
--- /dev/null
+++ b/qa/tasks/cephfs/test_full.py
@@ -0,0 +1,398 @@
+
+
+import json
+import logging
+import os
+from textwrap import dedent
+import time
+from teuthology.orchestra.run import CommandFailedError
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+
+log = logging.getLogger(__name__)
+
+
+class FullnessTestCase(CephFSTestCase):
+ CLIENTS_REQUIRED = 2
+
+ # Subclasses define whether they're filling whole cluster or just data pool
+ data_only = False
+
+ # Subclasses define how many bytes should be written to achieve fullness
+ pool_capacity = None
+ fill_mb = None
+
+ # Subclasses define what fullness means to them
+ def is_full(self):
+ raise NotImplementedError()
+
+ def setUp(self):
+ CephFSTestCase.setUp(self)
+
+ mds_status = self.fs.rank_asok(["status"])
+
+ # Capture the initial OSD map epoch for later use
+ self.initial_osd_epoch = mds_status['osdmap_epoch_barrier']
+
+ def test_barrier(self):
+ """
+ That when an OSD epoch barrier is set on an MDS, subsequently
+ issued capabilities cause clients to update their OSD map to that
+ epoch.
+ """
+
+ # script that sync up client with MDS OSD map barrier. The barrier should
+ # be updated by cap flush ack message.
+ pyscript = dedent("""
+ import os
+ fd = os.open("{path}", os.O_CREAT | os.O_RDWR, 0O600)
+ os.fchmod(fd, 0O666)
+ os.fsync(fd)
+ os.close(fd)
+ """)
+
+ # Sync up client with initial MDS OSD map barrier.
+ path = os.path.join(self.mount_a.mountpoint, "foo")
+ self.mount_a.run_python(pyscript.format(path=path))
+
+ # Grab mounts' initial OSD epochs: later we will check that
+ # it hasn't advanced beyond this point.
+ mount_a_initial_epoch, mount_a_initial_barrier = self.mount_a.get_osd_epoch()
+
+ # Freshly mounted at start of test, should be up to date with OSD map
+ self.assertGreaterEqual(mount_a_initial_epoch, self.initial_osd_epoch)
+
+ # Set and unset a flag to cause OSD epoch to increment
+ self.fs.mon_manager.raw_cluster_cmd("osd", "set", "pause")
+ self.fs.mon_manager.raw_cluster_cmd("osd", "unset", "pause")
+
+ out = self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip()
+ new_epoch = json.loads(out)['epoch']
+ self.assertNotEqual(self.initial_osd_epoch, new_epoch)
+
+ # Do a metadata operation on clients, witness that they end up with
+ # the old OSD map from startup time (nothing has prompted client
+ # to update its map)
+ path = os.path.join(self.mount_a.mountpoint, "foo")
+ self.mount_a.run_python(pyscript.format(path=path))
+ mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch()
+ self.assertEqual(mount_a_epoch, mount_a_initial_epoch)
+ self.assertEqual(mount_a_barrier, mount_a_initial_barrier)
+
+ # Set a barrier on the MDS
+ self.fs.rank_asok(["osdmap", "barrier", new_epoch.__str__()])
+
+ # Sync up client with new MDS OSD map barrier
+ path = os.path.join(self.mount_a.mountpoint, "baz")
+ self.mount_a.run_python(pyscript.format(path=path))
+ mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch()
+ self.assertEqual(mount_a_barrier, new_epoch)
+
+ # Some time passes here because the metadata part of the operation
+ # completes immediately, while the resulting OSD map update happens
+ # asynchronously (it's an Objecter::_maybe_request_map) as a result
+ # of seeing the new epoch barrier.
+ self.wait_until_true(
+ lambda: self.mount_a.get_osd_epoch()[0] >= new_epoch,
+ timeout=30)
+
+ def _data_pool_name(self):
+ data_pool_names = self.fs.get_data_pool_names()
+ if len(data_pool_names) > 1:
+ raise RuntimeError("This test can't handle multiple data pools")
+ else:
+ return data_pool_names[0]
+
+ def _test_full(self, easy_case):
+ """
+ - That a client trying to write data to a file is prevented
+ from doing so with an -EFULL result
+ - That they are also prevented from creating new files by the MDS.
+ - That they may delete another file to get the system healthy again
+
+ :param easy_case: if true, delete a successfully written file to
+ free up space. else, delete the file that experienced
+ the failed write.
+ """
+
+ osd_mon_report_interval = int(self.fs.get_config("osd_mon_report_interval", service_type='osd'))
+
+ log.info("Writing {0}MB should fill this cluster".format(self.fill_mb))
+
+ # Fill up the cluster. This dd may or may not fail, as it depends on
+ # how soon the cluster recognises its own fullness
+ self.mount_a.write_n_mb("large_file_a", self.fill_mb // 2)
+ try:
+ self.mount_a.write_n_mb("large_file_b", self.fill_mb // 2)
+ except CommandFailedError:
+ log.info("Writing file B failed (full status happened already)")
+ assert self.is_full()
+ else:
+ log.info("Writing file B succeeded (full status will happen soon)")
+ self.wait_until_true(lambda: self.is_full(),
+ timeout=osd_mon_report_interval * 5)
+
+ # Attempting to write more data should give me ENOSPC
+ with self.assertRaises(CommandFailedError) as ar:
+ self.mount_a.write_n_mb("large_file_b", 50, seek=self.fill_mb // 2)
+ self.assertEqual(ar.exception.exitstatus, 1) # dd returns 1 on "No space"
+
+ # Wait for the MDS to see the latest OSD map so that it will reliably
+ # be applying the policy of rejecting non-deletion metadata operations
+ # while in the full state.
+ osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
+ self.wait_until_true(
+ lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch,
+ timeout=10)
+
+ if not self.data_only:
+ with self.assertRaises(CommandFailedError):
+ self.mount_a.write_n_mb("small_file_1", 0)
+
+ # Clear out some space
+ if easy_case:
+ self.mount_a.run_shell(['rm', '-f', 'large_file_a'])
+ self.mount_a.run_shell(['rm', '-f', 'large_file_b'])
+ else:
+ # In the hard case it is the file that filled the system.
+ # Before the new #7317 (ENOSPC, epoch barrier) changes, this
+ # would fail because the last objects written would be
+ # stuck in the client cache as objecter operations.
+ self.mount_a.run_shell(['rm', '-f', 'large_file_b'])
+ self.mount_a.run_shell(['rm', '-f', 'large_file_a'])
+
+ # Here we are waiting for two things to happen:
+ # * The MDS to purge the stray folder and execute object deletions
+ # * The OSDs to inform the mon that they are no longer full
+ self.wait_until_true(lambda: not self.is_full(),
+ timeout=osd_mon_report_interval * 5)
+
+ # Wait for the MDS to see the latest OSD map so that it will reliably
+ # be applying the free space policy
+ osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
+ self.wait_until_true(
+ lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch,
+ timeout=10)
+
+ # Now I should be able to write again
+ self.mount_a.write_n_mb("large_file", 50, seek=0)
+
+ # Ensure that the MDS keeps its OSD epoch barrier across a restart
+
+ def test_full_different_file(self):
+ self._test_full(True)
+
+ def test_full_same_file(self):
+ self._test_full(False)
+
+ def _remote_write_test(self, template):
+ """
+ Run some remote python in a way that's useful for
+ testing free space behaviour (see test_* methods using this)
+ """
+ file_path = os.path.join(self.mount_a.mountpoint, "full_test_file")
+
+ # Enough to trip the full flag
+ osd_mon_report_interval = int(self.fs.get_config("osd_mon_report_interval", service_type='osd'))
+ mon_tick_interval = int(self.fs.get_config("mon_tick_interval", service_type="mon"))
+
+ # Sufficient data to cause RADOS cluster to go 'full'
+ log.info("pool capacity {0}, {1}MB should be enough to fill it".format(self.pool_capacity, self.fill_mb))
+
+ # Long enough for RADOS cluster to notice it is full and set flag on mons
+ # (report_interval for mon to learn PG stats, tick interval for it to update OSD map,
+ # factor of 1.5 for I/O + network latency in committing OSD map and distributing it
+ # to the OSDs)
+ full_wait = (osd_mon_report_interval + mon_tick_interval) * 1.5
+
+ # Configs for this test should bring this setting down in order to
+ # run reasonably quickly
+ if osd_mon_report_interval > 10:
+ log.warning("This test may run rather slowly unless you decrease"
+ "osd_mon_report_interval (5 is a good setting)!")
+
+ self.mount_a.run_python(template.format(
+ fill_mb=self.fill_mb,
+ file_path=file_path,
+ full_wait=full_wait,
+ is_fuse=isinstance(self.mount_a, FuseMount)
+ ))
+
+ def test_full_fclose(self):
+ # A remote script which opens a file handle, fills up the filesystem, and then
+ # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync
+ remote_script = dedent("""
+ import time
+ import datetime
+ import subprocess
+ import os
+
+ # Write some buffered data through before going full, all should be well
+ print("writing some data through which we expect to succeed")
+ bytes = 0
+ f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT)
+ bytes += os.write(f, b'a' * 512 * 1024)
+ os.fsync(f)
+ print("fsync'ed data successfully, will now attempt to fill fs")
+
+ # Okay, now we're going to fill up the filesystem, and then keep
+ # writing until we see an error from fsync. As long as we're doing
+ # buffered IO, the error should always only appear from fsync and not
+ # from write
+ full = False
+
+ for n in range(0, int({fill_mb} * 0.9)):
+ bytes += os.write(f, b'x' * 1024 * 1024)
+ print("wrote {{0}} bytes via buffered write, may repeat".format(bytes))
+ print("done writing {{0}} bytes".format(bytes))
+
+ # OK, now we should sneak in under the full condition
+ # due to the time it takes the OSDs to report to the
+ # mons, and get a successful fsync on our full-making data
+ os.fsync(f)
+ print("successfully fsync'ed prior to getting full state reported")
+
+ # buffered write, add more dirty data to the buffer
+ print("starting buffered write")
+ try:
+ for n in range(0, int({fill_mb} * 0.2)):
+ bytes += os.write(f, b'x' * 1024 * 1024)
+ print("sleeping a bit as we've exceeded 90% of our expected full ratio")
+ time.sleep({full_wait})
+ except OSError:
+ pass;
+
+ print("wrote, now waiting 30s and then doing a close we expect to fail")
+
+ # Wait long enough for a background flush that should fail
+ time.sleep(30)
+
+ if {is_fuse}:
+ # ...and check that the failed background flush is reflected in fclose
+ try:
+ os.close(f)
+ except OSError:
+ print("close() returned an error as expected")
+ else:
+ raise RuntimeError("close() failed to raise error")
+ else:
+ # The kernel cephfs client does not raise errors on fclose
+ os.close(f)
+
+ os.unlink("{file_path}")
+ """)
+ self._remote_write_test(remote_script)
+
+ def test_full_fsync(self):
+ """
+ That when the full flag is encountered during asynchronous
+ flushes, such that an fwrite() succeeds but an fsync/fclose()
+ should return the ENOSPC error.
+ """
+
+ # A remote script which opens a file handle, fills up the filesystem, and then
+ # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync
+ remote_script = dedent("""
+ import time
+ import datetime
+ import subprocess
+ import os
+
+ # Write some buffered data through before going full, all should be well
+ print("writing some data through which we expect to succeed")
+ bytes = 0
+ f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT)
+ bytes += os.write(f, b'a' * 4096)
+ os.fsync(f)
+ print("fsync'ed data successfully, will now attempt to fill fs")
+
+ # Okay, now we're going to fill up the filesystem, and then keep
+ # writing until we see an error from fsync. As long as we're doing
+ # buffered IO, the error should always only appear from fsync and not
+ # from write
+ full = False
+
+ for n in range(0, int({fill_mb} * 1.1)):
+ try:
+ bytes += os.write(f, b'x' * 1024 * 1024)
+ print("wrote bytes via buffered write, moving on to fsync")
+ except OSError as e:
+ print("Unexpected error %s from write() instead of fsync()" % e)
+ raise
+
+ try:
+ os.fsync(f)
+ print("fsync'ed successfully")
+ except OSError as e:
+ print("Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0)))
+ full = True
+ break
+ else:
+ print("Not full yet after %.2f MB" % (bytes / (1024.0 * 1024.0)))
+
+ if n > {fill_mb} * 0.9:
+ # Be cautious in the last region where we expect to hit
+ # the full condition, so that we don't overshoot too dramatically
+ print("sleeping a bit as we've exceeded 90% of our expected full ratio")
+ time.sleep({full_wait})
+
+ if not full:
+ raise RuntimeError("Failed to reach fullness after writing %d bytes" % bytes)
+
+ # close() should not raise an error because we already caught it in
+ # fsync. There shouldn't have been any more writeback errors
+ # since then because all IOs got cancelled on the full flag.
+ print("calling close")
+ os.close(f)
+ print("close() did not raise error")
+
+ os.unlink("{file_path}")
+ """)
+
+ self._remote_write_test(remote_script)
+
+
+class TestQuotaFull(FullnessTestCase):
+ """
+ Test per-pool fullness, which indicates quota limits exceeded
+ """
+ pool_capacity = 1024 * 1024 * 32 # arbitrary low-ish limit
+ fill_mb = pool_capacity // (1024 * 1024)
+
+ # We are only testing quota handling on the data pool, not the metadata
+ # pool.
+ data_only = True
+
+ def setUp(self):
+ super(TestQuotaFull, self).setUp()
+
+ pool_name = self.fs.get_data_pool_name()
+ self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", pool_name,
+ "max_bytes", "{0}".format(self.pool_capacity))
+
+ def is_full(self):
+ return self.fs.is_full()
+
+
+class TestClusterFull(FullnessTestCase):
+ """
+ Test data pool fullness, which indicates that an OSD has become too full
+ """
+ pool_capacity = None
+ REQUIRE_MEMSTORE = True
+
+ def setUp(self):
+ super(TestClusterFull, self).setUp()
+
+ if self.pool_capacity is None:
+ max_avail = self.fs.get_pool_df(self._data_pool_name())['max_avail']
+ full_ratio = float(self.fs.get_config("mon_osd_full_ratio", service_type="mon"))
+ TestClusterFull.pool_capacity = int(max_avail * full_ratio)
+ TestClusterFull.fill_mb = (self.pool_capacity // (1024 * 1024))
+
+ def is_full(self):
+ return self.fs.is_full()
+
+# Hide the parent class so that unittest.loader doesn't try to run it.
+del globals()['FullnessTestCase']
diff --git a/qa/tasks/cephfs/test_journal_migration.py b/qa/tasks/cephfs/test_journal_migration.py
new file mode 100644
index 00000000..8863b371
--- /dev/null
+++ b/qa/tasks/cephfs/test_journal_migration.py
@@ -0,0 +1,100 @@
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from tasks.workunit import task as workunit
+
+JOURNAL_FORMAT_LEGACY = 0
+JOURNAL_FORMAT_RESILIENT = 1
+
+
+class TestJournalMigration(CephFSTestCase):
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 2
+
+ def test_journal_migration(self):
+ old_journal_version = JOURNAL_FORMAT_LEGACY
+ new_journal_version = JOURNAL_FORMAT_RESILIENT
+
+ self.mount_a.umount_wait()
+ self.fs.mds_stop()
+
+ # Create a filesystem using the older journal format.
+ self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version)
+ self.fs.mds_restart()
+ self.fs.recreate()
+
+ # Enable standby replay, to cover the bug case #8811 where
+ # a standby replay might mistakenly end up trying to rewrite
+ # the journal at the same time as an active daemon.
+ self.fs.set_allow_standby_replay(True)
+
+ status = self.fs.wait_for_daemons()
+
+ self.assertTrue(self.fs.get_replay(status=status) is not None)
+
+ # Do some client work so that the log is populated with something.
+ with self.mount_a.mounted():
+ self.mount_a.create_files()
+ self.mount_a.check_files() # sanity, this should always pass
+
+ # Run a more substantial workunit so that the length of the log to be
+ # coverted is going span at least a few segments
+ workunit(self.ctx, {
+ 'clients': {
+ "client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"],
+ },
+ "timeout": "3h"
+ })
+
+ # Modify the ceph.conf to ask the MDS to use the new journal format.
+ self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version)
+
+ # Restart the MDS.
+ self.fs.mds_fail_restart()
+
+ # This ensures that all daemons come up into a valid state
+ status = self.fs.wait_for_daemons()
+
+ # Check that files created in the initial client workload are still visible
+ # in a client mount.
+ with self.mount_a.mounted():
+ self.mount_a.check_files()
+
+ # Verify that the journal really has been rewritten.
+ journal_version = self.fs.get_journal_version()
+ if journal_version != new_journal_version:
+ raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format(
+ new_journal_version, journal_version()
+ ))
+
+ # Verify that cephfs-journal-tool can now read the rewritten journal
+ inspect_out = self.fs.journal_tool(["journal", "inspect"], 0)
+ if not inspect_out.endswith(": OK"):
+ raise RuntimeError("Unexpected journal-tool result: '{0}'".format(
+ inspect_out
+ ))
+
+ self.fs.journal_tool(["event", "get", "json",
+ "--path", "/tmp/journal.json"], 0)
+ p = self.fs.tool_remote.sh([
+ "python3",
+ "-c",
+ "import json; print(len(json.load(open('/tmp/journal.json'))))"
+ ])
+ event_count = int(p.strip())
+ if event_count < 1000:
+ # Approximate value of "lots", expected from having run fsstress
+ raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count))
+
+ # Do some client work to check that writing the log is still working
+ with self.mount_a.mounted():
+ workunit(self.ctx, {
+ 'clients': {
+ "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"],
+ },
+ "timeout": "3h"
+ })
+
+ # Check that both an active and a standby replay are still up
+ status = self.fs.status()
+ self.assertEqual(len(list(self.fs.get_replays(status=status))), 1)
+ self.assertEqual(len(list(self.fs.get_ranks(status=status))), 1)
diff --git a/qa/tasks/cephfs/test_journal_repair.py b/qa/tasks/cephfs/test_journal_repair.py
new file mode 100644
index 00000000..a52455d7
--- /dev/null
+++ b/qa/tasks/cephfs/test_journal_repair.py
@@ -0,0 +1,447 @@
+
+"""
+Test our tools for recovering the content of damaged journals
+"""
+
+import json
+import logging
+from textwrap import dedent
+import time
+
+from teuthology.exceptions import CommandFailedError, ConnectionLostError
+from tasks.cephfs.filesystem import ObjectNotFound, ROOT_INO
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
+from tasks.workunit import task as workunit
+
+log = logging.getLogger(__name__)
+
+
+class TestJournalRepair(CephFSTestCase):
+ MDSS_REQUIRED = 2
+
+ def test_inject_to_empty(self):
+ """
+ That when some dentries in the journal but nothing is in
+ the backing store, we correctly populate the backing store
+ from the journalled dentries.
+ """
+
+ # Inject metadata operations
+ self.mount_a.run_shell(["touch", "rootfile"])
+ self.mount_a.run_shell(["mkdir", "subdir"])
+ self.mount_a.run_shell(["touch", "subdir/subdirfile"])
+ # There are several different paths for handling hardlinks, depending
+ # on whether an existing dentry (being overwritten) is also a hardlink
+ self.mount_a.run_shell(["mkdir", "linkdir"])
+
+ # Test inode -> remote transition for a dentry
+ self.mount_a.run_shell(["touch", "linkdir/link0"])
+ self.mount_a.run_shell(["rm", "-f", "linkdir/link0"])
+ self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link0"])
+
+ # Test nothing -> remote transition
+ self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link1"])
+
+ # Test remote -> inode transition
+ self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link2"])
+ self.mount_a.run_shell(["rm", "-f", "linkdir/link2"])
+ self.mount_a.run_shell(["touch", "linkdir/link2"])
+
+ # Test remote -> diff remote transition
+ self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link3"])
+ self.mount_a.run_shell(["rm", "-f", "linkdir/link3"])
+ self.mount_a.run_shell(["ln", "rootfile", "linkdir/link3"])
+
+ # Test an empty directory
+ self.mount_a.run_shell(["mkdir", "subdir/subsubdir"])
+ self.mount_a.run_shell(["sync"])
+
+ # Before we unmount, make a note of the inode numbers, later we will
+ # check that they match what we recover from the journal
+ rootfile_ino = self.mount_a.path_to_ino("rootfile")
+ subdir_ino = self.mount_a.path_to_ino("subdir")
+ linkdir_ino = self.mount_a.path_to_ino("linkdir")
+ subdirfile_ino = self.mount_a.path_to_ino("subdir/subdirfile")
+ subsubdir_ino = self.mount_a.path_to_ino("subdir/subsubdir")
+
+ self.mount_a.umount_wait()
+
+ # Stop the MDS
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+
+ # Now, the journal should contain the operations, but the backing
+ # store shouldn't
+ with self.assertRaises(ObjectNotFound):
+ self.fs.list_dirfrag(subdir_ino)
+ self.assertEqual(self.fs.list_dirfrag(ROOT_INO), [])
+
+ # Execute the dentry recovery, this should populate the backing store
+ self.fs.journal_tool(['event', 'recover_dentries', 'list'], 0)
+
+ # Dentries in ROOT_INO are present
+ self.assertEqual(sorted(self.fs.list_dirfrag(ROOT_INO)), sorted(['rootfile_head', 'subdir_head', 'linkdir_head']))
+ self.assertEqual(self.fs.list_dirfrag(subdir_ino), ['subdirfile_head', 'subsubdir_head'])
+ self.assertEqual(sorted(self.fs.list_dirfrag(linkdir_ino)),
+ sorted(['link0_head', 'link1_head', 'link2_head', 'link3_head']))
+
+ # Now check the MDS can read what we wrote: truncate the journal
+ # and start the mds.
+ self.fs.journal_tool(['journal', 'reset'], 0)
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_daemons()
+
+ # List files
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ # First ls -R to populate MDCache, such that hardlinks will
+ # resolve properly (recover_dentries does not create backtraces,
+ # so ordinarily hardlinks to inodes that happen not to have backtraces
+ # will be invisible in readdir).
+ # FIXME: hook in forward scrub here to regenerate backtraces
+ proc = self.mount_a.run_shell(['ls', '-R'])
+ self.mount_a.umount_wait() # remount to clear client cache before our second ls
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ proc = self.mount_a.run_shell(['ls', '-R'])
+ self.assertEqual(proc.stdout.getvalue().strip(),
+ dedent("""
+ .:
+ linkdir
+ rootfile
+ subdir
+
+ ./linkdir:
+ link0
+ link1
+ link2
+ link3
+
+ ./subdir:
+ subdirfile
+ subsubdir
+
+ ./subdir/subsubdir:
+ """).strip())
+
+ # Check the correct inos were preserved by path
+ self.assertEqual(rootfile_ino, self.mount_a.path_to_ino("rootfile"))
+ self.assertEqual(subdir_ino, self.mount_a.path_to_ino("subdir"))
+ self.assertEqual(subdirfile_ino, self.mount_a.path_to_ino("subdir/subdirfile"))
+ self.assertEqual(subsubdir_ino, self.mount_a.path_to_ino("subdir/subsubdir"))
+
+ # Check that the hard link handling came out correctly
+ self.assertEqual(self.mount_a.path_to_ino("linkdir/link0"), subdirfile_ino)
+ self.assertEqual(self.mount_a.path_to_ino("linkdir/link1"), subdirfile_ino)
+ self.assertNotEqual(self.mount_a.path_to_ino("linkdir/link2"), subdirfile_ino)
+ self.assertEqual(self.mount_a.path_to_ino("linkdir/link3"), rootfile_ino)
+
+ # Create a new file, ensure it is not issued the same ino as one of the
+ # recovered ones
+ self.mount_a.run_shell(["touch", "afterwards"])
+ new_ino = self.mount_a.path_to_ino("afterwards")
+ self.assertNotIn(new_ino, [rootfile_ino, subdir_ino, subdirfile_ino])
+
+ # Check that we can do metadata ops in the recovered directory
+ self.mount_a.run_shell(["touch", "subdir/subsubdir/subsubdirfile"])
+
+ @for_teuthology # 308s
+ def test_reset(self):
+ """
+ That after forcibly modifying the backing store, we can get back into
+ a good state by resetting the MDSMap.
+
+ The scenario is that we have two active MDSs, and we lose the journals. Once
+ we have completely lost confidence in the integrity of the metadata, we want to
+ return the system to a single-MDS state to go into a scrub to recover what we
+ can.
+ """
+
+ # Set max_mds to 2
+ self.fs.set_max_mds(2)
+
+ # See that we have two active MDSs
+ self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30,
+ reject_fn=lambda v: v > 2 or v < 1)
+ active_mds_names = self.fs.get_active_names()
+
+ # Switch off any unneeded MDS daemons
+ for unneeded_mds in set(self.mds_cluster.mds_ids) - set(active_mds_names):
+ self.mds_cluster.mds_stop(unneeded_mds)
+ self.mds_cluster.mds_fail(unneeded_mds)
+
+ # Create a dir on each rank
+ self.mount_a.run_shell(["mkdir", "alpha"])
+ self.mount_a.run_shell(["mkdir", "bravo"])
+ self.mount_a.setfattr("alpha/", "ceph.dir.pin", "0")
+ self.mount_a.setfattr("bravo/", "ceph.dir.pin", "1")
+
+ def subtrees_assigned():
+ got_subtrees = self.fs.mds_asok(["get", "subtrees"], mds_id=active_mds_names[0])
+
+ for s in got_subtrees:
+ if s['dir']['path'] == '/bravo':
+ if s['auth_first'] == 1:
+ return True
+ else:
+ # Should not happen
+ raise RuntimeError("/bravo is subtree but not rank 1!")
+
+ return False
+
+ # Ensure the pinning has taken effect and the /bravo dir is now
+ # migrated to rank 1.
+ self.wait_until_true(subtrees_assigned, 30)
+
+ # Do some IO (this should be split across ranks according to
+ # the rank-pinned dirs)
+ self.mount_a.create_n_files("alpha/file", 1000)
+ self.mount_a.create_n_files("bravo/file", 1000)
+
+ # Flush the journals so that we have some backing store data
+ # belonging to one MDS, and some to the other MDS.
+ for mds_name in active_mds_names:
+ self.fs.mds_asok(["flush", "journal"], mds_name)
+
+ # Stop (hard) the second MDS daemon
+ self.fs.mds_stop(active_mds_names[1])
+
+ # Wipe out the tables for MDS rank 1 so that it is broken and can't start
+ # (this is the simulated failure that we will demonstrate that the disaster
+ # recovery tools can get us back from)
+ self.fs.erase_metadata_objects(prefix="mds1_")
+
+ # Try to access files from the client
+ blocked_ls = self.mount_a.run_shell(["ls", "-R"], wait=False)
+
+ # Check that this "ls -R" blocked rather than completing: indicates
+ # it got stuck trying to access subtrees which were on the now-dead MDS.
+ log.info("Sleeping to check ls is blocked...")
+ time.sleep(60)
+ self.assertFalse(blocked_ls.finished)
+
+ # This mount is now useless because it will depend on MDS rank 1, and MDS rank 1
+ # is not coming back. Kill it.
+ log.info("Killing mount, it's blocked on the MDS we killed")
+ self.mount_a.kill()
+ self.mount_a.kill_cleanup()
+ try:
+ # Now that the mount is dead, the ls -R should error out.
+ blocked_ls.wait()
+ except (CommandFailedError, ConnectionLostError):
+ # The ConnectionLostError case is for kernel client, where
+ # killing the mount also means killing the node.
+ pass
+
+ # See that the second MDS will crash when it starts and tries to
+ # acquire rank 1
+ damaged_id = active_mds_names[1]
+ self.fs.mds_restart(damaged_id)
+
+ # The daemon taking the damaged rank should start starting, then
+ # restart back into standby after asking the mon to mark the rank
+ # damaged.
+ def is_marked_damaged():
+ mds_map = self.fs.get_mds_map()
+ return 1 in mds_map['damaged']
+
+ self.wait_until_true(is_marked_damaged, 60)
+
+ def get_state():
+ info = self.mds_cluster.get_mds_info(damaged_id)
+ return info['state'] if info is not None else None
+
+ self.wait_until_equal(
+ get_state,
+ "up:standby",
+ timeout=60)
+
+ self.fs.mds_stop(damaged_id)
+ self.fs.mds_fail(damaged_id)
+
+ # Now give up and go through a disaster recovery procedure
+ self.fs.mds_stop(active_mds_names[0])
+ self.fs.mds_fail(active_mds_names[0])
+ # Invoke recover_dentries quietly, because otherwise log spews millions of lines
+ self.fs.journal_tool(["event", "recover_dentries", "summary"], 0, quiet=True)
+ self.fs.journal_tool(["event", "recover_dentries", "summary"], 1, quiet=True)
+ self.fs.table_tool(["0", "reset", "session"])
+ self.fs.journal_tool(["journal", "reset"], 0)
+ self.fs.erase_mds_objects(1)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name,
+ '--yes-i-really-mean-it')
+
+ # Bring an MDS back online, mount a client, and see that we can walk the full
+ # filesystem tree again
+ self.fs.mds_fail_restart(active_mds_names[0])
+ self.wait_until_equal(lambda: self.fs.get_active_names(), [active_mds_names[0]], 30,
+ reject_fn=lambda v: len(v) > 1)
+ self.mount_a.mount()
+ self.mount_a.run_shell(["ls", "-R"], wait=True)
+
+ def test_table_tool(self):
+ active_mdss = self.fs.get_active_names()
+ self.assertEqual(len(active_mdss), 1)
+ mds_name = active_mdss[0]
+
+ self.mount_a.run_shell(["touch", "foo"])
+ self.fs.mds_asok(["flush", "journal"], mds_name)
+
+ log.info(self.fs.table_tool(["all", "show", "inode"]))
+ log.info(self.fs.table_tool(["all", "show", "snap"]))
+ log.info(self.fs.table_tool(["all", "show", "session"]))
+
+ # Inode table should always be the same because initial state
+ # and choice of inode are deterministic.
+ # Should see one inode consumed
+ self.assertEqual(
+ json.loads(self.fs.table_tool(["all", "show", "inode"])),
+ {"0": {
+ "data": {
+ "version": 2,
+ "inotable": {
+ "projected_free": [
+ {"start": 1099511628777,
+ "len": 1099511626775}],
+ "free": [
+ {"start": 1099511628777,
+ "len": 1099511626775}]}},
+ "result": 0}}
+
+ )
+
+ # Should see one session
+ session_data = json.loads(self.fs.table_tool(
+ ["all", "show", "session"]))
+ self.assertEqual(len(session_data["0"]["data"]["sessions"]), 1)
+ self.assertEqual(session_data["0"]["result"], 0)
+
+ # Should see no snaps
+ self.assertEqual(
+ json.loads(self.fs.table_tool(["all", "show", "snap"])),
+ {"version": 1,
+ "snapserver": {"last_snap": 1,
+ "last_created": 1,
+ "last_destroyed": 1,
+ "pending_noop": [],
+ "snaps": [],
+ "need_to_purge": {},
+ "pending_update": [],
+ "pending_destroy": []},
+ "result": 0}
+ )
+
+ # Reset everything
+ for table in ["session", "inode", "snap"]:
+ self.fs.table_tool(["all", "reset", table])
+
+ log.info(self.fs.table_tool(["all", "show", "inode"]))
+ log.info(self.fs.table_tool(["all", "show", "snap"]))
+ log.info(self.fs.table_tool(["all", "show", "session"]))
+
+ # Should see 0 sessions
+ session_data = json.loads(self.fs.table_tool(
+ ["all", "show", "session"]))
+ self.assertEqual(len(session_data["0"]["data"]["sessions"]), 0)
+ self.assertEqual(session_data["0"]["result"], 0)
+
+ # Should see entire inode range now marked free
+ self.assertEqual(
+ json.loads(self.fs.table_tool(["all", "show", "inode"])),
+ {"0": {"data": {"version": 1,
+ "inotable": {"projected_free": [
+ {"start": 1099511627776,
+ "len": 1099511627776}],
+ "free": [
+ {"start": 1099511627776,
+ "len": 1099511627776}]}},
+ "result": 0}}
+ )
+
+ # Should see no snaps
+ self.assertEqual(
+ json.loads(self.fs.table_tool(["all", "show", "snap"])),
+ {"version": 1,
+ "snapserver": {"last_snap": 1,
+ "last_created": 1,
+ "last_destroyed": 1,
+ "pending_noop": [],
+ "snaps": [],
+ "need_to_purge": {},
+ "pending_update": [],
+ "pending_destroy": []},
+ "result": 0}
+ )
+
+ def test_table_tool_take_inos(self):
+ initial_range_start = 1099511627776
+ initial_range_len = 1099511627776
+ # Initially a completely clear range
+ self.assertEqual(
+ json.loads(self.fs.table_tool(["all", "show", "inode"])),
+ {"0": {"data": {"version": 0,
+ "inotable": {"projected_free": [
+ {"start": initial_range_start,
+ "len": initial_range_len}],
+ "free": [
+ {"start": initial_range_start,
+ "len": initial_range_len}]}},
+ "result": 0}}
+ )
+
+ # Remove some
+ self.assertEqual(
+ json.loads(self.fs.table_tool(["all", "take_inos", "{0}".format(initial_range_start + 100)])),
+ {"0": {"data": {"version": 1,
+ "inotable": {"projected_free": [
+ {"start": initial_range_start + 101,
+ "len": initial_range_len - 101}],
+ "free": [
+ {"start": initial_range_start + 101,
+ "len": initial_range_len - 101}]}},
+ "result": 0}}
+ )
+
+ @for_teuthology # Hack: "for_teuthology" because .sh doesn't work outside teuth
+ def test_journal_smoke(self):
+ workunit(self.ctx, {
+ 'clients': {
+ "client.{0}".format(self.mount_a.client_id): [
+ "fs/misc/trivial_sync.sh"],
+ },
+ "timeout": "1h"
+ })
+
+ for mount in self.mounts:
+ mount.umount_wait()
+
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+
+ # journal tool smoke
+ workunit(self.ctx, {
+ 'clients': {
+ "client.{0}".format(self.mount_a.client_id): [
+ "suites/cephfs_journal_tool_smoke.sh"],
+ },
+ "timeout": "1h"
+ })
+
+
+
+ self.fs.mds_restart()
+ self.fs.wait_for_daemons()
+
+ self.mount_a.mount()
+
+ # trivial sync moutn a
+ workunit(self.ctx, {
+ 'clients': {
+ "client.{0}".format(self.mount_a.client_id): [
+ "fs/misc/trivial_sync.sh"],
+ },
+ "timeout": "1h"
+ })
+
diff --git a/qa/tasks/cephfs/test_mantle.py b/qa/tasks/cephfs/test_mantle.py
new file mode 100644
index 00000000..6cd86ad1
--- /dev/null
+++ b/qa/tasks/cephfs/test_mantle.py
@@ -0,0 +1,109 @@
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+import json
+import logging
+
+log = logging.getLogger(__name__)
+failure = "using old balancer; mantle failed for balancer="
+success = "mantle balancer version changed: "
+
+class TestMantle(CephFSTestCase):
+ def start_mantle(self):
+ self.wait_for_health_clear(timeout=30)
+ self.fs.set_max_mds(2)
+ self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30,
+ reject_fn=lambda v: v > 2 or v < 1)
+
+ for m in self.fs.get_active_names():
+ self.fs.mds_asok(['config', 'set', 'debug_objecter', '20'], mds_id=m)
+ self.fs.mds_asok(['config', 'set', 'debug_ms', '0'], mds_id=m)
+ self.fs.mds_asok(['config', 'set', 'debug_mds', '0'], mds_id=m)
+ self.fs.mds_asok(['config', 'set', 'debug_mds_balancer', '5'], mds_id=m)
+
+ def push_balancer(self, obj, lua_code, expect):
+ self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', obj)
+ self.fs.rados(["put", obj, "-"], stdin_data=lua_code)
+ with self.assert_cluster_log(failure + obj + " " + expect):
+ log.info("run a " + obj + " balancer that expects=" + expect)
+
+ def test_version_empty(self):
+ self.start_mantle()
+ expect = " : (2) No such file or directory"
+
+ ret = self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer')
+ assert(ret == 22) # EINVAL
+
+ self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', " ")
+ with self.assert_cluster_log(failure + " " + expect): pass
+
+ def test_version_not_in_rados(self):
+ self.start_mantle()
+ expect = failure + "ghost.lua : (2) No such file or directory"
+ self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "ghost.lua")
+ with self.assert_cluster_log(expect): pass
+
+ def test_balancer_invalid(self):
+ self.start_mantle()
+ expect = ": (22) Invalid argument"
+
+ lua_code = "this is invalid lua code!"
+ self.push_balancer("invalid.lua", lua_code, expect)
+
+ lua_code = "BAL_LOG()"
+ self.push_balancer("invalid_log.lua", lua_code, expect)
+
+ lua_code = "BAL_LOG(0)"
+ self.push_balancer("invalid_log_again.lua", lua_code, expect)
+
+ def test_balancer_valid(self):
+ self.start_mantle()
+ lua_code = "BAL_LOG(0, \"test\")\nreturn {3, 4}"
+ self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua")
+ self.fs.rados(["put", "valid.lua", "-"], stdin_data=lua_code)
+ with self.assert_cluster_log(success + "valid.lua"):
+ log.info("run a valid.lua balancer")
+
+ def test_return_invalid(self):
+ self.start_mantle()
+ expect = ": (22) Invalid argument"
+
+ lua_code = "return \"hello\""
+ self.push_balancer("string.lua", lua_code, expect)
+
+ lua_code = "return 3"
+ self.push_balancer("number.lua", lua_code, expect)
+
+ lua_code = "return {}"
+ self.push_balancer("dict_empty.lua", lua_code, expect)
+
+ lua_code = "return {\"this\", \"is\", \"a\", \"test\"}"
+ self.push_balancer("dict_of_strings.lua", lua_code, expect)
+
+ lua_code = "return {3, \"test\"}"
+ self.push_balancer("dict_of_mixed.lua", lua_code, expect)
+
+ lua_code = "return {3}"
+ self.push_balancer("not_enough_numbers.lua", lua_code, expect)
+
+ lua_code = "return {3, 4, 5, 6, 7, 8, 9}"
+ self.push_balancer("too_many_numbers.lua", lua_code, expect)
+
+ def test_dead_osd(self):
+ self.start_mantle()
+ expect = " : (110) Connection timed out"
+
+ # kill the OSDs so that the balancer pull from RADOS times out
+ osd_map = json.loads(self.fs.mon_manager.raw_cluster_cmd('osd', 'dump', '--format=json-pretty'))
+ for i in range(0, len(osd_map['osds'])):
+ self.fs.mon_manager.raw_cluster_cmd_result('osd', 'down', str(i))
+ self.fs.mon_manager.raw_cluster_cmd_result('osd', 'out', str(i))
+
+ # trigger a pull from RADOS
+ self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua")
+
+ # make the timeout a little longer since dead OSDs spam ceph -w
+ with self.assert_cluster_log(failure + "valid.lua" + expect, timeout=30):
+ log.info("run a balancer that should timeout")
+
+ # cleanup
+ for i in range(0, len(osd_map['osds'])):
+ self.fs.mon_manager.raw_cluster_cmd_result('osd', 'in', str(i))
diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py
new file mode 100644
index 00000000..cd72ac38
--- /dev/null
+++ b/qa/tasks/cephfs/test_misc.py
@@ -0,0 +1,291 @@
+
+from unittest import SkipTest
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.orchestra.run import CommandFailedError, ConnectionLostError
+import errno
+import time
+import json
+import logging
+
+log = logging.getLogger(__name__)
+
+class TestMisc(CephFSTestCase):
+ CLIENTS_REQUIRED = 2
+
+ def test_getattr_caps(self):
+ """
+ Check if MDS recognizes the 'mask' parameter of open request.
+ The parameter allows client to request caps when opening file
+ """
+
+ if not isinstance(self.mount_a, FuseMount):
+ raise SkipTest("Require FUSE client")
+
+ # Enable debug. Client will requests CEPH_CAP_XATTR_SHARED
+ # on lookup/open
+ self.mount_b.umount_wait()
+ self.set_conf('client', 'client debug getattr caps', 'true')
+ self.mount_b.mount()
+ self.mount_b.wait_until_mounted()
+
+ # create a file and hold it open. MDS will issue CEPH_CAP_EXCL_*
+ # to mount_a
+ p = self.mount_a.open_background("testfile")
+ self.mount_b.wait_for_visible("testfile")
+
+ # this triggers a lookup request and an open request. The debug
+ # code will check if lookup/open reply contains xattrs
+ self.mount_b.run_shell(["cat", "testfile"])
+
+ self.mount_a.kill_background(p)
+
+ def test_root_rctime(self):
+ """
+ Check that the root inode has a non-default rctime on startup.
+ """
+
+ t = time.time()
+ rctime = self.mount_a.getfattr(".", "ceph.dir.rctime")
+ log.info("rctime = {}".format(rctime))
+ self.assertGreaterEqual(float(rctime), t - 10)
+
+ def test_fs_new(self):
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+
+ data_pool_name = self.fs.get_data_pool_name()
+
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name,
+ '--yes-i-really-mean-it')
+
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete',
+ self.fs.metadata_pool_name,
+ self.fs.metadata_pool_name,
+ '--yes-i-really-really-mean-it')
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+ self.fs.metadata_pool_name,
+ self.fs.pgs_per_fs_pool.__str__())
+
+ dummyfile = '/etc/fstab'
+
+ self.fs.put_metadata_object_raw("key", dummyfile)
+
+ def get_pool_df(fs, name):
+ try:
+ return fs.get_pool_df(name)['objects'] > 0
+ except RuntimeError:
+ return False
+
+ self.wait_until_true(lambda: get_pool_df(self.fs, self.fs.metadata_pool_name), timeout=30)
+
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
+ self.fs.metadata_pool_name,
+ data_pool_name)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.EINVAL)
+ else:
+ raise AssertionError("Expected EINVAL")
+
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
+ self.fs.metadata_pool_name,
+ data_pool_name, "--force")
+
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name,
+ '--yes-i-really-mean-it')
+
+
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete',
+ self.fs.metadata_pool_name,
+ self.fs.metadata_pool_name,
+ '--yes-i-really-really-mean-it')
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+ self.fs.metadata_pool_name,
+ self.fs.pgs_per_fs_pool.__str__())
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
+ self.fs.metadata_pool_name,
+ data_pool_name)
+
+ def test_cap_revoke_nonresponder(self):
+ """
+ Check that a client is evicted if it has not responded to cap revoke
+ request for configured number of seconds.
+ """
+ session_timeout = self.fs.get_var("session_timeout")
+ eviction_timeout = session_timeout / 2.0
+
+ self.fs.mds_asok(['config', 'set', 'mds_cap_revoke_eviction_timeout',
+ str(eviction_timeout)])
+
+ cap_holder = self.mount_a.open_background()
+
+ # Wait for the file to be visible from another client, indicating
+ # that mount_a has completed its network ops
+ self.mount_b.wait_for_visible()
+
+ # Simulate client death
+ self.mount_a.kill()
+
+ try:
+ # The waiter should get stuck waiting for the capability
+ # held on the MDS by the now-dead client A
+ cap_waiter = self.mount_b.write_background()
+
+ a = time.time()
+ time.sleep(eviction_timeout)
+ cap_waiter.wait()
+ b = time.time()
+ cap_waited = b - a
+ log.info("cap_waiter waited {0}s".format(cap_waited))
+
+ # check if the cap is transferred before session timeout kicked in.
+ # this is a good enough check to ensure that the client got evicted
+ # by the cap auto evicter rather than transitioning to stale state
+ # and then getting evicted.
+ self.assertLess(cap_waited, session_timeout,
+ "Capability handover took {0}, expected less than {1}".format(
+ cap_waited, session_timeout
+ ))
+
+ self.assertTrue(self.mount_a.is_blacklisted())
+ cap_holder.stdin.close()
+ try:
+ cap_holder.wait()
+ except (CommandFailedError, ConnectionLostError):
+ # We killed it (and possibly its node), so it raises an error
+ pass
+ finally:
+ self.mount_a.kill_cleanup()
+
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ def test_filtered_df(self):
+ pool_name = self.fs.get_data_pool_name()
+ raw_df = self.fs.get_pool_df(pool_name)
+ raw_avail = float(raw_df["max_avail"])
+ out = self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'get',
+ pool_name, 'size',
+ '-f', 'json-pretty')
+ _ = json.loads(out)
+
+ proc = self.mount_a.run_shell(['df', '.'])
+ output = proc.stdout.getvalue()
+ fs_avail = output.split('\n')[1].split()[3]
+ fs_avail = float(fs_avail) * 1024
+
+ ratio = raw_avail / fs_avail
+ assert 0.9 < ratio < 1.1
+
+ def test_dump_inode(self):
+ info = self.fs.mds_asok(['dump', 'inode', '1'])
+ assert(info['path'] == "/")
+
+ def test_dump_inode_hexademical(self):
+ self.mount_a.run_shell(["mkdir", "-p", "foo"])
+ ino = self.mount_a.path_to_ino("foo")
+ assert type(ino) is int
+ info = self.fs.mds_asok(['dump', 'inode', hex(ino)])
+ assert info['path'] == "/foo"
+
+
+class TestCacheDrop(CephFSTestCase):
+ CLIENTS_REQUIRED = 1
+
+ def _run_drop_cache_cmd(self, timeout=None):
+ result = None
+ mds_id = self.fs.get_lone_mds_id()
+ if timeout is not None:
+ result = self.fs.mon_manager.raw_cluster_cmd("tell", "mds.{0}".format(mds_id),
+ "cache", "drop", str(timeout))
+ else:
+ result = self.fs.mon_manager.raw_cluster_cmd("tell", "mds.{0}".format(mds_id),
+ "cache", "drop")
+ return json.loads(result)
+
+ def _setup(self, max_caps=20, threshold=400):
+ # create some files
+ self.mount_a.create_n_files("dc-dir/dc-file", 1000, sync=True)
+
+ # Reduce this so the MDS doesn't rkcall the maximum for simple tests
+ self.fs.rank_asok(['config', 'set', 'mds_recall_max_caps', str(max_caps)])
+ self.fs.rank_asok(['config', 'set', 'mds_recall_max_decay_threshold', str(threshold)])
+
+ def test_drop_cache_command(self):
+ """
+ Basic test for checking drop cache command.
+ Confirm it halts without a timeout.
+ Note that the cache size post trimming is not checked here.
+ """
+ mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client"))
+ self._setup()
+ result = self._run_drop_cache_cmd()
+ self.assertEqual(result['client_recall']['return_code'], 0)
+ self.assertEqual(result['flush_journal']['return_code'], 0)
+ # It should take at least 1 second
+ self.assertGreater(result['duration'], 1)
+ self.assertGreaterEqual(result['trim_cache']['trimmed'], 1000-2*mds_min_caps_per_client)
+
+ def test_drop_cache_command_timeout(self):
+ """
+ Basic test for checking drop cache command.
+ Confirm recall halts early via a timeout.
+ Note that the cache size post trimming is not checked here.
+ """
+ self._setup()
+ result = self._run_drop_cache_cmd(timeout=10)
+ self.assertEqual(result['client_recall']['return_code'], -errno.ETIMEDOUT)
+ self.assertEqual(result['flush_journal']['return_code'], 0)
+ self.assertGreater(result['duration'], 10)
+ self.assertGreaterEqual(result['trim_cache']['trimmed'], 100) # we did something, right?
+
+ def test_drop_cache_command_dead_timeout(self):
+ """
+ Check drop cache command with non-responding client using tell
+ interface. Note that the cache size post trimming is not checked
+ here.
+ """
+ self._setup()
+ self.mount_a.kill()
+ # Note: recall is subject to the timeout. The journal flush will
+ # be delayed due to the client being dead.
+ result = self._run_drop_cache_cmd(timeout=5)
+ self.assertEqual(result['client_recall']['return_code'], -errno.ETIMEDOUT)
+ self.assertEqual(result['flush_journal']['return_code'], 0)
+ self.assertGreater(result['duration'], 5)
+ self.assertLess(result['duration'], 120)
+ # Note: result['trim_cache']['trimmed'] may be >0 because dropping the
+ # cache now causes the Locker to drive eviction of stale clients (a
+ # stale session will be autoclosed at mdsmap['session_timeout']). The
+ # particular operation causing this is journal flush which causes the
+ # MDS to wait wait for cap revoke.
+ #self.assertEqual(0, result['trim_cache']['trimmed'])
+ self.mount_a.kill_cleanup()
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ def test_drop_cache_command_dead(self):
+ """
+ Check drop cache command with non-responding client using tell
+ interface. Note that the cache size post trimming is not checked
+ here.
+ """
+ self._setup()
+ self.mount_a.kill()
+ result = self._run_drop_cache_cmd()
+ self.assertEqual(result['client_recall']['return_code'], 0)
+ self.assertEqual(result['flush_journal']['return_code'], 0)
+ self.assertGreater(result['duration'], 5)
+ self.assertLess(result['duration'], 120)
+ # Note: result['trim_cache']['trimmed'] may be >0 because dropping the
+ # cache now causes the Locker to drive eviction of stale clients (a
+ # stale session will be autoclosed at mdsmap['session_timeout']). The
+ # particular operation causing this is journal flush which causes the
+ # MDS to wait wait for cap revoke.
+ self.mount_a.kill_cleanup()
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
diff --git a/qa/tasks/cephfs/test_openfiletable.py b/qa/tasks/cephfs/test_openfiletable.py
new file mode 100644
index 00000000..36e212d7
--- /dev/null
+++ b/qa/tasks/cephfs/test_openfiletable.py
@@ -0,0 +1,41 @@
+import time
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
+
+class OpenFileTable(CephFSTestCase):
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 1
+
+ def test_max_items_per_obj(self):
+ """
+ The maximum number of openfiles omap objects keys are now equal to
+ osd_deep_scrub_large_omap_object_key_threshold option.
+ """
+ self.set_conf("mds", "osd_deep_scrub_large_omap_object_key_threshold", "5")
+
+ self.fs.mds_restart()
+ self.fs.wait_for_daemons()
+
+ # Write some bytes to a file
+ size_mb = 1
+
+ # Hold the file open
+ file_count = 8
+ for i in range(0, file_count):
+ filename = "open_file{}".format(i)
+ p = self.mount_a.open_background(filename)
+ self.mount_a.write_n_mb(filename, size_mb)
+
+ time.sleep(10)
+
+ """
+ With osd_deep_scrub_large_omap_object_key_threshold value as 5 and
+ opening 8 files we should have a new rados object with name
+ mds0_openfiles.1 to hold the extra keys.
+ """
+
+ stat_out = self.fs.rados(["stat", "mds0_openfiles.1"])
+
+ # Now close the file
+ self.mount_a.kill_background(p)
diff --git a/qa/tasks/cephfs/test_pool_perm.py b/qa/tasks/cephfs/test_pool_perm.py
new file mode 100644
index 00000000..a1f234a2
--- /dev/null
+++ b/qa/tasks/cephfs/test_pool_perm.py
@@ -0,0 +1,113 @@
+from textwrap import dedent
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+import os
+
+
+class TestPoolPerm(CephFSTestCase):
+ def test_pool_perm(self):
+ self.mount_a.run_shell(["touch", "test_file"])
+
+ file_path = os.path.join(self.mount_a.mountpoint, "test_file")
+
+ remote_script = dedent("""
+ import os
+ import errno
+
+ fd = os.open("{path}", os.O_RDWR)
+ try:
+ if {check_read}:
+ ret = os.read(fd, 1024)
+ else:
+ os.write(fd, b'content')
+ except OSError as e:
+ if e.errno != errno.EPERM:
+ raise
+ else:
+ raise RuntimeError("client does not check permission of data pool")
+ """)
+
+ client_name = "client.{0}".format(self.mount_a.client_id)
+
+ # set data pool read only
+ self.fs.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', 'osd',
+ 'allow r pool={0}'.format(self.fs.get_data_pool_name()))
+
+ self.mount_a.umount_wait()
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ # write should fail
+ self.mount_a.run_python(remote_script.format(path=file_path, check_read=str(False)))
+
+ # set data pool write only
+ self.fs.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', 'osd',
+ 'allow w pool={0}'.format(self.fs.get_data_pool_name()))
+
+ self.mount_a.umount_wait()
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ # read should fail
+ self.mount_a.run_python(remote_script.format(path=file_path, check_read=str(True)))
+
+ def test_forbidden_modification(self):
+ """
+ That a client who does not have the capability for setting
+ layout pools is prevented from doing so.
+ """
+
+ # Set up
+ client_name = "client.{0}".format(self.mount_a.client_id)
+ new_pool_name = "data_new"
+ self.fs.add_data_pool(new_pool_name)
+
+ self.mount_a.run_shell(["touch", "layoutfile"])
+ self.mount_a.run_shell(["mkdir", "layoutdir"])
+
+ # Set MDS 'rw' perms: missing 'p' means no setting pool layouts
+ self.fs.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', client_name, 'mds', 'allow rw', 'mon', 'allow r',
+ 'osd',
+ 'allow rw pool={0},allow rw pool={1}'.format(
+ self.fs.get_data_pool_names()[0],
+ self.fs.get_data_pool_names()[1],
+ ))
+
+ self.mount_a.umount_wait()
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ with self.assertRaises(CommandFailedError):
+ self.mount_a.setfattr("layoutfile", "ceph.file.layout.pool",
+ new_pool_name)
+ with self.assertRaises(CommandFailedError):
+ self.mount_a.setfattr("layoutdir", "ceph.dir.layout.pool",
+ new_pool_name)
+ self.mount_a.umount_wait()
+
+ # Set MDS 'rwp' perms: should now be able to set layouts
+ self.fs.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', client_name, 'mds', 'allow rwp', 'mon', 'allow r',
+ 'osd',
+ 'allow rw pool={0},allow rw pool={1}'.format(
+ self.fs.get_data_pool_names()[0],
+ self.fs.get_data_pool_names()[1],
+ ))
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+ self.mount_a.setfattr("layoutfile", "ceph.file.layout.pool",
+ new_pool_name)
+ self.mount_a.setfattr("layoutdir", "ceph.dir.layout.pool",
+ new_pool_name)
+ self.mount_a.umount_wait()
+
+ def tearDown(self):
+ self.fs.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', "client.{0}".format(self.mount_a.client_id),
+ 'mds', 'allow', 'mon', 'allow r', 'osd',
+ 'allow rw pool={0}'.format(self.fs.get_data_pool_names()[0]))
+ super(TestPoolPerm, self).tearDown()
+
diff --git a/qa/tasks/cephfs/test_quota.py b/qa/tasks/cephfs/test_quota.py
new file mode 100644
index 00000000..dcfda5e2
--- /dev/null
+++ b/qa/tasks/cephfs/test_quota.py
@@ -0,0 +1,106 @@
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+from teuthology.exceptions import CommandFailedError
+
+class TestQuota(CephFSTestCase):
+ CLIENTS_REQUIRED = 2
+ MDSS_REQUIRED = 1
+
+ def test_remote_update_getfattr(self):
+ """
+ That quota changes made from one client are visible to another
+ client looking at ceph.quota xattrs
+ """
+ self.mount_a.run_shell(["mkdir", "subdir"])
+
+ self.assertEqual(
+ self.mount_a.getfattr("./subdir", "ceph.quota.max_files"),
+ None)
+ self.assertEqual(
+ self.mount_b.getfattr("./subdir", "ceph.quota.max_files"),
+ None)
+
+ self.mount_a.setfattr("./subdir", "ceph.quota.max_files", "10")
+ self.assertEqual(
+ self.mount_a.getfattr("./subdir", "ceph.quota.max_files"),
+ "10")
+
+ # Should be visible as soon as setxattr operation completes on
+ # mds (we get here sooner because setfattr gets an early reply)
+ self.wait_until_equal(
+ lambda: self.mount_b.getfattr("./subdir", "ceph.quota.max_files"),
+ "10", timeout=10)
+
+ def test_remote_update_df(self):
+ """
+ That when a client modifies the quota on a directory used
+ as another client's root, the other client sees the change
+ reflected in their statfs output.
+ """
+
+ self.mount_b.umount_wait()
+
+ self.mount_a.run_shell(["mkdir", "subdir"])
+
+ size_before = 1024 * 1024 * 128
+ self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes",
+ "%s" % size_before)
+
+ self.mount_b.mount(mount_path="/subdir")
+
+ self.assertDictEqual(
+ self.mount_b.df(),
+ {
+ "total": size_before,
+ "used": 0,
+ "available": size_before
+ })
+
+ size_after = 1024 * 1024 * 256
+ self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes",
+ "%s" % size_after)
+
+ # Should be visible as soon as setxattr operation completes on
+ # mds (we get here sooner because setfattr gets an early reply)
+ self.wait_until_equal(
+ lambda: self.mount_b.df(),
+ {
+ "total": size_after,
+ "used": 0,
+ "available": size_after
+ },
+ timeout=10
+ )
+
+ def test_remote_update_write(self):
+ """
+ That when a client modifies the quota on a directory used
+ as another client's root, the other client sees the effect
+ of the change when writing data.
+ """
+
+ self.mount_a.run_shell(["mkdir", "subdir_files"])
+ self.mount_a.run_shell(["mkdir", "subdir_data"])
+
+ # Set some nice high quotas that mount_b's initial operations
+ # will be well within
+ self.mount_a.setfattr("./subdir_files", "ceph.quota.max_files", "100")
+ self.mount_a.setfattr("./subdir_data", "ceph.quota.max_bytes", "104857600")
+
+ # Do some writes within my quota
+ self.mount_b.create_n_files("subdir_files/file", 20)
+ self.mount_b.write_n_mb("subdir_data/file", 20)
+
+ # Set quotas lower than what mount_b already wrote, it should
+ # refuse to write more once it's seen them
+ self.mount_a.setfattr("./subdir_files", "ceph.quota.max_files", "10")
+ self.mount_a.setfattr("./subdir_data", "ceph.quota.max_bytes", "1048576")
+
+ # Do some writes that would have been okay within the old quota,
+ # but are forbidden under the new quota
+ with self.assertRaises(CommandFailedError):
+ self.mount_b.create_n_files("subdir_files/file", 40)
+ with self.assertRaises(CommandFailedError):
+ self.mount_b.write_n_mb("subdir_data/file", 40)
+
diff --git a/qa/tasks/cephfs/test_readahead.py b/qa/tasks/cephfs/test_readahead.py
new file mode 100644
index 00000000..31e7bf18
--- /dev/null
+++ b/qa/tasks/cephfs/test_readahead.py
@@ -0,0 +1,31 @@
+import logging
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+
+class TestReadahead(CephFSTestCase):
+ def test_flush(self):
+ if not isinstance(self.mount_a, FuseMount):
+ self.skipTest("FUSE needed for measuring op counts")
+
+ # Create 32MB file
+ self.mount_a.run_shell(["dd", "if=/dev/urandom", "of=foo", "bs=1M", "count=32"])
+
+ # Unmount and remount the client to flush cache
+ self.mount_a.umount_wait()
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ initial_op_r = self.mount_a.admin_socket(['perf', 'dump', 'objecter'])['objecter']['op_r']
+ self.mount_a.run_shell(["dd", "if=foo", "of=/dev/null", "bs=128k", "count=32"])
+ op_r = self.mount_a.admin_socket(['perf', 'dump', 'objecter'])['objecter']['op_r']
+ assert op_r >= initial_op_r
+ op_r -= initial_op_r
+ log.info("read operations: {0}".format(op_r))
+
+ # with exponentially increasing readahead, we should see fewer than 10 operations
+ # but this test simply checks if the client is doing a remote read for each local read
+ if op_r >= 32:
+ raise RuntimeError("readahead not working")
diff --git a/qa/tasks/cephfs/test_recovery_pool.py b/qa/tasks/cephfs/test_recovery_pool.py
new file mode 100644
index 00000000..36b4e58e
--- /dev/null
+++ b/qa/tasks/cephfs/test_recovery_pool.py
@@ -0,0 +1,207 @@
+"""
+Test our tools for recovering metadata from the data pool into an alternate pool
+"""
+
+import logging
+import traceback
+from collections import namedtuple
+
+from teuthology.orchestra.run import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+
+ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
+
+
+class OverlayWorkload(object):
+ def __init__(self, orig_fs, recovery_fs, orig_mount, recovery_mount):
+ self._orig_fs = orig_fs
+ self._recovery_fs = recovery_fs
+ self._orig_mount = orig_mount
+ self._recovery_mount = recovery_mount
+ self._initial_state = None
+
+ # Accumulate backtraces for every failed validation, and return them. Backtraces
+ # are rather verbose, but we only see them when something breaks, and they
+ # let us see which check failed without having to decorate each check with
+ # a string
+ self._errors = []
+
+ def assert_equal(self, a, b):
+ try:
+ if a != b:
+ raise AssertionError("{0} != {1}".format(a, b))
+ except AssertionError as e:
+ self._errors.append(
+ ValidationError(e, traceback.format_exc(3))
+ )
+
+ def write(self):
+ """
+ Write the workload files to the mount
+ """
+ raise NotImplementedError()
+
+ def validate(self):
+ """
+ Read from the mount and validate that the workload files are present (i.e. have
+ survived or been reconstructed from the test scenario)
+ """
+ raise NotImplementedError()
+
+ def damage(self):
+ """
+ Damage the filesystem pools in ways that will be interesting to recover from. By
+ default just wipe everything in the metadata pool
+ """
+ # Delete every object in the metadata pool
+ objects = self._orig_fs.rados(["ls"]).split("\n")
+ for o in objects:
+ self._orig_fs.rados(["rm", o])
+
+ def flush(self):
+ """
+ Called after client unmount, after write: flush whatever you want
+ """
+ self._orig_fs.mds_asok(["flush", "journal"])
+ self._recovery_fs.mds_asok(["flush", "journal"])
+
+
+class SimpleOverlayWorkload(OverlayWorkload):
+ """
+ Single file, single directory, check that it gets recovered and so does its size
+ """
+ def write(self):
+ self._orig_mount.run_shell(["mkdir", "subdir"])
+ self._orig_mount.write_n_mb("subdir/sixmegs", 6)
+ self._initial_state = self._orig_mount.stat("subdir/sixmegs")
+
+ def validate(self):
+ self._recovery_mount.run_shell(["ls", "subdir"])
+ st = self._recovery_mount.stat("subdir/sixmegs")
+ self.assert_equal(st['st_size'], self._initial_state['st_size'])
+ return self._errors
+
+class TestRecoveryPool(CephFSTestCase):
+ MDSS_REQUIRED = 2
+ CLIENTS_REQUIRED = 2
+ REQUIRE_RECOVERY_FILESYSTEM = True
+
+ def is_marked_damaged(self, rank):
+ mds_map = self.fs.get_mds_map()
+ return rank in mds_map['damaged']
+
+ def _rebuild_metadata(self, workload, other_pool=None, workers=1):
+ """
+ That when all objects in metadata pool are removed, we can rebuild a metadata pool
+ based on the contents of a data pool, and a client can see and read our files.
+ """
+
+ # First, inject some files
+
+ workload.write()
+
+ # Unmount the client and flush the journal: the tool should also cope with
+ # situations where there is dirty metadata, but we'll test that separately
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+ workload.flush()
+
+ # Create the alternate pool if requested
+ recovery_fs = self.recovery_fs.name
+ recovery_pool = self.recovery_fs.get_metadata_pool_name()
+ self.recovery_fs.data_scan(['init', '--force-init',
+ '--filesystem', recovery_fs,
+ '--alternate-pool', recovery_pool])
+ self.recovery_fs.mon_manager.raw_cluster_cmd('-s')
+ self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "session"])
+ self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "snap"])
+ self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "inode"])
+
+ # Stop the MDS
+ self.fs.mds_stop()
+ self.fs.mds_fail()
+
+ # After recovery, we need the MDS to not be strict about stats (in production these options
+ # are off by default, but in QA we need to explicitly disable them)
+ self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
+ self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
+
+ # Apply any data damage the workload wants
+ workload.damage()
+
+ # Reset the MDS map in case multiple ranks were in play: recovery procedure
+ # only understands how to rebuild metadata under rank 0
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name,
+ '--yes-i-really-mean-it')
+
+ self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
+ self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
+ self.fs.table_tool([self.fs.name + ":0", "reset", "inode"])
+
+ # Run the recovery procedure
+ if False:
+ with self.assertRaises(CommandFailedError):
+ # Normal reset should fail when no objects are present, we'll use --force instead
+ self.fs.journal_tool(["journal", "reset"], 0)
+
+ self.fs.mds_stop()
+ self.fs.data_scan(['scan_extents', '--alternate-pool',
+ recovery_pool, '--filesystem', self.fs.name,
+ self.fs.get_data_pool_name()])
+ self.fs.data_scan(['scan_inodes', '--alternate-pool',
+ recovery_pool, '--filesystem', self.fs.name,
+ '--force-corrupt', '--force-init',
+ self.fs.get_data_pool_name()])
+ self.fs.journal_tool(['event', 'recover_dentries', 'list',
+ '--alternate-pool', recovery_pool], 0)
+
+ self.fs.data_scan(['init', '--force-init', '--filesystem',
+ self.fs.name])
+ self.fs.data_scan(['scan_inodes', '--filesystem', self.fs.name,
+ '--force-corrupt', '--force-init',
+ self.fs.get_data_pool_name()])
+ self.fs.journal_tool(['event', 'recover_dentries', 'list'], 0)
+
+ self.recovery_fs.journal_tool(['journal', 'reset', '--force'], 0)
+ self.fs.journal_tool(['journal', 'reset', '--force'], 0)
+ self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired',
+ recovery_fs + ":0")
+
+ # Mark the MDS repaired
+ self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
+
+ # Start the MDS
+ self.fs.mds_restart()
+ self.recovery_fs.mds_restart()
+ self.fs.wait_for_daemons()
+ self.recovery_fs.wait_for_daemons()
+ status = self.recovery_fs.status()
+ for rank in self.recovery_fs.get_ranks(status=status):
+ self.fs.mon_manager.raw_cluster_cmd('tell', "mds." + rank['name'],
+ 'injectargs', '--debug-mds=20')
+ self.fs.rank_tell(['scrub', 'start', '/', 'recursive', 'repair'], rank=rank['rank'], status=status)
+ log.info(str(self.mds_cluster.status()))
+
+ # Mount a client
+ self.mount_a.mount()
+ self.mount_b.mount(mount_fs_name=recovery_fs)
+ self.mount_a.wait_until_mounted()
+ self.mount_b.wait_until_mounted()
+
+ # See that the files are present and correct
+ errors = workload.validate()
+ if errors:
+ log.error("Validation errors found: {0}".format(len(errors)))
+ for e in errors:
+ log.error(e.exception)
+ log.error(e.backtrace)
+ raise AssertionError("Validation failed, first error: {0}\n{1}".format(
+ errors[0].exception, errors[0].backtrace
+ ))
+
+ def test_rebuild_simple(self):
+ self._rebuild_metadata(SimpleOverlayWorkload(self.fs, self.recovery_fs,
+ self.mount_a, self.mount_b))
diff --git a/qa/tasks/cephfs/test_scrub.py b/qa/tasks/cephfs/test_scrub.py
new file mode 100644
index 00000000..1875b5f3
--- /dev/null
+++ b/qa/tasks/cephfs/test_scrub.py
@@ -0,0 +1,175 @@
+"""
+Test CephFS scrub (distinct from OSD scrub) functionality
+"""
+import logging
+from collections import namedtuple
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
+
+
+class Workload(CephFSTestCase):
+ def __init__(self, filesystem, mount):
+ super().__init__()
+ self._mount = mount
+ self._filesystem = filesystem
+ self._initial_state = None
+
+ # Accumulate backtraces for every failed validation, and return them. Backtraces
+ # are rather verbose, but we only see them when something breaks, and they
+ # let us see which check failed without having to decorate each check with
+ # a string
+ self._errors = []
+
+ def write(self):
+ """
+ Write the workload files to the mount
+ """
+ raise NotImplementedError()
+
+ def validate(self):
+ """
+ Read from the mount and validate that the workload files are present (i.e. have
+ survived or been reconstructed from the test scenario)
+ """
+ raise NotImplementedError()
+
+ def damage(self):
+ """
+ Damage the filesystem pools in ways that will be interesting to recover from. By
+ default just wipe everything in the metadata pool
+ """
+ # Delete every object in the metadata pool
+ objects = self._filesystem.rados(["ls"]).split("\n")
+ for o in objects:
+ self._filesystem.rados(["rm", o])
+
+ def flush(self):
+ """
+ Called after client unmount, after write: flush whatever you want
+ """
+ self._filesystem.mds_asok(["flush", "journal"])
+
+
+class BacktraceWorkload(Workload):
+ """
+ Single file, single directory, wipe the backtrace and check it.
+ """
+ def write(self):
+ self._mount.run_shell(["mkdir", "subdir"])
+ self._mount.write_n_mb("subdir/sixmegs", 6)
+
+ def validate(self):
+ st = self._mount.stat("subdir/sixmegs")
+ self._filesystem.mds_asok(["flush", "journal"])
+ bt = self._filesystem.read_backtrace(st['st_ino'])
+ parent = bt['ancestors'][0]['dname']
+ self.assertEqual(parent, 'sixmegs')
+ return self._errors
+
+ def damage(self):
+ st = self._mount.stat("subdir/sixmegs")
+ self._filesystem.mds_asok(["flush", "journal"])
+ self._filesystem._write_data_xattr(st['st_ino'], "parent", "")
+
+ def create_files(self, nfiles=1000):
+ self._mount.create_n_files("scrub-new-files/file", nfiles)
+
+
+class DupInodeWorkload(Workload):
+ """
+ Duplicate an inode and try scrubbing it twice."
+ """
+
+ def write(self):
+ self._mount.run_shell(["mkdir", "parent"])
+ self._mount.run_shell(["mkdir", "parent/child"])
+ self._mount.write_n_mb("parent/parentfile", 6)
+ self._mount.write_n_mb("parent/child/childfile", 6)
+
+ def damage(self):
+ temp_bin_path = "/tmp/10000000000.00000000_omap.bin"
+ self._mount.umount_wait()
+ self._filesystem.mds_asok(["flush", "journal"])
+ self._filesystem.mds_stop()
+ self._filesystem.rados(["getomapval", "10000000000.00000000",
+ "parentfile_head", temp_bin_path])
+ self._filesystem.rados(["setomapval", "10000000000.00000000",
+ "shadow_head"], stdin_file=temp_bin_path)
+ self._filesystem.set_ceph_conf('mds', 'mds hack allow loading invalid metadata', True)
+ self._filesystem.mds_restart()
+ self._filesystem.wait_for_daemons()
+
+ def validate(self):
+ out_json = self._filesystem.rank_tell(["scrub", "start", "/", "recursive", "repair"])
+ self.assertNotEqual(out_json, None)
+ self.assertTrue(self._filesystem.are_daemons_healthy())
+ return self._errors
+
+
+class TestScrub(CephFSTestCase):
+ MDSS_REQUIRED = 1
+
+ def setUp(self):
+ super().setUp()
+
+ def _scrub(self, workload, workers=1):
+ """
+ That when all objects in metadata pool are removed, we can rebuild a metadata pool
+ based on the contents of a data pool, and a client can see and read our files.
+ """
+
+ # First, inject some files
+
+ workload.write()
+
+ # are off by default, but in QA we need to explicitly disable them)
+ self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
+ self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
+
+ # Apply any data damage the workload wants
+ workload.damage()
+
+ out_json = self.fs.rank_tell(["scrub", "start", "/", "recursive", "repair"])
+ self.assertNotEqual(out_json, None)
+
+ # See that the files are present and correct
+ errors = workload.validate()
+ if errors:
+ log.error("Validation errors found: {0}".format(len(errors)))
+ for e in errors:
+ log.error(e.exception)
+ log.error(e.backtrace)
+ raise AssertionError("Validation failed, first error: {0}\n{1}".format(
+ errors[0].exception, errors[0].backtrace
+ ))
+
+ def _get_damage_count(self, damage_type='backtrace'):
+ out_json = self.fs.rank_tell(["damage", "ls"])
+ self.assertNotEqual(out_json, None)
+
+ damage_count = 0
+ for it in out_json:
+ if it['damage_type'] == damage_type:
+ damage_count += 1
+ return damage_count
+
+ def _scrub_new_files(self, workload):
+ """
+ That scrubbing new files does not lead to errors
+ """
+ workload.create_files(1000)
+ self._wait_until_scrub_complete()
+ self.assertEqual(self._get_damage_count(), 0)
+
+ def test_scrub_backtrace_for_new_files(self):
+ self._scrub_new_files(BacktraceWorkload(self.fs, self.mount_a))
+
+ def test_scrub_backtrace(self):
+ self._scrub(BacktraceWorkload(self.fs, self.mount_a))
+
+ def test_scrub_dup_inode(self):
+ self._scrub(DupInodeWorkload(self.fs, self.mount_a))
diff --git a/qa/tasks/cephfs/test_scrub_checks.py b/qa/tasks/cephfs/test_scrub_checks.py
new file mode 100644
index 00000000..54ed16ff
--- /dev/null
+++ b/qa/tasks/cephfs/test_scrub_checks.py
@@ -0,0 +1,405 @@
+"""
+MDS admin socket scrubbing-related tests.
+"""
+import json
+import logging
+import errno
+import time
+from teuthology.exceptions import CommandFailedError
+import os
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+class TestScrubControls(CephFSTestCase):
+ """
+ Test basic scrub control operations such as abort, pause and resume.
+ """
+
+ MDSS_REQUIRED = 2
+ CLIENTS_REQUIRED = 1
+
+ def _abort_scrub(self, expected):
+ res = self.fs.rank_tell(["scrub", "abort"])
+ self.assertEqual(res['return_code'], expected)
+ def _pause_scrub(self, expected):
+ res = self.fs.rank_tell(["scrub", "pause"])
+ self.assertEqual(res['return_code'], expected)
+ def _resume_scrub(self, expected):
+ res = self.fs.rank_tell(["scrub", "resume"])
+ self.assertEqual(res['return_code'], expected)
+ def _get_scrub_status(self):
+ return self.fs.rank_tell(["scrub", "status"])
+ def _check_task_status(self, expected_status):
+ task_status = self.fs.get_task_status("scrub status")
+ active = self.fs.get_active_names()
+ log.debug("current active={0}".format(active))
+ self.assertTrue(task_status[active[0]].startswith(expected_status))
+
+ def test_scrub_abort(self):
+ test_dir = "scrub_control_test_path"
+ abs_test_path = "/{0}".format(test_dir)
+
+ log.info("mountpoint: {0}".format(self.mount_a.mountpoint))
+ client_path = os.path.join(self.mount_a.mountpoint, test_dir)
+ log.info("client_path: {0}".format(client_path))
+
+ log.info("Cloning repo into place")
+ TestScrubChecks.clone_repo(self.mount_a, client_path)
+
+ out_json = self.fs.rank_tell(["scrub", "start", abs_test_path, "recursive"])
+ self.assertNotEqual(out_json, None)
+
+ # abort and verify
+ self._abort_scrub(0)
+ out_json = self._get_scrub_status()
+ self.assertTrue("no active" in out_json['status'])
+
+ # sleep enough to fetch updated task status
+ time.sleep(10)
+ self._check_task_status("idle")
+
+ def test_scrub_pause_and_resume(self):
+ test_dir = "scrub_control_test_path"
+ abs_test_path = "/{0}".format(test_dir)
+
+ log.info("mountpoint: {0}".format(self.mount_a.mountpoint))
+ client_path = os.path.join(self.mount_a.mountpoint, test_dir)
+ log.info("client_path: {0}".format(client_path))
+
+ log.info("Cloning repo into place")
+ _ = TestScrubChecks.clone_repo(self.mount_a, client_path)
+
+ out_json = self.fs.rank_tell(["scrub", "start", abs_test_path, "recursive"])
+ self.assertNotEqual(out_json, None)
+
+ # pause and verify
+ self._pause_scrub(0)
+ out_json = self._get_scrub_status()
+ self.assertTrue("PAUSED" in out_json['status'])
+
+ # sleep enough to fetch updated task status
+ time.sleep(10)
+ self._check_task_status("paused")
+
+ # resume and verify
+ self._resume_scrub(0)
+ out_json = self._get_scrub_status()
+ self.assertFalse("PAUSED" in out_json['status'])
+
+ def test_scrub_pause_and_resume_with_abort(self):
+ test_dir = "scrub_control_test_path"
+ abs_test_path = "/{0}".format(test_dir)
+
+ log.info("mountpoint: {0}".format(self.mount_a.mountpoint))
+ client_path = os.path.join(self.mount_a.mountpoint, test_dir)
+ log.info("client_path: {0}".format(client_path))
+
+ log.info("Cloning repo into place")
+ _ = TestScrubChecks.clone_repo(self.mount_a, client_path)
+
+ out_json = self.fs.rank_tell(["scrub", "start", abs_test_path, "recursive"])
+ self.assertNotEqual(out_json, None)
+
+ # pause and verify
+ self._pause_scrub(0)
+ out_json = self._get_scrub_status()
+ self.assertTrue("PAUSED" in out_json['status'])
+
+ # sleep enough to fetch updated task status
+ time.sleep(10)
+ self._check_task_status("paused")
+
+ # abort and verify
+ self._abort_scrub(0)
+ out_json = self._get_scrub_status()
+ self.assertTrue("PAUSED" in out_json['status'])
+ self.assertTrue("0 inodes" in out_json['status'])
+
+ # sleep enough to fetch updated task status
+ time.sleep(10)
+ self._check_task_status("paused")
+
+ # resume and verify
+ self._resume_scrub(0)
+ out_json = self._get_scrub_status()
+ self.assertTrue("no active" in out_json['status'])
+
+ # sleep enough to fetch updated task status
+ time.sleep(10)
+ self._check_task_status("idle")
+
+ def test_scrub_task_status_on_mds_failover(self):
+ # sleep enough to fetch updated task status
+ time.sleep(10)
+
+ (original_active, ) = self.fs.get_active_names()
+ original_standbys = self.mds_cluster.get_standby_daemons()
+ self._check_task_status("idle")
+
+ # Kill the rank 0
+ self.fs.mds_stop(original_active)
+
+ grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
+
+ def promoted():
+ active = self.fs.get_active_names()
+ return active and active[0] in original_standbys
+
+ log.info("Waiting for promotion of one of the original standbys {0}".format(
+ original_standbys))
+ self.wait_until_true(promoted, timeout=grace*2)
+
+ mgr_beacon_grace = float(self.fs.get_config("mgr_service_beacon_grace", service_type="mon"))
+
+ def status_check():
+ task_status = self.fs.get_task_status("scrub status")
+ return original_active not in task_status
+ self.wait_until_true(status_check, timeout=mgr_beacon_grace*2)
+
+class TestScrubChecks(CephFSTestCase):
+ """
+ Run flush and scrub commands on the specified files in the filesystem. This
+ task will run through a sequence of operations, but it is not comprehensive
+ on its own -- it doesn't manipulate the mds cache state to test on both
+ in- and out-of-memory parts of the hierarchy. So it's designed to be run
+ multiple times within a single test run, so that the test can manipulate
+ memory state.
+
+ Usage:
+ mds_scrub_checks:
+ mds_rank: 0
+ path: path/to/test/dir
+ client: 0
+ run_seq: [0-9]+
+
+ Increment the run_seq on subsequent invocations within a single test run;
+ it uses that value to generate unique folder and file names.
+ """
+
+ MDSS_REQUIRED = 1
+ CLIENTS_REQUIRED = 1
+
+ def test_scrub_checks(self):
+ self._checks(0)
+ self._checks(1)
+
+ def _checks(self, run_seq):
+ mds_rank = 0
+ test_dir = "scrub_test_path"
+
+ abs_test_path = "/{0}".format(test_dir)
+
+ log.info("mountpoint: {0}".format(self.mount_a.mountpoint))
+ client_path = os.path.join(self.mount_a.mountpoint, test_dir)
+ log.info("client_path: {0}".format(client_path))
+
+ log.info("Cloning repo into place")
+ repo_path = TestScrubChecks.clone_repo(self.mount_a, client_path)
+
+ log.info("Initiating mds_scrub_checks on mds.{id_}, " +
+ "test_path {path}, run_seq {seq}".format(
+ id_=mds_rank, path=abs_test_path, seq=run_seq)
+ )
+
+
+ success_validator = lambda j, r: self.json_validator(j, r, "return_code", 0)
+
+ nep = "{test_path}/i/dont/exist".format(test_path=abs_test_path)
+ self.asok_command(mds_rank, "flush_path {nep}".format(nep=nep),
+ lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT))
+ self.tell_command(mds_rank, "scrub start {nep}".format(nep=nep),
+ lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT))
+
+ test_repo_path = "{test_path}/ceph-qa-suite".format(test_path=abs_test_path)
+ dirpath = "{repo_path}/suites".format(repo_path=test_repo_path)
+
+ if run_seq == 0:
+ log.info("First run: flushing {dirpath}".format(dirpath=dirpath))
+ command = "flush_path {dirpath}".format(dirpath=dirpath)
+ self.asok_command(mds_rank, command, success_validator)
+ command = "scrub start {dirpath}".format(dirpath=dirpath)
+ self.tell_command(mds_rank, command, success_validator)
+
+ filepath = "{repo_path}/suites/fs/verify/validater/valgrind.yaml".format(
+ repo_path=test_repo_path)
+ if run_seq == 0:
+ log.info("First run: flushing {filepath}".format(filepath=filepath))
+ command = "flush_path {filepath}".format(filepath=filepath)
+ self.asok_command(mds_rank, command, success_validator)
+ command = "scrub start {filepath}".format(filepath=filepath)
+ self.tell_command(mds_rank, command, success_validator)
+
+ filepath = "{repo_path}/suites/fs/basic/clusters/fixed-3-cephfs.yaml". \
+ format(repo_path=test_repo_path)
+ command = "scrub start {filepath}".format(filepath=filepath)
+ self.tell_command(mds_rank, command,
+ lambda j, r: self.json_validator(j, r, "performed_validation",
+ False))
+
+ if run_seq == 0:
+ log.info("First run: flushing base dir /")
+ command = "flush_path /"
+ self.asok_command(mds_rank, command, success_validator)
+ command = "scrub start /"
+ self.tell_command(mds_rank, command, success_validator)
+
+ new_dir = "{repo_path}/new_dir_{i}".format(repo_path=repo_path, i=run_seq)
+ test_new_dir = "{repo_path}/new_dir_{i}".format(repo_path=test_repo_path,
+ i=run_seq)
+ self.mount_a.run_shell(["mkdir", new_dir])
+ command = "flush_path {dir}".format(dir=test_new_dir)
+ self.asok_command(mds_rank, command, success_validator)
+
+ new_file = "{repo_path}/new_file_{i}".format(repo_path=repo_path,
+ i=run_seq)
+ test_new_file = "{repo_path}/new_file_{i}".format(repo_path=test_repo_path,
+ i=run_seq)
+ self.mount_a.write_n_mb(new_file, 1)
+
+ command = "flush_path {file}".format(file=test_new_file)
+ self.asok_command(mds_rank, command, success_validator)
+
+ # check that scrub fails on errors
+ ino = self.mount_a.path_to_ino(new_file)
+ rados_obj_name = "{ino:x}.00000000".format(ino=ino)
+ command = "scrub start {file}".format(file=test_new_file)
+
+ # Missing parent xattr -> ENODATA
+ self.fs.rados(["rmxattr", rados_obj_name, "parent"], pool=self.fs.get_data_pool_name())
+ self.tell_command(mds_rank, command,
+ lambda j, r: self.json_validator(j, r, "return_code", -errno.ENODATA))
+
+ # Missing object -> ENOENT
+ self.fs.rados(["rm", rados_obj_name], pool=self.fs.get_data_pool_name())
+ self.tell_command(mds_rank, command,
+ lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT))
+
+ command = "flush_path /"
+ self.asok_command(mds_rank, command, success_validator)
+
+ def test_scrub_repair(self):
+ mds_rank = 0
+ test_dir = "scrub_repair_path"
+
+ self.mount_a.run_shell(["sudo", "mkdir", test_dir])
+ self.mount_a.run_shell(["sudo", "touch", "{0}/file".format(test_dir)])
+ dir_objname = "{:x}.00000000".format(self.mount_a.path_to_ino(test_dir))
+
+ self.mount_a.umount_wait()
+
+ # flush journal entries to dirfrag objects, and expire journal
+ self.fs.mds_asok(['flush', 'journal'])
+ self.fs.mds_stop()
+
+ # remove the dentry from dirfrag, cause incorrect fragstat/rstat
+ self.fs.rados(["rmomapkey", dir_objname, "file_head"],
+ pool=self.fs.get_metadata_pool_name())
+
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_daemons()
+
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ # fragstat indicates the directory is not empty, rmdir should fail
+ with self.assertRaises(CommandFailedError) as ar:
+ self.mount_a.run_shell(["sudo", "rmdir", test_dir])
+ self.assertEqual(ar.exception.exitstatus, 1)
+
+ self.tell_command(mds_rank, "scrub start /{0} repair".format(test_dir),
+ lambda j, r: self.json_validator(j, r, "return_code", 0))
+
+ # wait a few second for background repair
+ time.sleep(10)
+
+ # fragstat should be fixed
+ self.mount_a.run_shell(["sudo", "rmdir", test_dir])
+
+ @staticmethod
+ def json_validator(json_out, rc, element, expected_value):
+ if rc != 0:
+ return False, "asok command returned error {rc}".format(rc=rc)
+ element_value = json_out.get(element)
+ if element_value != expected_value:
+ return False, "unexpectedly got {jv} instead of {ev}!".format(
+ jv=element_value, ev=expected_value)
+ return True, "Succeeded"
+
+ def tell_command(self, mds_rank, command, validator):
+ log.info("Running command '{command}'".format(command=command))
+
+ command_list = command.split()
+ jout = self.fs.rank_tell(command_list, mds_rank)
+
+ log.info("command '{command}' returned '{jout}'".format(
+ command=command, jout=jout))
+
+ success, errstring = validator(jout, 0)
+ if not success:
+ raise AsokCommandFailedError(command, 0, jout, errstring)
+ return jout
+
+ def asok_command(self, mds_rank, command, validator):
+ log.info("Running command '{command}'".format(command=command))
+
+ command_list = command.split()
+
+ # we just assume there's an active mds for every rank
+ mds_id = self.fs.get_active_names()[mds_rank]
+ proc = self.fs.mon_manager.admin_socket('mds', mds_id,
+ command_list, check_status=False)
+ rout = proc.exitstatus
+ sout = proc.stdout.getvalue()
+
+ if sout.strip():
+ jout = json.loads(sout)
+ else:
+ jout = None
+
+ log.info("command '{command}' got response code " +
+ "'{rout}' and stdout '{sout}'".format(
+ command=command, rout=rout, sout=sout))
+
+ success, errstring = validator(jout, rout)
+
+ if not success:
+ raise AsokCommandFailedError(command, rout, jout, errstring)
+
+ return jout
+
+ @staticmethod
+ def clone_repo(client_mount, path):
+ repo = "ceph-qa-suite"
+ repo_path = os.path.join(path, repo)
+ client_mount.run_shell(["mkdir", "-p", path])
+
+ try:
+ client_mount.stat(repo_path)
+ except CommandFailedError:
+ client_mount.run_shell([
+ "git", "clone", '--branch', 'giant',
+ "http://github.com/ceph/{repo}".format(repo=repo),
+ "{path}/{repo}".format(path=path, repo=repo)
+ ])
+
+ return repo_path
+
+
+class AsokCommandFailedError(Exception):
+ """
+ Exception thrown when we get an unexpected response
+ on an admin socket command
+ """
+
+ def __init__(self, command, rc, json_out, errstring):
+ self.command = command
+ self.rc = rc
+ self.json = json_out
+ self.errstring = errstring
+
+ def __str__(self):
+ return "Admin socket: {command} failed with rc={rc}," + \
+ "json output={json}, because '{es}'".format(
+ command=self.command, rc=self.rc,
+ json=self.json, es=self.errstring)
diff --git a/qa/tasks/cephfs/test_sessionmap.py b/qa/tasks/cephfs/test_sessionmap.py
new file mode 100644
index 00000000..b44991f5
--- /dev/null
+++ b/qa/tasks/cephfs/test_sessionmap.py
@@ -0,0 +1,236 @@
+import time
+import json
+import logging
+from unittest import SkipTest
+
+from tasks.cephfs.fuse_mount import FuseMount
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.misc import sudo_write_file
+
+log = logging.getLogger(__name__)
+
+
+class TestSessionMap(CephFSTestCase):
+ CLIENTS_REQUIRED = 2
+ MDSS_REQUIRED = 2
+
+ def test_tell_session_drop(self):
+ """
+ That when a `tell` command is sent using the python CLI,
+ its MDS session is gone after it terminates
+ """
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+
+ status = self.fs.status()
+ self.fs.rank_tell(["session", "ls"], status=status)
+
+ ls_data = self.fs.rank_asok(['session', 'ls'], status=status)
+ self.assertEqual(len(ls_data), 0)
+
+ def _get_connection_count(self, status=None):
+ perf = self.fs.rank_asok(["perf", "dump"], status=status)
+ conn = 0
+ for module, dump in perf.items():
+ if "AsyncMessenger::Worker" in module:
+ conn += dump['msgr_active_connections']
+ return conn
+
+ def test_tell_conn_close(self):
+ """
+ That when a `tell` command is sent using the python CLI,
+ the conn count goes back to where it started (i.e. we aren't
+ leaving connections open)
+ """
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+
+ status = self.fs.status()
+ s = self._get_connection_count(status=status)
+ self.fs.rank_tell(["session", "ls"], status=status)
+ e = self._get_connection_count(status=status)
+
+ self.assertEqual(s, e)
+
+ def test_mount_conn_close(self):
+ """
+ That when a client unmounts, the thread count on the MDS goes back
+ to what it was before the client mounted
+ """
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+
+ status = self.fs.status()
+ s = self._get_connection_count(status=status)
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+ self.assertGreater(self._get_connection_count(status=status), s)
+ self.mount_a.umount_wait()
+ e = self._get_connection_count(status=status)
+
+ self.assertEqual(s, e)
+
+ def test_version_splitting(self):
+ """
+ That when many sessions are updated, they are correctly
+ split into multiple versions to obey mds_sessionmap_keys_per_op
+ """
+
+ # Start umounted
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+
+ # Configure MDS to write one OMAP key at once
+ self.set_conf('mds', 'mds_sessionmap_keys_per_op', 1)
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_daemons()
+
+ # I would like two MDSs, so that I can do an export dir later
+ self.fs.set_max_mds(2)
+ self.fs.wait_for_daemons()
+
+ status = self.fs.status()
+
+ # Bring the clients back
+ self.mount_a.mount()
+ self.mount_b.mount()
+ self.mount_a.create_files() # Kick the client into opening sessions
+ self.mount_b.create_files()
+
+ # See that they've got sessions
+ self.assert_session_count(2, mds_id=self.fs.get_rank(status=status)['name'])
+
+ # See that we persist their sessions
+ self.fs.rank_asok(["flush", "journal"], rank=0, status=status)
+ table_json = json.loads(self.fs.table_tool(["0", "show", "session"]))
+ log.info("SessionMap: {0}".format(json.dumps(table_json, indent=2)))
+ self.assertEqual(table_json['0']['result'], 0)
+ self.assertEqual(len(table_json['0']['data']['sessions']), 2)
+
+ # Now, induce a "force_open_sessions" event by exporting a dir
+ self.mount_a.run_shell(["mkdir", "bravo"])
+ self.mount_a.run_shell(["touch", "bravo/file"])
+ self.mount_b.run_shell(["ls", "-l", "bravo/file"])
+
+ def get_omap_wrs():
+ return self.fs.rank_asok(['perf', 'dump', 'objecter'], rank=1, status=status)['objecter']['omap_wr']
+
+ # Flush so that there are no dirty sessions on rank 1
+ self.fs.rank_asok(["flush", "journal"], rank=1, status=status)
+
+ # Export so that we get a force_open to rank 1 for the two sessions from rank 0
+ initial_omap_wrs = get_omap_wrs()
+ self.fs.rank_asok(['export', 'dir', '/bravo', '1'], rank=0, status=status)
+
+ # This is the critical (if rather subtle) check: that in the process of doing an export dir,
+ # we hit force_open_sessions, and as a result we end up writing out the sessionmap. There
+ # will be two sessions dirtied here, and because we have set keys_per_op to 1, we should see
+ # a single session get written out (the first of the two, triggered by the second getting marked
+ # dirty)
+ # The number of writes is two per session, because the header (sessionmap version) update and
+ # KV write both count. Also, multiply by 2 for each openfile table update.
+ self.wait_until_true(
+ lambda: get_omap_wrs() - initial_omap_wrs == 2*2,
+ timeout=30 # Long enough for an export to get acked
+ )
+
+ # Now end our sessions and check the backing sessionmap is updated correctly
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+
+ # In-memory sessionmap check
+ self.assert_session_count(0, mds_id=self.fs.get_rank(status=status)['name'])
+
+ # On-disk sessionmap check
+ self.fs.rank_asok(["flush", "journal"], rank=0, status=status)
+ table_json = json.loads(self.fs.table_tool(["0", "show", "session"]))
+ log.info("SessionMap: {0}".format(json.dumps(table_json, indent=2)))
+ self.assertEqual(table_json['0']['result'], 0)
+ self.assertEqual(len(table_json['0']['data']['sessions']), 0)
+
+ def _configure_auth(self, mount, id_name, mds_caps, osd_caps=None, mon_caps=None):
+ """
+ Set up auth credentials for a client mount, and write out the keyring
+ for the client to use.
+ """
+
+ if osd_caps is None:
+ osd_caps = "allow rw"
+
+ if mon_caps is None:
+ mon_caps = "allow r"
+
+ out = self.fs.mon_manager.raw_cluster_cmd(
+ "auth", "get-or-create", "client.{name}".format(name=id_name),
+ "mds", mds_caps,
+ "osd", osd_caps,
+ "mon", mon_caps
+ )
+ mount.client_id = id_name
+ sudo_write_file(mount.client_remote, mount.get_keyring_path(), out)
+ self.set_conf("client.{name}".format(name=id_name), "keyring", mount.get_keyring_path())
+
+ def test_session_reject(self):
+ if not isinstance(self.mount_a, FuseMount):
+ raise SkipTest("Requires FUSE client to inject client metadata")
+
+ self.mount_a.run_shell(["mkdir", "foo"])
+ self.mount_a.run_shell(["mkdir", "foo/bar"])
+ self.mount_a.umount_wait()
+
+ # Mount B will be my rejected client
+ self.mount_b.umount_wait()
+
+ # Configure a client that is limited to /foo/bar
+ self._configure_auth(self.mount_b, "badguy", "allow rw path=/foo/bar")
+ # Check he can mount that dir and do IO
+ self.mount_b.mount(mount_path="/foo/bar")
+ self.mount_b.wait_until_mounted()
+ self.mount_b.create_destroy()
+ self.mount_b.umount_wait()
+
+ # Configure the client to claim that its mount point metadata is /baz
+ self.set_conf("client.badguy", "client_metadata", "root=/baz")
+ # Try to mount the client, see that it fails
+ with self.assert_cluster_log("client session with non-allowable root '/baz' denied"):
+ with self.assertRaises(CommandFailedError):
+ self.mount_b.mount(mount_path="/foo/bar")
+
+ def test_session_evict_blacklisted(self):
+ """
+ Check that mds evicts blacklisted client
+ """
+ if not isinstance(self.mount_a, FuseMount):
+ self.skipTest("Requires FUSE client to use is_blacklisted()")
+
+ self.fs.set_max_mds(2)
+ self.fs.wait_for_daemons()
+ status = self.fs.status()
+
+ self.mount_a.run_shell(["mkdir", "d0", "d1"])
+ self.mount_a.setfattr("d0", "ceph.dir.pin", "0")
+ self.mount_a.setfattr("d1", "ceph.dir.pin", "1")
+ self._wait_subtrees(status, 0, [('/d0', 0), ('/d1', 1)])
+
+ self.mount_a.run_shell(["touch", "d0/f0"])
+ self.mount_a.run_shell(["touch", "d1/f0"])
+ self.mount_b.run_shell(["touch", "d0/f1"])
+ self.mount_b.run_shell(["touch", "d1/f1"])
+
+ self.assert_session_count(2, mds_id=self.fs.get_rank(rank=0, status=status)['name'])
+ self.assert_session_count(2, mds_id=self.fs.get_rank(rank=1, status=status)['name'])
+
+ mount_a_client_id = self.mount_a.get_global_id()
+ self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id],
+ mds_id=self.fs.get_rank(rank=0, status=status)['name'])
+ self.wait_until_true(lambda: self.mount_a.is_blacklisted(), timeout=30)
+
+ # 10 seconds should be enough for evicting client
+ time.sleep(10)
+ self.assert_session_count(1, mds_id=self.fs.get_rank(rank=0, status=status)['name'])
+ self.assert_session_count(1, mds_id=self.fs.get_rank(rank=1, status=status)['name'])
+
+ self.mount_a.kill_cleanup()
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
diff --git a/qa/tasks/cephfs/test_snapshots.py b/qa/tasks/cephfs/test_snapshots.py
new file mode 100644
index 00000000..f09b645c
--- /dev/null
+++ b/qa/tasks/cephfs/test_snapshots.py
@@ -0,0 +1,530 @@
+import sys
+import logging
+import signal
+from textwrap import dedent
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.orchestra.run import CommandFailedError, Raw
+from unittest import SkipTest
+
+log = logging.getLogger(__name__)
+
+MDS_RESTART_GRACE = 60
+
+class TestSnapshots(CephFSTestCase):
+ MDSS_REQUIRED = 3
+ LOAD_SETTINGS = ["mds_max_snaps_per_dir"]
+
+ def _check_subtree(self, rank, path, status=None):
+ got_subtrees = self.fs.rank_asok(["get", "subtrees"], rank=rank, status=status)
+ for s in got_subtrees:
+ if s['dir']['path'] == path and s['auth_first'] == rank:
+ return True
+ return False
+
+ def _get_snapclient_dump(self, rank=0, status=None):
+ return self.fs.rank_asok(["dump", "snaps"], rank=rank, status=status)
+
+ def _get_snapserver_dump(self, rank=0, status=None):
+ return self.fs.rank_asok(["dump", "snaps", "--server"], rank=rank, status=status)
+
+ def _get_last_created_snap(self, rank=0, status=None):
+ return int(self._get_snapserver_dump(rank,status=status)["last_created"])
+
+ def _get_last_destroyed_snap(self, rank=0, status=None):
+ return int(self._get_snapserver_dump(rank,status=status)["last_destroyed"])
+
+ def _get_pending_snap_update(self, rank=0, status=None):
+ return self._get_snapserver_dump(rank,status=status)["pending_update"]
+
+ def _get_pending_snap_destroy(self, rank=0, status=None):
+ return self._get_snapserver_dump(rank,status=status)["pending_destroy"]
+
+ def test_kill_mdstable(self):
+ """
+ check snaptable transcation
+ """
+ if not isinstance(self.mount_a, FuseMount):
+ raise SkipTest("Require FUSE client to forcibly kill mount")
+
+ self.fs.set_allow_new_snaps(True);
+ self.fs.set_max_mds(2)
+ status = self.fs.wait_for_daemons()
+
+ grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
+
+ # setup subtrees
+ self.mount_a.run_shell(["mkdir", "-p", "d1/dir"])
+ self.mount_a.setfattr("d1", "ceph.dir.pin", "1")
+ self.wait_until_true(lambda: self._check_subtree(1, '/d1', status=status), timeout=30)
+
+ last_created = self._get_last_created_snap(rank=0,status=status)
+
+ # mds_kill_mdstable_at:
+ # 1: MDSTableServer::handle_prepare
+ # 2: MDSTableServer::_prepare_logged
+ # 5: MDSTableServer::handle_commit
+ # 6: MDSTableServer::_commit_logged
+ for i in [1,2,5,6]:
+ log.info("testing snapserver mds_kill_mdstable_at={0}".format(i))
+
+ status = self.fs.status()
+ rank0 = self.fs.get_rank(rank=0, status=status)
+ self.fs.rank_freeze(True, rank=0)
+ self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "{0}".format(i)], rank=0, status=status)
+ proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s1{0}".format(i)], wait=False)
+ self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=0), timeout=grace*2);
+ self.delete_mds_coredump(rank0['name']);
+
+ self.fs.rank_fail(rank=0)
+ self.fs.mds_restart(rank0['name'])
+ self.wait_for_daemon_start([rank0['name']])
+ status = self.fs.wait_for_daemons()
+
+ proc.wait()
+ last_created += 1
+ self.wait_until_true(lambda: self._get_last_created_snap(rank=0) == last_created, timeout=30)
+
+ self.set_conf("mds", "mds_reconnect_timeout", "5")
+
+ self.mount_a.run_shell(["rmdir", Raw("d1/dir/.snap/*")])
+
+ # set mds_kill_mdstable_at, also kill snapclient
+ for i in [2,5,6]:
+ log.info("testing snapserver mds_kill_mdstable_at={0}, also kill snapclient".format(i))
+ status = self.fs.status()
+ last_created = self._get_last_created_snap(rank=0, status=status)
+
+ rank0 = self.fs.get_rank(rank=0, status=status)
+ rank1 = self.fs.get_rank(rank=1, status=status)
+ self.fs.rank_freeze(True, rank=0) # prevent failover...
+ self.fs.rank_freeze(True, rank=1) # prevent failover...
+ self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "{0}".format(i)], rank=0, status=status)
+ proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s2{0}".format(i)], wait=False)
+ self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=0), timeout=grace*2);
+ self.delete_mds_coredump(rank0['name']);
+
+ self.fs.rank_signal(signal.SIGKILL, rank=1)
+
+ self.mount_a.kill()
+ self.mount_a.kill_cleanup()
+
+ self.fs.rank_fail(rank=0)
+ self.fs.mds_restart(rank0['name'])
+ self.wait_for_daemon_start([rank0['name']])
+
+ self.fs.wait_for_state('up:resolve', rank=0, timeout=MDS_RESTART_GRACE)
+ if i in [2,5]:
+ self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1)
+ elif i == 6:
+ self.assertEqual(len(self._get_pending_snap_update(rank=0)), 0)
+ self.assertGreater(self._get_last_created_snap(rank=0), last_created)
+
+ self.fs.rank_fail(rank=1)
+ self.fs.mds_restart(rank1['name'])
+ self.wait_for_daemon_start([rank1['name']])
+ self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE)
+
+ if i in [2,5]:
+ self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30)
+ if i == 2:
+ self.assertEqual(self._get_last_created_snap(rank=0), last_created)
+ else:
+ self.assertGreater(self._get_last_created_snap(rank=0), last_created)
+
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ self.mount_a.run_shell(["rmdir", Raw("d1/dir/.snap/*")])
+
+ # mds_kill_mdstable_at:
+ # 3: MDSTableClient::handle_request (got agree)
+ # 4: MDSTableClient::commit
+ # 7: MDSTableClient::handle_request (got ack)
+ for i in [3,4,7]:
+ log.info("testing snapclient mds_kill_mdstable_at={0}".format(i))
+ last_created = self._get_last_created_snap(rank=0)
+
+ status = self.fs.status()
+ rank1 = self.fs.get_rank(rank=1, status=status)
+ self.fs.rank_freeze(True, rank=1) # prevent failover...
+ self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "{0}".format(i)], rank=1, status=status)
+ proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s3{0}".format(i)], wait=False)
+ self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=1), timeout=grace*2);
+ self.delete_mds_coredump(rank1['name']);
+
+ self.mount_a.kill()
+ self.mount_a.kill_cleanup()
+
+ if i in [3,4]:
+ self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1)
+ elif i == 7:
+ self.assertEqual(len(self._get_pending_snap_update(rank=0)), 0)
+ self.assertGreater(self._get_last_created_snap(rank=0), last_created)
+
+ self.fs.rank_fail(rank=1)
+ self.fs.mds_restart(rank1['name'])
+ self.wait_for_daemon_start([rank1['name']])
+ status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE)
+
+ if i in [3,4]:
+ self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30)
+ if i == 3:
+ self.assertEqual(self._get_last_created_snap(rank=0), last_created)
+ else:
+ self.assertGreater(self._get_last_created_snap(rank=0), last_created)
+
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ self.mount_a.run_shell(["rmdir", Raw("d1/dir/.snap/*")])
+
+ # mds_kill_mdstable_at:
+ # 3: MDSTableClient::handle_request (got agree)
+ # 8: MDSTableServer::handle_rollback
+ log.info("testing snapclient mds_kill_mdstable_at=3, snapserver mds_kill_mdstable_at=8")
+ last_created = self._get_last_created_snap(rank=0)
+
+ status = self.fs.status()
+ rank0 = self.fs.get_rank(rank=0, status=status)
+ rank1 = self.fs.get_rank(rank=1, status=status)
+ self.fs.rank_freeze(True, rank=0)
+ self.fs.rank_freeze(True, rank=1)
+ self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "8".format(i)], rank=0, status=status)
+ self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "3".format(i)], rank=1, status=status)
+ proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s4".format(i)], wait=False)
+ self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=1), timeout=grace*2);
+ self.delete_mds_coredump(rank1['name']);
+
+ self.mount_a.kill()
+ self.mount_a.kill_cleanup()
+
+ self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1)
+
+ self.fs.rank_fail(rank=1)
+ self.fs.mds_restart(rank1['name'])
+ self.wait_for_daemon_start([rank1['name']])
+
+ # rollback triggers assertion
+ self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=0), timeout=grace*2);
+ self.delete_mds_coredump(rank0['name']);
+ self.fs.rank_fail(rank=0)
+ self.fs.mds_restart(rank0['name'])
+ self.wait_for_daemon_start([rank0['name']])
+ self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE)
+
+ # mds.1 should re-send rollback message
+ self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30)
+ self.assertEqual(self._get_last_created_snap(rank=0), last_created)
+
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ def test_snapclient_cache(self):
+ """
+ check if snapclient cache gets synced properly
+ """
+ self.fs.set_allow_new_snaps(True);
+ self.fs.set_max_mds(3)
+ status = self.fs.wait_for_daemons()
+
+ grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
+
+ self.mount_a.run_shell(["mkdir", "-p", "d0/d1/dir"])
+ self.mount_a.run_shell(["mkdir", "-p", "d0/d2/dir"])
+ self.mount_a.setfattr("d0", "ceph.dir.pin", "0")
+ self.mount_a.setfattr("d0/d1", "ceph.dir.pin", "1")
+ self.mount_a.setfattr("d0/d2", "ceph.dir.pin", "2")
+ self.wait_until_true(lambda: self._check_subtree(2, '/d0/d2', status=status), timeout=30)
+ self.wait_until_true(lambda: self._check_subtree(1, '/d0/d1', status=status), timeout=5)
+ self.wait_until_true(lambda: self._check_subtree(0, '/d0', status=status), timeout=5)
+
+ def _check_snapclient_cache(snaps_dump, cache_dump=None, rank=0):
+ if cache_dump is None:
+ cache_dump = self._get_snapclient_dump(rank=rank)
+ for key, value in cache_dump.items():
+ if value != snaps_dump[key]:
+ return False
+ return True;
+
+ # sync after mksnap
+ last_created = self._get_last_created_snap(rank=0)
+ self.mount_a.run_shell(["mkdir", "d0/d1/dir/.snap/s1", "d0/d1/dir/.snap/s2"])
+ self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30)
+ self.assertGreater(self._get_last_created_snap(rank=0), last_created)
+
+ snaps_dump = self._get_snapserver_dump(rank=0)
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=0));
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=1));
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2));
+
+ # sync after rmsnap
+ last_destroyed = self._get_last_destroyed_snap(rank=0)
+ self.mount_a.run_shell(["rmdir", "d0/d1/dir/.snap/s1"])
+ self.wait_until_true(lambda: len(self._get_pending_snap_destroy(rank=0)) == 0, timeout=30)
+ self.assertGreater(self._get_last_destroyed_snap(rank=0), last_destroyed)
+
+ snaps_dump = self._get_snapserver_dump(rank=0)
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=0));
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=1));
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2));
+
+ # sync during mds recovers
+ self.fs.rank_fail(rank=2)
+ status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE)
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2));
+
+ self.fs.rank_fail(rank=0)
+ self.fs.rank_fail(rank=1)
+ status = self.fs.wait_for_daemons()
+ self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE)
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=0));
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=1));
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2));
+
+ # kill at MDSTableClient::handle_notify_prep
+ status = self.fs.status()
+ rank2 = self.fs.get_rank(rank=2, status=status)
+ self.fs.rank_freeze(True, rank=2)
+ self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "9"], rank=2, status=status)
+ proc = self.mount_a.run_shell(["mkdir", "d0/d1/dir/.snap/s3"], wait=False)
+ self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=2), timeout=grace*2);
+ self.delete_mds_coredump(rank2['name']);
+
+ # mksnap should wait for notify ack from mds.2
+ self.assertFalse(proc.finished);
+
+ # mksnap should proceed after mds.2 fails
+ self.fs.rank_fail(rank=2)
+ self.wait_until_true(lambda: proc.finished, timeout=30);
+
+ self.fs.mds_restart(rank2['name'])
+ self.wait_for_daemon_start([rank2['name']])
+ status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE)
+
+ self.mount_a.run_shell(["rmdir", Raw("d0/d1/dir/.snap/*")])
+
+ # kill at MDSTableClient::commit
+ # the recovering mds should sync all mds' cache when it enters resolve stage
+ self.set_conf("mds", "mds_reconnect_timeout", "5")
+ for i in range(1, 4):
+ status = self.fs.status()
+ rank2 = self.fs.get_rank(rank=2, status=status)
+ self.fs.rank_freeze(True, rank=2)
+ self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "4"], rank=2, status=status)
+ last_created = self._get_last_created_snap(rank=0)
+ proc = self.mount_a.run_shell(["mkdir", "d0/d2/dir/.snap/s{0}".format(i)], wait=False)
+ self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=2), timeout=grace*2);
+ self.delete_mds_coredump(rank2['name']);
+
+ self.mount_a.kill()
+ self.mount_a.kill_cleanup()
+
+ self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1)
+
+ if i in [2,4]:
+ self.fs.rank_fail(rank=0)
+ if i in [3,4]:
+ self.fs.rank_fail(rank=1)
+
+ self.fs.rank_fail(rank=2)
+ self.fs.mds_restart(rank2['name'])
+ self.wait_for_daemon_start([rank2['name']])
+ status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE)
+
+ rank0_cache = self._get_snapclient_dump(rank=0)
+ rank1_cache = self._get_snapclient_dump(rank=1)
+ rank2_cache = self._get_snapclient_dump(rank=2)
+
+ self.assertGreater(int(rank0_cache["last_created"]), last_created)
+ self.assertEqual(rank0_cache, rank1_cache);
+ self.assertEqual(rank0_cache, rank2_cache);
+
+ self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30)
+
+ snaps_dump = self._get_snapserver_dump(rank=0)
+ self.assertEqual(snaps_dump["last_created"], rank0_cache["last_created"])
+ self.assertTrue(_check_snapclient_cache(snaps_dump, cache_dump=rank0_cache));
+
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ self.mount_a.run_shell(["rmdir", Raw("d0/d2/dir/.snap/*")])
+
+ def test_multimds_mksnap(self):
+ """
+ check if snapshot takes effect across authority subtrees
+ """
+ self.fs.set_allow_new_snaps(True);
+ self.fs.set_max_mds(2)
+ status = self.fs.wait_for_daemons()
+
+ self.mount_a.run_shell(["mkdir", "-p", "d0/d1"])
+ self.mount_a.setfattr("d0", "ceph.dir.pin", "0")
+ self.mount_a.setfattr("d0/d1", "ceph.dir.pin", "1")
+ self.wait_until_true(lambda: self._check_subtree(1, '/d0/d1', status=status), timeout=30)
+ self.wait_until_true(lambda: self._check_subtree(0, '/d0', status=status), timeout=5)
+
+ self.mount_a.write_test_pattern("d0/d1/file_a", 8 * 1024 * 1024)
+ self.mount_a.run_shell(["mkdir", "d0/.snap/s1"])
+ self.mount_a.run_shell(["rm", "-f", "d0/d1/file_a"])
+ self.mount_a.validate_test_pattern("d0/.snap/s1/d1/file_a", 8 * 1024 * 1024)
+
+ self.mount_a.run_shell(["rmdir", "d0/.snap/s1"])
+ self.mount_a.run_shell(["rm", "-rf", "d0"])
+
+ def test_multimds_past_parents(self):
+ """
+ check if past parents are properly recorded during across authority rename
+ """
+ self.fs.set_allow_new_snaps(True);
+ self.fs.set_max_mds(2)
+ status = self.fs.wait_for_daemons()
+
+ self.mount_a.run_shell(["mkdir", "d0", "d1"])
+ self.mount_a.setfattr("d0", "ceph.dir.pin", "0")
+ self.mount_a.setfattr("d1", "ceph.dir.pin", "1")
+ self.wait_until_true(lambda: self._check_subtree(1, '/d1', status=status), timeout=30)
+ self.wait_until_true(lambda: self._check_subtree(0, '/d0', status=status), timeout=5)
+
+ self.mount_a.run_shell(["mkdir", "d0/d3"])
+ self.mount_a.run_shell(["mkdir", "d0/.snap/s1"])
+ snap_name = self.mount_a.run_shell(["ls", "d0/d3/.snap"]).stdout.getvalue()
+
+ self.mount_a.run_shell(["mv", "d0/d3", "d1/d3"])
+ snap_name1 = self.mount_a.run_shell(["ls", "d1/d3/.snap"]).stdout.getvalue()
+ self.assertEqual(snap_name1, snap_name);
+
+ self.mount_a.run_shell(["rmdir", "d0/.snap/s1"])
+ snap_name1 = self.mount_a.run_shell(["ls", "d1/d3/.snap"]).stdout.getvalue()
+ self.assertEqual(snap_name1, "");
+
+ self.mount_a.run_shell(["rm", "-rf", "d0", "d1"])
+
+ def test_multimds_hardlink(self):
+ """
+ check if hardlink snapshot works in multimds setup
+ """
+ self.fs.set_allow_new_snaps(True);
+ self.fs.set_max_mds(2)
+ status = self.fs.wait_for_daemons()
+
+ self.mount_a.run_shell(["mkdir", "d0", "d1"])
+
+ self.mount_a.setfattr("d0", "ceph.dir.pin", "0")
+ self.mount_a.setfattr("d1", "ceph.dir.pin", "1")
+ self.wait_until_true(lambda: self._check_subtree(1, '/d1', status=status), timeout=30)
+ self.wait_until_true(lambda: self._check_subtree(0, '/d0', status=status), timeout=5)
+
+ self.mount_a.run_python(dedent("""
+ import os
+ open(os.path.join("{path}", "d0/file1"), 'w').write("asdf")
+ open(os.path.join("{path}", "d0/file2"), 'w').write("asdf")
+ """.format(path=self.mount_a.mountpoint)
+ ))
+
+ self.mount_a.run_shell(["ln", "d0/file1", "d1/file1"])
+ self.mount_a.run_shell(["ln", "d0/file2", "d1/file2"])
+
+ self.mount_a.run_shell(["mkdir", "d1/.snap/s1"])
+
+ self.mount_a.run_python(dedent("""
+ import os
+ open(os.path.join("{path}", "d0/file1"), 'w').write("qwer")
+ """.format(path=self.mount_a.mountpoint)
+ ))
+
+ self.mount_a.run_shell(["grep", "asdf", "d1/.snap/s1/file1"])
+
+ self.mount_a.run_shell(["rm", "-f", "d0/file2"])
+ self.mount_a.run_shell(["grep", "asdf", "d1/.snap/s1/file2"])
+
+ self.mount_a.run_shell(["rm", "-f", "d1/file2"])
+ self.mount_a.run_shell(["grep", "asdf", "d1/.snap/s1/file2"])
+
+ self.mount_a.run_shell(["rmdir", "d1/.snap/s1"])
+ self.mount_a.run_shell(["rm", "-rf", "d0", "d1"])
+
+ class SnapLimitViolationException(Exception):
+ failed_snapshot_number = -1
+
+ def __init__(self, num):
+ self.failed_snapshot_number = num
+
+ def get_snap_name(self, dir_name, sno):
+ sname = "{dir_name}/.snap/s_{sno}".format(dir_name=dir_name, sno=sno)
+ return sname
+
+ def create_snap_dir(self, sname):
+ self.mount_a.run_shell(["mkdir", sname])
+
+ def delete_dir_and_snaps(self, dir_name, snaps):
+ for sno in range(1, snaps+1, 1):
+ sname = self.get_snap_name(dir_name, sno)
+ self.mount_a.run_shell(["rmdir", sname])
+ self.mount_a.run_shell(["rmdir", dir_name])
+
+ def create_dir_and_snaps(self, dir_name, snaps):
+ self.mount_a.run_shell(["mkdir", dir_name])
+
+ for sno in range(1, snaps+1, 1):
+ sname = self.get_snap_name(dir_name, sno)
+ try:
+ self.create_snap_dir(sname)
+ except CommandFailedError as e:
+ # failing at the last mkdir beyond the limit is expected
+ if sno == snaps:
+ log.info("failed while creating snap #{}: {}".format(sno, repr(e)))
+ raise TestSnapshots.SnapLimitViolationException(sno)
+
+ def test_mds_max_snaps_per_dir_default_limit(self):
+ """
+ Test the newly introudced option named mds_max_snaps_per_dir
+ Default snaps limit is 100
+ Test if the default number of snapshot directories can be created
+ """
+ self.create_dir_and_snaps("accounts", int(self.mds_max_snaps_per_dir))
+ self.delete_dir_and_snaps("accounts", int(self.mds_max_snaps_per_dir))
+
+ def test_mds_max_snaps_per_dir_with_increased_limit(self):
+ """
+ Test the newly introudced option named mds_max_snaps_per_dir
+ First create 101 directories and ensure that the 101st directory
+ creation fails. Then increase the default by one and see if the
+ additional directory creation succeeds
+ """
+ # first test the default limit
+ new_limit = int(self.mds_max_snaps_per_dir)
+ self.fs.rank_asok(['config', 'set', 'mds_max_snaps_per_dir', repr(new_limit)])
+ try:
+ self.create_dir_and_snaps("accounts", new_limit + 1)
+ except TestSnapshots.SnapLimitViolationException as e:
+ if e.failed_snapshot_number == (new_limit + 1):
+ pass
+ # then increase the limit by one and test
+ new_limit = new_limit + 1
+ self.fs.rank_asok(['config', 'set', 'mds_max_snaps_per_dir', repr(new_limit)])
+ sname = self.get_snap_name("accounts", new_limit)
+ self.create_snap_dir(sname)
+ self.delete_dir_and_snaps("accounts", new_limit)
+
+ def test_mds_max_snaps_per_dir_with_reduced_limit(self):
+ """
+ Test the newly introudced option named mds_max_snaps_per_dir
+ First create 99 directories. Then reduce the limit to 98. Then try
+ creating another directory and ensure that additional directory
+ creation fails.
+ """
+ # first test the new limit
+ new_limit = int(self.mds_max_snaps_per_dir) - 1
+ self.create_dir_and_snaps("accounts", new_limit)
+ sname = self.get_snap_name("accounts", new_limit + 1)
+ # then reduce the limit by one and test
+ new_limit = new_limit - 1
+ self.fs.rank_asok(['config', 'set', 'mds_max_snaps_per_dir', repr(new_limit)])
+ try:
+ self.create_snap_dir(sname)
+ except CommandFailedError:
+ # after reducing limit we expect the new snapshot creation to fail
+ pass
+ self.delete_dir_and_snaps("accounts", new_limit + 1)
diff --git a/qa/tasks/cephfs/test_strays.py b/qa/tasks/cephfs/test_strays.py
new file mode 100644
index 00000000..f518afe7
--- /dev/null
+++ b/qa/tasks/cephfs/test_strays.py
@@ -0,0 +1,973 @@
+import json
+import time
+import logging
+from textwrap import dedent
+import datetime
+import gevent
+
+from teuthology.orchestra.run import CommandFailedError, Raw
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
+
+log = logging.getLogger(__name__)
+
+
+class TestStrays(CephFSTestCase):
+ MDSS_REQUIRED = 2
+
+ OPS_THROTTLE = 1
+ FILES_THROTTLE = 2
+
+ # Range of different file sizes used in throttle test's workload
+ throttle_workload_size_range = 16
+
+ @for_teuthology
+ def test_ops_throttle(self):
+ self._test_throttling(self.OPS_THROTTLE)
+
+ @for_teuthology
+ def test_files_throttle(self):
+ self._test_throttling(self.FILES_THROTTLE)
+
+ def test_dir_deletion(self):
+ """
+ That when deleting a bunch of dentries and the containing
+ directory, everything gets purged.
+ Catches cases where the client might e.g. fail to trim
+ the unlinked dir from its cache.
+ """
+ file_count = 1000
+ create_script = dedent("""
+ import os
+
+ mount_path = "{mount_path}"
+ subdir = "delete_me"
+ size = {size}
+ file_count = {file_count}
+ os.mkdir(os.path.join(mount_path, subdir))
+ for i in range(0, file_count):
+ filename = "{{0}}_{{1}}.bin".format(i, size)
+ with open(os.path.join(mount_path, subdir, filename), 'w') as f:
+ f.write(size * 'x')
+ """.format(
+ mount_path=self.mount_a.mountpoint,
+ size=1024,
+ file_count=file_count
+ ))
+
+ self.mount_a.run_python(create_script)
+
+ # That the dirfrag object is created
+ self.fs.mds_asok(["flush", "journal"])
+ dir_ino = self.mount_a.path_to_ino("delete_me")
+ self.assertTrue(self.fs.dirfrag_exists(dir_ino, 0))
+
+ # Remove everything
+ self.mount_a.run_shell(["rm", "-rf", "delete_me"])
+ self.fs.mds_asok(["flush", "journal"])
+
+ # That all the removed files get created as strays
+ strays = self.get_mdc_stat("strays_created")
+ self.assertEqual(strays, file_count + 1)
+
+ # That the strays all get enqueued for purge
+ self.wait_until_equal(
+ lambda: self.get_mdc_stat("strays_enqueued"),
+ strays,
+ timeout=600
+
+ )
+
+ # That all the purge operations execute
+ self.wait_until_equal(
+ lambda: self.get_stat("purge_queue", "pq_executed"),
+ strays,
+ timeout=600
+ )
+
+ # That finally, the directory metadata object is gone
+ self.assertFalse(self.fs.dirfrag_exists(dir_ino, 0))
+
+ # That finally, the data objects are all gone
+ self.await_data_pool_empty()
+
+ def _test_throttling(self, throttle_type):
+ self.data_log = []
+ try:
+ return self._do_test_throttling(throttle_type)
+ except:
+ for l in self.data_log:
+ log.info(",".join([l_.__str__() for l_ in l]))
+ raise
+
+ def _do_test_throttling(self, throttle_type):
+ """
+ That the mds_max_purge_ops setting is respected
+ """
+
+ def set_throttles(files, ops):
+ """
+ Helper for updating ops/files limits, and calculating effective
+ ops_per_pg setting to give the same ops limit.
+ """
+ self.set_conf('mds', 'mds_max_purge_files', "%d" % files)
+ self.set_conf('mds', 'mds_max_purge_ops', "%d" % ops)
+
+ pgs = self.fs.mon_manager.get_pool_property(
+ self.fs.get_data_pool_name(),
+ "pg_num"
+ )
+ ops_per_pg = float(ops) / pgs
+ self.set_conf('mds', 'mds_max_purge_ops_per_pg', "%s" % ops_per_pg)
+
+ # Test conditions depend on what we're going to be exercising.
+ # * Lift the threshold on whatever throttle we are *not* testing, so
+ # that the throttle of interest is the one that will be the bottleneck
+ # * Create either many small files (test file count throttling) or fewer
+ # large files (test op throttling)
+ if throttle_type == self.OPS_THROTTLE:
+ set_throttles(files=100000000, ops=16)
+ size_unit = 1024 * 1024 # big files, generate lots of ops
+ file_multiplier = 100
+ elif throttle_type == self.FILES_THROTTLE:
+ # The default value of file limit is pretty permissive, so to avoid
+ # the test running too fast, create lots of files and set the limit
+ # pretty low.
+ set_throttles(ops=100000000, files=6)
+ size_unit = 1024 # small, numerous files
+ file_multiplier = 200
+ else:
+ raise NotImplementedError(throttle_type)
+
+ # Pick up config changes
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_daemons()
+
+ create_script = dedent("""
+ import os
+
+ mount_path = "{mount_path}"
+ subdir = "delete_me"
+ size_unit = {size_unit}
+ file_multiplier = {file_multiplier}
+ os.mkdir(os.path.join(mount_path, subdir))
+ for i in range(0, file_multiplier):
+ for size in range(0, {size_range}*size_unit, size_unit):
+ filename = "{{0}}_{{1}}.bin".format(i, size // size_unit)
+ with open(os.path.join(mount_path, subdir, filename), 'w') as f:
+ f.write(size * 'x')
+ """.format(
+ mount_path=self.mount_a.mountpoint,
+ size_unit=size_unit,
+ file_multiplier=file_multiplier,
+ size_range=self.throttle_workload_size_range
+ ))
+
+ self.mount_a.run_python(create_script)
+
+ # We will run the deletion in the background, to reduce the risk of it completing before
+ # we have started monitoring the stray statistics.
+ def background():
+ self.mount_a.run_shell(["rm", "-rf", "delete_me"])
+ self.fs.mds_asok(["flush", "journal"])
+
+ background_thread = gevent.spawn(background)
+
+ total_inodes = file_multiplier * self.throttle_workload_size_range + 1
+ mds_max_purge_ops = int(self.fs.get_config("mds_max_purge_ops", 'mds'))
+ mds_max_purge_files = int(self.fs.get_config("mds_max_purge_files", 'mds'))
+
+ # During this phase we look for the concurrent ops to exceed half
+ # the limit (a heuristic) and not exceed the limit (a correctness
+ # condition).
+ purge_timeout = 600
+ elapsed = 0
+ files_high_water = 0
+ ops_high_water = 0
+
+ while True:
+ stats = self.fs.mds_asok(['perf', 'dump'])
+ mdc_stats = stats['mds_cache']
+ pq_stats = stats['purge_queue']
+ if elapsed >= purge_timeout:
+ raise RuntimeError("Timeout waiting for {0} inodes to purge, stats:{1}".format(total_inodes, mdc_stats))
+
+ num_strays = mdc_stats['num_strays']
+ num_strays_purging = pq_stats['pq_executing']
+ num_purge_ops = pq_stats['pq_executing_ops']
+ files_high_water = pq_stats['pq_executing_high_water']
+ ops_high_water = pq_stats['pq_executing_ops_high_water']
+
+ self.data_log.append([datetime.datetime.now(), num_strays, num_strays_purging, num_purge_ops, files_high_water, ops_high_water])
+
+ total_strays_created = mdc_stats['strays_created']
+ total_strays_purged = pq_stats['pq_executed']
+
+ if total_strays_purged == total_inodes:
+ log.info("Complete purge in {0} seconds".format(elapsed))
+ break
+ elif total_strays_purged > total_inodes:
+ raise RuntimeError("Saw more strays than expected, mdc stats: {0}".format(mdc_stats))
+ else:
+ if throttle_type == self.OPS_THROTTLE:
+ # 11 is filer_max_purge_ops plus one for the backtrace:
+ # limit is allowed to be overshot by this much.
+ if num_purge_ops > mds_max_purge_ops + 11:
+ raise RuntimeError("num_purge_ops violates threshold {0}/{1}".format(
+ num_purge_ops, mds_max_purge_ops
+ ))
+ elif throttle_type == self.FILES_THROTTLE:
+ if num_strays_purging > mds_max_purge_files:
+ raise RuntimeError("num_strays_purging violates threshold {0}/{1}".format(
+ num_strays_purging, mds_max_purge_files
+ ))
+ else:
+ raise NotImplementedError(throttle_type)
+
+ log.info("Waiting for purge to complete {0}/{1}, {2}/{3}".format(
+ num_strays_purging, num_strays,
+ total_strays_purged, total_strays_created
+ ))
+ time.sleep(1)
+ elapsed += 1
+
+ background_thread.join()
+
+ # Check that we got up to a respectable rate during the purge. This is totally
+ # racy, but should be safeish unless the cluster is pathologically slow, or
+ # insanely fast such that the deletions all pass before we have polled the
+ # statistics.
+ if throttle_type == self.OPS_THROTTLE:
+ if ops_high_water < mds_max_purge_ops // 2:
+ raise RuntimeError("Ops in flight high water is unexpectedly low ({0} / {1})".format(
+ ops_high_water, mds_max_purge_ops
+ ))
+ # The MDS may go over mds_max_purge_ops for some items, like a
+ # heavily fragmented directory. The throttle does not kick in
+ # until *after* we reach or exceed the limit. This is expected
+ # because we don't want to starve the PQ or never purge a
+ # particularly large file/directory.
+ self.assertLessEqual(ops_high_water, mds_max_purge_ops+64)
+ elif throttle_type == self.FILES_THROTTLE:
+ if files_high_water < mds_max_purge_files // 2:
+ raise RuntimeError("Files in flight high water is unexpectedly low ({0} / {1})".format(
+ files_high_water, mds_max_purge_files
+ ))
+ self.assertLessEqual(files_high_water, mds_max_purge_files)
+
+ # Sanity check all MDC stray stats
+ stats = self.fs.mds_asok(['perf', 'dump'])
+ mdc_stats = stats['mds_cache']
+ pq_stats = stats['purge_queue']
+ self.assertEqual(mdc_stats['num_strays'], 0)
+ self.assertEqual(mdc_stats['num_strays_delayed'], 0)
+ self.assertEqual(pq_stats['pq_executing'], 0)
+ self.assertEqual(pq_stats['pq_executing_ops'], 0)
+ self.assertEqual(mdc_stats['strays_created'], total_inodes)
+ self.assertEqual(mdc_stats['strays_enqueued'], total_inodes)
+ self.assertEqual(pq_stats['pq_executed'], total_inodes)
+
+ def get_mdc_stat(self, name, mds_id=None):
+ return self.get_stat("mds_cache", name, mds_id)
+
+ def get_stat(self, subsys, name, mds_id=None):
+ return self.fs.mds_asok(['perf', 'dump', subsys, name],
+ mds_id=mds_id)[subsys][name]
+
+ def _wait_for_counter(self, subsys, counter, expect_val, timeout=60,
+ mds_id=None):
+ self.wait_until_equal(
+ lambda: self.get_stat(subsys, counter, mds_id),
+ expect_val=expect_val, timeout=timeout,
+ reject_fn=lambda x: x > expect_val
+ )
+
+ def test_open_inode(self):
+ """
+ That the case of a dentry unlinked while a client holds an
+ inode open is handled correctly.
+
+ The inode should be moved into a stray dentry, while the original
+ dentry and directory should be purged.
+
+ The inode's data should be purged when the client eventually closes
+ it.
+ """
+ mount_a_client_id = self.mount_a.get_global_id()
+
+ # Write some bytes to a file
+ size_mb = 8
+
+ # Hold the file open
+ p = self.mount_a.open_background("open_file")
+ self.mount_a.write_n_mb("open_file", size_mb)
+ open_file_ino = self.mount_a.path_to_ino("open_file")
+
+ self.assertEqual(self.get_session(mount_a_client_id)['num_caps'], 2)
+
+ # Unlink the dentry
+ self.mount_a.run_shell(["rm", "-f", "open_file"])
+
+ # Wait to see the stray count increment
+ self.wait_until_equal(
+ lambda: self.get_mdc_stat("num_strays"),
+ expect_val=1, timeout=60, reject_fn=lambda x: x > 1)
+
+ # See that while the stray count has incremented, none have passed
+ # on to the purge queue
+ self.assertEqual(self.get_mdc_stat("strays_created"), 1)
+ self.assertEqual(self.get_mdc_stat("strays_enqueued"), 0)
+
+ # See that the client still holds 2 caps
+ self.assertEqual(self.get_session(mount_a_client_id)['num_caps'], 2)
+
+ # See that the data objects remain in the data pool
+ self.assertTrue(self.fs.data_objects_present(open_file_ino, size_mb * 1024 * 1024))
+
+ # Now close the file
+ self.mount_a.kill_background(p)
+
+ # Wait to see the client cap count decrement
+ self.wait_until_equal(
+ lambda: self.get_session(mount_a_client_id)['num_caps'],
+ expect_val=1, timeout=60, reject_fn=lambda x: x > 2 or x < 1
+ )
+ # Wait to see the purge counter increment, stray count go to zero
+ self._wait_for_counter("mds_cache", "strays_enqueued", 1)
+ self.wait_until_equal(
+ lambda: self.get_mdc_stat("num_strays"),
+ expect_val=0, timeout=6, reject_fn=lambda x: x > 1
+ )
+ self._wait_for_counter("purge_queue", "pq_executed", 1)
+
+ # See that the data objects no longer exist
+ self.assertTrue(self.fs.data_objects_absent(open_file_ino, size_mb * 1024 * 1024))
+
+ self.await_data_pool_empty()
+
+ def test_hardlink_reintegration(self):
+ """
+ That removal of primary dentry of hardlinked inode results
+ in reintegration of inode into the previously-remote dentry,
+ rather than lingering as a stray indefinitely.
+ """
+ # Write some bytes to file_a
+ size_mb = 8
+ self.mount_a.run_shell(["mkdir", "dir_1"])
+ self.mount_a.write_n_mb("dir_1/file_a", size_mb)
+ ino = self.mount_a.path_to_ino("dir_1/file_a")
+
+ # Create a hardlink named file_b
+ self.mount_a.run_shell(["mkdir", "dir_2"])
+ self.mount_a.run_shell(["ln", "dir_1/file_a", "dir_2/file_b"])
+ self.assertEqual(self.mount_a.path_to_ino("dir_2/file_b"), ino)
+
+ # Flush journal
+ self.fs.mds_asok(['flush', 'journal'])
+
+ # See that backtrace for the file points to the file_a path
+ pre_unlink_bt = self.fs.read_backtrace(ino)
+ self.assertEqual(pre_unlink_bt['ancestors'][0]['dname'], "file_a")
+
+ # empty mds cache. otherwise mds reintegrates stray when unlink finishes
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(['flush', 'journal'])
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_daemons()
+ self.mount_a.mount()
+
+ # Unlink file_a
+ self.mount_a.run_shell(["rm", "-f", "dir_1/file_a"])
+
+ # See that a stray was created
+ self.assertEqual(self.get_mdc_stat("num_strays"), 1)
+ self.assertEqual(self.get_mdc_stat("strays_created"), 1)
+
+ # Wait, see that data objects are still present (i.e. that the
+ # stray did not advance to purging given time)
+ time.sleep(30)
+ self.assertTrue(self.fs.data_objects_present(ino, size_mb * 1024 * 1024))
+ self.assertEqual(self.get_mdc_stat("strays_enqueued"), 0)
+
+ # See that before reintegration, the inode's backtrace points to a stray dir
+ self.fs.mds_asok(['flush', 'journal'])
+ self.assertTrue(self.get_backtrace_path(ino).startswith("stray"))
+
+ last_reintegrated = self.get_mdc_stat("strays_reintegrated")
+
+ # Do a metadata operation on the remaining link (mv is heavy handed, but
+ # others like touch may be satisfied from caps without poking MDS)
+ self.mount_a.run_shell(["mv", "dir_2/file_b", "dir_2/file_c"])
+
+ # Stray reintegration should happen as a result of the eval_remote call
+ # on responding to a client request.
+ self.wait_until_equal(
+ lambda: self.get_mdc_stat("num_strays"),
+ expect_val=0,
+ timeout=60
+ )
+
+ # See the reintegration counter increment
+ curr_reintegrated = self.get_mdc_stat("strays_reintegrated")
+ self.assertGreater(curr_reintegrated, last_reintegrated)
+ last_reintegrated = curr_reintegrated
+
+ # Flush the journal
+ self.fs.mds_asok(['flush', 'journal'])
+
+ # See that the backtrace for the file points to the remaining link's path
+ post_reint_bt = self.fs.read_backtrace(ino)
+ self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "file_c")
+
+ # mds should reintegrates stray when unlink finishes
+ self.mount_a.run_shell(["ln", "dir_2/file_c", "dir_2/file_d"])
+ self.mount_a.run_shell(["rm", "-f", "dir_2/file_c"])
+
+ # Stray reintegration should happen as a result of the notify_stray call
+ # on completion of unlink
+ self.wait_until_equal(
+ lambda: self.get_mdc_stat("num_strays"),
+ expect_val=0,
+ timeout=60
+ )
+
+ # See the reintegration counter increment
+ curr_reintegrated = self.get_mdc_stat("strays_reintegrated")
+ self.assertGreater(curr_reintegrated, last_reintegrated)
+ last_reintegrated = curr_reintegrated
+
+ # Flush the journal
+ self.fs.mds_asok(['flush', 'journal'])
+
+ # See that the backtrace for the file points to the newest link's path
+ post_reint_bt = self.fs.read_backtrace(ino)
+ self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "file_d")
+
+ # Now really delete it
+ self.mount_a.run_shell(["rm", "-f", "dir_2/file_d"])
+ self._wait_for_counter("mds_cache", "strays_enqueued", 1)
+ self._wait_for_counter("purge_queue", "pq_executed", 1)
+
+ self.assert_purge_idle()
+ self.assertTrue(self.fs.data_objects_absent(ino, size_mb * 1024 * 1024))
+
+ # We caused the inode to go stray 3 times
+ self.assertEqual(self.get_mdc_stat("strays_created"), 3)
+ # We purged it at the last
+ self.assertEqual(self.get_mdc_stat("strays_enqueued"), 1)
+
+ def test_mv_hardlink_cleanup(self):
+ """
+ That when doing a rename from A to B, and B has hardlinks,
+ then we make a stray for B which is then reintegrated
+ into one of his hardlinks.
+ """
+ # Create file_a, file_b, and a hardlink to file_b
+ size_mb = 8
+ self.mount_a.write_n_mb("file_a", size_mb)
+ file_a_ino = self.mount_a.path_to_ino("file_a")
+
+ self.mount_a.write_n_mb("file_b", size_mb)
+ file_b_ino = self.mount_a.path_to_ino("file_b")
+
+ self.mount_a.run_shell(["ln", "file_b", "linkto_b"])
+ self.assertEqual(self.mount_a.path_to_ino("linkto_b"), file_b_ino)
+
+ # mv file_a file_b
+ self.mount_a.run_shell(["mv", "file_a", "file_b"])
+
+ # Stray reintegration should happen as a result of the notify_stray call on
+ # completion of rename
+ self.wait_until_equal(
+ lambda: self.get_mdc_stat("num_strays"),
+ expect_val=0,
+ timeout=60
+ )
+
+ self.assertEqual(self.get_mdc_stat("strays_created"), 1)
+ self.assertGreaterEqual(self.get_mdc_stat("strays_reintegrated"), 1)
+
+ # No data objects should have been deleted, as both files still have linkage.
+ self.assertTrue(self.fs.data_objects_present(file_a_ino, size_mb * 1024 * 1024))
+ self.assertTrue(self.fs.data_objects_present(file_b_ino, size_mb * 1024 * 1024))
+
+ self.fs.mds_asok(['flush', 'journal'])
+
+ post_reint_bt = self.fs.read_backtrace(file_b_ino)
+ self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "linkto_b")
+
+ def _setup_two_ranks(self):
+ # Set up two MDSs
+ self.fs.set_max_mds(2)
+
+ # See that we have two active MDSs
+ self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30,
+ reject_fn=lambda v: v > 2 or v < 1)
+
+ active_mds_names = self.fs.get_active_names()
+ rank_0_id = active_mds_names[0]
+ rank_1_id = active_mds_names[1]
+ log.info("Ranks 0 and 1 are {0} and {1}".format(
+ rank_0_id, rank_1_id))
+
+ # Get rid of other MDS daemons so that it's easier to know which
+ # daemons to expect in which ranks after restarts
+ for unneeded_mds in set(self.mds_cluster.mds_ids) - {rank_0_id, rank_1_id}:
+ self.mds_cluster.mds_stop(unneeded_mds)
+ self.mds_cluster.mds_fail(unneeded_mds)
+
+ return rank_0_id, rank_1_id
+
+ def _force_migrate(self, to_id, path, watch_ino):
+ """
+ :param to_id: MDS id to move it to
+ :param path: Filesystem path (string) to move
+ :param watch_ino: Inode number to look for at destination to confirm move
+ :return: None
+ """
+ self.mount_a.run_shell(["setfattr", "-n", "ceph.dir.pin", "-v", "1", path])
+
+ # Poll the MDS cache dump to watch for the export completing
+ migrated = False
+ migrate_timeout = 60
+ migrate_elapsed = 0
+ while not migrated:
+ data = self.fs.mds_asok(["dump", "cache"], to_id)
+ for inode_data in data:
+ if inode_data['ino'] == watch_ino:
+ log.debug("Found ino in cache: {0}".format(json.dumps(inode_data, indent=2)))
+ if inode_data['is_auth'] is True:
+ migrated = True
+ break
+
+ if not migrated:
+ if migrate_elapsed > migrate_timeout:
+ raise RuntimeError("Migration hasn't happened after {0}s!".format(migrate_elapsed))
+ else:
+ migrate_elapsed += 1
+ time.sleep(1)
+
+ def _is_stopped(self, rank):
+ mds_map = self.fs.get_mds_map()
+ return rank not in [i['rank'] for i in mds_map['info'].values()]
+
+ def test_purge_on_shutdown(self):
+ """
+ That when an MDS rank is shut down, its purge queue is
+ drained in the process.
+ """
+ rank_0_id, rank_1_id = self._setup_two_ranks()
+
+ self.set_conf("mds.{0}".format(rank_1_id), 'mds_max_purge_files', "0")
+ self.mds_cluster.mds_fail_restart(rank_1_id)
+ self.fs.wait_for_daemons()
+
+ file_count = 5
+
+ self.mount_a.create_n_files("delete_me/file", file_count)
+
+ self._force_migrate(rank_1_id, "delete_me",
+ self.mount_a.path_to_ino("delete_me/file_0"))
+
+ self.mount_a.run_shell(["rm", "-rf", Raw("delete_me/*")])
+ self.mount_a.umount_wait()
+
+ # See all the strays go into purge queue
+ self._wait_for_counter("mds_cache", "strays_created", file_count, mds_id=rank_1_id)
+ self._wait_for_counter("mds_cache", "strays_enqueued", file_count, mds_id=rank_1_id)
+ self.assertEqual(self.get_stat("mds_cache", "num_strays", mds_id=rank_1_id), 0)
+
+ # See nothing get purged from the purge queue (yet)
+ time.sleep(10)
+ self.assertEqual(self.get_stat("purge_queue", "pq_executed", mds_id=rank_1_id), 0)
+
+ # Shut down rank 1
+ self.fs.set_max_mds(1)
+
+ # It shouldn't proceed past stopping because its still not allowed
+ # to purge
+ time.sleep(10)
+ self.assertEqual(self.get_stat("purge_queue", "pq_executed", mds_id=rank_1_id), 0)
+ self.assertFalse(self._is_stopped(1))
+
+ # Permit the daemon to start purging again
+ self.fs.mon_manager.raw_cluster_cmd('tell', 'mds.{0}'.format(rank_1_id),
+ 'injectargs',
+ "--mds_max_purge_files 100")
+
+ # It should now proceed through shutdown
+ self.fs.wait_for_daemons(timeout=120)
+
+ # ...and in the process purge all that data
+ self.await_data_pool_empty()
+
+ def test_migration_on_shutdown(self):
+ """
+ That when an MDS rank is shut down, any non-purgeable strays
+ get migrated to another rank.
+ """
+
+ rank_0_id, rank_1_id = self._setup_two_ranks()
+
+ # Create a non-purgeable stray in a ~mds1 stray directory
+ # by doing a hard link and deleting the original file
+ self.mount_a.run_shell(["mkdir", "dir_1", "dir_2"])
+ self.mount_a.run_shell(["touch", "dir_1/original"])
+ self.mount_a.run_shell(["ln", "dir_1/original", "dir_2/linkto"])
+
+ self._force_migrate(rank_1_id, "dir_1",
+ self.mount_a.path_to_ino("dir_1/original"))
+
+ # empty mds cache. otherwise mds reintegrates stray when unlink finishes
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(['flush', 'journal'], rank_0_id)
+ self.fs.mds_asok(['flush', 'journal'], rank_1_id)
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_daemons()
+
+ active_mds_names = self.fs.get_active_names()
+ rank_0_id = active_mds_names[0]
+ rank_1_id = active_mds_names[1]
+
+ self.mount_a.mount()
+
+ self.mount_a.run_shell(["rm", "-f", "dir_1/original"])
+ self.mount_a.umount_wait()
+
+ self._wait_for_counter("mds_cache", "strays_created", 1,
+ mds_id=rank_1_id)
+
+ # Shut down rank 1
+ self.fs.set_max_mds(1)
+ self.fs.wait_for_daemons(timeout=120)
+
+ # See that the stray counter on rank 0 has incremented
+ self.assertEqual(self.get_mdc_stat("strays_created", rank_0_id), 1)
+
+ def assert_backtrace(self, ino, expected_path):
+ """
+ Assert that the backtrace in the data pool for an inode matches
+ an expected /foo/bar path.
+ """
+ expected_elements = expected_path.strip("/").split("/")
+ bt = self.fs.read_backtrace(ino)
+ actual_elements = list(reversed([dn['dname'] for dn in bt['ancestors']]))
+ self.assertListEqual(expected_elements, actual_elements)
+
+ def get_backtrace_path(self, ino):
+ bt = self.fs.read_backtrace(ino)
+ elements = reversed([dn['dname'] for dn in bt['ancestors']])
+ return "/".join(elements)
+
+ def assert_purge_idle(self):
+ """
+ Assert that the MDS perf counters indicate no strays exist and
+ no ongoing purge activity. Sanity check for when PurgeQueue should
+ be idle.
+ """
+ mdc_stats = self.fs.mds_asok(['perf', 'dump', "mds_cache"])['mds_cache']
+ pq_stats = self.fs.mds_asok(['perf', 'dump', "purge_queue"])['purge_queue']
+ self.assertEqual(mdc_stats["num_strays"], 0)
+ self.assertEqual(mdc_stats["num_strays_delayed"], 0)
+ self.assertEqual(pq_stats["pq_executing"], 0)
+ self.assertEqual(pq_stats["pq_executing_ops"], 0)
+
+ def test_mv_cleanup(self):
+ """
+ That when doing a rename from A to B, and B has no hardlinks,
+ then we make a stray for B and purge him.
+ """
+ # Create file_a and file_b, write some to both
+ size_mb = 8
+ self.mount_a.write_n_mb("file_a", size_mb)
+ file_a_ino = self.mount_a.path_to_ino("file_a")
+ self.mount_a.write_n_mb("file_b", size_mb)
+ file_b_ino = self.mount_a.path_to_ino("file_b")
+
+ self.fs.mds_asok(['flush', 'journal'])
+ self.assert_backtrace(file_a_ino, "file_a")
+ self.assert_backtrace(file_b_ino, "file_b")
+
+ # mv file_a file_b
+ self.mount_a.run_shell(['mv', 'file_a', 'file_b'])
+
+ # See that stray counter increments
+ self.assertEqual(self.get_mdc_stat("strays_created"), 1)
+ # Wait for purge counter to increment
+ self._wait_for_counter("mds_cache", "strays_enqueued", 1)
+ self._wait_for_counter("purge_queue", "pq_executed", 1)
+
+ self.assert_purge_idle()
+
+ # file_b should have been purged
+ self.assertTrue(self.fs.data_objects_absent(file_b_ino, size_mb * 1024 * 1024))
+
+ # Backtrace should have updated from file_a to file_b
+ self.fs.mds_asok(['flush', 'journal'])
+ self.assert_backtrace(file_a_ino, "file_b")
+
+ # file_a's data should still exist
+ self.assertTrue(self.fs.data_objects_present(file_a_ino, size_mb * 1024 * 1024))
+
+ def _pool_df(self, pool_name):
+ """
+ Return a dict like
+ {
+ "kb_used": 0,
+ "bytes_used": 0,
+ "max_avail": 19630292406,
+ "objects": 0
+ }
+
+ :param pool_name: Which pool (must exist)
+ """
+ out = self.fs.mon_manager.raw_cluster_cmd("df", "--format=json-pretty")
+ for p in json.loads(out)['pools']:
+ if p['name'] == pool_name:
+ return p['stats']
+
+ raise RuntimeError("Pool '{0}' not found".format(pool_name))
+
+ def await_data_pool_empty(self):
+ self.wait_until_true(
+ lambda: self._pool_df(
+ self.fs.get_data_pool_name()
+ )['objects'] == 0,
+ timeout=60)
+
+ def test_snapshot_remove(self):
+ """
+ That removal of a snapshot that references a now-unlinked file results
+ in purging on the stray for the file.
+ """
+ # Enable snapshots
+ self.fs.set_allow_new_snaps(True)
+
+ # Create a dir with a file in it
+ size_mb = 8
+ self.mount_a.run_shell(["mkdir", "snapdir"])
+ self.mount_a.run_shell(["mkdir", "snapdir/subdir"])
+ self.mount_a.write_test_pattern("snapdir/subdir/file_a", size_mb * 1024 * 1024)
+ file_a_ino = self.mount_a.path_to_ino("snapdir/subdir/file_a")
+
+ # Snapshot the dir
+ self.mount_a.run_shell(["mkdir", "snapdir/.snap/snap1"])
+
+ # Cause the head revision to deviate from the snapshot
+ self.mount_a.write_n_mb("snapdir/subdir/file_a", size_mb)
+
+ # Flush the journal so that backtraces, dirfrag objects will actually be written
+ self.fs.mds_asok(["flush", "journal"])
+
+ # Unlink the file
+ self.mount_a.run_shell(["rm", "-f", "snapdir/subdir/file_a"])
+ self.mount_a.run_shell(["rmdir", "snapdir/subdir"])
+
+ # Unmount the client because when I come back to check the data is still
+ # in the file I don't want to just see what's in the page cache.
+ self.mount_a.umount_wait()
+
+ self.assertEqual(self.get_mdc_stat("strays_created"), 2)
+
+ # FIXME: at this stage we see a purge and the stray count drops to
+ # zero, but there's actually still a stray, so at the very
+ # least the StrayManager stats code is slightly off
+
+ self.mount_a.mount()
+
+ # See that the data from the snapshotted revision of the file is still present
+ # and correct
+ self.mount_a.validate_test_pattern("snapdir/.snap/snap1/subdir/file_a", size_mb * 1024 * 1024)
+
+ # Remove the snapshot
+ self.mount_a.run_shell(["rmdir", "snapdir/.snap/snap1"])
+
+ # Purging file_a doesn't happen until after we've flushed the journal, because
+ # it is referenced by the snapshotted subdir, and the snapshot isn't really
+ # gone until the journal references to it are gone
+ self.fs.mds_asok(["flush", "journal"])
+
+ # Wait for purging to complete, which requires the OSDMap to propagate to the OSDs.
+ # See also: http://tracker.ceph.com/issues/20072
+ self.wait_until_true(
+ lambda: self.fs.data_objects_absent(file_a_ino, size_mb * 1024 * 1024),
+ timeout=60
+ )
+
+ # See that a purge happens now
+ self._wait_for_counter("mds_cache", "strays_enqueued", 2)
+ self._wait_for_counter("purge_queue", "pq_executed", 2)
+
+ self.await_data_pool_empty()
+
+ def test_fancy_layout(self):
+ """
+ purge stray file with fancy layout
+ """
+
+ file_name = "fancy_layout_file"
+ self.mount_a.run_shell(["touch", file_name])
+
+ file_layout = "stripe_unit=1048576 stripe_count=4 object_size=8388608"
+ self.mount_a.setfattr(file_name, "ceph.file.layout", file_layout)
+
+ # 35MB requires 7 objects
+ size_mb = 35
+ self.mount_a.write_n_mb(file_name, size_mb)
+
+ self.mount_a.run_shell(["rm", "-f", file_name])
+ self.fs.mds_asok(["flush", "journal"])
+
+ # can't use self.fs.data_objects_absent here, it does not support fancy layout
+ self.await_data_pool_empty()
+
+ def test_dirfrag_limit(self):
+ """
+ That the directory fragment size cannot exceed mds_bal_fragment_size_max (using a limit of 50 in all configurations).
+
+ That fragmentation (forced) will allow more entries to be created.
+
+ That unlinking fails when the stray directory fragment becomes too large and that unlinking may continue once those strays are purged.
+ """
+
+ LOW_LIMIT = 50
+ for mds in self.fs.get_daemon_names():
+ self.fs.mds_asok(["config", "set", "mds_bal_fragment_size_max", str(LOW_LIMIT)], mds)
+
+ try:
+ self.mount_a.run_python(dedent("""
+ import os
+ path = os.path.join("{path}", "subdir")
+ os.mkdir(path)
+ for n in range(0, {file_count}):
+ with open(os.path.join(path, "%s" % n), 'w') as f:
+ f.write(str(n))
+ """.format(
+ path=self.mount_a.mountpoint,
+ file_count=LOW_LIMIT+1
+ )))
+ except CommandFailedError:
+ pass # ENOSPAC
+ else:
+ raise RuntimeError("fragment size exceeded")
+
+ # Now test that we can go beyond the limit if we fragment the directory
+
+ self.mount_a.run_python(dedent("""
+ import os
+ path = os.path.join("{path}", "subdir2")
+ os.mkdir(path)
+ for n in range(0, {file_count}):
+ with open(os.path.join(path, "%s" % n), 'w') as f:
+ f.write(str(n))
+ dfd = os.open(path, os.O_DIRECTORY)
+ os.fsync(dfd)
+ """.format(
+ path=self.mount_a.mountpoint,
+ file_count=LOW_LIMIT
+ )))
+
+ # Ensure that subdir2 is fragmented
+ mds_id = self.fs.get_active_names()[0]
+ self.fs.mds_asok(["dirfrag", "split", "/subdir2", "0/0", "1"], mds_id)
+
+ # remount+flush (release client caps)
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(["flush", "journal"], mds_id)
+ self.mount_a.mount()
+ self.mount_a.wait_until_mounted()
+
+ # Create 50% more files than the current fragment limit
+ self.mount_a.run_python(dedent("""
+ import os
+ path = os.path.join("{path}", "subdir2")
+ for n in range({file_count}, ({file_count}*3)//2):
+ with open(os.path.join(path, "%s" % n), 'w') as f:
+ f.write(str(n))
+ """.format(
+ path=self.mount_a.mountpoint,
+ file_count=LOW_LIMIT
+ )))
+
+ # Now test the stray directory size is limited and recovers
+ strays_before = self.get_mdc_stat("strays_created")
+ try:
+ self.mount_a.run_python(dedent("""
+ import os
+ path = os.path.join("{path}", "subdir3")
+ os.mkdir(path)
+ for n in range({file_count}):
+ fpath = os.path.join(path, "%s" % n)
+ with open(fpath, 'w') as f:
+ f.write(str(n))
+ os.unlink(fpath)
+ """.format(
+ path=self.mount_a.mountpoint,
+ file_count=LOW_LIMIT*10 # 10 stray directories, should collide before this count
+ )))
+ except CommandFailedError:
+ pass # ENOSPAC
+ else:
+ raise RuntimeError("fragment size exceeded")
+
+ strays_after = self.get_mdc_stat("strays_created")
+ self.assertGreaterEqual(strays_after-strays_before, LOW_LIMIT)
+
+ self._wait_for_counter("mds_cache", "strays_enqueued", strays_after)
+ self._wait_for_counter("purge_queue", "pq_executed", strays_after)
+
+ self.mount_a.run_python(dedent("""
+ import os
+ path = os.path.join("{path}", "subdir4")
+ os.mkdir(path)
+ for n in range({file_count}):
+ fpath = os.path.join(path, "%s" % n)
+ with open(fpath, 'w') as f:
+ f.write(str(n))
+ os.unlink(fpath)
+ """.format(
+ path=self.mount_a.mountpoint,
+ file_count=LOW_LIMIT
+ )))
+
+ def test_purge_queue_upgrade(self):
+ """
+ That when starting on a system with no purge queue in the metadata
+ pool, we silently create one.
+ :return:
+ """
+
+ self.mds_cluster.mds_stop()
+ self.mds_cluster.mds_fail()
+ self.fs.rados(["rm", "500.00000000"])
+ self.mds_cluster.mds_restart()
+ self.fs.wait_for_daemons()
+
+ def test_replicated_delete_speed(self):
+ """
+ That deletions of replicated metadata are not pathologically slow
+ """
+ rank_0_id, rank_1_id = self._setup_two_ranks()
+
+ self.set_conf("mds.{0}".format(rank_1_id), 'mds_max_purge_files', "0")
+ self.mds_cluster.mds_fail_restart(rank_1_id)
+ self.fs.wait_for_daemons()
+
+ file_count = 10
+
+ self.mount_a.create_n_files("delete_me/file", file_count)
+
+ self._force_migrate(rank_1_id, "delete_me",
+ self.mount_a.path_to_ino("delete_me/file_0"))
+
+ begin = datetime.datetime.now()
+ self.mount_a.run_shell(["rm", "-rf", Raw("delete_me/*")])
+ end = datetime.datetime.now()
+
+ # What we're really checking here is that we are completing client
+ # operations immediately rather than delaying until the next tick.
+ tick_period = float(self.fs.get_config("mds_tick_interval",
+ service_type="mds"))
+
+ duration = (end - begin).total_seconds()
+ self.assertLess(duration, (file_count * tick_period) * 0.25)
+
diff --git a/qa/tasks/cephfs/test_volume_client.py b/qa/tasks/cephfs/test_volume_client.py
new file mode 100644
index 00000000..7f66218c
--- /dev/null
+++ b/qa/tasks/cephfs/test_volume_client.py
@@ -0,0 +1,1765 @@
+import json
+import logging
+import os
+from textwrap import dedent
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from tasks.cephfs.fuse_mount import FuseMount
+from teuthology.exceptions import CommandFailedError
+from teuthology.misc import sudo_write_file
+
+log = logging.getLogger(__name__)
+
+
+class TestVolumeClient(CephFSTestCase):
+ # One for looking at the global filesystem, one for being
+ # the VolumeClient, two for mounting the created shares
+ CLIENTS_REQUIRED = 4
+
+ def setUp(self):
+ CephFSTestCase.setUp(self)
+
+ def _volume_client_python(self, client, script, vol_prefix=None, ns_prefix=None):
+ # Can't dedent this *and* the script we pass in, because they might have different
+ # levels of indentation to begin with, so leave this string zero-indented
+ if vol_prefix:
+ vol_prefix = "\"" + vol_prefix + "\""
+ if ns_prefix:
+ ns_prefix = "\"" + ns_prefix + "\""
+ return client.run_python("""
+from __future__ import print_function
+from ceph_volume_client import CephFSVolumeClient, VolumePath
+from sys import version_info as sys_version_info
+from rados import OSError as rados_OSError
+import logging
+log = logging.getLogger("ceph_volume_client")
+log.addHandler(logging.StreamHandler())
+log.setLevel(logging.DEBUG)
+vc = CephFSVolumeClient("manila", "{conf_path}", "ceph", {vol_prefix}, {ns_prefix})
+vc.connect()
+{payload}
+vc.disconnect()
+ """.format(payload=script, conf_path=client.config_path,
+ vol_prefix=vol_prefix, ns_prefix=ns_prefix))
+
+ def _configure_vc_auth(self, mount, id_name):
+ """
+ Set up auth credentials for the VolumeClient user
+ """
+ out = self.fs.mon_manager.raw_cluster_cmd(
+ "auth", "get-or-create", "client.{name}".format(name=id_name),
+ "mds", "allow *",
+ "osd", "allow rw",
+ "mon", "allow *"
+ )
+ mount.client_id = id_name
+ sudo_write_file(mount.client_remote, mount.get_keyring_path(), out)
+ self.set_conf("client.{name}".format(name=id_name), "keyring", mount.get_keyring_path())
+
+ def _configure_guest_auth(self, volumeclient_mount, guest_mount,
+ guest_entity, mount_path,
+ namespace_prefix=None, readonly=False,
+ tenant_id=None, allow_existing_id=False):
+ """
+ Set up auth credentials for the guest client to mount a volume.
+
+ :param volumeclient_mount: mount used as the handle for driving
+ volumeclient.
+ :param guest_mount: mount used by the guest client.
+ :param guest_entity: auth ID used by the guest client.
+ :param mount_path: path of the volume.
+ :param namespace_prefix: name prefix of the RADOS namespace, which
+ is used for the volume's layout.
+ :param readonly: defaults to False. If set to 'True' only read-only
+ mount access is granted to the guest.
+ :param tenant_id: (OpenStack) tenant ID of the guest client.
+ """
+
+ head, volume_id = os.path.split(mount_path)
+ head, group_id = os.path.split(head)
+ head, volume_prefix = os.path.split(head)
+ volume_prefix = "/" + volume_prefix
+
+ # Authorize the guest client's auth ID to mount the volume.
+ key = self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ auth_result = vc.authorize(vp, "{guest_entity}", readonly={readonly},
+ tenant_id="{tenant_id}",
+ allow_existing_id="{allow_existing_id}")
+ print(auth_result['auth_key'])
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ guest_entity=guest_entity,
+ readonly=readonly,
+ tenant_id=tenant_id,
+ allow_existing_id=allow_existing_id)), volume_prefix, namespace_prefix
+ )
+
+ # CephFSVolumeClient's authorize() does not return the secret
+ # key to a caller who isn't multi-tenant aware. Explicitly
+ # query the key for such a client.
+ if not tenant_id:
+ key = self.fs.mon_manager.raw_cluster_cmd(
+ "auth", "get-key", "client.{name}".format(name=guest_entity),
+ )
+
+ # The guest auth ID should exist.
+ existing_ids = [a['entity'] for a in self.auth_list()]
+ self.assertIn("client.{0}".format(guest_entity), existing_ids)
+
+ # Create keyring file for the guest client.
+ keyring_txt = dedent("""
+ [client.{guest_entity}]
+ key = {key}
+
+ """.format(
+ guest_entity=guest_entity,
+ key=key
+ ))
+ guest_mount.client_id = guest_entity
+ sudo_write_file(guest_mount.client_remote,
+ guest_mount.get_keyring_path(), keyring_txt)
+
+ # Add a guest client section to the ceph config file.
+ self.set_conf("client.{0}".format(guest_entity), "client quota", "True")
+ self.set_conf("client.{0}".format(guest_entity), "debug client", "20")
+ self.set_conf("client.{0}".format(guest_entity), "debug objecter", "20")
+ self.set_conf("client.{0}".format(guest_entity),
+ "keyring", guest_mount.get_keyring_path())
+
+ def test_default_prefix(self):
+ group_id = "grpid"
+ volume_id = "volid"
+ DEFAULT_VOL_PREFIX = "volumes"
+ DEFAULT_NS_PREFIX = "fsvolumens_"
+
+ self.mount_b.umount_wait()
+ self._configure_vc_auth(self.mount_b, "manila")
+
+ #create a volume with default prefix
+ self._volume_client_python(self.mount_b, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.create_volume(vp, 10, data_isolated=True)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ )))
+
+ # The dir should be created
+ self.mount_a.stat(os.path.join(DEFAULT_VOL_PREFIX, group_id, volume_id))
+
+ #namespace should be set
+ ns_in_attr = self.mount_a.getfattr(os.path.join(DEFAULT_VOL_PREFIX, group_id, volume_id), "ceph.dir.layout.pool_namespace")
+ namespace = "{0}{1}".format(DEFAULT_NS_PREFIX, volume_id)
+ self.assertEqual(namespace, ns_in_attr)
+
+
+ def test_lifecycle(self):
+ """
+ General smoke test for create, extend, destroy
+ """
+
+ # I'm going to use mount_c later as a guest for mounting the created
+ # shares
+ self.mounts[2].umount_wait()
+
+ # I'm going to leave mount_b unmounted and just use it as a handle for
+ # driving volumeclient. It's a little hacky but we don't have a more
+ # general concept for librados/libcephfs clients as opposed to full
+ # blown mounting clients.
+ self.mount_b.umount_wait()
+ self._configure_vc_auth(self.mount_b, "manila")
+
+ guest_entity = "guest"
+ group_id = "grpid"
+ volume_id = "volid"
+
+ volume_prefix = "/myprefix"
+ namespace_prefix = "mynsprefix_"
+
+ # Create a 100MB volume
+ volume_size = 100
+ mount_path = self._volume_client_python(self.mount_b, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ create_result = vc.create_volume(vp, 1024*1024*{volume_size})
+ print(create_result['mount_path'])
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ volume_size=volume_size
+ )), volume_prefix, namespace_prefix)
+
+ # The dir should be created
+ self.mount_a.stat(os.path.join("myprefix", group_id, volume_id))
+
+ # Authorize and configure credentials for the guest to mount the
+ # the volume.
+ self._configure_guest_auth(self.mount_b, self.mounts[2], guest_entity,
+ mount_path, namespace_prefix)
+ self.mounts[2].mount(mount_path=mount_path)
+
+ # The kernel client doesn't have the quota-based df behaviour,
+ # or quotas at all, so only exercise the client behaviour when
+ # running fuse.
+ if isinstance(self.mounts[2], FuseMount):
+ # df should see volume size, same as the quota set on volume's dir
+ self.assertEqual(self.mounts[2].df()['total'],
+ volume_size * 1024 * 1024)
+ self.assertEqual(
+ self.mount_a.getfattr(
+ os.path.join(volume_prefix.strip("/"), group_id, volume_id),
+ "ceph.quota.max_bytes"),
+ "%s" % (volume_size * 1024 * 1024))
+
+ # df granularity is 4MB block so have to write at least that much
+ data_bin_mb = 4
+ self.mounts[2].write_n_mb("data.bin", data_bin_mb)
+
+ # Write something outside volume to check this space usage is
+ # not reported in the volume's DF.
+ other_bin_mb = 8
+ self.mount_a.write_n_mb("other.bin", other_bin_mb)
+
+ # global: df should see all the writes (data + other). This is a >
+ # rather than a == because the global spaced used includes all pools
+ def check_df():
+ used = self.mount_a.df()['used']
+ return used >= (other_bin_mb * 1024 * 1024)
+
+ self.wait_until_true(check_df, timeout=30)
+
+ # Hack: do a metadata IO to kick rstats
+ self.mounts[2].run_shell(["touch", "foo"])
+
+ # volume: df should see the data_bin_mb consumed from quota, same
+ # as the rbytes for the volume's dir
+ self.wait_until_equal(
+ lambda: self.mounts[2].df()['used'],
+ data_bin_mb * 1024 * 1024, timeout=60)
+ self.wait_until_equal(
+ lambda: self.mount_a.getfattr(
+ os.path.join(volume_prefix.strip("/"), group_id, volume_id),
+ "ceph.dir.rbytes"),
+ "%s" % (data_bin_mb * 1024 * 1024), timeout=60)
+
+ # sync so that file data are persist to rados
+ self.mounts[2].run_shell(["sync"])
+
+ # Our data should stay in particular rados namespace
+ pool_name = self.mount_a.getfattr(os.path.join("myprefix", group_id, volume_id), "ceph.dir.layout.pool")
+ namespace = "{0}{1}".format(namespace_prefix, volume_id)
+ ns_in_attr = self.mount_a.getfattr(os.path.join("myprefix", group_id, volume_id), "ceph.dir.layout.pool_namespace")
+ self.assertEqual(namespace, ns_in_attr)
+
+ objects_in_ns = set(self.fs.rados(["ls"], pool=pool_name, namespace=namespace).split("\n"))
+ self.assertNotEqual(objects_in_ns, set())
+
+ # De-authorize the guest
+ self._volume_client_python(self.mount_b, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.deauthorize(vp, "{guest_entity}")
+ vc.evict("{guest_entity}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ guest_entity=guest_entity
+ )), volume_prefix, namespace_prefix)
+
+ # Once deauthorized, the client should be unable to do any more metadata ops
+ # The way that the client currently behaves here is to block (it acts like
+ # it has lost network, because there is nothing to tell it that is messages
+ # are being dropped because it's identity is gone)
+ background = self.mounts[2].write_n_mb("rogue.bin", 1, wait=False)
+ try:
+ background.wait()
+ except CommandFailedError:
+ # command failed with EBLACKLISTED?
+ if "transport endpoint shutdown" in background.stderr.getvalue():
+ pass
+ else:
+ raise
+
+ # After deauthorisation, the client ID should be gone (this was the only
+ # volume it was authorised for)
+ self.assertNotIn("client.{0}".format(guest_entity), [e['entity'] for e in self.auth_list()])
+
+ # Clean up the dead mount (ceph-fuse's behaviour here is a bit undefined)
+ self.mounts[2].umount_wait()
+
+ self._volume_client_python(self.mount_b, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.delete_volume(vp)
+ vc.purge_volume(vp)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ )), volume_prefix, namespace_prefix)
+
+ def test_idempotency(self):
+ """
+ That the volumeclient interface works when calling everything twice
+ """
+ self.mount_b.umount_wait()
+ self._configure_vc_auth(self.mount_b, "manila")
+
+ guest_entity = "guest"
+ group_id = "grpid"
+ volume_id = "volid"
+ self._volume_client_python(self.mount_b, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.create_volume(vp, 10)
+ vc.create_volume(vp, 10)
+ vc.authorize(vp, "{guest_entity}")
+ vc.authorize(vp, "{guest_entity}")
+ vc.deauthorize(vp, "{guest_entity}")
+ vc.deauthorize(vp, "{guest_entity}")
+ vc.delete_volume(vp)
+ vc.delete_volume(vp)
+ vc.purge_volume(vp)
+ vc.purge_volume(vp)
+
+ vc.create_volume(vp, 10, data_isolated=True)
+ vc.create_volume(vp, 10, data_isolated=True)
+ vc.authorize(vp, "{guest_entity}")
+ vc.authorize(vp, "{guest_entity}")
+ vc.deauthorize(vp, "{guest_entity}")
+ vc.deauthorize(vp, "{guest_entity}")
+ vc.evict("{guest_entity}")
+ vc.evict("{guest_entity}")
+ vc.delete_volume(vp, data_isolated=True)
+ vc.delete_volume(vp, data_isolated=True)
+ vc.purge_volume(vp, data_isolated=True)
+ vc.purge_volume(vp, data_isolated=True)
+
+ vc.create_volume(vp, 10, namespace_isolated=False)
+ vc.create_volume(vp, 10, namespace_isolated=False)
+ vc.authorize(vp, "{guest_entity}")
+ vc.authorize(vp, "{guest_entity}")
+ vc.deauthorize(vp, "{guest_entity}")
+ vc.deauthorize(vp, "{guest_entity}")
+ vc.evict("{guest_entity}")
+ vc.evict("{guest_entity}")
+ vc.delete_volume(vp)
+ vc.delete_volume(vp)
+ vc.purge_volume(vp)
+ vc.purge_volume(vp)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ guest_entity=guest_entity
+ )))
+
+ def test_data_isolated(self):
+ """
+ That data isolated shares get their own pool
+ :return:
+ """
+
+ # Because the teuthology config template sets mon_max_pg_per_osd to
+ # 10000 (i.e. it just tries to ignore health warnings), reset it to something
+ # sane before using volume_client, to avoid creating pools with absurdly large
+ # numbers of PGs.
+ self.set_conf("global", "mon max pg per osd", "300")
+ for mon_daemon_state in self.ctx.daemons.iter_daemons_of_role('mon'):
+ mon_daemon_state.restart()
+
+ self.mount_b.umount_wait()
+ self._configure_vc_auth(self.mount_b, "manila")
+
+ # Calculate how many PGs we'll expect the new volume pool to have
+ osd_map = json.loads(self.fs.mon_manager.raw_cluster_cmd('osd', 'dump', '--format=json-pretty'))
+ max_per_osd = int(self.fs.get_config('mon_max_pg_per_osd'))
+ osd_count = len(osd_map['osds'])
+ max_overall = osd_count * max_per_osd
+
+ existing_pg_count = 0
+ for p in osd_map['pools']:
+ existing_pg_count += p['pg_num']
+
+ expected_pg_num = (max_overall - existing_pg_count) // 10
+ log.info("max_per_osd {0}".format(max_per_osd))
+ log.info("osd_count {0}".format(osd_count))
+ log.info("max_overall {0}".format(max_overall))
+ log.info("existing_pg_count {0}".format(existing_pg_count))
+ log.info("expected_pg_num {0}".format(expected_pg_num))
+
+ pools_a = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools']
+
+ group_id = "grpid"
+ volume_id = "volid"
+ self._volume_client_python(self.mount_b, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.create_volume(vp, 10, data_isolated=True)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ )))
+
+ pools_b = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools']
+
+ # Should have created one new pool
+ new_pools = set(p['pool_name'] for p in pools_b) - set([p['pool_name'] for p in pools_a])
+ self.assertEqual(len(new_pools), 1)
+
+ # It should have followed the heuristic for PG count
+ # (this is an overly strict test condition, so we may want to remove
+ # it at some point as/when the logic gets fancier)
+ created_pg_num = self.fs.mon_manager.get_pool_property(list(new_pools)[0], "pg_num")
+ self.assertEqual(expected_pg_num, created_pg_num)
+
+ def test_15303(self):
+ """
+ Reproducer for #15303 "Client holds incorrect complete flag on dir
+ after losing caps" (http://tracker.ceph.com/issues/15303)
+ """
+ for m in self.mounts:
+ m.umount_wait()
+
+ # Create a dir on mount A
+ self.mount_a.mount()
+ self.mount_a.run_shell(["mkdir", "parent1"])
+ self.mount_a.run_shell(["mkdir", "parent2"])
+ self.mount_a.run_shell(["mkdir", "parent1/mydir"])
+
+ # Put some files in it from mount B
+ self.mount_b.mount()
+ self.mount_b.run_shell(["touch", "parent1/mydir/afile"])
+ self.mount_b.umount_wait()
+
+ # List the dir's contents on mount A
+ self.assertListEqual(self.mount_a.ls("parent1/mydir"),
+ ["afile"])
+
+ def test_evict_client(self):
+ """
+ That a volume client can be evicted based on its auth ID and the volume
+ path it has mounted.
+ """
+
+ if not isinstance(self.mount_a, FuseMount):
+ self.skipTest("Requires FUSE client to inject client metadata")
+
+ # mounts[1] would be used as handle for driving VolumeClient. mounts[2]
+ # and mounts[3] would be used as guests to mount the volumes/shares.
+
+ for i in range(1, 4):
+ self.mounts[i].umount_wait()
+
+ volumeclient_mount = self.mounts[1]
+ self._configure_vc_auth(volumeclient_mount, "manila")
+ guest_mounts = (self.mounts[2], self.mounts[3])
+
+ guest_entity = "guest"
+ group_id = "grpid"
+ mount_paths = []
+ volume_ids = []
+
+ # Create two volumes. Authorize 'guest' auth ID to mount the two
+ # volumes. Mount the two volumes. Write data to the volumes.
+ for i in range(2):
+ # Create volume.
+ volume_ids.append("volid_{0}".format(str(i)))
+ mount_paths.append(
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ create_result = vc.create_volume(vp, 10 * 1024 * 1024)
+ print(create_result['mount_path'])
+ """.format(
+ group_id=group_id,
+ volume_id=volume_ids[i]
+ ))))
+
+ # Authorize 'guest' auth ID to mount the volume.
+ self._configure_guest_auth(volumeclient_mount, guest_mounts[i],
+ guest_entity, mount_paths[i])
+
+ # Mount the volume.
+ guest_mounts[i].mountpoint_dir_name = 'mnt.{id}.{suffix}'.format(
+ id=guest_entity, suffix=str(i))
+ guest_mounts[i].mount(mount_path=mount_paths[i])
+ guest_mounts[i].write_n_mb("data.bin", 1)
+
+
+ # Evict client, guest_mounts[0], using auth ID 'guest' and has mounted
+ # one volume.
+ self._volume_client_python(self.mount_b, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.deauthorize(vp, "{guest_entity}")
+ vc.evict("{guest_entity}", volume_path=vp)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_ids[0],
+ guest_entity=guest_entity
+ )))
+
+ # Evicted guest client, guest_mounts[0], should not be able to do
+ # anymore metadata ops. It should start failing all operations
+ # when it sees that its own address is in the blacklist.
+ try:
+ guest_mounts[0].write_n_mb("rogue.bin", 1)
+ except CommandFailedError:
+ pass
+ else:
+ raise RuntimeError("post-eviction write should have failed!")
+
+ # The blacklisted guest client should now be unmountable
+ guest_mounts[0].umount_wait()
+
+ # Guest client, guest_mounts[1], using the same auth ID 'guest', but
+ # has mounted the other volume, should be able to use its volume
+ # unaffected.
+ guest_mounts[1].write_n_mb("data.bin.1", 1)
+
+ # Cleanup.
+ for i in range(2):
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.deauthorize(vp, "{guest_entity}")
+ vc.delete_volume(vp)
+ vc.purge_volume(vp)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_ids[i],
+ guest_entity=guest_entity
+ )))
+
+
+ def test_purge(self):
+ """
+ Reproducer for #15266, exception trying to purge volumes that
+ contain non-ascii filenames.
+
+ Additionally test any other purge corner cases here.
+ """
+ # I'm going to leave mount_b unmounted and just use it as a handle for
+ # driving volumeclient. It's a little hacky but we don't have a more
+ # general concept for librados/libcephfs clients as opposed to full
+ # blown mounting clients.
+ self.mount_b.umount_wait()
+ self._configure_vc_auth(self.mount_b, "manila")
+
+ group_id = "grpid"
+ # Use a unicode volume ID (like Manila), to reproduce #15266
+ volume_id = u"volid"
+
+ # Create
+ mount_path = self._volume_client_python(self.mount_b, dedent("""
+ vp = VolumePath("{group_id}", u"{volume_id}")
+ create_result = vc.create_volume(vp, 10)
+ print(create_result['mount_path'])
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id
+ )))
+
+ # Strip leading "/"
+ mount_path = mount_path[1:]
+
+ # A file with non-ascii characters
+ self.mount_a.run_shell(["touch", os.path.join(mount_path, u"b\u00F6b")])
+
+ # A file with no permissions to do anything
+ self.mount_a.run_shell(["touch", os.path.join(mount_path, "noperms")])
+ self.mount_a.run_shell(["chmod", "0000", os.path.join(mount_path, "noperms")])
+
+ self._volume_client_python(self.mount_b, dedent("""
+ vp = VolumePath("{group_id}", u"{volume_id}")
+ vc.delete_volume(vp)
+ vc.purge_volume(vp)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id
+ )))
+
+ # Check it's really gone
+ self.assertEqual(self.mount_a.ls("volumes/_deleting"), [])
+ self.assertEqual(self.mount_a.ls("volumes/"), ["_deleting", group_id])
+
+ def test_readonly_authorization(self):
+ """
+ That guest clients can be restricted to read-only mounts of volumes.
+ """
+
+ volumeclient_mount = self.mounts[1]
+ guest_mount = self.mounts[2]
+ volumeclient_mount.umount_wait()
+ guest_mount.umount_wait()
+
+ # Configure volumeclient_mount as the handle for driving volumeclient.
+ self._configure_vc_auth(volumeclient_mount, "manila")
+
+ guest_entity = "guest"
+ group_id = "grpid"
+ volume_id = "volid"
+
+ # Create a volume.
+ mount_path = self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ create_result = vc.create_volume(vp, 1024*1024*10)
+ print(create_result['mount_path'])
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ )))
+
+ # Authorize and configure credentials for the guest to mount the
+ # the volume with read-write access.
+ self._configure_guest_auth(volumeclient_mount, guest_mount, guest_entity,
+ mount_path, readonly=False)
+
+ # Mount the volume, and write to it.
+ guest_mount.mount(mount_path=mount_path)
+ guest_mount.write_n_mb("data.bin", 1)
+
+ # Change the guest auth ID's authorization to read-only mount access.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.deauthorize(vp, "{guest_entity}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ guest_entity=guest_entity
+ )))
+ self._configure_guest_auth(volumeclient_mount, guest_mount, guest_entity,
+ mount_path, readonly=True)
+
+ # The effect of the change in access level to read-only is not
+ # immediate. The guest sees the change only after a remount of
+ # the volume.
+ guest_mount.umount_wait()
+ guest_mount.mount(mount_path=mount_path)
+
+ # Read existing content of the volume.
+ self.assertListEqual(guest_mount.ls(guest_mount.mountpoint), ["data.bin"])
+ # Cannot write into read-only volume.
+ try:
+ guest_mount.write_n_mb("rogue.bin", 1)
+ except CommandFailedError:
+ pass
+
+ def test_get_authorized_ids(self):
+ """
+ That for a volume, the authorized IDs and their access levels
+ can be obtained using CephFSVolumeClient's get_authorized_ids().
+ """
+ volumeclient_mount = self.mounts[1]
+ volumeclient_mount.umount_wait()
+
+ # Configure volumeclient_mount as the handle for driving volumeclient.
+ self._configure_vc_auth(volumeclient_mount, "manila")
+
+ group_id = "grpid"
+ volume_id = "volid"
+ guest_entity_1 = "guest1"
+ guest_entity_2 = "guest2"
+
+ log.info("print(group ID: {0})".format(group_id))
+
+ # Create a volume.
+ auths = self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.create_volume(vp, 1024*1024*10)
+ auths = vc.get_authorized_ids(vp)
+ print(auths)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ )))
+ # Check the list of authorized IDs for the volume.
+ self.assertEqual('None', auths)
+
+ # Allow two auth IDs access to the volume.
+ auths = self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.authorize(vp, "{guest_entity_1}", readonly=False)
+ vc.authorize(vp, "{guest_entity_2}", readonly=True)
+ auths = vc.get_authorized_ids(vp)
+ print(auths)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ guest_entity_1=guest_entity_1,
+ guest_entity_2=guest_entity_2,
+ )))
+ # Check the list of authorized IDs and their access levels.
+ expected_result = [('guest1', 'rw'), ('guest2', 'r')]
+ self.assertCountEqual(str(expected_result), auths)
+
+ # Disallow both the auth IDs' access to the volume.
+ auths = self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.deauthorize(vp, "{guest_entity_1}")
+ vc.deauthorize(vp, "{guest_entity_2}")
+ auths = vc.get_authorized_ids(vp)
+ print(auths)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ guest_entity_1=guest_entity_1,
+ guest_entity_2=guest_entity_2,
+ )))
+ # Check the list of authorized IDs for the volume.
+ self.assertEqual('None', auths)
+
+ def test_multitenant_volumes(self):
+ """
+ That volume access can be restricted to a tenant.
+
+ That metadata used to enforce tenant isolation of
+ volumes is stored as a two-way mapping between auth
+ IDs and volumes that they're authorized to access.
+ """
+ volumeclient_mount = self.mounts[1]
+ volumeclient_mount.umount_wait()
+
+ # Configure volumeclient_mount as the handle for driving volumeclient.
+ self._configure_vc_auth(volumeclient_mount, "manila")
+
+ group_id = "groupid"
+ volume_id = "volumeid"
+
+ # Guest clients belonging to different tenants, but using the same
+ # auth ID.
+ auth_id = "guest"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+ guestclient_2 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant2",
+ }
+
+ # Create a volume.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.create_volume(vp, 1024*1024*10)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ )))
+
+ # Check that volume metadata file is created on volume creation.
+ vol_metadata_filename = "_{0}:{1}.meta".format(group_id, volume_id)
+ self.assertIn(vol_metadata_filename, self.mounts[0].ls("volumes"))
+
+ # Authorize 'guestclient_1', using auth ID 'guest' and belonging to
+ # 'tenant1', with 'rw' access to the volume.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ auth_id=guestclient_1["auth_id"],
+ tenant_id=guestclient_1["tenant_id"]
+ )))
+
+ # Check that auth metadata file for auth ID 'guest', is
+ # created on authorizing 'guest' access to the volume.
+ auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
+ self.assertIn(auth_metadata_filename, self.mounts[0].ls("volumes"))
+
+ # Verify that the auth metadata file stores the tenant ID that the
+ # auth ID belongs to, the auth ID's authorized access levels
+ # for different volumes, versioning details, etc.
+ expected_auth_metadata = {
+ "version": 2,
+ "compat_version": 6,
+ "dirty": False,
+ "tenant_id": "tenant1",
+ "subvolumes": {
+ "groupid/volumeid": {
+ "dirty": False,
+ "access_level": "rw"
+ }
+ }
+ }
+
+ auth_metadata = self._volume_client_python(volumeclient_mount, dedent("""
+ import json
+ vp = VolumePath("{group_id}", "{volume_id}")
+ auth_metadata = vc._auth_metadata_get("{auth_id}")
+ print(json.dumps(auth_metadata))
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ auth_id=guestclient_1["auth_id"],
+ )))
+ auth_metadata = json.loads(auth_metadata)
+
+ self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"])
+ del expected_auth_metadata["version"]
+ del auth_metadata["version"]
+ self.assertEqual(expected_auth_metadata, auth_metadata)
+
+ # Verify that the volume metadata file stores info about auth IDs
+ # and their access levels to the volume, versioning details, etc.
+ expected_vol_metadata = {
+ "version": 2,
+ "compat_version": 1,
+ "auths": {
+ "guest": {
+ "dirty": False,
+ "access_level": "rw"
+ }
+ }
+ }
+
+ vol_metadata = self._volume_client_python(volumeclient_mount, dedent("""
+ import json
+ vp = VolumePath("{group_id}", "{volume_id}")
+ volume_metadata = vc._volume_metadata_get(vp)
+ print(json.dumps(volume_metadata))
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ )))
+ vol_metadata = json.loads(vol_metadata)
+
+ self.assertGreaterEqual(vol_metadata["version"], expected_vol_metadata["version"])
+ del expected_vol_metadata["version"]
+ del vol_metadata["version"]
+ self.assertEqual(expected_vol_metadata, vol_metadata)
+
+ # Cannot authorize 'guestclient_2' to access the volume.
+ # It uses auth ID 'guest', which has already been used by a
+ # 'guestclient_1' belonging to an another tenant for accessing
+ # the volume.
+ with self.assertRaises(CommandFailedError):
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ auth_id=guestclient_2["auth_id"],
+ tenant_id=guestclient_2["tenant_id"]
+ )))
+
+ # Check that auth metadata file is cleaned up on removing
+ # auth ID's only access to a volume.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.deauthorize(vp, "{guest_entity}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ guest_entity=guestclient_1["auth_id"]
+ )))
+
+ self.assertNotIn(auth_metadata_filename, self.mounts[0].ls("volumes"))
+
+ # Check that volume metadata file is cleaned up on volume deletion.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.delete_volume(vp)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ )))
+ self.assertNotIn(vol_metadata_filename, self.mounts[0].ls("volumes"))
+
+ def test_authorize_auth_id_not_created_by_ceph_volume_client(self):
+ """
+ If the auth_id already exists and is not created by
+ ceph_volume_client, it's not allowed to authorize
+ the auth-id by default.
+ """
+ volumeclient_mount = self.mounts[1]
+ volumeclient_mount.umount_wait()
+
+ # Configure volumeclient_mount as the handle for driving volumeclient.
+ self._configure_vc_auth(volumeclient_mount, "manila")
+
+ group_id = "groupid"
+ volume_id = "volumeid"
+
+ # Create auth_id
+ self.fs.mon_manager.raw_cluster_cmd(
+ "auth", "get-or-create", "client.guest1",
+ "mds", "allow *",
+ "osd", "allow rw",
+ "mon", "allow *"
+ )
+
+ auth_id = "guest1"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # Create a volume.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.create_volume(vp, 1024*1024*10)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ )))
+
+ # Cannot authorize 'guestclient_1' to access the volume.
+ # It uses auth ID 'guest1', which already exists and not
+ # created by ceph_volume_client
+ with self.assertRaises(CommandFailedError):
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ auth_id=guestclient_1["auth_id"],
+ tenant_id=guestclient_1["tenant_id"]
+ )))
+
+ # Delete volume
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.delete_volume(vp)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ )))
+
+ def test_authorize_allow_existing_id_option(self):
+ """
+ If the auth_id already exists and is not created by
+ ceph_volume_client, it's not allowed to authorize
+ the auth-id by default but is allowed with option
+ allow_existing_id.
+ """
+ volumeclient_mount = self.mounts[1]
+ volumeclient_mount.umount_wait()
+
+ # Configure volumeclient_mount as the handle for driving volumeclient.
+ self._configure_vc_auth(volumeclient_mount, "manila")
+
+ group_id = "groupid"
+ volume_id = "volumeid"
+
+ # Create auth_id
+ self.fs.mon_manager.raw_cluster_cmd(
+ "auth", "get-or-create", "client.guest1",
+ "mds", "allow *",
+ "osd", "allow rw",
+ "mon", "allow *"
+ )
+
+ auth_id = "guest1"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # Create a volume.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.create_volume(vp, 1024*1024*10)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ )))
+
+ # Cannot authorize 'guestclient_1' to access the volume
+ # by default, which already exists and not created by
+ # ceph_volume_client but is allowed with option 'allow_existing_id'.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}",
+ allow_existing_id="{allow_existing_id}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ auth_id=guestclient_1["auth_id"],
+ tenant_id=guestclient_1["tenant_id"],
+ allow_existing_id=True
+ )))
+
+ # Delete volume
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.delete_volume(vp)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ )))
+
+ def test_deauthorize_auth_id_after_out_of_band_update(self):
+ """
+ If the auth_id authorized by ceph_volume_client is updated
+ out of band, the auth_id should not be deleted after a
+ deauthorize. It should only remove caps associated it.
+ """
+ volumeclient_mount = self.mounts[1]
+ volumeclient_mount.umount_wait()
+
+ # Configure volumeclient_mount as the handle for driving volumeclient.
+ self._configure_vc_auth(volumeclient_mount, "manila")
+
+ group_id = "groupid"
+ volume_id = "volumeid"
+
+
+ auth_id = "guest1"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # Create a volume.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.create_volume(vp, 1024*1024*10)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ )))
+
+ # Authorize 'guestclient_1' to access the volume.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ auth_id=guestclient_1["auth_id"],
+ tenant_id=guestclient_1["tenant_id"]
+ )))
+
+ # Update caps for guestclient_1 out of band
+ out = self.fs.mon_manager.raw_cluster_cmd(
+ "auth", "caps", "client.guest1",
+ "mds", "allow rw path=/volumes/groupid, allow rw path=/volumes/groupid/volumeid",
+ "osd", "allow rw pool=cephfs_data namespace=fsvolumens_volumeid",
+ "mon", "allow r",
+ "mgr", "allow *"
+ )
+
+ # Deauthorize guestclient_1
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.deauthorize(vp, "{guest_entity}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ guest_entity=guestclient_1["auth_id"]
+ )))
+
+ # Validate the caps of guestclient_1 after deauthorize. It should not have deleted
+ # guestclient_1. The mgr and mds caps should be present which was updated out of band.
+ out = json.loads(self.fs.mon_manager.raw_cluster_cmd("auth", "get", "client.guest1", "--format=json-pretty"))
+
+ self.assertEqual("client.guest1", out[0]["entity"])
+ self.assertEqual("allow rw path=/volumes/groupid", out[0]["caps"]["mds"])
+ self.assertEqual("allow *", out[0]["caps"]["mgr"])
+ self.assertNotIn("osd", out[0]["caps"])
+
+ # Delete volume
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.delete_volume(vp)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ )))
+
+ def test_recover_metadata(self):
+ """
+ That volume client can recover from partial auth updates using
+ metadata files, which store auth info and its update status info.
+ """
+ volumeclient_mount = self.mounts[1]
+ volumeclient_mount.umount_wait()
+
+ # Configure volumeclient_mount as the handle for driving volumeclient.
+ self._configure_vc_auth(volumeclient_mount, "manila")
+
+ group_id = "groupid"
+ volume_id = "volumeid"
+
+ guestclient = {
+ "auth_id": "guest",
+ "tenant_id": "tenant",
+ }
+
+ # Create a volume.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.create_volume(vp, 1024*1024*10)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ )))
+
+ # Authorize 'guestclient' access to the volume.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ auth_id=guestclient["auth_id"],
+ tenant_id=guestclient["tenant_id"]
+ )))
+
+ # Check that auth metadata file for auth ID 'guest' is created.
+ auth_metadata_filename = "${0}.meta".format(guestclient["auth_id"])
+ self.assertIn(auth_metadata_filename, self.mounts[0].ls("volumes"))
+
+ # Induce partial auth update state by modifying the auth metadata file,
+ # and then run recovery procedure.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ auth_metadata = vc._auth_metadata_get("{auth_id}")
+ auth_metadata['dirty'] = True
+ vc._auth_metadata_set("{auth_id}", auth_metadata)
+ vc.recover()
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ auth_id=guestclient["auth_id"],
+ )))
+
+ def test_update_old_style_auth_metadata_to_new_during_recover(self):
+ """
+ From nautilus onwards 'volumes' created by ceph_volume_client were
+ renamed and used as CephFS subvolumes accessed via the ceph-mgr
+ interface. Hence it makes sense to store the subvolume data in
+ auth-metadata file with 'subvolumes' key instead of 'volumes' key.
+ This test validates the transparent update of 'volumes' key to
+ 'subvolumes' key in auth metadata file during recover.
+ """
+ volumeclient_mount = self.mounts[1]
+ volumeclient_mount.umount_wait()
+
+ # Configure volumeclient_mount as the handle for driving volumeclient.
+ self._configure_vc_auth(volumeclient_mount, "manila")
+
+ group_id = "groupid"
+ volume_id = "volumeid"
+
+ guestclient = {
+ "auth_id": "guest",
+ "tenant_id": "tenant",
+ }
+
+ # Create a volume.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.create_volume(vp, 1024*1024*10)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ )))
+
+ # Check that volume metadata file is created on volume creation.
+ vol_metadata_filename = "_{0}:{1}.meta".format(group_id, volume_id)
+ self.assertIn(vol_metadata_filename, self.mounts[0].ls("volumes"))
+
+ # Authorize 'guestclient' access to the volume.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ auth_id=guestclient["auth_id"],
+ tenant_id=guestclient["tenant_id"]
+ )))
+
+ # Check that auth metadata file for auth ID 'guest' is created.
+ auth_metadata_filename = "${0}.meta".format(guestclient["auth_id"])
+ self.assertIn(auth_metadata_filename, self.mounts[0].ls("volumes"))
+
+ # Replace 'subvolumes' to 'volumes', old style auth-metadata file
+ self.mounts[0].run_shell(['sed', '-i', 's/subvolumes/volumes/g', 'volumes/{0}'.format(auth_metadata_filename)])
+
+ # Verify that the auth metadata file stores the tenant ID that the
+ # auth ID belongs to, the auth ID's authorized access levels
+ # for different volumes, versioning details, etc.
+ expected_auth_metadata = {
+ "version": 2,
+ "compat_version": 6,
+ "dirty": False,
+ "tenant_id": "tenant",
+ "subvolumes": {
+ "groupid/volumeid": {
+ "dirty": False,
+ "access_level": "rw"
+ }
+ }
+ }
+
+ # Induce partial auth update state by modifying the auth metadata file,
+ # and then run recovery procedure. This should also update 'volumes' key
+ # to 'subvolumes'.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ auth_metadata = vc._auth_metadata_get("{auth_id}")
+ auth_metadata['dirty'] = True
+ vc._auth_metadata_set("{auth_id}", auth_metadata)
+ vc.recover()
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ auth_id=guestclient["auth_id"],
+ )))
+
+ auth_metadata = self._volume_client_python(volumeclient_mount, dedent("""
+ import json
+ auth_metadata = vc._auth_metadata_get("{auth_id}")
+ print(json.dumps(auth_metadata))
+ """.format(
+ auth_id=guestclient["auth_id"],
+ )))
+ auth_metadata = json.loads(auth_metadata)
+
+ self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"])
+ del expected_auth_metadata["version"]
+ del auth_metadata["version"]
+ self.assertEqual(expected_auth_metadata, auth_metadata)
+
+ # Check that auth metadata file is cleaned up on removing
+ # auth ID's access to volumes 'volumeid'.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.deauthorize(vp, "{guest_entity}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ guest_entity=guestclient["auth_id"]
+ )))
+ self.assertNotIn(auth_metadata_filename, self.mounts[0].ls("volumes"))
+
+ # Check that volume metadata file is cleaned up on volume deletion.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.delete_volume(vp)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ )))
+ self.assertNotIn(vol_metadata_filename, self.mounts[0].ls("volumes"))
+
+ def test_update_old_style_auth_metadata_to_new_during_authorize(self):
+ """
+ From nautilus onwards 'volumes' created by ceph_volume_client were
+ renamed and used as CephFS subvolumes accessed via the ceph-mgr
+ interface. Hence it makes sense to store the subvolume data in
+ auth-metadata file with 'subvolumes' key instead of 'volumes' key.
+ This test validates the transparent update of 'volumes' key to
+ 'subvolumes' key in auth metadata file during authorize.
+ """
+ volumeclient_mount = self.mounts[1]
+ volumeclient_mount.umount_wait()
+
+ # Configure volumeclient_mount as the handle for driving volumeclient.
+ self._configure_vc_auth(volumeclient_mount, "manila")
+
+ group_id = "groupid"
+ volume_id1 = "volumeid1"
+ volume_id2 = "volumeid2"
+
+ auth_id = "guest"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # Create a volume volumeid1.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ create_result = vc.create_volume(vp, 10*1024*1024)
+ print(create_result['mount_path'])
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id1,
+ )))
+
+ # Create a volume volumeid2.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ create_result = vc.create_volume(vp, 10*1024*1024)
+ print(create_result['mount_path'])
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id2,
+ )))
+
+ # Check that volume metadata file is created on volume creation.
+ vol_metadata_filename = "_{0}:{1}.meta".format(group_id, volume_id1)
+ self.assertIn(vol_metadata_filename, self.mounts[0].ls("volumes"))
+ vol_metadata_filename2 = "_{0}:{1}.meta".format(group_id, volume_id2)
+ self.assertIn(vol_metadata_filename2, self.mounts[0].ls("volumes"))
+
+ # Authorize 'guestclient_1', using auth ID 'guest' and belonging to
+ # 'tenant1', with 'rw' access to the volume 'volumeid1'.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id1,
+ auth_id=guestclient_1["auth_id"],
+ tenant_id=guestclient_1["tenant_id"]
+ )))
+
+ # Check that auth metadata file for auth ID 'guest', is
+ # created on authorizing 'guest' access to the volume.
+ auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
+ self.assertIn(auth_metadata_filename, self.mounts[0].ls("volumes"))
+
+ # Replace 'subvolumes' to 'volumes', old style auth-metadata file
+ self.mounts[0].run_shell(['sed', '-i', 's/subvolumes/volumes/g', 'volumes/{0}'.format(auth_metadata_filename)])
+
+ # Authorize 'guestclient_1', using auth ID 'guest' and belonging to
+ # 'tenant1', with 'rw' access to the volume 'volumeid2'.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id2,
+ auth_id=guestclient_1["auth_id"],
+ tenant_id=guestclient_1["tenant_id"]
+ )))
+
+ # Verify that the auth metadata file stores the tenant ID that the
+ # auth ID belongs to, the auth ID's authorized access levels
+ # for different volumes, versioning details, etc.
+ expected_auth_metadata = {
+ "version": 2,
+ "compat_version": 6,
+ "dirty": False,
+ "tenant_id": "tenant1",
+ "subvolumes": {
+ "groupid/volumeid1": {
+ "dirty": False,
+ "access_level": "rw"
+ },
+ "groupid/volumeid2": {
+ "dirty": False,
+ "access_level": "rw"
+ }
+ }
+ }
+
+ auth_metadata = self._volume_client_python(volumeclient_mount, dedent("""
+ import json
+ auth_metadata = vc._auth_metadata_get("{auth_id}")
+ print(json.dumps(auth_metadata))
+ """.format(
+ auth_id=guestclient_1["auth_id"],
+ )))
+ auth_metadata = json.loads(auth_metadata)
+
+ self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"])
+ del expected_auth_metadata["version"]
+ del auth_metadata["version"]
+ self.assertEqual(expected_auth_metadata, auth_metadata)
+
+ # Check that auth metadata file is cleaned up on removing
+ # auth ID's access to volumes 'volumeid1' and 'volumeid2'.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.deauthorize(vp, "{guest_entity}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id1,
+ guest_entity=guestclient_1["auth_id"]
+ )))
+
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.deauthorize(vp, "{guest_entity}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id2,
+ guest_entity=guestclient_1["auth_id"]
+ )))
+ self.assertNotIn(auth_metadata_filename, self.mounts[0].ls("volumes"))
+
+ # Check that volume metadata file is cleaned up on volume deletion.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.delete_volume(vp)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id1,
+ )))
+ self.assertNotIn(vol_metadata_filename, self.mounts[0].ls("volumes"))
+
+ # Check that volume metadata file is cleaned up on volume deletion.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.delete_volume(vp)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id2,
+ )))
+ self.assertNotIn(vol_metadata_filename2, self.mounts[0].ls("volumes"))
+
+ def test_update_old_style_auth_metadata_to_new_during_deauthorize(self):
+ """
+ From nautilus onwards 'volumes' created by ceph_volume_client were
+ renamed and used as CephFS subvolumes accessed via the ceph-mgr
+ interface. Hence it makes sense to store the subvolume data in
+ auth-metadata file with 'subvolumes' key instead of 'volumes' key.
+ This test validates the transparent update of 'volumes' key to
+ 'subvolumes' key in auth metadata file during de-authorize.
+ """
+ volumeclient_mount = self.mounts[1]
+ volumeclient_mount.umount_wait()
+
+ # Configure volumeclient_mount as the handle for driving volumeclient.
+ self._configure_vc_auth(volumeclient_mount, "manila")
+
+ group_id = "groupid"
+ volume_id1 = "volumeid1"
+ volume_id2 = "volumeid2"
+
+ auth_id = "guest"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # Create a volume volumeid1.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ create_result = vc.create_volume(vp, 10*1024*1024)
+ print(create_result['mount_path'])
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id1,
+ )))
+
+ # Create a volume volumeid2.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ create_result = vc.create_volume(vp, 10*1024*1024)
+ print(create_result['mount_path'])
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id2,
+ )))
+
+ # Check that volume metadata file is created on volume creation.
+ vol_metadata_filename = "_{0}:{1}.meta".format(group_id, volume_id1)
+ self.assertIn(vol_metadata_filename, self.mounts[0].ls("volumes"))
+ vol_metadata_filename2 = "_{0}:{1}.meta".format(group_id, volume_id2)
+ self.assertIn(vol_metadata_filename2, self.mounts[0].ls("volumes"))
+
+ # Authorize 'guestclient_1', using auth ID 'guest' and belonging to
+ # 'tenant1', with 'rw' access to the volume 'volumeid1'.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id1,
+ auth_id=guestclient_1["auth_id"],
+ tenant_id=guestclient_1["tenant_id"]
+ )))
+
+ # Authorize 'guestclient_1', using auth ID 'guest' and belonging to
+ # 'tenant1', with 'rw' access to the volume 'volumeid2'.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.authorize(vp, "{auth_id}", tenant_id="{tenant_id}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id2,
+ auth_id=guestclient_1["auth_id"],
+ tenant_id=guestclient_1["tenant_id"]
+ )))
+
+ # Check that auth metadata file for auth ID 'guest', is
+ # created on authorizing 'guest' access to the volume.
+ auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
+ self.assertIn(auth_metadata_filename, self.mounts[0].ls("volumes"))
+
+ # Replace 'subvolumes' to 'volumes', old style auth-metadata file
+ self.mounts[0].run_shell(['sed', '-i', 's/subvolumes/volumes/g', 'volumes/{0}'.format(auth_metadata_filename)])
+
+ # Deauthorize 'guestclient_1' to access 'volumeid2'. This should update
+ # 'volumes' key to 'subvolumes'
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.deauthorize(vp, "{guest_entity}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id2,
+ guest_entity=guestclient_1["auth_id"],
+ )))
+
+ # Verify that the auth metadata file stores the tenant ID that the
+ # auth ID belongs to, the auth ID's authorized access levels
+ # for different volumes, versioning details, etc.
+ expected_auth_metadata = {
+ "version": 2,
+ "compat_version": 6,
+ "dirty": False,
+ "tenant_id": "tenant1",
+ "subvolumes": {
+ "groupid/volumeid1": {
+ "dirty": False,
+ "access_level": "rw"
+ }
+ }
+ }
+
+ auth_metadata = self._volume_client_python(volumeclient_mount, dedent("""
+ import json
+ auth_metadata = vc._auth_metadata_get("{auth_id}")
+ print(json.dumps(auth_metadata))
+ """.format(
+ auth_id=guestclient_1["auth_id"],
+ )))
+ auth_metadata = json.loads(auth_metadata)
+
+ self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"])
+ del expected_auth_metadata["version"]
+ del auth_metadata["version"]
+ self.assertEqual(expected_auth_metadata, auth_metadata)
+
+ # Check that auth metadata file is cleaned up on removing
+ # auth ID's access to volumes 'volumeid1' and 'volumeid2'
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.deauthorize(vp, "{guest_entity}")
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id1,
+ guest_entity=guestclient_1["auth_id"]
+ )))
+ self.assertNotIn(auth_metadata_filename, self.mounts[0].ls("volumes"))
+
+ # Check that volume metadata file is cleaned up on 'volumeid1' deletion.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.delete_volume(vp)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id1,
+ )))
+ self.assertNotIn(vol_metadata_filename, self.mounts[0].ls("volumes"))
+
+ # Check that volume metadata file is cleaned up on 'volumeid2' deletion.
+ self._volume_client_python(volumeclient_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.delete_volume(vp)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id2,
+ )))
+ self.assertNotIn(vol_metadata_filename2, self.mounts[0].ls("volumes"))
+
+ def test_put_object(self):
+ vc_mount = self.mounts[1]
+ vc_mount.umount_wait()
+ self._configure_vc_auth(vc_mount, "manila")
+
+ obj_data = 'test data'
+ obj_name = 'test_vc_obj_1'
+ pool_name = self.fs.get_data_pool_names()[0]
+
+ self._volume_client_python(vc_mount, dedent("""
+ vc.put_object("{pool_name}", "{obj_name}", b"{obj_data}")
+ """.format(
+ pool_name = pool_name,
+ obj_name = obj_name,
+ obj_data = obj_data
+ )))
+
+ read_data = self.fs.rados(['get', obj_name, '-'], pool=pool_name)
+ self.assertEqual(obj_data, read_data)
+
+ def test_get_object(self):
+ vc_mount = self.mounts[1]
+ vc_mount.umount_wait()
+ self._configure_vc_auth(vc_mount, "manila")
+
+ obj_data = 'test_data'
+ obj_name = 'test_vc_ob_2'
+ pool_name = self.fs.get_data_pool_names()[0]
+
+ self.fs.rados(['put', obj_name, '-'], pool=pool_name, stdin_data=obj_data)
+
+ self._volume_client_python(vc_mount, dedent("""
+ data_read = vc.get_object("{pool_name}", "{obj_name}")
+ assert data_read == b"{obj_data}"
+ """.format(
+ pool_name = pool_name,
+ obj_name = obj_name,
+ obj_data = obj_data
+ )))
+
+ def test_put_object_versioned(self):
+ vc_mount = self.mounts[1]
+ vc_mount.umount_wait()
+ self._configure_vc_auth(vc_mount, "manila")
+
+ obj_data = 'test_data'
+ obj_name = 'test_vc_obj'
+ pool_name = self.fs.get_data_pool_names()[0]
+ self.fs.rados(['put', obj_name, '-'], pool=pool_name, stdin_data=obj_data)
+
+ self._volume_client_python(vc_mount, dedent("""
+ data, version_before = vc.get_object_and_version("{pool_name}", "{obj_name}")
+
+ if sys_version_info.major < 3:
+ data = data + 'modification1'
+ elif sys_version_info.major > 3:
+ data = str.encode(data.decode() + 'modification1')
+
+ vc.put_object_versioned("{pool_name}", "{obj_name}", data, version_before)
+ data, version_after = vc.get_object_and_version("{pool_name}", "{obj_name}")
+ assert version_after == version_before + 1
+ """).format(pool_name=pool_name, obj_name=obj_name))
+
+ def test_version_check_for_put_object_versioned(self):
+ vc_mount = self.mounts[1]
+ vc_mount.umount_wait()
+ self._configure_vc_auth(vc_mount, "manila")
+
+ obj_data = 'test_data'
+ obj_name = 'test_vc_ob_2'
+ pool_name = self.fs.get_data_pool_names()[0]
+ self.fs.rados(['put', obj_name, '-'], pool=pool_name, stdin_data=obj_data)
+
+ # Test if put_object_versioned() crosschecks the version of the
+ # given object. Being a negative test, an exception is expected.
+ expected_exception = 'rados_OSError'
+ output = self._volume_client_python(vc_mount, dedent("""
+ data, version = vc.get_object_and_version("{pool_name}", "{obj_name}")
+
+ if sys_version_info.major < 3:
+ data = data + 'm1'
+ elif sys_version_info.major > 3:
+ data = str.encode(data.decode('utf-8') + 'm1')
+
+ vc.put_object("{pool_name}", "{obj_name}", data)
+
+ if sys_version_info.major < 3:
+ data = data + 'm2'
+ elif sys_version_info.major > 3:
+ data = str.encode(data.decode('utf-8') + 'm2')
+
+ try:
+ vc.put_object_versioned("{pool_name}", "{obj_name}", data, version)
+ except {expected_exception}:
+ print('{expected_exception} raised')
+ """).format(pool_name=pool_name, obj_name=obj_name,
+ expected_exception=expected_exception))
+ self.assertEqual(expected_exception + ' raised', output)
+
+
+ def test_delete_object(self):
+ vc_mount = self.mounts[1]
+ vc_mount.umount_wait()
+ self._configure_vc_auth(vc_mount, "manila")
+
+ obj_data = 'test data'
+ obj_name = 'test_vc_obj_3'
+ pool_name = self.fs.get_data_pool_names()[0]
+
+ self.fs.rados(['put', obj_name, '-'], pool=pool_name, stdin_data=obj_data)
+
+ self._volume_client_python(vc_mount, dedent("""
+ data_read = vc.delete_object("{pool_name}", "{obj_name}")
+ """.format(
+ pool_name = pool_name,
+ obj_name = obj_name,
+ )))
+
+ with self.assertRaises(CommandFailedError):
+ self.fs.rados(['stat', obj_name], pool=pool_name)
+
+ # Check idempotency -- no error raised trying to delete non-existent
+ # object
+ self._volume_client_python(vc_mount, dedent("""
+ data_read = vc.delete_object("{pool_name}", "{obj_name}")
+ """.format(
+ pool_name = pool_name,
+ obj_name = obj_name,
+ )))
+
+ def test_21501(self):
+ """
+ Reproducer for #21501 "ceph_volume_client: sets invalid caps for
+ existing IDs with no caps" (http://tracker.ceph.com/issues/21501)
+ """
+
+ vc_mount = self.mounts[1]
+ vc_mount.umount_wait()
+
+ # Configure vc_mount as the handle for driving volumeclient
+ self._configure_vc_auth(vc_mount, "manila")
+
+ # Create a volume
+ group_id = "grpid"
+ volume_id = "volid"
+ mount_path = self._volume_client_python(vc_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ create_result = vc.create_volume(vp, 1024*1024*10)
+ print(create_result['mount_path'])
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id
+ )))
+
+ # Create an auth ID with no caps
+ guest_id = '21501'
+ self.fs.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'get-or-create', 'client.{0}'.format(guest_id))
+
+ guest_mount = self.mounts[2]
+ guest_mount.umount_wait()
+
+ # Set auth caps for the auth ID using the volumeclient
+ self._configure_guest_auth(vc_mount, guest_mount, guest_id, mount_path, allow_existing_id=True)
+
+ # Mount the volume in the guest using the auth ID to assert that the
+ # auth caps are valid
+ guest_mount.mount(mount_path=mount_path)
+
+ def test_volume_without_namespace_isolation(self):
+ """
+ That volume client can create volumes that do not have separate RADOS
+ namespace layouts.
+ """
+ vc_mount = self.mounts[1]
+ vc_mount.umount_wait()
+
+ # Configure vc_mount as the handle for driving volumeclient
+ self._configure_vc_auth(vc_mount, "manila")
+
+ # Create a volume
+ volume_prefix = "/myprefix"
+ group_id = "grpid"
+ volume_id = "volid"
+ self._volume_client_python(vc_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ create_result = vc.create_volume(vp, 1024*1024*10, namespace_isolated=False)
+ print(create_result['mount_path'])
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id
+ )), volume_prefix)
+
+ # The CephFS volume should be created
+ self.mounts[0].stat(os.path.join("myprefix", group_id, volume_id))
+ vol_namespace = self.mounts[0].getfattr(
+ os.path.join("myprefix", group_id, volume_id),
+ "ceph.dir.layout.pool_namespace")
+ assert not vol_namespace
+
+ self._volume_client_python(vc_mount, dedent("""
+ vp = VolumePath("{group_id}", "{volume_id}")
+ vc.delete_volume(vp)
+ vc.purge_volume(vp)
+ """.format(
+ group_id=group_id,
+ volume_id=volume_id,
+ )), volume_prefix)
diff --git a/qa/tasks/cephfs/test_volumes.py b/qa/tasks/cephfs/test_volumes.py
new file mode 100644
index 00000000..67f138f8
--- /dev/null
+++ b/qa/tasks/cephfs/test_volumes.py
@@ -0,0 +1,4435 @@
+import os
+import json
+import time
+import errno
+import random
+import logging
+import collections
+import uuid
+import unittest
+from hashlib import md5
+from textwrap import dedent
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+from teuthology.misc import sudo_write_file
+
+log = logging.getLogger(__name__)
+
+class TestVolumes(CephFSTestCase):
+ TEST_VOLUME_PREFIX = "volume"
+ TEST_SUBVOLUME_PREFIX="subvolume"
+ TEST_GROUP_PREFIX="group"
+ TEST_SNAPSHOT_PREFIX="snapshot"
+ TEST_CLONE_PREFIX="clone"
+ TEST_FILE_NAME_PREFIX="subvolume_file"
+
+ # for filling subvolume with data
+ CLIENTS_REQUIRED = 2
+
+ # io defaults
+ DEFAULT_FILE_SIZE = 1 # MB
+ DEFAULT_NUMBER_OF_FILES = 1024
+
+ def _fs_cmd(self, *args):
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", *args)
+
+ def _raw_cmd(self, *args):
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args)
+
+ def __check_clone_state(self, state, clone, clone_group=None, timo=120):
+ check = 0
+ args = ["clone", "status", self.volname, clone]
+ if clone_group:
+ args.append(clone_group)
+ args = tuple(args)
+ while check < timo:
+ result = json.loads(self._fs_cmd(*args))
+ if result["status"]["state"] == state:
+ break
+ check += 1
+ time.sleep(1)
+ self.assertTrue(check < timo)
+
+ def _wait_for_clone_to_complete(self, clone, clone_group=None, timo=120):
+ self.__check_clone_state("complete", clone, clone_group, timo)
+
+ def _wait_for_clone_to_fail(self, clone, clone_group=None, timo=120):
+ self.__check_clone_state("failed", clone, clone_group, timo)
+
+ def _check_clone_canceled(self, clone, clone_group=None):
+ self.__check_clone_state("canceled", clone, clone_group, timo=1)
+
+ def _get_subvolume_snapshot_path(self, subvolume, snapshot, source_group, subvol_path, source_version):
+ if source_version == 2:
+ # v2
+ if subvol_path is not None:
+ (base_path, uuid_str) = os.path.split(subvol_path)
+ else:
+ (base_path, uuid_str) = os.path.split(self._get_subvolume_path(self.volname, subvolume, group_name=source_group))
+ return os.path.join(base_path, ".snap", snapshot, uuid_str)
+
+ # v1
+ base_path = self._get_subvolume_path(self.volname, subvolume, group_name=source_group)
+ return os.path.join(base_path, ".snap", snapshot)
+
+ def _verify_clone_attrs(self, source_path, clone_path):
+ path1 = source_path
+ path2 = clone_path
+
+ p = self.mount_a.run_shell(["find", path1])
+ paths = p.stdout.getvalue().strip().split()
+
+ # for each entry in source and clone (sink) verify certain inode attributes:
+ # inode type, mode, ownership, [am]time.
+ for source_path in paths:
+ sink_entry = source_path[len(path1)+1:]
+ sink_path = os.path.join(path2, sink_entry)
+
+ # mode+type
+ sval = int(self.mount_a.run_shell(['stat', '-c' '%f', source_path]).stdout.getvalue().strip(), 16)
+ cval = int(self.mount_a.run_shell(['stat', '-c' '%f', sink_path]).stdout.getvalue().strip(), 16)
+ self.assertEqual(sval, cval)
+
+ # ownership
+ sval = int(self.mount_a.run_shell(['stat', '-c' '%u', source_path]).stdout.getvalue().strip())
+ cval = int(self.mount_a.run_shell(['stat', '-c' '%u', sink_path]).stdout.getvalue().strip())
+ self.assertEqual(sval, cval)
+
+ sval = int(self.mount_a.run_shell(['stat', '-c' '%g', source_path]).stdout.getvalue().strip())
+ cval = int(self.mount_a.run_shell(['stat', '-c' '%g', sink_path]).stdout.getvalue().strip())
+ self.assertEqual(sval, cval)
+
+ # inode timestamps
+ sval = int(self.mount_a.run_shell(['stat', '-c' '%X', source_path]).stdout.getvalue().strip())
+ cval = int(self.mount_a.run_shell(['stat', '-c' '%X', sink_path]).stdout.getvalue().strip())
+ self.assertEqual(sval, cval)
+
+ sval = int(self.mount_a.run_shell(['stat', '-c' '%Y', source_path]).stdout.getvalue().strip())
+ cval = int(self.mount_a.run_shell(['stat', '-c' '%Y', sink_path]).stdout.getvalue().strip())
+ self.assertEqual(sval, cval)
+
+ def _verify_clone_root(self, source_path, clone_path, clone, clone_group, clone_pool):
+ # verifies following clone root attrs quota, data_pool and pool_namespace
+ # remaining attributes of clone root are validated in _verify_clone_attrs
+
+ clone_info = json.loads(self._get_subvolume_info(self.volname, clone, clone_group))
+
+ # verify quota is inherited from source snapshot
+ src_quota = self.mount_a.getfattr(source_path, "ceph.quota.max_bytes")
+ self.assertEqual(clone_info["bytes_quota"], "infinite" if src_quota is None else int(src_quota))
+
+ if clone_pool:
+ # verify pool is set as per request
+ self.assertEqual(clone_info["data_pool"], clone_pool)
+ else:
+ # verify pool and pool namespace are inherited from snapshot
+ self.assertEqual(clone_info["data_pool"],
+ self.mount_a.getfattr(source_path, "ceph.dir.layout.pool"))
+ self.assertEqual(clone_info["pool_namespace"],
+ self.mount_a.getfattr(source_path, "ceph.dir.layout.pool_namespace"))
+
+ def _verify_clone(self, subvolume, snapshot, clone,
+ source_group=None, clone_group=None, clone_pool=None,
+ subvol_path=None, source_version=2, timo=120):
+ # pass in subvol_path (subvolume path when snapshot was taken) when subvolume is removed
+ # but snapshots are retained for clone verification
+ path1 = self._get_subvolume_snapshot_path(subvolume, snapshot, source_group, subvol_path, source_version)
+ path2 = self._get_subvolume_path(self.volname, clone, group_name=clone_group)
+
+ check = 0
+ # TODO: currently snapshot rentries are not stable if snapshot source entries
+ # are removed, https://tracker.ceph.com/issues/46747
+ while check < timo and subvol_path is None:
+ val1 = int(self.mount_a.getfattr(path1, "ceph.dir.rentries"))
+ val2 = int(self.mount_a.getfattr(path2, "ceph.dir.rentries"))
+ if val1 == val2:
+ break
+ check += 1
+ time.sleep(1)
+ self.assertTrue(check < timo)
+
+ self._verify_clone_root(path1, path2, clone, clone_group, clone_pool)
+ self._verify_clone_attrs(path1, path2)
+
+ def _generate_random_volume_name(self, count=1):
+ n = self.volume_start
+ volumes = [f"{TestVolumes.TEST_VOLUME_PREFIX}_{i:016}" for i in range(n, n+count)]
+ self.volume_start += count
+ return volumes[0] if count == 1 else volumes
+
+ def _generate_random_subvolume_name(self, count=1):
+ n = self.subvolume_start
+ subvolumes = [f"{TestVolumes.TEST_SUBVOLUME_PREFIX}_{i:016}" for i in range(n, n+count)]
+ self.subvolume_start += count
+ return subvolumes[0] if count == 1 else subvolumes
+
+ def _generate_random_group_name(self, count=1):
+ n = self.group_start
+ groups = [f"{TestVolumes.TEST_GROUP_PREFIX}_{i:016}" for i in range(n, n+count)]
+ self.group_start += count
+ return groups[0] if count == 1 else groups
+
+ def _generate_random_snapshot_name(self, count=1):
+ n = self.snapshot_start
+ snaps = [f"{TestVolumes.TEST_SNAPSHOT_PREFIX}_{i:016}" for i in range(n, n+count)]
+ self.snapshot_start += count
+ return snaps[0] if count == 1 else snaps
+
+ def _generate_random_clone_name(self, count=1):
+ n = self.clone_start
+ clones = [f"{TestVolumes.TEST_CLONE_PREFIX}_{i:016}" for i in range(n, n+count)]
+ self.clone_start += count
+ return clones[0] if count == 1 else clones
+
+ def _enable_multi_fs(self):
+ self._fs_cmd("flag", "set", "enable_multiple", "true", "--yes-i-really-mean-it")
+
+ def _create_or_reuse_test_volume(self):
+ result = json.loads(self._fs_cmd("volume", "ls"))
+ if len(result) == 0:
+ self.vol_created = True
+ self.volname = self._generate_random_volume_name()
+ self._fs_cmd("volume", "create", self.volname)
+ else:
+ self.volname = result[0]['name']
+
+ def _get_subvolume_group_path(self, vol_name, group_name):
+ args = ("subvolumegroup", "getpath", vol_name, group_name)
+ path = self._fs_cmd(*args)
+ # remove the leading '/', and trailing whitespaces
+ return path[1:].rstrip()
+
+ def _get_subvolume_path(self, vol_name, subvol_name, group_name=None):
+ args = ["subvolume", "getpath", vol_name, subvol_name]
+ if group_name:
+ args.append(group_name)
+ args = tuple(args)
+ path = self._fs_cmd(*args)
+ # remove the leading '/', and trailing whitespaces
+ return path[1:].rstrip()
+
+ def _get_subvolume_info(self, vol_name, subvol_name, group_name=None):
+ args = ["subvolume", "info", vol_name, subvol_name]
+ if group_name:
+ args.append(group_name)
+ args = tuple(args)
+ subvol_md = self._fs_cmd(*args)
+ return subvol_md
+
+ def _get_subvolume_snapshot_info(self, vol_name, subvol_name, snapname, group_name=None):
+ args = ["subvolume", "snapshot", "info", vol_name, subvol_name, snapname]
+ if group_name:
+ args.append(group_name)
+ args = tuple(args)
+ snap_md = self._fs_cmd(*args)
+ return snap_md
+
+ def _delete_test_volume(self):
+ self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")
+
+ def _do_subvolume_pool_and_namespace_update(self, subvolume, pool=None, pool_namespace=None, subvolume_group=None):
+ subvolpath = self._get_subvolume_path(self.volname, subvolume, group_name=subvolume_group)
+
+ if pool is not None:
+ self.mount_a.setfattr(subvolpath, 'ceph.dir.layout.pool', pool)
+
+ if pool_namespace is not None:
+ self.mount_a.setfattr(subvolpath, 'ceph.dir.layout.pool_namespace', pool_namespace)
+
+ def _do_subvolume_attr_update(self, subvolume, uid, gid, mode, subvolume_group=None):
+ subvolpath = self._get_subvolume_path(self.volname, subvolume, group_name=subvolume_group)
+
+ # mode
+ self.mount_a.run_shell(['chmod', mode, subvolpath])
+
+ # ownership
+ self.mount_a.run_shell(['chown', uid, subvolpath])
+ self.mount_a.run_shell(['chgrp', gid, subvolpath])
+
+ def _do_subvolume_io(self, subvolume, subvolume_group=None, create_dir=None,
+ number_of_files=DEFAULT_NUMBER_OF_FILES, file_size=DEFAULT_FILE_SIZE):
+ # get subvolume path for IO
+ args = ["subvolume", "getpath", self.volname, subvolume]
+ if subvolume_group:
+ args.append(subvolume_group)
+ args = tuple(args)
+ subvolpath = self._fs_cmd(*args)
+ self.assertNotEqual(subvolpath, None)
+ subvolpath = subvolpath[1:].rstrip() # remove "/" prefix and any trailing newline
+
+ io_path = subvolpath
+ if create_dir:
+ io_path = os.path.join(subvolpath, create_dir)
+ self.mount_a.run_shell(["mkdir", "-p", io_path])
+
+ log.debug("filling subvolume {0} with {1} files each {2}MB size under directory {3}".format(subvolume, number_of_files, file_size, io_path))
+ for i in range(number_of_files):
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i)
+ self.mount_a.write_n_mb(os.path.join(io_path, filename), file_size)
+
+ def _do_subvolume_io_mixed(self, subvolume, subvolume_group=None):
+ subvolpath = self._get_subvolume_path(self.volname, subvolume, group_name=subvolume_group)
+
+ reg_file = "regfile.0"
+ reg_path = os.path.join(subvolpath, reg_file)
+ dir_path = os.path.join(subvolpath, "dir.0")
+ sym_path1 = os.path.join(subvolpath, "sym.0")
+ # this symlink's ownership would be changed
+ sym_path2 = os.path.join(dir_path, "sym.0")
+
+ #self.mount_a.write_n_mb(reg_path, TestVolumes.DEFAULT_FILE_SIZE)
+ self.mount_a.run_shell(["sudo", "mkdir", dir_path], omit_sudo=False)
+ self.mount_a.run_shell(["sudo", "ln", "-s", "./{}".format(reg_file), sym_path1], omit_sudo=False)
+ self.mount_a.run_shell(["sudo", "ln", "-s", "./{}".format(reg_file), sym_path2], omit_sudo=False)
+ # flip ownership to nobody. assumption: nobody's id is 65534
+ self.mount_a.run_shell(["sudo", "chown", "-h", "65534:65534", sym_path2], omit_sudo=False)
+
+ def _wait_for_trash_empty(self, timeout=30):
+ # XXX: construct the trash dir path (note that there is no mgr
+ # [sub]volume interface for this).
+ trashdir = os.path.join("./", "volumes", "_deleting")
+ self.mount_a.wait_for_dir_empty(trashdir, timeout=timeout)
+
+ def _assert_meta_location_and_version(self, vol_name, subvol_name, subvol_group=None, version=2, legacy=False):
+ if legacy:
+ subvol_path = self._get_subvolume_path(vol_name, subvol_name, group_name=subvol_group)
+ m = md5()
+ m.update(("/"+subvol_path).encode('utf-8'))
+ meta_filename = "{0}.meta".format(m.digest().hex())
+ metapath = os.path.join(".", "volumes", "_legacy", meta_filename)
+ else:
+ group = subvol_group if subvol_group is not None else '_nogroup'
+ metapath = os.path.join(".", "volumes", group, subvol_name, ".meta")
+
+ out = self.mount_a.run_shell(['cat', metapath])
+ lines = out.stdout.getvalue().strip().split('\n')
+ sv_version = -1
+ for line in lines:
+ if line == "version = " + str(version):
+ sv_version = version
+ break
+ self.assertEqual(sv_version, version, "version expected was '{0}' but got '{1}' from meta file at '{2}'".format(
+ version, sv_version, metapath))
+
+ def _create_v1_subvolume(self, subvol_name, subvol_group=None, has_snapshot=True, subvol_type='subvolume', state='complete'):
+ group = subvol_group if subvol_group is not None else '_nogroup'
+ basepath = os.path.join("volumes", group, subvol_name)
+ uuid_str = str(uuid.uuid4())
+ createpath = os.path.join(basepath, uuid_str)
+ self.mount_a.run_shell(['mkdir', '-p', createpath])
+
+ # create a v1 snapshot, to prevent auto upgrades
+ if has_snapshot:
+ snappath = os.path.join(createpath, ".snap", "fake")
+ self.mount_a.run_shell(['mkdir', '-p', snappath])
+
+ # add required xattrs to subvolume
+ default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool")
+ self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool)
+
+ # create a v1 .meta file
+ meta_contents = "[GLOBAL]\nversion = 1\ntype = {0}\npath = {1}\nstate = {2}\n".format(subvol_type, "/" + createpath, state)
+ if state == 'pending':
+ # add a fake clone source
+ meta_contents = meta_contents + '[source]\nvolume = fake\nsubvolume = fake\nsnapshot = fake\n'
+ meta_filepath1 = os.path.join(self.mount_a.mountpoint, basepath, ".meta")
+ sudo_write_file(self.mount_a.client_remote, meta_filepath1, meta_contents)
+ return createpath
+
+ def _update_fake_trash(self, subvol_name, subvol_group=None, trash_name='fake', create=True):
+ group = subvol_group if subvol_group is not None else '_nogroup'
+ trashpath = os.path.join("volumes", group, subvol_name, '.trash', trash_name)
+ if create:
+ self.mount_a.run_shell(['mkdir', '-p', trashpath])
+ else:
+ self.mount_a.run_shell(['rmdir', trashpath])
+
+ def _configure_guest_auth(self, guest_mount, authid, key):
+ """
+ Set up auth credentials for a guest client.
+ """
+ # Create keyring file for the guest client.
+ keyring_txt = dedent("""
+ [client.{authid}]
+ key = {key}
+
+ """.format(authid=authid,key=key))
+
+ guest_mount.client_id = authid
+ guest_mount.client_remote.write_file(guest_mount.get_keyring_path(),
+ keyring_txt, sudo=True)
+ # Add a guest client section to the ceph config file.
+ self.config_set("client.{0}".format(authid), "debug client", 20)
+ self.config_set("client.{0}".format(authid), "debug objecter", 20)
+ self.set_conf("client.{0}".format(authid),
+ "keyring", guest_mount.get_keyring_path())
+
+ def _auth_metadata_get(self, filedata):
+ """
+ Return a deserialized JSON object, or None
+ """
+ try:
+ data = json.loads(filedata)
+ except json.decoder.JSONDecodeError:
+ data = None
+ return data
+
+ def setUp(self):
+ super(TestVolumes, self).setUp()
+ self.volname = None
+ self.vol_created = False
+ self._enable_multi_fs()
+ self._create_or_reuse_test_volume()
+ self.config_set('mon', 'mon_allow_pool_delete', True)
+ self.volume_start = random.randint(1, (1<<20))
+ self.subvolume_start = random.randint(1, (1<<20))
+ self.group_start = random.randint(1, (1<<20))
+ self.snapshot_start = random.randint(1, (1<<20))
+ self.clone_start = random.randint(1, (1<<20))
+
+ def tearDown(self):
+ if self.vol_created:
+ self._delete_test_volume()
+ super(TestVolumes, self).tearDown()
+
+ def test_connection_expiration(self):
+ # unmount any cephfs mounts
+ for i in range(0, self.CLIENTS_REQUIRED):
+ self.mounts[i].umount_wait()
+ sessions = self._session_list()
+ self.assertLessEqual(len(sessions), 1) # maybe mgr is already mounted
+
+ # Get the mgr to definitely mount cephfs
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+ sessions = self._session_list()
+ self.assertEqual(len(sessions), 1)
+
+ # Now wait for the mgr to expire the connection:
+ self.wait_until_evicted(sessions[0]['id'], timeout=90)
+
+ def test_volume_create(self):
+ """
+ That the volume can be created and then cleans up
+ """
+ volname = self._generate_random_volume_name()
+ self._fs_cmd("volume", "create", volname)
+ volumels = json.loads(self._fs_cmd("volume", "ls"))
+
+ if not (volname in ([volume['name'] for volume in volumels])):
+ raise RuntimeError("Error creating volume '{0}'".format(volname))
+ else:
+ # clean up
+ self._fs_cmd("volume", "rm", volname, "--yes-i-really-mean-it")
+
+ def test_volume_ls(self):
+ """
+ That the existing and the newly created volumes can be listed and
+ finally cleans up.
+ """
+ vls = json.loads(self._fs_cmd("volume", "ls"))
+ volumes = [volume['name'] for volume in vls]
+
+ #create new volumes and add it to the existing list of volumes
+ volumenames = self._generate_random_volume_name(2)
+ for volumename in volumenames:
+ self._fs_cmd("volume", "create", volumename)
+ volumes.extend(volumenames)
+
+ # list volumes
+ try:
+ volumels = json.loads(self._fs_cmd('volume', 'ls'))
+ if len(volumels) == 0:
+ raise RuntimeError("Expected the 'fs volume ls' command to list the created volumes.")
+ else:
+ volnames = [volume['name'] for volume in volumels]
+ if collections.Counter(volnames) != collections.Counter(volumes):
+ raise RuntimeError("Error creating or listing volumes")
+ finally:
+ # clean up
+ for volume in volumenames:
+ self._fs_cmd("volume", "rm", volume, "--yes-i-really-mean-it")
+
+ def test_volume_rm(self):
+ """
+ That the volume can only be removed when --yes-i-really-mean-it is used
+ and verify that the deleted volume is not listed anymore.
+ """
+ for m in self.mounts:
+ m.umount_wait()
+ try:
+ self._fs_cmd("volume", "rm", self.volname)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EPERM:
+ raise RuntimeError("expected the 'fs volume rm' command to fail with EPERM, "
+ "but it failed with {0}".format(ce.exitstatus))
+ else:
+ self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")
+
+ #check if it's gone
+ volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty"))
+ if (self.volname in [volume['name'] for volume in volumes]):
+ raise RuntimeError("Expected the 'fs volume rm' command to succeed. "
+ "The volume {0} not removed.".format(self.volname))
+ else:
+ raise RuntimeError("expected the 'fs volume rm' command to fail.")
+
+ def test_subvolume_marked(self):
+ """
+ ensure a subvolume is marked with the ceph.dir.subvolume xattr
+ """
+ subvolume = self._generate_random_subvolume_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # getpath
+ subvolpath = self._get_subvolume_path(self.volname, subvolume)
+
+ # subdirectory of a subvolume cannot be moved outside the subvolume once marked with
+ # the xattr ceph.dir.subvolume, hence test by attempting to rename subvol path (incarnation)
+ # outside the subvolume
+ dstpath = os.path.join(self.mount_a.mountpoint, 'volumes', '_nogroup', 'new_subvol_location')
+ srcpath = os.path.join(self.mount_a.mountpoint, subvolpath)
+ rename_script = dedent("""
+ import os
+ import errno
+ try:
+ os.rename("{src}", "{dst}")
+ except OSError as e:
+ if e.errno != errno.EXDEV:
+ raise RuntimeError("invalid error code on renaming subvolume incarnation out of subvolume directory")
+ else:
+ raise RuntimeError("expected renaming subvolume incarnation out of subvolume directory to fail")
+ """)
+ self.mount_a.run_python(rename_script.format(src=srcpath, dst=dstpath))
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_volume_rm_arbitrary_pool_removal(self):
+ """
+ That the arbitrary pool added to the volume out of band is removed
+ successfully on volume removal.
+ """
+ for m in self.mounts:
+ m.umount_wait()
+ new_pool = "new_pool"
+ # add arbitrary data pool
+ self.fs.add_data_pool(new_pool)
+ self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")
+
+ #check if fs is gone
+ volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty"))
+ volnames = [volume['name'] for volume in volumes]
+ self.assertNotIn(self.volname, volnames)
+
+ #check if osd pools are gone
+ pools = json.loads(self._raw_cmd("osd", "pool", "ls", "detail", "--format=json-pretty"))
+ for pool in pools:
+ self.assertNotIn(self.volname, pool["application_metadata"].keys())
+
+ def test_volume_rm_when_mon_delete_pool_false(self):
+ """
+ That the volume can only be removed when mon_allowd_pool_delete is set
+ to true and verify that the pools are removed after volume deletion.
+ """
+ for m in self.mounts:
+ m.umount_wait()
+ self.config_set('mon', 'mon_allow_pool_delete', False)
+ try:
+ self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EPERM,
+ "expected the 'fs volume rm' command to fail with EPERM, "
+ "but it failed with {0}".format(ce.exitstatus))
+ self.config_set('mon', 'mon_allow_pool_delete', True)
+ self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")
+
+ #check if fs is gone
+ volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty"))
+ volnames = [volume['name'] for volume in volumes]
+ self.assertNotIn(self.volname, volnames,
+ "volume {0} exists after removal".format(self.volname))
+ #check if pools are gone
+ pools = json.loads(self._raw_cmd("osd", "pool", "ls", "detail", "--format=json-pretty"))
+ for pool in pools:
+ self.assertNotIn(self.volname, pool["application_metadata"].keys(),
+ "pool {0} exists after volume removal".format(pool["pool_name"]))
+
+ ### basic subvolume operations
+
+ def test_subvolume_create_and_rm(self):
+ # create subvolume
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # make sure it exists
+ subvolpath = self._fs_cmd("subvolume", "getpath", self.volname, subvolume)
+ self.assertNotEqual(subvolpath, None)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ # make sure its gone
+ try:
+ self._fs_cmd("subvolume", "getpath", self.volname, subvolume)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs subvolume getpath' command to fail. Subvolume not removed.")
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_expand(self):
+ """
+ That a subvolume can be expanded in size and its quota matches the expected size.
+ """
+
+ # create subvolume
+ subvolname = self._generate_random_subvolume_name()
+ osize = self.DEFAULT_FILE_SIZE*1024*1024
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # expand the subvolume
+ nsize = osize*2
+ self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize))
+
+ # verify the quota
+ size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes"))
+ self.assertEqual(size, nsize)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_shrink(self):
+ """
+ That a subvolume can be shrinked in size and its quota matches the expected size.
+ """
+
+ # create subvolume
+ subvolname = self._generate_random_subvolume_name()
+ osize = self.DEFAULT_FILE_SIZE*1024*1024
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # shrink the subvolume
+ nsize = osize // 2
+ self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize))
+
+ # verify the quota
+ size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes"))
+ self.assertEqual(size, nsize)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_resize_fail_invalid_size(self):
+ """
+ That a subvolume cannot be resized to an invalid size and the quota did not change
+ """
+
+ osize = self.DEFAULT_FILE_SIZE*1024*1024
+ # create subvolume
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # try to resize the subvolume with an invalid size -10
+ nsize = -10
+ try:
+ self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize))
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolume with invalid size")
+ else:
+ self.fail("expected the 'fs subvolume resize' command to fail")
+
+ # verify the quota did not change
+ size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes"))
+ self.assertEqual(size, osize)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_resize_fail_zero_size(self):
+ """
+ That a subvolume cannot be resized to a zero size and the quota did not change
+ """
+
+ osize = self.DEFAULT_FILE_SIZE*1024*1024
+ # create subvolume
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # try to resize the subvolume with size 0
+ nsize = 0
+ try:
+ self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize))
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolume with invalid size")
+ else:
+ self.fail("expected the 'fs subvolume resize' command to fail")
+
+ # verify the quota did not change
+ size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes"))
+ self.assertEqual(size, osize)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_resize_quota_lt_used_size(self):
+ """
+ That a subvolume can be resized to a size smaller than the current used size
+ and the resulting quota matches the expected size.
+ """
+
+ osize = self.DEFAULT_FILE_SIZE*1024*1024*20
+ # create subvolume
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # create one file of 10MB
+ file_size=self.DEFAULT_FILE_SIZE*10
+ number_of_files=1
+ log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+ number_of_files,
+ file_size))
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+1)
+ self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+
+ usedsize = int(self.mount_a.getfattr(subvolpath, "ceph.dir.rbytes"))
+ susedsize = int(self.mount_a.run_shell(['stat', '-c' '%s', subvolpath]).stdout.getvalue().strip())
+ self.assertEqual(usedsize, susedsize)
+
+ # shrink the subvolume
+ nsize = usedsize // 2
+ try:
+ self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize))
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume resize' command to succeed")
+
+ # verify the quota
+ size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes"))
+ self.assertEqual(size, nsize)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+
+ def test_subvolume_resize_fail_quota_lt_used_size_no_shrink(self):
+ """
+ That a subvolume cannot be resized to a size smaller than the current used size
+ when --no_shrink is given and the quota did not change.
+ """
+
+ osize = self.DEFAULT_FILE_SIZE*1024*1024*20
+ # create subvolume
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # create one file of 10MB
+ file_size=self.DEFAULT_FILE_SIZE*10
+ number_of_files=1
+ log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+ number_of_files,
+ file_size))
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+2)
+ self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+
+ usedsize = int(self.mount_a.getfattr(subvolpath, "ceph.dir.rbytes"))
+ susedsize = int(self.mount_a.run_shell(['stat', '-c' '%s', subvolpath]).stdout.getvalue().strip())
+ self.assertEqual(usedsize, susedsize)
+
+ # shrink the subvolume
+ nsize = usedsize // 2
+ try:
+ self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize), "--no_shrink")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolume with invalid size")
+ else:
+ self.fail("expected the 'fs subvolume resize' command to fail")
+
+ # verify the quota did not change
+ size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes"))
+ self.assertEqual(size, osize)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_resize_expand_on_full_subvolume(self):
+ """
+ That the subvolume can be expanded from a full subvolume and future writes succeed.
+ """
+
+ osize = self.DEFAULT_FILE_SIZE*1024*1024*10
+ # create subvolume of quota 10MB and make sure it exists
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # create one file of size 10MB and write
+ file_size=self.DEFAULT_FILE_SIZE*10
+ number_of_files=1
+ log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+ number_of_files,
+ file_size))
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+3)
+ self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+
+ # create a file of size 5MB and try write more
+ file_size=file_size // 2
+ number_of_files=1
+ log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+ number_of_files,
+ file_size))
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+4)
+ try:
+ self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+ except CommandFailedError:
+ # Not able to write. So expand the subvolume more and try writing the 5MB file again
+ nsize = osize*2
+ self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize))
+ try:
+ self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+ except CommandFailedError:
+ self.fail("expected filling subvolume {0} with {1} file of size {2}MB"
+ "to succeed".format(subvolname, number_of_files, file_size))
+ else:
+ self.fail("expected filling subvolume {0} with {1} file of size {2}MB"
+ "to fail".format(subvolname, number_of_files, file_size))
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_idempotence(self):
+ # create subvolume
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # try creating w/ same subvolume name -- should be idempotent
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_idempotence_resize(self):
+ # create subvolume
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # try creating w/ same subvolume name with size -- should set quota
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "1000000000")
+
+ # get subvolume metadata
+ subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
+ self.assertEqual(subvol_info["bytes_quota"], 1000000000)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_isolated_namespace(self):
+ """
+ Create subvolume in separate rados namespace
+ """
+
+ # create subvolume
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--namespace-isolated")
+
+ # get subvolume metadata
+ subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
+ self.assertNotEqual(len(subvol_info), 0)
+ self.assertEqual(subvol_info["pool_namespace"], "fsvolumens_" + subvolume)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_with_invalid_data_pool_layout(self):
+ subvolume = self._generate_random_subvolume_name()
+ data_pool = "invalid_pool"
+ # create subvolume with invalid data pool layout
+ try:
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on create of subvolume with invalid pool layout")
+ else:
+ self.fail("expected the 'fs subvolume create' command to fail")
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_rm_force(self):
+ # test removing non-existing subvolume with --force
+ subvolume = self._generate_random_subvolume_name()
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--force")
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume rm --force' command to succeed")
+
+ def test_subvolume_create_with_auto_cleanup_on_fail(self):
+ subvolume = self._generate_random_subvolume_name()
+ data_pool = "invalid_pool"
+ # create subvolume with invalid data pool layout fails
+ with self.assertRaises(CommandFailedError):
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool)
+
+ # check whether subvol path is cleaned up
+ try:
+ self._fs_cmd("subvolume", "getpath", self.volname, subvolume)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on getpath of non-existent subvolume")
+ else:
+ self.fail("expected the 'fs subvolume getpath' command to fail")
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_with_invalid_size(self):
+ # create subvolume with an invalid size -1
+ subvolume = self._generate_random_subvolume_name()
+ try:
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--size", "-1")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on create of subvolume with invalid size")
+ else:
+ self.fail("expected the 'fs subvolume create' command to fail")
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_nonexistent_subvolume_rm(self):
+ # remove non-existing subvolume
+ subvolume = "non_existent_subvolume"
+
+ # try, remove subvolume
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs subvolume rm' command to fail")
+
+ def test_nonexistent_subvolume_group_create(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = "non_existent_group"
+
+ # try, creating subvolume in a nonexistent group
+ try:
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs subvolume create' command to fail")
+
+ def test_default_uid_gid_subvolume(self):
+ subvolume = self._generate_random_subvolume_name()
+ expected_uid = 0
+ expected_gid = 0
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+ subvol_path = self._get_subvolume_path(self.volname, subvolume)
+
+ # check subvolume's uid and gid
+ stat = self.mount_a.stat(subvol_path)
+ self.assertEqual(stat['st_uid'], expected_uid)
+ self.assertEqual(stat['st_gid'], expected_gid)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_ls(self):
+ # tests the 'fs subvolume ls' command
+
+ subvolumes = []
+
+ # create subvolumes
+ subvolumes = self._generate_random_subvolume_name(3)
+ for subvolume in subvolumes:
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # list subvolumes
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ if len(subvolumels) == 0:
+ self.fail("Expected the 'fs subvolume ls' command to list the created subvolumes.")
+ else:
+ subvolnames = [subvolume['name'] for subvolume in subvolumels]
+ if collections.Counter(subvolnames) != collections.Counter(subvolumes):
+ self.fail("Error creating or listing subvolumes")
+
+ # remove subvolume
+ for subvolume in subvolumes:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_ls_for_notexistent_default_group(self):
+ # tests the 'fs subvolume ls' command when the default group '_nogroup' doesn't exist
+ # prerequisite: we expect that the volume is created and the default group _nogroup is
+ # NOT created (i.e. a subvolume without group is not created)
+
+ # list subvolumes
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ if len(subvolumels) > 0:
+ raise RuntimeError("Expected the 'fs subvolume ls' command to output an empty list.")
+
+ def test_subvolume_resize_infinite_size(self):
+ """
+ That a subvolume can be resized to an infinite size by unsetting its quota.
+ """
+
+ # create subvolume
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size",
+ str(self.DEFAULT_FILE_SIZE*1024*1024))
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # resize inf
+ self._fs_cmd("subvolume", "resize", self.volname, subvolname, "inf")
+
+ # verify that the quota is None
+ size = self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")
+ self.assertEqual(size, None)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_resize_infinite_size_future_writes(self):
+ """
+ That a subvolume can be resized to an infinite size and the future writes succeed.
+ """
+
+ # create subvolume
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size",
+ str(self.DEFAULT_FILE_SIZE*1024*1024*5))
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # resize inf
+ self._fs_cmd("subvolume", "resize", self.volname, subvolname, "inf")
+
+ # verify that the quota is None
+ size = self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")
+ self.assertEqual(size, None)
+
+ # create one file of 10MB and try to write
+ file_size=self.DEFAULT_FILE_SIZE*10
+ number_of_files=1
+ log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+ number_of_files,
+ file_size))
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+5)
+
+ try:
+ self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+ except CommandFailedError:
+ self.fail("expected filling subvolume {0} with {1} file of size {2}MB "
+ "to succeed".format(subvolname, number_of_files, file_size))
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_info(self):
+ # tests the 'fs subvolume info' command
+
+ subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime",
+ "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace",
+ "type", "uid", "features", "state"]
+
+ # create subvolume
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # get subvolume metadata
+ subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
+ for md in subvol_md:
+ self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md))
+
+ self.assertEqual(subvol_info["bytes_pcent"], "undefined", "bytes_pcent should be set to undefined if quota is not set")
+ self.assertEqual(subvol_info["bytes_quota"], "infinite", "bytes_quota should be set to infinite if quota is not set")
+ self.assertEqual(subvol_info["pool_namespace"], "", "expected pool namespace to be empty")
+ self.assertEqual(subvol_info["state"], "complete", "expected state to be complete")
+
+ self.assertEqual(len(subvol_info["features"]), 3,
+ msg="expected 3 features, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"]))
+ for feature in ['snapshot-clone', 'snapshot-autoprotect', 'snapshot-retention']:
+ self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature))
+
+ nsize = self.DEFAULT_FILE_SIZE*1024*1024
+ self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize))
+
+ # get subvolume metadata after quota set
+ subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
+ for md in subvol_md:
+ self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md))
+
+ self.assertNotEqual(subvol_info["bytes_pcent"], "undefined", "bytes_pcent should not be set to undefined if quota is not set")
+ self.assertEqual(subvol_info["bytes_quota"], nsize, "bytes_quota should be set to '{0}'".format(nsize))
+ self.assertEqual(subvol_info["type"], "subvolume", "type should be set to subvolume")
+ self.assertEqual(subvol_info["state"], "complete", "expected state to be complete")
+
+ self.assertEqual(len(subvol_info["features"]), 3,
+ msg="expected 3 features, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"]))
+ for feature in ['snapshot-clone', 'snapshot-autoprotect', 'snapshot-retention']:
+ self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature))
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_clone_subvolume_info(self):
+
+ # tests the 'fs subvolume info' command for a clone
+ subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime",
+ "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace",
+ "type", "uid"]
+
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=1)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ subvol_info = json.loads(self._get_subvolume_info(self.volname, clone))
+ if len(subvol_info) == 0:
+ raise RuntimeError("Expected the 'fs subvolume info' command to list metadata of subvolume")
+ for md in subvol_md:
+ if md not in subvol_info.keys():
+ raise RuntimeError("%s not present in the metadata of subvolume" % md)
+ if subvol_info["type"] != "clone":
+ raise RuntimeError("type should be set to clone")
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+
+ ### subvolume group operations
+
+ def test_subvolume_create_and_rm_in_group(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_group_create_with_desired_data_pool_layout(self):
+ group1, group2 = self._generate_random_group_name(2)
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group1)
+ group1_path = self._get_subvolume_group_path(self.volname, group1)
+
+ default_pool = self.mount_a.getfattr(group1_path, "ceph.dir.layout.pool")
+ new_pool = "new_pool"
+ self.assertNotEqual(default_pool, new_pool)
+
+ # add data pool
+ self.fs.add_data_pool(new_pool)
+
+ # create group specifying the new data pool as its pool layout
+ self._fs_cmd("subvolumegroup", "create", self.volname, group2,
+ "--pool_layout", new_pool)
+ group2_path = self._get_subvolume_group_path(self.volname, group2)
+
+ desired_pool = self.mount_a.getfattr(group2_path, "ceph.dir.layout.pool")
+ self.assertEqual(desired_pool, new_pool)
+
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group1)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group2)
+
+ ### authorize operations
+
+ def test_authorize_deauthorize_legacy_subvolume(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ authid = "alice"
+
+ guest_mount = self.mount_b
+ guest_mount.umount_wait()
+
+ # emulate a old-fashioned subvolume in a custom group
+ createpath = os.path.join(".", "volumes", group, subvolume)
+ self.mount_a.run_shell(['mkdir', '-p', createpath])
+
+ # add required xattrs to subvolume
+ default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool")
+ self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool)
+
+ mount_path = os.path.join("/", "volumes", group, subvolume)
+
+ # authorize guest authID read-write access to subvolume
+ key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid,
+ "--group_name", group, "--tenant_id", "tenant_id")
+
+ # guest authID should exist
+ existing_ids = [a['entity'] for a in self.auth_list()]
+ self.assertIn("client.{0}".format(authid), existing_ids)
+
+ # configure credentials for guest client
+ self._configure_guest_auth(guest_mount, authid, key)
+
+ # mount the subvolume, and write to it
+ guest_mount.mount(mount_path=mount_path)
+ guest_mount.write_n_mb("data.bin", 1)
+
+ # authorize guest authID read access to subvolume
+ key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid,
+ "--group_name", group, "--tenant_id", "tenant_id", "--access_level", "r")
+
+ # guest client sees the change in access level to read only after a
+ # remount of the subvolume.
+ guest_mount.umount_wait()
+ guest_mount.mount(mount_path=mount_path)
+
+ # read existing content of the subvolume
+ self.assertListEqual(guest_mount.ls(guest_mount.mountpoint), ["data.bin"])
+ # cannot write into read-only subvolume
+ with self.assertRaises(CommandFailedError):
+ guest_mount.write_n_mb("rogue.bin", 1)
+
+ # cleanup
+ guest_mount.umount_wait()
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid,
+ "--group_name", group)
+ # guest authID should no longer exist
+ existing_ids = [a['entity'] for a in self.auth_list()]
+ self.assertNotIn("client.{0}".format(authid), existing_ids)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_authorize_deauthorize_subvolume(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ authid = "alice"
+
+ guest_mount = self.mount_b
+ guest_mount.umount_wait()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+ mount_path = self._fs_cmd("subvolume", "getpath", self.volname, subvolume,
+ "--group_name", group).rstrip()
+
+ # authorize guest authID read-write access to subvolume
+ key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid,
+ "--group_name", group, "--tenant_id", "tenant_id")
+
+ # guest authID should exist
+ existing_ids = [a['entity'] for a in self.auth_list()]
+ self.assertIn("client.{0}".format(authid), existing_ids)
+
+ # configure credentials for guest client
+ self._configure_guest_auth(guest_mount, authid, key)
+
+ # mount the subvolume, and write to it
+ guest_mount.mount(mount_path=mount_path)
+ guest_mount.write_n_mb("data.bin", 1)
+
+ # authorize guest authID read access to subvolume
+ key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid,
+ "--group_name", group, "--tenant_id", "tenant_id", "--access_level", "r")
+
+ # guest client sees the change in access level to read only after a
+ # remount of the subvolume.
+ guest_mount.umount_wait()
+ guest_mount.mount(mount_path=mount_path)
+
+ # read existing content of the subvolume
+ self.assertListEqual(guest_mount.ls(guest_mount.mountpoint), ["data.bin"])
+ # cannot write into read-only subvolume
+ with self.assertRaises(CommandFailedError):
+ guest_mount.write_n_mb("rogue.bin", 1)
+
+ # cleanup
+ guest_mount.umount_wait()
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid,
+ "--group_name", group)
+ # guest authID should no longer exist
+ existing_ids = [a['entity'] for a in self.auth_list()]
+ self.assertNotIn("client.{0}".format(authid), existing_ids)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_multitenant_subvolumes(self):
+ """
+ That subvolume access can be restricted to a tenant.
+
+ That metadata used to enforce tenant isolation of
+ subvolumes is stored as a two-way mapping between auth
+ IDs and subvolumes that they're authorized to access.
+ """
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ guest_mount = self.mount_b
+
+ # Guest clients belonging to different tenants, but using the same
+ # auth ID.
+ auth_id = "alice"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+ guestclient_2 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant2",
+ }
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # Check that subvolume metadata file is created on subvolume creation.
+ subvol_metadata_filename = "_{0}:{1}.meta".format(group, subvolume)
+ self.assertIn(subvol_metadata_filename, guest_mount.ls("volumes"))
+
+ # Authorize 'guestclient_1', using auth ID 'alice' and belonging to
+ # 'tenant1', with 'rw' access to the volume.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ # Check that auth metadata file for auth ID 'alice', is
+ # created on authorizing 'alice' access to the subvolume.
+ auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
+ self.assertIn(auth_metadata_filename, guest_mount.ls("volumes"))
+
+ # Verify that the auth metadata file stores the tenant ID that the
+ # auth ID belongs to, the auth ID's authorized access levels
+ # for different subvolumes, versioning details, etc.
+ expected_auth_metadata = {
+ "version": 5,
+ "compat_version": 6,
+ "dirty": False,
+ "tenant_id": "tenant1",
+ "subvolumes": {
+ "{0}/{1}".format(group,subvolume): {
+ "dirty": False,
+ "access_level": "rw"
+ }
+ }
+ }
+
+ auth_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(auth_metadata_filename)))
+ self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"])
+ del expected_auth_metadata["version"]
+ del auth_metadata["version"]
+ self.assertEqual(expected_auth_metadata, auth_metadata)
+
+ # Verify that the subvolume metadata file stores info about auth IDs
+ # and their access levels to the subvolume, versioning details, etc.
+ expected_subvol_metadata = {
+ "version": 1,
+ "compat_version": 1,
+ "auths": {
+ "alice": {
+ "dirty": False,
+ "access_level": "rw"
+ }
+ }
+ }
+ subvol_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(subvol_metadata_filename)))
+
+ self.assertGreaterEqual(subvol_metadata["version"], expected_subvol_metadata["version"])
+ del expected_subvol_metadata["version"]
+ del subvol_metadata["version"]
+ self.assertEqual(expected_subvol_metadata, subvol_metadata)
+
+ # Cannot authorize 'guestclient_2' to access the volume.
+ # It uses auth ID 'alice', which has already been used by a
+ # 'guestclient_1' belonging to an another tenant for accessing
+ # the volume.
+
+ try:
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_2["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_2["tenant_id"])
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EPERM,
+ "Invalid error code returned on authorize of subvolume with same auth_id but different tenant_id")
+ else:
+ self.fail("expected the 'fs subvolume authorize' command to fail")
+
+ # Check that auth metadata file is cleaned up on removing
+ # auth ID's only access to a volume.
+
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id,
+ "--group_name", group)
+ self.assertNotIn(auth_metadata_filename, guest_mount.ls("volumes"))
+
+ # Check that subvolume metadata file is cleaned up on subvolume deletion.
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+ self.assertNotIn(subvol_metadata_filename, guest_mount.ls("volumes"))
+
+ # clean up
+ guest_mount.umount_wait()
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_authorized_list(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ authid1 = "alice"
+ authid2 = "guest1"
+ authid3 = "guest2"
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # authorize alice authID read-write access to subvolume
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid1,
+ "--group_name", group)
+ # authorize guest1 authID read-write access to subvolume
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid2,
+ "--group_name", group)
+ # authorize guest2 authID read access to subvolume
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid3,
+ "--group_name", group, "--access_level", "r")
+
+ # list authorized-ids of the subvolume
+ expected_auth_list = [{'alice': 'rw'}, {'guest1': 'rw'}, {'guest2': 'r'}]
+ auth_list = json.loads(self._fs_cmd('subvolume', 'authorized_list', self.volname, subvolume, "--group_name", group))
+ self.assertCountEqual(expected_auth_list, auth_list)
+
+ # cleanup
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid1,
+ "--group_name", group)
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid2,
+ "--group_name", group)
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid3,
+ "--group_name", group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_authorize_auth_id_not_created_by_mgr_volumes(self):
+ """
+ If the auth_id already exists and is not created by mgr plugin,
+ it's not allowed to authorize the auth-id by default.
+ """
+
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # Create auth_id
+ self.fs.mon_manager.raw_cluster_cmd(
+ "auth", "get-or-create", "client.guest1",
+ "mds", "allow *",
+ "osd", "allow rw",
+ "mon", "allow *"
+ )
+
+ auth_id = "guest1"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ try:
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EPERM,
+ "Invalid error code returned on authorize of subvolume for auth_id created out of band")
+ else:
+ self.fail("expected the 'fs subvolume authorize' command to fail")
+
+ # clean up
+ self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_authorize_allow_existing_id_option(self):
+ """
+ If the auth_id already exists and is not created by mgr volumes,
+ it's not allowed to authorize the auth-id by default but is
+ allowed with option allow_existing_id.
+ """
+
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # Create auth_id
+ self.fs.mon_manager.raw_cluster_cmd(
+ "auth", "get-or-create", "client.guest1",
+ "mds", "allow *",
+ "osd", "allow rw",
+ "mon", "allow *"
+ )
+
+ auth_id = "guest1"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # Cannot authorize 'guestclient_1' to access the volume by default,
+ # which already exists and not created by mgr volumes but is allowed
+ # with option 'allow_existing_id'.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"], "--allow-existing-id")
+
+ # clean up
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id,
+ "--group_name", group)
+ self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_deauthorize_auth_id_after_out_of_band_update(self):
+ """
+ If the auth_id authorized by mgr/volumes plugin is updated
+ out of band, the auth_id should not be deleted after a
+ deauthorize. It should only remove caps associated with it.
+ """
+
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ auth_id = "guest1"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # Authorize 'guestclient_1' to access the subvolume.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ subvol_path = self._fs_cmd("subvolume", "getpath", self.volname, subvolume,
+ "--group_name", group).rstrip()
+
+ # Update caps for guestclient_1 out of band
+ out = self.fs.mon_manager.raw_cluster_cmd(
+ "auth", "caps", "client.guest1",
+ "mds", "allow rw path=/volumes/{0}, allow rw path={1}".format(group, subvol_path),
+ "osd", "allow rw pool=cephfs_data",
+ "mon", "allow r",
+ "mgr", "allow *"
+ )
+
+ # Deauthorize guestclient_1
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id, "--group_name", group)
+
+ # Validate the caps of guestclient_1 after deauthorize. It should not have deleted
+ # guestclient_1. The mgr and mds caps should be present which was updated out of band.
+ out = json.loads(self.fs.mon_manager.raw_cluster_cmd("auth", "get", "client.guest1", "--format=json-pretty"))
+
+ self.assertEqual("client.guest1", out[0]["entity"])
+ self.assertEqual("allow rw path=/volumes/{0}".format(group), out[0]["caps"]["mds"])
+ self.assertEqual("allow *", out[0]["caps"]["mgr"])
+ self.assertNotIn("osd", out[0]["caps"])
+
+ # clean up
+ out = self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_recover_auth_metadata_during_authorize(self):
+ """
+ That auth metadata manager can recover from partial auth updates using
+ metadata files, which store auth info and its update status info. This
+ test validates the recovery during authorize.
+ """
+
+ guest_mount = self.mount_b
+
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ auth_id = "guest1"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # Authorize 'guestclient_1' to access the subvolume.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ # Check that auth metadata file for auth ID 'guest1', is
+ # created on authorizing 'guest1' access to the subvolume.
+ auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
+ self.assertIn(auth_metadata_filename, guest_mount.ls("volumes"))
+ expected_auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename)))
+
+ # Induce partial auth update state by modifying the auth metadata file,
+ # and then run authorize again.
+ guest_mount.run_shell(['sed', '-i', 's/false/true/g', 'volumes/{0}'.format(auth_metadata_filename)])
+
+ # Authorize 'guestclient_1' to access the subvolume.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename)))
+ self.assertEqual(auth_metadata_content, expected_auth_metadata_content)
+
+ # clean up
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id, "--group_name", group)
+ guest_mount.umount_wait()
+ self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_recover_auth_metadata_during_deauthorize(self):
+ """
+ That auth metadata manager can recover from partial auth updates using
+ metadata files, which store auth info and its update status info. This
+ test validates the recovery during deauthorize.
+ """
+
+ guest_mount = self.mount_b
+
+ subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
+ group = self._generate_random_group_name()
+
+ guestclient_1 = {
+ "auth_id": "guest1",
+ "tenant_id": "tenant1",
+ }
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolumes in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group)
+ self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--group_name", group)
+
+ # Authorize 'guestclient_1' to access the subvolume1.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume1, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ # Check that auth metadata file for auth ID 'guest1', is
+ # created on authorizing 'guest1' access to the subvolume1.
+ auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
+ self.assertIn(auth_metadata_filename, guest_mount.ls("volumes"))
+ expected_auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename)))
+
+ # Authorize 'guestclient_1' to access the subvolume2.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume2, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ # Induce partial auth update state by modifying the auth metadata file,
+ # and then run de-authorize.
+ guest_mount.run_shell(['sed', '-i', 's/false/true/g', 'volumes/{0}'.format(auth_metadata_filename)])
+
+ # Deauthorize 'guestclient_1' to access the subvolume2.
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume2, guestclient_1["auth_id"],
+ "--group_name", group)
+
+ auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename)))
+ self.assertEqual(auth_metadata_content, expected_auth_metadata_content)
+
+ # clean up
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, "guest1", "--group_name", group)
+ guest_mount.umount_wait()
+ self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_update_old_style_auth_metadata_to_new_during_authorize(self):
+ """
+ CephVolumeClient stores the subvolume data in auth metadata file with
+ 'volumes' key as there was no subvolume namespace. It doesn't makes sense
+ with mgr/volumes. This test validates the transparent update of 'volumes'
+ key to 'subvolumes' key in auth metadata file during authorize.
+ """
+
+ guest_mount = self.mount_b
+
+ subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
+ group = self._generate_random_group_name()
+
+ auth_id = "guest1"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolumes in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group)
+ self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--group_name", group)
+
+ # Authorize 'guestclient_1' to access the subvolume1.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume1, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ # Check that auth metadata file for auth ID 'guest1', is
+ # created on authorizing 'guest1' access to the subvolume1.
+ auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
+ self.assertIn(auth_metadata_filename, guest_mount.ls("volumes"))
+
+ # Replace 'subvolumes' to 'volumes', old style auth-metadata file
+ guest_mount.run_shell(['sed', '-i', 's/subvolumes/volumes/g', 'volumes/{0}'.format(auth_metadata_filename)])
+
+ # Authorize 'guestclient_1' to access the subvolume2. This should transparently update 'volumes' to 'subvolumes'
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume2, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ expected_auth_metadata = {
+ "version": 5,
+ "compat_version": 6,
+ "dirty": False,
+ "tenant_id": "tenant1",
+ "subvolumes": {
+ "{0}/{1}".format(group,subvolume1): {
+ "dirty": False,
+ "access_level": "rw"
+ },
+ "{0}/{1}".format(group,subvolume2): {
+ "dirty": False,
+ "access_level": "rw"
+ }
+ }
+ }
+
+ auth_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(auth_metadata_filename)))
+
+ self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"])
+ del expected_auth_metadata["version"]
+ del auth_metadata["version"]
+ self.assertEqual(expected_auth_metadata, auth_metadata)
+
+ # clean up
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, auth_id, "--group_name", group)
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume2, auth_id, "--group_name", group)
+ guest_mount.umount_wait()
+ self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_update_old_style_auth_metadata_to_new_during_deauthorize(self):
+ """
+ CephVolumeClient stores the subvolume data in auth metadata file with
+ 'volumes' key as there was no subvolume namespace. It doesn't makes sense
+ with mgr/volumes. This test validates the transparent update of 'volumes'
+ key to 'subvolumes' key in auth metadata file during deauthorize.
+ """
+
+ guest_mount = self.mount_b
+
+ subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
+ group = self._generate_random_group_name()
+
+ auth_id = "guest1"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolumes in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group)
+ self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--group_name", group)
+
+ # Authorize 'guestclient_1' to access the subvolume1.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume1, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ # Authorize 'guestclient_1' to access the subvolume2.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume2, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ # Check that auth metadata file for auth ID 'guest1', is created.
+ auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
+ self.assertIn(auth_metadata_filename, guest_mount.ls("volumes"))
+
+ # Replace 'subvolumes' to 'volumes', old style auth-metadata file
+ guest_mount.run_shell(['sed', '-i', 's/subvolumes/volumes/g', 'volumes/{0}'.format(auth_metadata_filename)])
+
+ # Deauthorize 'guestclient_1' to access the subvolume2. This should update 'volumes' to subvolumes'
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume2, auth_id, "--group_name", group)
+
+ expected_auth_metadata = {
+ "version": 5,
+ "compat_version": 6,
+ "dirty": False,
+ "tenant_id": "tenant1",
+ "subvolumes": {
+ "{0}/{1}".format(group,subvolume1): {
+ "dirty": False,
+ "access_level": "rw"
+ }
+ }
+ }
+
+ auth_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(auth_metadata_filename)))
+
+ self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"])
+ del expected_auth_metadata["version"]
+ del auth_metadata["version"]
+ self.assertEqual(expected_auth_metadata, auth_metadata)
+
+ # clean up
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, auth_id, "--group_name", group)
+ guest_mount.umount_wait()
+ self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_evict_client(self):
+ """
+ That a subvolume client can be evicted based on the auth ID
+ """
+
+ subvolumes = self._generate_random_subvolume_name(2)
+ group = self._generate_random_group_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # mounts[0] and mounts[1] would be used as guests to mount the volumes/shares.
+ for i in range(0, 2):
+ self.mounts[i].umount_wait()
+ guest_mounts = (self.mounts[0], self.mounts[1])
+ auth_id = "guest"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # Create two subvolumes. Authorize 'guest' auth ID to mount the two
+ # subvolumes. Mount the two subvolumes. Write data to the volumes.
+ for i in range(2):
+ # Create subvolume.
+ self._fs_cmd("subvolume", "create", self.volname, subvolumes[i], "--group_name", group)
+
+ # authorize guest authID read-write access to subvolume
+ key = self._fs_cmd("subvolume", "authorize", self.volname, subvolumes[i], guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ mount_path = self._fs_cmd("subvolume", "getpath", self.volname, subvolumes[i],
+ "--group_name", group).rstrip()
+ # configure credentials for guest client
+ self._configure_guest_auth(guest_mounts[i], auth_id, key)
+
+ # mount the subvolume, and write to it
+ guest_mounts[i].mount(mount_path=mount_path)
+ guest_mounts[i].write_n_mb("data.bin", 1)
+
+ # Evict client, guest_mounts[0], using auth ID 'guest' and has mounted
+ # one volume.
+ self._fs_cmd("subvolume", "evict", self.volname, subvolumes[0], auth_id, "--group_name", group)
+
+ # Evicted guest client, guest_mounts[0], should not be able to do
+ # anymore metadata ops. It should start failing all operations
+ # when it sees that its own address is in the blocklist.
+ try:
+ guest_mounts[0].write_n_mb("rogue.bin", 1)
+ except CommandFailedError:
+ pass
+ else:
+ raise RuntimeError("post-eviction write should have failed!")
+
+ # The blocklisted guest client should now be unmountable
+ guest_mounts[0].umount_wait()
+
+ # Guest client, guest_mounts[1], using the same auth ID 'guest', but
+ # has mounted the other volume, should be able to use its volume
+ # unaffected.
+ guest_mounts[1].write_n_mb("data.bin.1", 1)
+
+ # Cleanup.
+ guest_mounts[1].umount_wait()
+ for i in range(2):
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolumes[i], auth_id, "--group_name", group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolumes[i], "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_group_create_with_invalid_data_pool_layout(self):
+ group = self._generate_random_group_name()
+ data_pool = "invalid_pool"
+ # create group with invalid data pool layout
+ try:
+ self._fs_cmd("subvolumegroup", "create", self.volname, group, "--pool_layout", data_pool)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EINVAL:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs subvolumegroup create' command to fail")
+
+ def test_subvolume_group_rm_force(self):
+ # test removing non-existing subvolume group with --force
+ group = self._generate_random_group_name()
+ try:
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group, "--force")
+ except CommandFailedError as ce:
+ raise RuntimeError("expected the 'fs subvolumegroup rm --force' command to succeed")
+
+ def test_subvolume_group_create_with_auto_cleanup_on_fail(self):
+ group = self._generate_random_group_name()
+ data_pool = "invalid_pool"
+ # create group with invalid data pool layout
+ with self.assertRaises(CommandFailedError):
+ self._fs_cmd("subvolumegroup", "create", self.volname, group, "--pool_layout", data_pool)
+
+ # check whether group path is cleaned up
+ try:
+ self._fs_cmd("subvolumegroup", "getpath", self.volname, group)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs subvolumegroup getpath' command to fail")
+
+ def test_subvolume_create_with_desired_data_pool_layout_in_group(self):
+ subvol1, subvol2 = self._generate_random_subvolume_name(2)
+ group = self._generate_random_group_name()
+
+ # create group. this also helps set default pool layout for subvolumes
+ # created within the group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvol1, "--group_name", group)
+ subvol1_path = self._get_subvolume_path(self.volname, subvol1, group_name=group)
+
+ default_pool = self.mount_a.getfattr(subvol1_path, "ceph.dir.layout.pool")
+ new_pool = "new_pool"
+ self.assertNotEqual(default_pool, new_pool)
+
+ # add data pool
+ self.fs.add_data_pool(new_pool)
+
+ # create subvolume specifying the new data pool as its pool layout
+ self._fs_cmd("subvolume", "create", self.volname, subvol2, "--group_name", group,
+ "--pool_layout", new_pool)
+ subvol2_path = self._get_subvolume_path(self.volname, subvol2, group_name=group)
+
+ desired_pool = self.mount_a.getfattr(subvol2_path, "ceph.dir.layout.pool")
+ self.assertEqual(desired_pool, new_pool)
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvol2, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvol1, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_group_create_with_desired_mode(self):
+ group1, group2 = self._generate_random_group_name(2)
+ # default mode
+ expected_mode1 = "755"
+ # desired mode
+ expected_mode2 = "777"
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group1)
+ self._fs_cmd("subvolumegroup", "create", self.volname, group2, "--mode", "777")
+
+ group1_path = self._get_subvolume_group_path(self.volname, group1)
+ group2_path = self._get_subvolume_group_path(self.volname, group2)
+
+ # check group's mode
+ actual_mode1 = self.mount_a.run_shell(['stat', '-c' '%a', group1_path]).stdout.getvalue().strip()
+ actual_mode2 = self.mount_a.run_shell(['stat', '-c' '%a', group2_path]).stdout.getvalue().strip()
+ self.assertEqual(actual_mode1, expected_mode1)
+ self.assertEqual(actual_mode2, expected_mode2)
+
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group1)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group2)
+
+ def test_subvolume_group_create_with_desired_uid_gid(self):
+ """
+ That the subvolume group can be created with the desired uid and gid and its uid and gid matches the
+ expected values.
+ """
+ uid = 1000
+ gid = 1000
+
+ # create subvolume group
+ subvolgroupname = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, subvolgroupname, "--uid", str(uid), "--gid", str(gid))
+
+ # make sure it exists
+ subvolgrouppath = self._get_subvolume_group_path(self.volname, subvolgroupname)
+ self.assertNotEqual(subvolgrouppath, None)
+
+ # verify the uid and gid
+ suid = int(self.mount_a.run_shell(['stat', '-c' '%u', subvolgrouppath]).stdout.getvalue().strip())
+ sgid = int(self.mount_a.run_shell(['stat', '-c' '%g', subvolgrouppath]).stdout.getvalue().strip())
+ self.assertEqual(uid, suid)
+ self.assertEqual(gid, sgid)
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, subvolgroupname)
+
+ def test_subvolume_create_with_desired_mode_in_group(self):
+ subvol1, subvol2, subvol3 = self._generate_random_subvolume_name(3)
+
+ group = self._generate_random_group_name()
+ # default mode
+ expected_mode1 = "755"
+ # desired mode
+ expected_mode2 = "777"
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvol1, "--group_name", group)
+ self._fs_cmd("subvolume", "create", self.volname, subvol2, "--group_name", group, "--mode", "777")
+ # check whether mode 0777 also works
+ self._fs_cmd("subvolume", "create", self.volname, subvol3, "--group_name", group, "--mode", "0777")
+
+ subvol1_path = self._get_subvolume_path(self.volname, subvol1, group_name=group)
+ subvol2_path = self._get_subvolume_path(self.volname, subvol2, group_name=group)
+ subvol3_path = self._get_subvolume_path(self.volname, subvol3, group_name=group)
+
+ # check subvolume's mode
+ actual_mode1 = self.mount_a.run_shell(['stat', '-c' '%a', subvol1_path]).stdout.getvalue().strip()
+ actual_mode2 = self.mount_a.run_shell(['stat', '-c' '%a', subvol2_path]).stdout.getvalue().strip()
+ actual_mode3 = self.mount_a.run_shell(['stat', '-c' '%a', subvol3_path]).stdout.getvalue().strip()
+ self.assertEqual(actual_mode1, expected_mode1)
+ self.assertEqual(actual_mode2, expected_mode2)
+ self.assertEqual(actual_mode3, expected_mode2)
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvol1, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvol2, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvol3, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_with_desired_uid_gid(self):
+ """
+ That the subvolume can be created with the desired uid and gid and its uid and gid matches the
+ expected values.
+ """
+ uid = 1000
+ gid = 1000
+
+ # create subvolume
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--uid", str(uid), "--gid", str(gid))
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # verify the uid and gid
+ suid = int(self.mount_a.run_shell(['stat', '-c' '%u', subvolpath]).stdout.getvalue().strip())
+ sgid = int(self.mount_a.run_shell(['stat', '-c' '%g', subvolpath]).stdout.getvalue().strip())
+ self.assertEqual(uid, suid)
+ self.assertEqual(gid, sgid)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_nonexistent_subvolume_group_rm(self):
+ group = "non_existent_group"
+
+ # try, remove subvolume group
+ try:
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs subvolumegroup rm' command to fail")
+
+ def test_default_uid_gid_subvolume_group(self):
+ group = self._generate_random_group_name()
+ expected_uid = 0
+ expected_gid = 0
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+ group_path = self._get_subvolume_group_path(self.volname, group)
+
+ # check group's uid and gid
+ stat = self.mount_a.stat(group_path)
+ self.assertEqual(stat['st_uid'], expected_uid)
+ self.assertEqual(stat['st_gid'], expected_gid)
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_group_ls(self):
+ # tests the 'fs subvolumegroup ls' command
+
+ subvolumegroups = []
+
+ #create subvolumegroups
+ subvolumegroups = self._generate_random_group_name(3)
+ for groupname in subvolumegroups:
+ self._fs_cmd("subvolumegroup", "create", self.volname, groupname)
+
+ subvolumegroupls = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname))
+ if len(subvolumegroupls) == 0:
+ raise RuntimeError("Expected the 'fs subvolumegroup ls' command to list the created subvolume groups")
+ else:
+ subvolgroupnames = [subvolumegroup['name'] for subvolumegroup in subvolumegroupls]
+ if collections.Counter(subvolgroupnames) != collections.Counter(subvolumegroups):
+ raise RuntimeError("Error creating or listing subvolume groups")
+
+ def test_subvolume_group_ls_for_nonexistent_volume(self):
+ # tests the 'fs subvolumegroup ls' command when /volume doesn't exist
+ # prerequisite: we expect that the test volume is created and a subvolumegroup is NOT created
+
+ # list subvolume groups
+ subvolumegroupls = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname))
+ if len(subvolumegroupls) > 0:
+ raise RuntimeError("Expected the 'fs subvolumegroup ls' command to output an empty list")
+
+ ### snapshot operations
+
+ def test_subvolume_snapshot_create_and_rm(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_info(self):
+
+ """
+ tests the 'fs subvolume snapshot info' command
+ """
+
+ snap_md = ["created_at", "data_pool", "has_pending_clones", "size"]
+
+ subvolume = self._generate_random_subvolume_name()
+ snapshot, snap_missing = self._generate_random_snapshot_name(2)
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=1)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot))
+ for md in snap_md:
+ self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md))
+ self.assertEqual(snap_info["has_pending_clones"], "no")
+
+ # snapshot info for non-existent snapshot
+ try:
+ self._get_subvolume_snapshot_info(self.volname, subvolume, snap_missing)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on snapshot info of non-existent snapshot")
+ else:
+ self.fail("expected snapshot info of non-existent snapshot to fail")
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_create_idempotence(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # try creating w/ same subvolume snapshot name -- should be idempotent
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_nonexistent_subvolume_snapshot_rm(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove snapshot again
+ try:
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs subvolume snapshot rm' command to fail")
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_rm_force(self):
+ # test removing non existing subvolume snapshot with --force
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # remove snapshot
+ try:
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, "--force")
+ except CommandFailedError as ce:
+ raise RuntimeError("expected the 'fs subvolume snapshot rm --force' command to succeed")
+
+ def test_subvolume_snapshot_in_group(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # snapshot subvolume in group
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_snapshot_ls(self):
+ # tests the 'fs subvolume snapshot ls' command
+
+ snapshots = []
+
+ # create subvolume
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # create subvolume snapshots
+ snapshots = self._generate_random_snapshot_name(3)
+ for snapshot in snapshots:
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ subvolsnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume))
+ if len(subvolsnapshotls) == 0:
+ self.fail("Expected the 'fs subvolume snapshot ls' command to list the created subvolume snapshots")
+ else:
+ snapshotnames = [snapshot['name'] for snapshot in subvolsnapshotls]
+ if collections.Counter(snapshotnames) != collections.Counter(snapshots):
+ self.fail("Error creating or listing subvolume snapshots")
+
+ # remove snapshot
+ for snapshot in snapshots:
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_group_snapshot_unsupported_status(self):
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # snapshot group
+ try:
+ self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOSYS, "invalid error code on subvolumegroup snapshot create")
+ else:
+ self.fail("expected subvolumegroup snapshot create command to fail")
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ @unittest.skip("skipping subvolumegroup snapshot tests")
+ def test_subvolume_group_snapshot_create_and_rm(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # snapshot group
+ self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
+
+ # remove snapshot
+ self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ @unittest.skip("skipping subvolumegroup snapshot tests")
+ def test_subvolume_group_snapshot_idempotence(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # snapshot group
+ self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
+
+ # try creating snapshot w/ same snapshot name -- shoule be idempotent
+ self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
+
+ # remove snapshot
+ self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ @unittest.skip("skipping subvolumegroup snapshot tests")
+ def test_nonexistent_subvolume_group_snapshot_rm(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # snapshot group
+ self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
+
+ # remove snapshot
+ self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot)
+
+ # remove snapshot
+ try:
+ self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs subvolumegroup snapshot rm' command to fail")
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ @unittest.skip("skipping subvolumegroup snapshot tests")
+ def test_subvolume_group_snapshot_rm_force(self):
+ # test removing non-existing subvolume group snapshot with --force
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+ # remove snapshot
+ try:
+ self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot, "--force")
+ except CommandFailedError as ce:
+ raise RuntimeError("expected the 'fs subvolumegroup snapshot rm --force' command to succeed")
+
+ @unittest.skip("skipping subvolumegroup snapshot tests")
+ def test_subvolume_group_snapshot_ls(self):
+ # tests the 'fs subvolumegroup snapshot ls' command
+
+ snapshots = []
+
+ # create group
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolumegroup snapshots
+ snapshots = self._generate_random_snapshot_name(3)
+ for snapshot in snapshots:
+ self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
+
+ subvolgrpsnapshotls = json.loads(self._fs_cmd('subvolumegroup', 'snapshot', 'ls', self.volname, group))
+ if len(subvolgrpsnapshotls) == 0:
+ raise RuntimeError("Expected the 'fs subvolumegroup snapshot ls' command to list the created subvolume group snapshots")
+ else:
+ snapshotnames = [snapshot['name'] for snapshot in subvolgrpsnapshotls]
+ if collections.Counter(snapshotnames) != collections.Counter(snapshots):
+ raise RuntimeError("Error creating or listing subvolume group snapshots")
+
+ def test_async_subvolume_rm(self):
+ subvolumes = self._generate_random_subvolume_name(100)
+
+ # create subvolumes
+ for subvolume in subvolumes:
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+ self._do_subvolume_io(subvolume, number_of_files=10)
+
+ self.mount_a.umount_wait()
+
+ # remove subvolumes
+ for subvolume in subvolumes:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ self.mount_a.mount()
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty(timeout=300)
+
+ def test_subvolume_inherited_snapshot_ls(self):
+ # tests the scenario where 'fs subvolume snapshot ls' command
+ # should not list inherited snapshots created as part of snapshot
+ # at ancestral level
+
+ snapshots = []
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snap_count = 3
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # create subvolume snapshots
+ snapshots = self._generate_random_snapshot_name(snap_count)
+ for snapshot in snapshots:
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group)
+
+ # Create snapshot at ancestral level
+ ancestral_snappath1 = os.path.join(".", "volumes", group, ".snap", "ancestral_snap_1")
+ ancestral_snappath2 = os.path.join(".", "volumes", group, ".snap", "ancestral_snap_2")
+ self.mount_a.run_shell(['mkdir', '-p', ancestral_snappath1, ancestral_snappath2])
+
+ subvolsnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume, group))
+ self.assertEqual(len(subvolsnapshotls), snap_count)
+
+ # remove ancestral snapshots
+ self.mount_a.run_shell(['rmdir', ancestral_snappath1, ancestral_snappath2])
+
+ # remove snapshot
+ for snapshot in snapshots:
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_inherited_snapshot_info(self):
+ """
+ tests the scenario where 'fs subvolume snapshot info' command
+ should fail for inherited snapshots created as part of snapshot
+ at ancestral level
+ """
+
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # Create snapshot at ancestral level
+ ancestral_snap_name = "ancestral_snap_1"
+ ancestral_snappath1 = os.path.join(".", "volumes", group, ".snap", ancestral_snap_name)
+ self.mount_a.run_shell(['mkdir', '-p', ancestral_snappath1])
+
+ # Validate existence of inherited snapshot
+ group_path = os.path.join(".", "volumes", group)
+ inode_number_group_dir = int(self.mount_a.run_shell(['stat', '-c' '%i', group_path]).stdout.getvalue().strip())
+ inherited_snap = "_{0}_{1}".format(ancestral_snap_name, inode_number_group_dir)
+ inherited_snappath = os.path.join(".", "volumes", group, subvolume,".snap", inherited_snap)
+ self.mount_a.run_shell(['ls', inherited_snappath])
+
+ # snapshot info on inherited snapshot
+ try:
+ self._get_subvolume_snapshot_info(self.volname, subvolume, inherited_snap, group)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on snapshot info of inherited snapshot")
+ else:
+ self.fail("expected snapshot info of inherited snapshot to fail")
+
+ # remove ancestral snapshots
+ self.mount_a.run_shell(['rmdir', ancestral_snappath1])
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_inherited_snapshot_rm(self):
+ """
+ tests the scenario where 'fs subvolume snapshot rm' command
+ should fail for inherited snapshots created as part of snapshot
+ at ancestral level
+ """
+
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # Create snapshot at ancestral level
+ ancestral_snap_name = "ancestral_snap_1"
+ ancestral_snappath1 = os.path.join(".", "volumes", group, ".snap", ancestral_snap_name)
+ self.mount_a.run_shell(['mkdir', '-p', ancestral_snappath1])
+
+ # Validate existence of inherited snap
+ group_path = os.path.join(".", "volumes", group)
+ inode_number_group_dir = int(self.mount_a.run_shell(['stat', '-c' '%i', group_path]).stdout.getvalue().strip())
+ inherited_snap = "_{0}_{1}".format(ancestral_snap_name, inode_number_group_dir)
+ inherited_snappath = os.path.join(".", "volumes", group, subvolume,".snap", inherited_snap)
+ self.mount_a.run_shell(['ls', inherited_snappath])
+
+ # inherited snapshot should not be deletable
+ try:
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, inherited_snap, "--group_name", group)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, msg="invalid error code when removing inherited snapshot")
+ else:
+ self.fail("expected removing inheirted snapshot to fail")
+
+ # remove ancestral snapshots
+ self.mount_a.run_shell(['rmdir', ancestral_snappath1])
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_subvolumegroup_snapshot_name_conflict(self):
+ """
+ tests the scenario where creation of subvolume snapshot name
+ with same name as it's subvolumegroup snapshot name. This should
+ fail.
+ """
+
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ group_snapshot = self._generate_random_snapshot_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # Create subvolumegroup snapshot
+ group_snapshot_path = os.path.join(".", "volumes", group, ".snap", group_snapshot)
+ self.mount_a.run_shell(['mkdir', '-p', group_snapshot_path])
+
+ # Validate existence of subvolumegroup snapshot
+ self.mount_a.run_shell(['ls', group_snapshot_path])
+
+ # Creation of subvolume snapshot with it's subvolumegroup snapshot name should fail
+ try:
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, group_snapshot, "--group_name", group)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, msg="invalid error code when creating subvolume snapshot with same name as subvolume group snapshot")
+ else:
+ self.fail("expected subvolume snapshot creation with same name as subvolumegroup snapshot to fail")
+
+ # remove subvolumegroup snapshot
+ self.mount_a.run_shell(['rmdir', group_snapshot_path])
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_upgrade_legacy_to_v1(self):
+ """
+ poor man's upgrade test -- rather than going through a full upgrade cycle,
+ emulate subvolumes by going through the wormhole and verify if they are
+ accessible.
+ further ensure that a legacy volume is not updated to v2.
+ """
+ subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
+ group = self._generate_random_group_name()
+
+ # emulate a old-fashioned subvolume -- one in the default group and
+ # the other in a custom group
+ createpath1 = os.path.join(".", "volumes", "_nogroup", subvolume1)
+ self.mount_a.run_shell(['mkdir', '-p', createpath1])
+
+ # create group
+ createpath2 = os.path.join(".", "volumes", group, subvolume2)
+ self.mount_a.run_shell(['mkdir', '-p', createpath2])
+
+ # this would auto-upgrade on access without anyone noticing
+ subvolpath1 = self._fs_cmd("subvolume", "getpath", self.volname, subvolume1)
+ self.assertNotEqual(subvolpath1, None)
+ subvolpath1 = subvolpath1.rstrip() # remove "/" prefix and any trailing newline
+
+ subvolpath2 = self._fs_cmd("subvolume", "getpath", self.volname, subvolume2, group)
+ self.assertNotEqual(subvolpath2, None)
+ subvolpath2 = subvolpath2.rstrip() # remove "/" prefix and any trailing newline
+
+ # and... the subvolume path returned should be what we created behind the scene
+ self.assertEqual(createpath1[1:], subvolpath1)
+ self.assertEqual(createpath2[1:], subvolpath2)
+
+ # ensure metadata file is in legacy location, with required version v1
+ self._assert_meta_location_and_version(self.volname, subvolume1, version=1, legacy=True)
+ self._assert_meta_location_and_version(self.volname, subvolume2, subvol_group=group, version=1, legacy=True)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume1)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume2, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_no_upgrade_v1_sanity(self):
+ """
+ poor man's upgrade test -- theme continues...
+
+ This test is to ensure v1 subvolumes are retained as is, due to a snapshot being present, and runs through
+ a series of operations on the v1 subvolume to ensure they work as expected.
+ """
+ subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime",
+ "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace",
+ "type", "uid", "features", "state"]
+ snap_md = ["created_at", "data_pool", "has_pending_clones", "size"]
+
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone1, clone2 = self._generate_random_clone_name(2)
+ mode = "777"
+ uid = "1000"
+ gid = "1000"
+
+ # emulate a v1 subvolume -- in the default group
+ subvolume_path = self._create_v1_subvolume(subvolume)
+
+ # getpath
+ subvolpath = self._get_subvolume_path(self.volname, subvolume)
+ self.assertEqual(subvolpath, subvolume_path)
+
+ # ls
+ subvolumes = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumes), 1, "subvolume ls count mismatch, expected '1', found {0}".format(len(subvolumes)))
+ self.assertEqual(subvolumes[0]['name'], subvolume,
+ "subvolume name mismatch in ls output, expected '{0}', found '{1}'".format(subvolume, subvolumes[0]['name']))
+
+ # info
+ subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
+ for md in subvol_md:
+ self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md))
+
+ self.assertEqual(subvol_info["state"], "complete",
+ msg="expected state to be 'complete', found '{0}".format(subvol_info["state"]))
+ self.assertEqual(len(subvol_info["features"]), 2,
+ msg="expected 1 feature, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"]))
+ for feature in ['snapshot-clone', 'snapshot-autoprotect']:
+ self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature))
+
+ # resize
+ nsize = self.DEFAULT_FILE_SIZE*1024*1024*10
+ self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize))
+ subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
+ for md in subvol_md:
+ self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md))
+ self.assertEqual(subvol_info["bytes_quota"], nsize, "bytes_quota should be set to '{0}'".format(nsize))
+
+ # create (idempotent) (change some attrs, to ensure attrs are preserved from the snapshot on clone)
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", mode, "--uid", uid, "--gid", gid)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=8)
+
+ # snap-create
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone1)
+
+ # ensure clone is v2
+ self._assert_meta_location_and_version(self.volname, clone1, version=2)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone1, source_version=1)
+
+ # clone (older snapshot)
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, 'fake', clone2)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone2)
+
+ # ensure clone is v2
+ self._assert_meta_location_and_version(self.volname, clone2, version=2)
+
+ # verify clone
+ # TODO: rentries will mismatch till this is fixed https://tracker.ceph.com/issues/46747
+ #self._verify_clone(subvolume, 'fake', clone2, source_version=1)
+
+ # snap-info
+ snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot))
+ for md in snap_md:
+ self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md))
+ self.assertEqual(snap_info["has_pending_clones"], "no")
+
+ # snap-ls
+ subvol_snapshots = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume))
+ self.assertEqual(len(subvol_snapshots), 2, "subvolume ls count mismatch, expected 2', found {0}".format(len(subvol_snapshots)))
+ snapshotnames = [snapshot['name'] for snapshot in subvol_snapshots]
+ for name in [snapshot, 'fake']:
+ self.assertIn(name, snapshotnames, msg="expected snapshot '{0}' in subvolume snapshot ls".format(name))
+
+ # snap-rm
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, "fake")
+
+ # ensure volume is still at version 1
+ self._assert_meta_location_and_version(self.volname, subvolume, version=1)
+
+ # rm
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone1)
+ self._fs_cmd("subvolume", "rm", self.volname, clone2)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_no_upgrade_v1_to_v2(self):
+ """
+ poor man's upgrade test -- theme continues...
+ ensure v1 to v2 upgrades are not done automatically due to various states of v1
+ """
+ subvolume1, subvolume2, subvolume3 = self._generate_random_subvolume_name(3)
+ group = self._generate_random_group_name()
+
+ # emulate a v1 subvolume -- in the default group
+ subvol1_path = self._create_v1_subvolume(subvolume1)
+
+ # emulate a v1 subvolume -- in a custom group
+ subvol2_path = self._create_v1_subvolume(subvolume2, subvol_group=group)
+
+ # emulate a v1 subvolume -- in a clone pending state
+ self._create_v1_subvolume(subvolume3, subvol_type='clone', has_snapshot=False, state='pending')
+
+ # this would attempt auto-upgrade on access, but fail to do so as snapshots exist
+ subvolpath1 = self._get_subvolume_path(self.volname, subvolume1)
+ self.assertEqual(subvolpath1, subvol1_path)
+
+ subvolpath2 = self._get_subvolume_path(self.volname, subvolume2, group_name=group)
+ self.assertEqual(subvolpath2, subvol2_path)
+
+ # this would attempt auto-upgrade on access, but fail to do so as volume is not complete
+ # use clone status, as only certain operations are allowed in pending state
+ status = json.loads(self._fs_cmd("clone", "status", self.volname, subvolume3))
+ self.assertEqual(status["status"]["state"], "pending")
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume1, "fake")
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume2, "fake", group)
+
+ # ensure metadata file is in v1 location, with version retained as v1
+ self._assert_meta_location_and_version(self.volname, subvolume1, version=1)
+ self._assert_meta_location_and_version(self.volname, subvolume2, subvol_group=group, version=1)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume1)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume2, group)
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume3)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EAGAIN, "invalid error code on rm of subvolume undergoing clone")
+ else:
+ self.fail("expected rm of subvolume undergoing clone to fail")
+
+ # ensure metadata file is in v1 location, with version retained as v1
+ self._assert_meta_location_and_version(self.volname, subvolume3, version=1)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume3, "--force")
+
+ # verify list subvolumes returns an empty list
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumels), 0)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_upgrade_v1_to_v2(self):
+ """
+ poor man's upgrade test -- theme continues...
+ ensure v1 to v2 upgrades work
+ """
+ subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
+ group = self._generate_random_group_name()
+
+ # emulate a v1 subvolume -- in the default group
+ subvol1_path = self._create_v1_subvolume(subvolume1, has_snapshot=False)
+
+ # emulate a v1 subvolume -- in a custom group
+ subvol2_path = self._create_v1_subvolume(subvolume2, subvol_group=group, has_snapshot=False)
+
+ # this would attempt auto-upgrade on access
+ subvolpath1 = self._get_subvolume_path(self.volname, subvolume1)
+ self.assertEqual(subvolpath1, subvol1_path)
+
+ subvolpath2 = self._get_subvolume_path(self.volname, subvolume2, group_name=group)
+ self.assertEqual(subvolpath2, subvol2_path)
+
+ # ensure metadata file is in v2 location, with version retained as v2
+ self._assert_meta_location_and_version(self.volname, subvolume1, version=2)
+ self._assert_meta_location_and_version(self.volname, subvolume2, subvol_group=group, version=2)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume1)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume2, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_rm_with_snapshots(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove subvolume -- should fail with ENOTEMPTY since it has snapshots
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOTEMPTY:
+ raise RuntimeError("invalid error code returned when deleting subvolume with snapshots")
+ else:
+ raise RuntimeError("expected subvolume deletion to fail")
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_retain_snapshot_without_snapshots(self):
+ """
+ ensure retain snapshots based delete of a subvolume with no snapshots, deletes the subbvolume
+ """
+ subvolume = self._generate_random_subvolume_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # remove with snapshot retention (should remove volume, no snapshots to retain)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # verify list subvolumes returns an empty list
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumels), 0)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_retain_snapshot_with_snapshots(self):
+ """
+ ensure retain snapshots based delete of a subvolume with snapshots retains the subvolume
+ also test allowed and dis-allowed operations on a retained subvolume
+ """
+ snap_md = ["created_at", "data_pool", "has_pending_clones", "size"]
+
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove subvolume -- should fail with ENOTEMPTY since it has snapshots
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on rm of retained subvolume with snapshots")
+ else:
+ self.fail("expected rm of subvolume with retained snapshots to fail")
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # fetch info
+ subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume))
+ self.assertEqual(subvol_info["state"], "snapshot-retained",
+ msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"]))
+
+ ## test allowed ops in retained state
+ # ls
+ subvolumes = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumes), 1, "subvolume ls count mismatch, expected '1', found {0}".format(len(subvolumes)))
+ self.assertEqual(subvolumes[0]['name'], subvolume,
+ "subvolume name mismatch in ls output, expected '{0}', found '{1}'".format(subvolume, subvolumes[0]['name']))
+
+ # snapshot info
+ snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot))
+ for md in snap_md:
+ self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md))
+ self.assertEqual(snap_info["has_pending_clones"], "no")
+
+ # rm --force (allowed but should fail)
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--force")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on rm of subvolume with retained snapshots")
+ else:
+ self.fail("expected rm of subvolume with retained snapshots to fail")
+
+ # rm (allowed but should fail)
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on rm of subvolume with retained snapshots")
+ else:
+ self.fail("expected rm of subvolume with retained snapshots to fail")
+
+ ## test disallowed ops
+ # getpath
+ try:
+ self._fs_cmd("subvolume", "getpath", self.volname, subvolume)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on getpath of subvolume with retained snapshots")
+ else:
+ self.fail("expected getpath of subvolume with retained snapshots to fail")
+
+ # resize
+ nsize = self.DEFAULT_FILE_SIZE*1024*1024
+ try:
+ self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize))
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on resize of subvolume with retained snapshots")
+ else:
+ self.fail("expected resize of subvolume with retained snapshots to fail")
+
+ # snap-create
+ try:
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, "fail")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on snapshot create of subvolume with retained snapshots")
+ else:
+ self.fail("expected snapshot create of subvolume with retained snapshots to fail")
+
+ # remove snapshot (should remove volume)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # verify list subvolumes returns an empty list
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumels), 0)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_retain_snapshot_invalid_recreate(self):
+ """
+ ensure retained subvolume recreate does not leave any incarnations in the subvolume and trash
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # recreate subvolume with an invalid pool
+ data_pool = "invalid_pool"
+ try:
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on recreate of subvolume with invalid poolname")
+ else:
+ self.fail("expected recreate of subvolume with invalid poolname to fail")
+
+ # fetch info
+ subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume))
+ self.assertEqual(subvol_info["state"], "snapshot-retained",
+ msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"]))
+
+ # getpath
+ try:
+ self._fs_cmd("subvolume", "getpath", self.volname, subvolume)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on getpath of subvolume with retained snapshots")
+ else:
+ self.fail("expected getpath of subvolume with retained snapshots to fail")
+
+ # remove snapshot (should remove volume)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_retain_snapshot_trash_busy_recreate(self):
+ """
+ ensure retained subvolume recreate fails if its trash is not yet purged
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # fake a trash entry
+ self._update_fake_trash(subvolume)
+
+ # recreate subvolume
+ try:
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EAGAIN, "invalid error code on recreate of subvolume with purge pending")
+ else:
+ self.fail("expected recreate of subvolume with purge pending to fail")
+
+ # clear fake trash entry
+ self._update_fake_trash(subvolume, create=False)
+
+ # recreate subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_retain_snapshot_trash_busy_recreate_clone(self):
+ """
+ ensure retained clone recreate fails if its trash is not yet purged
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # clone subvolume snapshot
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # snapshot clone
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone, snapshot)
+
+ # remove clone with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, clone, "--retain-snapshots")
+
+ # fake a trash entry
+ self._update_fake_trash(clone)
+
+ # clone subvolume snapshot (recreate)
+ try:
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EAGAIN, "invalid error code on recreate of clone with purge pending")
+ else:
+ self.fail("expected recreate of clone with purge pending to fail")
+
+ # clear fake trash entry
+ self._update_fake_trash(clone, create=False)
+
+ # recreate subvolume
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_retain_snapshot_recreate_subvolume(self):
+ """
+ ensure a retained subvolume can be recreated and further snapshotted
+ """
+ snap_md = ["created_at", "data_pool", "has_pending_clones", "size"]
+
+ subvolume = self._generate_random_subvolume_name()
+ snapshot1, snapshot2 = self._generate_random_snapshot_name(2)
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot1)
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # fetch info
+ subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume))
+ self.assertEqual(subvol_info["state"], "snapshot-retained",
+ msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"]))
+
+ # recreate retained subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # fetch info
+ subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume))
+ self.assertEqual(subvol_info["state"], "complete",
+ msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"]))
+
+ # snapshot info (older snapshot)
+ snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot1))
+ for md in snap_md:
+ self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md))
+ self.assertEqual(snap_info["has_pending_clones"], "no")
+
+ # snap-create (new snapshot)
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot2)
+
+ # remove with retain snapshots
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # list snapshots
+ subvolsnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume))
+ self.assertEqual(len(subvolsnapshotls), 2, "Expected the 'fs subvolume snapshot ls' command to list the"
+ " created subvolume snapshots")
+ snapshotnames = [snapshot['name'] for snapshot in subvolsnapshotls]
+ for snap in [snapshot1, snapshot2]:
+ self.assertIn(snap, snapshotnames, "Missing snapshot '{0}' in snapshot list".format(snap))
+
+ # remove snapshots (should remove volume)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot1)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot2)
+
+ # verify list subvolumes returns an empty list
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumels), 0)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_retain_snapshot_clone(self):
+ """
+ clone a snapshot from a snapshot retained subvolume
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # store path for clone verification
+ subvol_path = self._get_subvolume_path(self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=16)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # clone retained subvolume snapshot
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone, subvol_path=subvol_path)
+
+ # remove snapshots (removes retained volume)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify list subvolumes returns an empty list
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumels), 0)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_retain_snapshot_recreate(self):
+ """
+ recreate a subvolume from one of its retained snapshots
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # store path for clone verification
+ subvol_path = self._get_subvolume_path(self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=16)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # recreate retained subvolume using its own snapshot to clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, subvolume)
+
+ # check clone status
+ self._wait_for_clone_to_complete(subvolume)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, subvolume, subvol_path=subvol_path)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify list subvolumes returns an empty list
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumels), 0)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_clone_retain_snapshot_with_snapshots(self):
+ """
+ retain snapshots of a cloned subvolume and check disallowed operations
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot1, snapshot2 = self._generate_random_snapshot_name(2)
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # store path for clone verification
+ subvol1_path = self._get_subvolume_path(self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=16)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot1)
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # clone retained subvolume snapshot
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot1, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot1, clone, subvol_path=subvol1_path)
+
+ # create a snapshot on the clone
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone, snapshot2)
+
+ # retain a clone
+ self._fs_cmd("subvolume", "rm", self.volname, clone, "--retain-snapshots")
+
+ # list snapshots
+ clonesnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, clone))
+ self.assertEqual(len(clonesnapshotls), 1, "Expected the 'fs subvolume snapshot ls' command to list the"
+ " created subvolume snapshots")
+ snapshotnames = [snapshot['name'] for snapshot in clonesnapshotls]
+ for snap in [snapshot2]:
+ self.assertIn(snap, snapshotnames, "Missing snapshot '{0}' in snapshot list".format(snap))
+
+ ## check disallowed operations on retained clone
+ # clone-status
+ try:
+ self._fs_cmd("clone", "status", self.volname, clone)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on clone status of clone with retained snapshots")
+ else:
+ self.fail("expected clone status of clone with retained snapshots to fail")
+
+ # clone-cancel
+ try:
+ self._fs_cmd("clone", "cancel", self.volname, clone)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on clone cancel of clone with retained snapshots")
+ else:
+ self.fail("expected clone cancel of clone with retained snapshots to fail")
+
+ # remove snapshots (removes subvolumes as all are in retained state)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot1)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone, snapshot2)
+
+ # verify list subvolumes returns an empty list
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumels), 0)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_retain_snapshot_clone_from_newer_snapshot(self):
+ """
+ clone a subvolume from recreated subvolume's latest snapshot
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot1, snapshot2 = self._generate_random_snapshot_name(2)
+ clone = self._generate_random_clone_name(1)
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=16)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot1)
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # recreate subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # get and store path for clone verification
+ subvol2_path = self._get_subvolume_path(self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=16)
+
+ # snapshot newer subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot2)
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # clone retained subvolume's newer snapshot
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot2, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot2, clone, subvol_path=subvol2_path)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot1)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot2)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify list subvolumes returns an empty list
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumels), 0)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_protect_unprotect_sanity(self):
+ """
+ Snapshot protect/unprotect commands are deprecated. This test exists to ensure that
+ invoking the command does not cause errors, till they are removed from a subsequent release.
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=64)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # now, protect snapshot
+ self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # now, unprotect snapshot
+ self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=64)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_reconf_max_concurrent_clones(self):
+ """
+ Validate 'max_concurrent_clones' config option
+ """
+
+ # get the default number of cloner threads
+ default_max_concurrent_clones = int(self.config_get('mgr.x', 'mgr/volumes/max_concurrent_clones'))
+ self.assertEqual(default_max_concurrent_clones, 4)
+
+ # Increase number of cloner threads
+ self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 6)
+ max_concurrent_clones = int(self.config_get('mgr.x', 'mgr/volumes/max_concurrent_clones'))
+ self.assertEqual(max_concurrent_clones, 6)
+
+ # Decrease number of cloner threads
+ self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 2)
+ max_concurrent_clones = int(self.config_get('mgr.x', 'mgr/volumes/max_concurrent_clones'))
+ self.assertEqual(max_concurrent_clones, 2)
+
+ def test_subvolume_snapshot_clone_pool_layout(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # add data pool
+ new_pool = "new_pool"
+ self.fs.add_data_pool(new_pool)
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=32)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, "--pool_layout", new_pool)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone, clone_pool=new_pool)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ subvol_path = self._get_subvolume_path(self.volname, clone)
+ desired_pool = self.mount_a.getfattr(subvol_path, "ceph.dir.layout.pool")
+ self.assertEqual(desired_pool, new_pool)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_with_attrs(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ mode = "777"
+ uid = "1000"
+ gid = "1000"
+ new_uid = "1001"
+ new_gid = "1001"
+ new_mode = "700"
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", mode, "--uid", uid, "--gid", gid)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=32)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # change subvolume attrs (to ensure clone picks up snapshot attrs)
+ self._do_subvolume_attr_update(subvolume, new_uid, new_gid, new_mode)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_clone_inherit_snapshot_namespace_and_size(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+ osize = self.DEFAULT_FILE_SIZE*1024*1024*12
+
+ # create subvolume, in an isolated namespace with a specified size
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--namespace-isolated", "--size", str(osize))
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=8)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # create a pool different from current subvolume pool
+ subvol_path = self._get_subvolume_path(self.volname, subvolume)
+ default_pool = self.mount_a.getfattr(subvol_path, "ceph.dir.layout.pool")
+ new_pool = "new_pool"
+ self.assertNotEqual(default_pool, new_pool)
+ self.fs.add_data_pool(new_pool)
+
+ # update source subvolume pool
+ self._do_subvolume_pool_and_namespace_update(subvolume, pool=new_pool, pool_namespace="")
+
+ # schedule a clone, with NO --pool specification
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_and_reclone(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone1, clone2 = self._generate_random_clone_name(2)
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=32)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone1)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone1)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # now the clone is just like a normal subvolume -- snapshot the clone and fork
+ # another clone. before that do some IO so it's can be differentiated.
+ self._do_subvolume_io(clone1, create_dir="data", number_of_files=32)
+
+ # snapshot clone -- use same snap name
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone1, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, clone1, snapshot, clone2)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone2)
+
+ # verify clone
+ self._verify_clone(clone1, snapshot, clone2)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone1, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone1)
+ self._fs_cmd("subvolume", "rm", self.volname, clone2)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_under_group(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+ group = self._generate_random_group_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=32)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, '--target_group_name', group)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone, clone_group=group)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone, clone_group=group)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone, group)
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_under_group_snapshot_clone(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, group)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, subvolume_group=group, number_of_files=32)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, '--group_name', group)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone, source_group=group)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_different_groups(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+ s_group, c_group = self._generate_random_group_name(2)
+
+ # create groups
+ self._fs_cmd("subvolumegroup", "create", self.volname, s_group)
+ self._fs_cmd("subvolumegroup", "create", self.volname, c_group)
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, s_group)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, subvolume_group=s_group, number_of_files=32)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, s_group)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone,
+ '--group_name', s_group, '--target_group_name', c_group)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone, clone_group=c_group)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone, source_group=s_group, clone_group=c_group)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, s_group)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, s_group)
+ self._fs_cmd("subvolume", "rm", self.volname, clone, c_group)
+
+ # remove groups
+ self._fs_cmd("subvolumegroup", "rm", self.volname, s_group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, c_group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_with_upgrade(self):
+ """
+ yet another poor man's upgrade test -- rather than going through a full
+ upgrade cycle, emulate old types subvolumes by going through the wormhole
+ and verify clone operation.
+ further ensure that a legacy volume is not updated to v2, but clone is.
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # emulate a old-fashioned subvolume
+ createpath = os.path.join(".", "volumes", "_nogroup", subvolume)
+ self.mount_a.run_shell(['mkdir', '-p', createpath])
+
+ # add required xattrs to subvolume
+ default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool")
+ self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=64)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # ensure metadata file is in legacy location, with required version v1
+ self._assert_meta_location_and_version(self.volname, subvolume, version=1, legacy=True)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # snapshot should not be deletable now
+ try:
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EAGAIN, msg="invalid error code when removing source snapshot of a clone")
+ else:
+ self.fail("expected removing source snapshot of a clone to fail")
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone, source_version=1)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # ensure metadata file is in v2 location, with required version v2
+ self._assert_meta_location_and_version(self.volname, clone)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_clone_in_progress_getpath(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=64)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # clone should not be accessible right now
+ try:
+ self._get_subvolume_path(self.volname, clone)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EAGAIN:
+ raise RuntimeError("invalid error code when fetching path of an pending clone")
+ else:
+ raise RuntimeError("expected fetching path of an pending clone to fail")
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # clone should be accessible now
+ subvolpath = self._get_subvolume_path(self.volname, clone)
+ self.assertNotEqual(subvolpath, None)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_clone_in_progress_snapshot_rm(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=64)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # snapshot should not be deletable now
+ try:
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EAGAIN, msg="invalid error code when removing source snapshot of a clone")
+ else:
+ self.fail("expected removing source snapshot of a clone to fail")
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # clone should be accessible now
+ subvolpath = self._get_subvolume_path(self.volname, clone)
+ self.assertNotEqual(subvolpath, None)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_clone_in_progress_source(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=64)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # verify clone source
+ result = json.loads(self._fs_cmd("clone", "status", self.volname, clone))
+ source = result['status']['source']
+ self.assertEqual(source['volume'], self.volname)
+ self.assertEqual(source['subvolume'], subvolume)
+ self.assertEqual(source.get('group', None), None)
+ self.assertEqual(source['snapshot'], snapshot)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # clone should be accessible now
+ subvolpath = self._get_subvolume_path(self.volname, clone)
+ self.assertNotEqual(subvolpath, None)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_non_clone_status(self):
+ subvolume = self._generate_random_subvolume_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ try:
+ self._fs_cmd("clone", "status", self.volname, subvolume)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOTSUP:
+ raise RuntimeError("invalid error code when fetching status of a non cloned subvolume")
+ else:
+ raise RuntimeError("expected fetching of clone status of a subvolume to fail")
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_on_existing_subvolumes(self):
+ subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolumes
+ self._fs_cmd("subvolume", "create", self.volname, subvolume1)
+ self._fs_cmd("subvolume", "create", self.volname, subvolume2)
+
+ # do some IO
+ self._do_subvolume_io(subvolume1, number_of_files=32)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume1, snapshot)
+
+ # schedule a clone with target as subvolume2
+ try:
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume1, snapshot, subvolume2)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EEXIST:
+ raise RuntimeError("invalid error code when cloning to existing subvolume")
+ else:
+ raise RuntimeError("expected cloning to fail if the target is an existing subvolume")
+
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume1, snapshot, clone)
+
+ # schedule a clone with target as clone
+ try:
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume1, snapshot, clone)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EEXIST:
+ raise RuntimeError("invalid error code when cloning to existing clone")
+ else:
+ raise RuntimeError("expected cloning to fail if the target is an existing clone")
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume1, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume1, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume1)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume2)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_fail_with_remove(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone1, clone2 = self._generate_random_clone_name(2)
+
+ pool_capacity = 32 * 1024 * 1024
+ # number of files required to fill up 99% of the pool
+ nr_files = int((pool_capacity * 0.99) // (TestVolumes.DEFAULT_FILE_SIZE * 1024 * 1024))
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=nr_files)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # add data pool
+ new_pool = "new_pool"
+ self.fs.add_data_pool(new_pool)
+
+ self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", new_pool,
+ "max_bytes", "{0}".format(pool_capacity // 4))
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1, "--pool_layout", new_pool)
+
+ # check clone status -- this should dramatically overshoot the pool quota
+ self._wait_for_clone_to_complete(clone1)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone1, clone_pool=new_pool)
+
+ # wait a bit so that subsequent I/O will give pool full error
+ time.sleep(120)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone2, "--pool_layout", new_pool)
+
+ # check clone status
+ self._wait_for_clone_to_fail(clone2)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone1)
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, clone2)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EAGAIN:
+ raise RuntimeError("invalid error code when trying to remove failed clone")
+ else:
+ raise RuntimeError("expected error when removing a failed clone")
+
+ # ... and with force, failed clone can be removed
+ self._fs_cmd("subvolume", "rm", self.volname, clone2, "--force")
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_attr_clone(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io_mixed(subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_cancel_in_progress(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=128)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # cancel on-going clone
+ self._fs_cmd("clone", "cancel", self.volname, clone)
+
+ # verify canceled state
+ self._check_clone_canceled(clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone, "--force")
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_cancel_pending(self):
+ """
+ this test is a bit more involved compared to canceling an in-progress clone.
+ we'd need to ensure that a to-be canceled clone has still not been picked up
+ by cloner threads. exploit the fact that clones are picked up in an FCFS
+ fashion and there are four (4) cloner threads by default. When the number of
+ cloner threads increase, this test _may_ start tripping -- so, the number of
+ clone operations would need to be jacked up.
+ """
+ # default number of clone threads
+ NR_THREADS = 4
+ # good enough for 4 threads
+ NR_CLONES = 5
+ # yeh, 1gig -- we need the clone to run for sometime
+ FILE_SIZE_MB = 1024
+
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clones = self._generate_random_clone_name(NR_CLONES)
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=4, file_size=FILE_SIZE_MB)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule clones
+ for clone in clones:
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ to_wait = clones[0:NR_THREADS]
+ to_cancel = clones[NR_THREADS:]
+
+ # cancel pending clones and verify
+ for clone in to_cancel:
+ status = json.loads(self._fs_cmd("clone", "status", self.volname, clone))
+ self.assertEqual(status["status"]["state"], "pending")
+ self._fs_cmd("clone", "cancel", self.volname, clone)
+ self._check_clone_canceled(clone)
+
+ # let's cancel on-going clones. handle the case where some of the clones
+ # _just_ complete
+ for clone in list(to_wait):
+ try:
+ self._fs_cmd("clone", "cancel", self.volname, clone)
+ to_cancel.append(clone)
+ to_wait.remove(clone)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EINVAL:
+ raise RuntimeError("invalid error code when cancelling on-going clone")
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ for clone in to_wait:
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+ for clone in to_cancel:
+ self._fs_cmd("subvolume", "rm", self.volname, clone, "--force")
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
diff --git a/qa/tasks/cephfs_test_runner.py b/qa/tasks/cephfs_test_runner.py
new file mode 100644
index 00000000..4455c086
--- /dev/null
+++ b/qa/tasks/cephfs_test_runner.py
@@ -0,0 +1,209 @@
+import contextlib
+import logging
+import os
+import unittest
+from unittest import suite, loader, case
+from teuthology.task import interactive
+from teuthology import misc
+from tasks.cephfs.filesystem import Filesystem, MDSCluster, CephCluster
+from tasks.mgr.mgr_test_case import MgrCluster
+
+log = logging.getLogger(__name__)
+
+
+class DecoratingLoader(loader.TestLoader):
+ """
+ A specialization of TestLoader that tags some extra attributes
+ onto test classes as they are loaded.
+ """
+ def __init__(self, params):
+ self._params = params
+ super(DecoratingLoader, self).__init__()
+
+ def _apply_params(self, obj):
+ for k, v in self._params.items():
+ setattr(obj, k, v)
+
+ def loadTestsFromTestCase(self, testCaseClass):
+ self._apply_params(testCaseClass)
+ return super(DecoratingLoader, self).loadTestsFromTestCase(testCaseClass)
+
+ def loadTestsFromName(self, name, module=None):
+ result = super(DecoratingLoader, self).loadTestsFromName(name, module)
+
+ # Special case for when we were called with the name of a method, we get
+ # a suite with one TestCase
+ tests_in_result = list(result)
+ if len(tests_in_result) == 1 and isinstance(tests_in_result[0], case.TestCase):
+ self._apply_params(tests_in_result[0])
+
+ return result
+
+
+class LogStream(object):
+ def __init__(self):
+ self.buffer = ""
+
+ def write(self, data):
+ self.buffer += data
+ if "\n" in self.buffer:
+ lines = self.buffer.split("\n")
+ for line in lines[:-1]:
+ log.info(line)
+ self.buffer = lines[-1]
+
+ def flush(self):
+ pass
+
+
+class InteractiveFailureResult(unittest.TextTestResult):
+ """
+ Specialization that implements interactive-on-error style
+ behavior.
+ """
+ ctx = None
+
+ def addFailure(self, test, err):
+ log.error(self._exc_info_to_string(err, test))
+ log.error("Failure in test '{0}', going interactive".format(
+ self.getDescription(test)
+ ))
+ interactive.task(ctx=self.ctx, config=None)
+
+ def addError(self, test, err):
+ log.error(self._exc_info_to_string(err, test))
+ log.error("Error in test '{0}', going interactive".format(
+ self.getDescription(test)
+ ))
+ interactive.task(ctx=self.ctx, config=None)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run the CephFS test cases.
+
+ Run everything in tasks/cephfs/test_*.py:
+
+ ::
+
+ tasks:
+ - install:
+ - ceph:
+ - ceph-fuse:
+ - cephfs_test_runner:
+
+ `modules` argument allows running only some specific modules:
+
+ ::
+
+ tasks:
+ ...
+ - cephfs_test_runner:
+ modules:
+ - tasks.cephfs.test_sessionmap
+ - tasks.cephfs.test_auto_repair
+
+ By default, any cases that can't be run on the current cluster configuration
+ will generate a failure. When the optional `fail_on_skip` argument is set
+ to false, any tests that can't be run on the current configuration will
+ simply be skipped:
+
+ ::
+ tasks:
+ ...
+ - cephfs_test_runner:
+ fail_on_skip: false
+
+ """
+
+ ceph_cluster = CephCluster(ctx)
+
+ if len(list(misc.all_roles_of_type(ctx.cluster, 'mds'))):
+ mds_cluster = MDSCluster(ctx)
+ fs = Filesystem(ctx)
+ else:
+ mds_cluster = None
+ fs = None
+
+ if len(list(misc.all_roles_of_type(ctx.cluster, 'mgr'))):
+ mgr_cluster = MgrCluster(ctx)
+ else:
+ mgr_cluster = None
+
+ # Mount objects, sorted by ID
+ if hasattr(ctx, 'mounts'):
+ mounts = [v for k, v in sorted(ctx.mounts.items(), key=lambda mount: mount[0])]
+ else:
+ # The test configuration has a filesystem but no fuse/kclient mounts
+ mounts = []
+
+ decorating_loader = DecoratingLoader({
+ "ctx": ctx,
+ "mounts": mounts,
+ "fs": fs,
+ "ceph_cluster": ceph_cluster,
+ "mds_cluster": mds_cluster,
+ "mgr_cluster": mgr_cluster,
+ })
+
+ fail_on_skip = config.get('fail_on_skip', True)
+
+ # Put useful things onto ctx for interactive debugging
+ ctx.fs = fs
+ ctx.mds_cluster = mds_cluster
+ ctx.mgr_cluster = mgr_cluster
+
+ # Depending on config, either load specific modules, or scan for moduless
+ if config and 'modules' in config and config['modules']:
+ module_suites = []
+ for mod_name in config['modules']:
+ # Test names like cephfs.test_auto_repair
+ module_suites.append(decorating_loader.loadTestsFromName(mod_name))
+ overall_suite = suite.TestSuite(module_suites)
+ else:
+ # Default, run all tests
+ overall_suite = decorating_loader.discover(
+ os.path.join(
+ os.path.dirname(os.path.abspath(__file__)),
+ "cephfs/"
+ )
+ )
+
+ if ctx.config.get("interactive-on-error", False):
+ InteractiveFailureResult.ctx = ctx
+ result_class = InteractiveFailureResult
+ else:
+ result_class = unittest.TextTestResult
+
+ class LoggingResult(result_class):
+ def startTest(self, test):
+ log.info("Starting test: {0}".format(self.getDescription(test)))
+ return super(LoggingResult, self).startTest(test)
+
+ def addSkip(self, test, reason):
+ if fail_on_skip:
+ # Don't just call addFailure because that requires a traceback
+ self.failures.append((test, reason))
+ else:
+ super(LoggingResult, self).addSkip(test, reason)
+
+ # Execute!
+ result = unittest.TextTestRunner(
+ stream=LogStream(),
+ resultclass=LoggingResult,
+ verbosity=2,
+ failfast=True).run(overall_suite)
+
+ if not result.wasSuccessful():
+ result.printErrors() # duplicate output at end for convenience
+
+ bad_tests = []
+ for test, error in result.errors:
+ bad_tests.append(str(test))
+ for test, failure in result.failures:
+ bad_tests.append(str(test))
+
+ raise RuntimeError("Test failure: {0}".format(", ".join(bad_tests)))
+
+ yield
diff --git a/qa/tasks/cephfs_upgrade_snap.py b/qa/tasks/cephfs_upgrade_snap.py
new file mode 100644
index 00000000..1708d43c
--- /dev/null
+++ b/qa/tasks/cephfs_upgrade_snap.py
@@ -0,0 +1,45 @@
+"""
+Upgrade cluster snap format.
+"""
+
+import logging
+import time
+
+from tasks.cephfs.filesystem import Filesystem
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+ """
+ Upgrade CephFS file system snap format.
+ """
+
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'snap-upgrade task only accepts a dict for configuration'
+
+ fs = Filesystem(ctx)
+
+ mds_map = fs.get_mds_map()
+ assert(mds_map['max_mds'] == 1)
+
+ json = fs.rank_tell(["scrub", "start", "/", "force", "recursive", "repair"])
+ if not json or json['return_code'] == 0:
+ log.info("scrub / completed")
+ else:
+ log.info("scrub / failed: {}".format(json))
+
+ json = fs.rank_tell(["scrub", "start", "~mdsdir", "force", "recursive", "repair"])
+ if not json or json['return_code'] == 0:
+ log.info("scrub ~mdsdir completed")
+ else:
+ log.info("scrub / failed: {}".format(json))
+
+ for i in range(0, 10):
+ mds_map = fs.get_mds_map()
+ if (mds_map['flags'] & (1<<1)) != 0 and (mds_map['flags'] & (1<<4)) != 0:
+ break
+ time.sleep(10)
+ assert((mds_map['flags'] & (1<<1)) != 0) # Test CEPH_MDSMAP_ALLOW_SNAPS
+ assert((mds_map['flags'] & (1<<4)) != 0) # Test CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS
diff --git a/qa/tasks/check_counter.py b/qa/tasks/check_counter.py
new file mode 100644
index 00000000..daa81973
--- /dev/null
+++ b/qa/tasks/check_counter.py
@@ -0,0 +1,98 @@
+
+import logging
+import json
+
+from teuthology.task import Task
+from teuthology import misc
+
+log = logging.getLogger(__name__)
+
+
+class CheckCounter(Task):
+ """
+ Use this task to validate that some daemon perf counters were
+ incremented by the nested tasks.
+
+ Config:
+ 'cluster_name': optional, specify which cluster
+ 'target': dictionary of daemon type to list of performance counters.
+ 'dry_run': just log the value of the counters, don't fail if they
+ aren't nonzero.
+
+ Success condition is that for all of the named counters, at least
+ one of the daemons of that type has the counter nonzero.
+
+ Example to check cephfs dirfrag splits are happening:
+ - install:
+ - ceph:
+ - ceph-fuse:
+ - check-counter:
+ counters:
+ mds:
+ - "mds.dir_split"
+ - workunit: ...
+ """
+
+ def start(self):
+ log.info("START")
+
+ def end(self):
+ overrides = self.ctx.config.get('overrides', {})
+ misc.deep_merge(self.config, overrides.get('check-counter', {}))
+
+ cluster_name = self.config.get('cluster_name', None)
+ dry_run = self.config.get('dry_run', False)
+ targets = self.config.get('counters', {})
+
+ if cluster_name is None:
+ cluster_name = next(iter(self.ctx.managers.keys()))
+
+ for daemon_type, counters in targets.items():
+ # List of 'a', 'b', 'c'...
+ daemon_ids = list(misc.all_roles_of_type(self.ctx.cluster, daemon_type))
+ daemons = dict([(daemon_id,
+ self.ctx.daemons.get_daemon(daemon_type, daemon_id))
+ for daemon_id in daemon_ids])
+
+ seen = set()
+
+ for daemon_id, daemon in daemons.items():
+ if not daemon.running():
+ log.info("Ignoring daemon {0}, it isn't running".format(daemon_id))
+ continue
+ else:
+ log.debug("Getting stats from {0}".format(daemon_id))
+
+ manager = self.ctx.managers[cluster_name]
+ proc = manager.admin_socket(daemon_type, daemon_id, ["perf", "dump"])
+ response_data = proc.stdout.getvalue().strip()
+ if response_data:
+ perf_dump = json.loads(response_data)
+ else:
+ log.warning("No admin socket response from {0}, skipping".format(daemon_id))
+ continue
+
+ for counter in counters:
+ subsys, counter_id = counter.split(".")
+ if subsys not in perf_dump or counter_id not in perf_dump[subsys]:
+ log.warning("Counter '{0}' not found on daemon {1}.{2}".format(
+ counter, daemon_type, daemon_id))
+ continue
+ value = perf_dump[subsys][counter_id]
+
+ log.info("Daemon {0}.{1} {2}={3}".format(
+ daemon_type, daemon_id, counter, value
+ ))
+
+ if value > 0:
+ seen.add(counter)
+
+ if not dry_run:
+ unseen = set(counters) - set(seen)
+ if unseen:
+ raise RuntimeError("The following counters failed to be set "
+ "on {0} daemons: {1}".format(
+ daemon_type, unseen
+ ))
+
+task = CheckCounter
diff --git a/qa/tasks/cifs_mount.py b/qa/tasks/cifs_mount.py
new file mode 100644
index 00000000..b282b0b7
--- /dev/null
+++ b/qa/tasks/cifs_mount.py
@@ -0,0 +1,137 @@
+"""
+Mount cifs clients. Unmount when finished.
+"""
+import contextlib
+import logging
+import os
+
+from teuthology import misc as teuthology
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Mount/unmount a cifs client.
+
+ The config is optional and defaults to mounting on all clients. If
+ a config is given, it is expected to be a list of clients to do
+ this operation on.
+
+ Example that starts smbd and mounts cifs on all nodes::
+
+ tasks:
+ - ceph:
+ - samba:
+ - cifs-mount:
+ - interactive:
+
+ Example that splits smbd and cifs:
+
+ tasks:
+ - ceph:
+ - samba: [samba.0]
+ - cifs-mount: [client.0]
+ - ceph-fuse: [client.1]
+ - interactive:
+
+ Example that specifies the share name:
+
+ tasks:
+ - ceph:
+ - ceph-fuse:
+ - samba:
+ samba.0:
+ cephfuse: "{testdir}/mnt.0"
+ - cifs-mount:
+ client.0:
+ share: cephfuse
+
+ :param ctx: Context
+ :param config: Configuration
+ """
+ log.info('Mounting cifs clients...')
+
+ if config is None:
+ config = dict(('client.{id}'.format(id=id_), None)
+ for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client'))
+ elif isinstance(config, list):
+ config = dict((name, None) for name in config)
+
+ clients = list(teuthology.get_clients(ctx=ctx, roles=config.keys()))
+
+ from .samba import get_sambas
+ samba_roles = ['samba.{id_}'.format(id_=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba')]
+ sambas = list(get_sambas(ctx=ctx, roles=samba_roles))
+ (ip, _) = sambas[0][1].ssh.get_transport().getpeername()
+ log.info('samba ip: {ip}'.format(ip=ip))
+
+ for id_, remote in clients:
+ mnt = os.path.join(teuthology.get_testdir(ctx), 'mnt.{id}'.format(id=id_))
+ log.info('Mounting cifs client.{id} at {remote} {mnt}...'.format(
+ id=id_, remote=remote,mnt=mnt))
+
+ remote.run(
+ args=[
+ 'mkdir',
+ '--',
+ mnt,
+ ],
+ )
+
+ rolestr = 'client.{id_}'.format(id_=id_)
+ unc = "ceph"
+ log.info("config: {c}".format(c=config))
+ if config[rolestr] is not None and 'share' in config[rolestr]:
+ unc = config[rolestr]['share']
+
+ remote.run(
+ args=[
+ 'sudo',
+ 'mount',
+ '-t',
+ 'cifs',
+ '//{sambaip}/{unc}'.format(sambaip=ip, unc=unc),
+ '-o',
+ 'username=ubuntu,password=ubuntu',
+ mnt,
+ ],
+ )
+
+ remote.run(
+ args=[
+ 'sudo',
+ 'chown',
+ 'ubuntu:ubuntu',
+ '{m}/'.format(m=mnt),
+ ],
+ )
+
+ try:
+ yield
+ finally:
+ log.info('Unmounting cifs clients...')
+ for id_, remote in clients:
+ remote.run(
+ args=[
+ 'sudo',
+ 'umount',
+ mnt,
+ ],
+ )
+ for id_, remote in clients:
+ while True:
+ try:
+ remote.run(
+ args=[
+ 'rmdir', '--', mnt,
+ run.Raw('2>&1'),
+ run.Raw('|'),
+ 'grep', 'Device or resource busy',
+ ],
+ )
+ import time
+ time.sleep(1)
+ except Exception:
+ break
diff --git a/qa/tasks/cram.py b/qa/tasks/cram.py
new file mode 100644
index 00000000..d06f0944
--- /dev/null
+++ b/qa/tasks/cram.py
@@ -0,0 +1,151 @@
+"""
+Cram tests
+"""
+import logging
+import os
+
+import six
+
+from tasks.util.workunit import get_refspec_after_overrides
+
+from teuthology import misc as teuthology
+from teuthology.parallel import parallel
+from teuthology.orchestra import run
+from teuthology.config import config as teuth_config
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+ """
+ Run all cram tests from the specified paths on the specified
+ clients. Each client runs tests in parallel.
+
+ Limitations:
+ Tests must have a .t suffix. Tests with duplicate names will
+ overwrite each other, so only the last one will run.
+
+ For example::
+
+ tasks:
+ - ceph:
+ - cram:
+ clients:
+ client.0:
+ - qa/test.t
+ - qa/test2.t]
+ client.1: [qa/test.t]
+ branch: foo
+
+ You can also run a list of cram tests on all clients::
+
+ tasks:
+ - ceph:
+ - cram:
+ clients:
+ all: [qa/test.t]
+
+ :param ctx: Context
+ :param config: Configuration
+ """
+ assert isinstance(config, dict)
+ assert 'clients' in config and isinstance(config['clients'], dict), \
+ 'configuration must contain a dictionary of clients'
+
+ clients = teuthology.replace_all_with_clients(ctx.cluster,
+ config['clients'])
+ testdir = teuthology.get_testdir(ctx)
+
+ overrides = ctx.config.get('overrides', {})
+ refspec = get_refspec_after_overrides(config, overrides)
+
+ git_url = teuth_config.get_ceph_qa_suite_git_url()
+ log.info('Pulling tests from %s ref %s', git_url, refspec)
+
+ try:
+ for client, tests in clients.items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ client_dir = '{tdir}/archive/cram.{role}'.format(tdir=testdir, role=client)
+ remote.run(
+ args=[
+ 'mkdir', '--', client_dir,
+ run.Raw('&&'),
+ 'virtualenv', '{tdir}/virtualenv'.format(tdir=testdir),
+ run.Raw('&&'),
+ '{tdir}/virtualenv/bin/pip'.format(tdir=testdir),
+ 'install', 'cram==0.6',
+ ],
+ )
+ clone_dir = '{tdir}/clone.{role}'.format(tdir=testdir, role=client)
+ remote.run(args=refspec.clone(git_url, clone_dir))
+
+ for test in tests:
+ assert test.endswith('.t'), 'tests must end in .t'
+ remote.run(
+ args=[
+ 'cp', '--', os.path.join(clone_dir, test), client_dir,
+ ],
+ )
+
+ with parallel() as p:
+ for role in clients.keys():
+ p.spawn(_run_tests, ctx, role)
+ finally:
+ for client, tests in clients.items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ client_dir = '{tdir}/archive/cram.{role}'.format(tdir=testdir, role=client)
+ test_files = set([test.rsplit('/', 1)[1] for test in tests])
+
+ # remove test files unless they failed
+ for test_file in test_files:
+ abs_file = os.path.join(client_dir, test_file)
+ remote.run(
+ args=[
+ 'test', '-f', abs_file + '.err',
+ run.Raw('||'),
+ 'rm', '-f', '--', abs_file,
+ ],
+ )
+
+ # ignore failure since more than one client may
+ # be run on a host, and the client dir should be
+ # non-empty if the test failed
+ remote.run(
+ args=[
+ 'rm', '-rf', '--',
+ '{tdir}/virtualenv'.format(tdir=testdir),
+ clone_dir,
+ run.Raw(';'),
+ 'rmdir', '--ignore-fail-on-non-empty', client_dir,
+ ],
+ )
+
+def _run_tests(ctx, role):
+ """
+ For each role, check to make sure it's a client, then run the cram on that client
+
+ :param ctx: Context
+ :param role: Roles
+ """
+ assert isinstance(role, six.string_types)
+ PREFIX = 'client.'
+ assert role.startswith(PREFIX)
+ id_ = role[len(PREFIX):]
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ ceph_ref = ctx.summary.get('ceph-sha1', 'master')
+
+ testdir = teuthology.get_testdir(ctx)
+ log.info('Running tests for %s...', role)
+ remote.run(
+ args=[
+ run.Raw('CEPH_REF={ref}'.format(ref=ceph_ref)),
+ run.Raw('CEPH_ID="{id}"'.format(id=id_)),
+ run.Raw('PATH=$PATH:/usr/sbin'),
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ '{tdir}/virtualenv/bin/cram'.format(tdir=testdir),
+ '-v', '--',
+ run.Raw('{tdir}/archive/cram.{role}/*.t'.format(tdir=testdir, role=role)),
+ ],
+ logger=log.getChild(role),
+ )
diff --git a/qa/tasks/create_verify_lfn_objects.py b/qa/tasks/create_verify_lfn_objects.py
new file mode 100644
index 00000000..53254158
--- /dev/null
+++ b/qa/tasks/create_verify_lfn_objects.py
@@ -0,0 +1,83 @@
+"""
+Rados modle-based integration tests
+"""
+import contextlib
+import logging
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ For each combination of namespace and name_length, create
+ <num_objects> objects with name length <name_length>
+ on entry. On exit, verify that the objects still exist, can
+ be deleted, and then don't exist.
+
+ Usage::
+
+ create_verify_lfn_objects.py:
+ pool: <pool_name> default: 'data'
+ prefix: <prefix> default: ''
+ namespace: [<namespace>] default: ['']
+ num_objects: [<num_objects>] default: 10
+ name_length: [<name_length>] default: [400]
+ """
+ pool = config.get('pool', 'data')
+ num_objects = config.get('num_objects', 10)
+ name_length = config.get('name_length', [400])
+ namespace = config.get('namespace', [None])
+ prefix = config.get('prefix', None)
+ manager = ctx.managers['ceph']
+
+ objects = []
+ for l in name_length:
+ for ns in namespace:
+ def object_name(i):
+ nslength = 0
+ if namespace != '':
+ nslength = len(namespace)
+ numstr = str(i)
+ fillerlen = l - nslength - len(prefix) - len(numstr)
+ assert fillerlen >= 0
+ return prefix + ('a'*fillerlen) + numstr
+ objects += [(ns, object_name(i)) for i in range(num_objects)]
+
+ for ns, name in objects:
+ err = manager.do_put(
+ pool,
+ name,
+ '/etc/resolv.conf',
+ namespace=ns)
+ log.info("err is " + str(err))
+ assert err == 0
+
+ try:
+ yield
+ finally:
+ log.info('ceph_verify_lfn_objects verifying...')
+ for ns, name in objects:
+ err = manager.do_get(
+ pool,
+ name,
+ namespace=ns)
+ log.info("err is " + str(err))
+ assert err == 0
+
+ log.info('ceph_verify_lfn_objects deleting...')
+ for ns, name in objects:
+ err = manager.do_rm(
+ pool,
+ name,
+ namespace=ns)
+ log.info("err is " + str(err))
+ assert err == 0
+
+ log.info('ceph_verify_lfn_objects verifying absent...')
+ for ns, name in objects:
+ err = manager.do_get(
+ pool,
+ name,
+ namespace=ns)
+ log.info("err is " + str(err))
+ assert err != 0
diff --git a/qa/tasks/devstack.py b/qa/tasks/devstack.py
new file mode 100644
index 00000000..35620f7e
--- /dev/null
+++ b/qa/tasks/devstack.py
@@ -0,0 +1,379 @@
+#!/usr/bin/env python
+import contextlib
+import logging
+import textwrap
+from configparser import ConfigParser
+
+import six
+import time
+
+from teuthology.orchestra import run
+from teuthology import misc
+from teuthology.contextutil import nested
+
+log = logging.getLogger(__name__)
+
+DEVSTACK_GIT_REPO = 'https://github.com/openstack-dev/devstack.git'
+DS_STABLE_BRANCHES = ("havana", "grizzly")
+
+is_devstack_node = lambda role: role.startswith('devstack')
+is_osd_node = lambda role: role.startswith('osd')
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ if config is None:
+ config = {}
+ if not isinstance(config, dict):
+ raise TypeError("config must be a dict")
+ with nested(lambda: install(ctx=ctx, config=config),
+ lambda: smoke(ctx=ctx, config=config),
+ ):
+ yield
+
+
+@contextlib.contextmanager
+def install(ctx, config):
+ """
+ Install OpenStack DevStack and configure it to use a Ceph cluster for
+ Glance and Cinder.
+
+ Requires one node with a role 'devstack'
+
+ Since devstack runs rampant on the system it's used on, typically you will
+ want to reprovision that machine after using devstack on it.
+
+ Also, the default 2GB of RAM that is given to vps nodes is insufficient. I
+ recommend 4GB. Downburst can be instructed to give 4GB to a vps node by
+ adding this to the yaml:
+
+ downburst:
+ ram: 4G
+
+ This was created using documentation found here:
+ https://github.com/openstack-dev/devstack/blob/master/README.md
+ http://docs.ceph.com/docs/master/rbd/rbd-openstack/
+ """
+ if config is None:
+ config = {}
+ if not isinstance(config, dict):
+ raise TypeError("config must be a dict")
+
+ devstack_node = next(iter(ctx.cluster.only(is_devstack_node).remotes.keys()))
+ an_osd_node = next(iter(ctx.cluster.only(is_osd_node).remotes.keys()))
+
+ devstack_branch = config.get("branch", "master")
+ install_devstack(devstack_node, devstack_branch)
+ try:
+ configure_devstack_and_ceph(ctx, config, devstack_node, an_osd_node)
+ yield
+ finally:
+ pass
+
+
+def install_devstack(devstack_node, branch="master"):
+ log.info("Cloning DevStack repo...")
+
+ args = ['git', 'clone', DEVSTACK_GIT_REPO]
+ devstack_node.run(args=args)
+
+ if branch != "master":
+ if branch in DS_STABLE_BRANCHES and not branch.startswith("stable"):
+ branch = "stable/" + branch
+ log.info("Checking out {branch} branch...".format(branch=branch))
+ cmd = "cd devstack && git checkout " + branch
+ devstack_node.run(args=cmd)
+
+ log.info("Installing DevStack...")
+ args = ['cd', 'devstack', run.Raw('&&'), './stack.sh']
+ devstack_node.run(args=args)
+
+
+def configure_devstack_and_ceph(ctx, config, devstack_node, ceph_node):
+ pool_size = config.get('pool_size', '128')
+ create_pools(ceph_node, pool_size)
+ distribute_ceph_conf(devstack_node, ceph_node)
+ # This is where we would install python-ceph and ceph-common but it appears
+ # the ceph task does that for us.
+ generate_ceph_keys(ceph_node)
+ distribute_ceph_keys(devstack_node, ceph_node)
+ secret_uuid = set_libvirt_secret(devstack_node, ceph_node)
+ update_devstack_config_files(devstack_node, secret_uuid)
+ set_apache_servername(devstack_node)
+ # Rebooting is the most-often-used method of restarting devstack services
+ misc.reboot(devstack_node)
+ start_devstack(devstack_node)
+ restart_apache(devstack_node)
+
+
+def create_pools(ceph_node, pool_size):
+ log.info("Creating pools on Ceph cluster...")
+
+ for pool_name in ['volumes', 'images', 'backups']:
+ args = ['sudo', 'ceph', 'osd', 'pool', 'create', pool_name, pool_size]
+ ceph_node.run(args=args)
+
+
+def distribute_ceph_conf(devstack_node, ceph_node):
+ log.info("Copying ceph.conf to DevStack node...")
+
+ ceph_conf_path = '/etc/ceph/ceph.conf'
+ ceph_conf = misc.get_file(ceph_node, ceph_conf_path, sudo=True)
+ misc.sudo_write_file(devstack_node, ceph_conf_path, ceph_conf)
+
+
+def generate_ceph_keys(ceph_node):
+ log.info("Generating Ceph keys...")
+
+ ceph_auth_cmds = [
+ ['sudo', 'ceph', 'auth', 'get-or-create', 'client.cinder', 'mon',
+ 'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=volumes, allow rx pool=images'], # noqa
+ ['sudo', 'ceph', 'auth', 'get-or-create', 'client.glance', 'mon',
+ 'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=images'], # noqa
+ ['sudo', 'ceph', 'auth', 'get-or-create', 'client.cinder-backup', 'mon',
+ 'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=backups'], # noqa
+ ]
+ for cmd in ceph_auth_cmds:
+ ceph_node.run(args=cmd)
+
+
+def distribute_ceph_keys(devstack_node, ceph_node):
+ log.info("Copying Ceph keys to DevStack node...")
+
+ def copy_key(from_remote, key_name, to_remote, dest_path, owner):
+ key_stringio = six.StringIO()
+ from_remote.run(
+ args=['sudo', 'ceph', 'auth', 'get-or-create', key_name],
+ stdout=key_stringio)
+ key_stringio.seek(0)
+ misc.sudo_write_file(to_remote, dest_path,
+ key_stringio, owner=owner)
+ keys = [
+ dict(name='client.glance',
+ path='/etc/ceph/ceph.client.glance.keyring',
+ # devstack appears to just want root:root
+ #owner='glance:glance',
+ ),
+ dict(name='client.cinder',
+ path='/etc/ceph/ceph.client.cinder.keyring',
+ # devstack appears to just want root:root
+ #owner='cinder:cinder',
+ ),
+ dict(name='client.cinder-backup',
+ path='/etc/ceph/ceph.client.cinder-backup.keyring',
+ # devstack appears to just want root:root
+ #owner='cinder:cinder',
+ ),
+ ]
+ for key_dict in keys:
+ copy_key(ceph_node, key_dict['name'], devstack_node,
+ key_dict['path'], key_dict.get('owner'))
+
+
+def set_libvirt_secret(devstack_node, ceph_node):
+ log.info("Setting libvirt secret...")
+
+ cinder_key_stringio = six.StringIO()
+ ceph_node.run(args=['sudo', 'ceph', 'auth', 'get-key', 'client.cinder'],
+ stdout=cinder_key_stringio)
+ cinder_key = cinder_key_stringio.getvalue().strip()
+
+ uuid_stringio = six.StringIO()
+ devstack_node.run(args=['uuidgen'], stdout=uuid_stringio)
+ uuid = uuid_stringio.getvalue().strip()
+
+ secret_path = '/tmp/secret.xml'
+ secret_template = textwrap.dedent("""
+ <secret ephemeral='no' private='no'>
+ <uuid>{uuid}</uuid>
+ <usage type='ceph'>
+ <name>client.cinder secret</name>
+ </usage>
+ </secret>""")
+ misc.sudo_write_file(devstack_node, secret_path,
+ secret_template.format(uuid=uuid))
+ devstack_node.run(args=['sudo', 'virsh', 'secret-define', '--file',
+ secret_path])
+ devstack_node.run(args=['sudo', 'virsh', 'secret-set-value', '--secret',
+ uuid, '--base64', cinder_key])
+ return uuid
+
+
+def update_devstack_config_files(devstack_node, secret_uuid):
+ log.info("Updating DevStack config files to use Ceph...")
+
+ def backup_config(node, file_name, backup_ext='.orig.teuth'):
+ node.run(args=['cp', '-f', file_name, file_name + backup_ext])
+
+ def update_config(config_name, config_stream, update_dict,
+ section='DEFAULT'):
+ parser = ConfigParser()
+ parser.read_file(config_stream)
+ for (key, value) in update_dict.items():
+ parser.set(section, key, value)
+ out_stream = six.StringIO()
+ parser.write(out_stream)
+ out_stream.seek(0)
+ return out_stream
+
+ updates = [
+ dict(name='/etc/glance/glance-api.conf', options=dict(
+ default_store='rbd',
+ rbd_store_user='glance',
+ rbd_store_pool='images',
+ show_image_direct_url='True',)),
+ dict(name='/etc/cinder/cinder.conf', options=dict(
+ volume_driver='cinder.volume.drivers.rbd.RBDDriver',
+ rbd_pool='volumes',
+ rbd_ceph_conf='/etc/ceph/ceph.conf',
+ rbd_flatten_volume_from_snapshot='false',
+ rbd_max_clone_depth='5',
+ glance_api_version='2',
+ rbd_user='cinder',
+ rbd_secret_uuid=secret_uuid,
+ backup_driver='cinder.backup.drivers.ceph',
+ backup_ceph_conf='/etc/ceph/ceph.conf',
+ backup_ceph_user='cinder-backup',
+ backup_ceph_chunk_size='134217728',
+ backup_ceph_pool='backups',
+ backup_ceph_stripe_unit='0',
+ backup_ceph_stripe_count='0',
+ restore_discard_excess_bytes='true',
+ )),
+ dict(name='/etc/nova/nova.conf', options=dict(
+ libvirt_images_type='rbd',
+ libvirt_images_rbd_pool='volumes',
+ libvirt_images_rbd_ceph_conf='/etc/ceph/ceph.conf',
+ rbd_user='cinder',
+ rbd_secret_uuid=secret_uuid,
+ libvirt_inject_password='false',
+ libvirt_inject_key='false',
+ libvirt_inject_partition='-2',
+ )),
+ ]
+
+ for update in updates:
+ file_name = update['name']
+ options = update['options']
+ config_data = misc.get_file(devstack_node, file_name, sudo=True)
+ config_stream = six.StringIO(config_data)
+ backup_config(devstack_node, file_name)
+ new_config_stream = update_config(file_name, config_stream, options)
+ misc.sudo_write_file(devstack_node, file_name, new_config_stream)
+
+
+def set_apache_servername(node):
+ # Apache complains: "Could not reliably determine the server's fully
+ # qualified domain name, using 127.0.0.1 for ServerName"
+ # So, let's make sure it knows its name.
+ log.info("Setting Apache ServerName...")
+
+ hostname = node.hostname
+ config_file = '/etc/apache2/conf.d/servername'
+ misc.sudo_write_file(node, config_file,
+ "ServerName {name}".format(name=hostname))
+
+
+def start_devstack(devstack_node):
+ log.info("Patching devstack start script...")
+ # This causes screen to start headless - otherwise rejoin-stack.sh fails
+ # because there is no terminal attached.
+ cmd = "cd devstack && sed -ie 's/screen -c/screen -dm -c/' rejoin-stack.sh"
+ devstack_node.run(args=cmd)
+
+ log.info("Starting devstack...")
+ cmd = "cd devstack && ./rejoin-stack.sh"
+ devstack_node.run(args=cmd)
+
+ # This was added because I was getting timeouts on Cinder requests - which
+ # were trying to access Keystone on port 5000. A more robust way to handle
+ # this would be to introduce a wait-loop on devstack_node that checks to
+ # see if a service is listening on port 5000.
+ log.info("Waiting 30s for devstack to start...")
+ time.sleep(30)
+
+
+def restart_apache(node):
+ node.run(args=['sudo', '/etc/init.d/apache2', 'restart'], wait=True)
+
+
+@contextlib.contextmanager
+def exercise(ctx, config):
+ log.info("Running devstack exercises...")
+
+ if config is None:
+ config = {}
+ if not isinstance(config, dict):
+ raise TypeError("config must be a dict")
+
+ devstack_node = next(iter(ctx.cluster.only(is_devstack_node).remotes.keys()))
+
+ # TODO: save the log *and* preserve failures
+ #devstack_archive_dir = create_devstack_archive(ctx, devstack_node)
+
+ try:
+ #cmd = "cd devstack && ./exercise.sh 2>&1 | tee {dir}/exercise.log".format( # noqa
+ # dir=devstack_archive_dir)
+ cmd = "cd devstack && ./exercise.sh"
+ devstack_node.run(args=cmd, wait=True)
+ yield
+ finally:
+ pass
+
+
+def create_devstack_archive(ctx, devstack_node):
+ test_dir = misc.get_testdir(ctx)
+ devstack_archive_dir = "{test_dir}/archive/devstack".format(
+ test_dir=test_dir)
+ devstack_node.run(args="mkdir -p " + devstack_archive_dir)
+ return devstack_archive_dir
+
+
+@contextlib.contextmanager
+def smoke(ctx, config):
+ log.info("Running a basic smoketest...")
+
+ devstack_node = next(iter(ctx.cluster.only(is_devstack_node).remotes.keys()))
+ an_osd_node = next(iter(ctx.cluster.only(is_osd_node).remotes.keys()))
+
+ try:
+ create_volume(devstack_node, an_osd_node, 'smoke0', 1)
+ yield
+ finally:
+ pass
+
+
+def create_volume(devstack_node, ceph_node, vol_name, size):
+ """
+ :param size: The size of the volume, in GB
+ """
+ size = str(size)
+ log.info("Creating a {size}GB volume named {name}...".format(
+ name=vol_name,
+ size=size))
+ args = ['source', 'devstack/openrc', run.Raw('&&'), 'cinder', 'create',
+ '--display-name', vol_name, size]
+ cinder_create = devstack_node.sh(args, wait=True)
+ vol_info = parse_os_table(cinder_create)
+ log.debug("Volume info: %s", str(vol_info))
+
+ try:
+ rbd_output = ceph_node.sh("rbd --id cinder ls -l volumes", wait=True)
+ except run.CommandFailedError:
+ log.debug("Original rbd call failed; retrying without '--id cinder'")
+ rbd_output = ceph_node.sh("rbd ls -l volumes", wait=True)
+
+ assert vol_info['id'] in rbd_output, \
+ "Volume not found on Ceph cluster"
+ assert vol_info['size'] == size, \
+ "Volume size on Ceph cluster is different than specified"
+ return vol_info['id']
+
+
+def parse_os_table(table_str):
+ out_dict = dict()
+ for line in table_str.split('\n'):
+ if line.startswith('|'):
+ items = line.split()
+ out_dict[items[1]] = items[3]
+ return out_dict
diff --git a/qa/tasks/die_on_err.py b/qa/tasks/die_on_err.py
new file mode 100644
index 00000000..a6aa4c63
--- /dev/null
+++ b/qa/tasks/die_on_err.py
@@ -0,0 +1,70 @@
+"""
+Raise exceptions on osd coredumps or test err directories
+"""
+import contextlib
+import logging
+import time
+from teuthology.orchestra import run
+
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Die if {testdir}/err exists or if an OSD dumps core
+ """
+ if config is None:
+ config = {}
+
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+ num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+ log.info('num_osds is %s' % num_osds)
+
+ manager = ceph_manager.CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager'),
+ )
+
+ while len(manager.get_osd_status()['up']) < num_osds:
+ time.sleep(10)
+
+ testdir = teuthology.get_testdir(ctx)
+
+ while True:
+ for i in range(num_osds):
+ (osd_remote,) = ctx.cluster.only('osd.%d' % i).remotes.keys()
+ p = osd_remote.run(
+ args = [ 'test', '-e', '{tdir}/err'.format(tdir=testdir) ],
+ wait=True,
+ check_status=False,
+ )
+ exit_status = p.exitstatus
+
+ if exit_status == 0:
+ log.info("osd %d has an error" % i)
+ raise Exception("osd %d error" % i)
+
+ log_path = '/var/log/ceph/osd.%d.log' % (i)
+
+ p = osd_remote.run(
+ args = [
+ 'tail', '-1', log_path,
+ run.Raw('|'),
+ 'grep', '-q', 'end dump'
+ ],
+ wait=True,
+ check_status=False,
+ )
+ exit_status = p.exitstatus
+
+ if exit_status == 0:
+ log.info("osd %d dumped core" % i)
+ raise Exception("osd %d dumped core" % i)
+
+ time.sleep(5)
diff --git a/qa/tasks/divergent_priors.py b/qa/tasks/divergent_priors.py
new file mode 100644
index 00000000..e000bb2b
--- /dev/null
+++ b/qa/tasks/divergent_priors.py
@@ -0,0 +1,160 @@
+"""
+Special case divergence test
+"""
+import logging
+import time
+
+from teuthology import misc as teuthology
+from tasks.util.rados import rados
+
+
+log = logging.getLogger(__name__)
+
+
+def task(ctx, config):
+ """
+ Test handling of divergent entries with prior_version
+ prior to log_tail
+
+ overrides:
+ ceph:
+ conf:
+ osd:
+ debug osd: 5
+
+ Requires 3 osds on a single test node.
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'divergent_priors task only accepts a dict for configuration'
+
+ manager = ctx.managers['ceph']
+
+ while len(manager.get_osd_status()['up']) < 3:
+ time.sleep(10)
+ manager.flush_pg_stats([0, 1, 2])
+ manager.raw_cluster_cmd('osd', 'set', 'noout')
+ manager.raw_cluster_cmd('osd', 'set', 'noin')
+ manager.raw_cluster_cmd('osd', 'set', 'nodown')
+ manager.wait_for_clean()
+
+ # something that is always there
+ dummyfile = '/etc/fstab'
+ dummyfile2 = '/etc/resolv.conf'
+
+ # create 1 pg pool
+ log.info('creating foo')
+ manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1')
+
+ osds = [0, 1, 2]
+ for i in osds:
+ manager.set_config(i, osd_min_pg_log_entries=10)
+ manager.set_config(i, osd_max_pg_log_entries=10)
+ manager.set_config(i, osd_pg_log_trim_min=5)
+
+ # determine primary
+ divergent = manager.get_pg_primary('foo', 0)
+ log.info("primary and soon to be divergent is %d", divergent)
+ non_divergent = list(osds)
+ non_divergent.remove(divergent)
+
+ log.info('writing initial objects')
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+ # write 100 objects
+ for i in range(100):
+ rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile])
+
+ manager.wait_for_clean()
+
+ # blackhole non_divergent
+ log.info("blackholing osds %s", str(non_divergent))
+ for i in non_divergent:
+ manager.set_config(i, objectstore_blackhole=1)
+
+ DIVERGENT_WRITE = 5
+ DIVERGENT_REMOVE = 5
+ # Write some soon to be divergent
+ log.info('writing divergent objects')
+ for i in range(DIVERGENT_WRITE):
+ rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i,
+ dummyfile2], wait=False)
+ # Remove some soon to be divergent
+ log.info('remove divergent objects')
+ for i in range(DIVERGENT_REMOVE):
+ rados(ctx, mon, ['-p', 'foo', 'rm',
+ 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False)
+ time.sleep(10)
+ mon.run(
+ args=['killall', '-9', 'rados'],
+ wait=True,
+ check_status=False)
+
+ # kill all the osds but leave divergent in
+ log.info('killing all the osds')
+ for i in osds:
+ manager.kill_osd(i)
+ for i in osds:
+ manager.mark_down_osd(i)
+ for i in non_divergent:
+ manager.mark_out_osd(i)
+
+ # bring up non-divergent
+ log.info("bringing up non_divergent %s", str(non_divergent))
+ for i in non_divergent:
+ manager.revive_osd(i)
+ for i in non_divergent:
+ manager.mark_in_osd(i)
+
+ # write 1 non-divergent object (ensure that old divergent one is divergent)
+ objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE)
+ log.info('writing non-divergent object ' + objname)
+ rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2])
+
+ manager.wait_for_recovery()
+
+ # ensure no recovery of up osds first
+ log.info('delay recovery')
+ for i in non_divergent:
+ manager.wait_run_admin_socket(
+ 'osd', i, ['set_recovery_delay', '100000'])
+
+ # bring in our divergent friend
+ log.info("revive divergent %d", divergent)
+ manager.raw_cluster_cmd('osd', 'set', 'noup')
+ manager.revive_osd(divergent)
+
+ log.info('delay recovery divergent')
+ manager.wait_run_admin_socket(
+ 'osd', divergent, ['set_recovery_delay', '100000'])
+
+ manager.raw_cluster_cmd('osd', 'unset', 'noup')
+ while len(manager.get_osd_status()['up']) < 3:
+ time.sleep(10)
+
+ log.info('wait for peering')
+ rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile])
+
+ # At this point the divergent_priors should have been detected
+
+ log.info("killing divergent %d", divergent)
+ manager.kill_osd(divergent)
+ log.info("reviving divergent %d", divergent)
+ manager.revive_osd(divergent)
+
+ time.sleep(20)
+
+ log.info('allowing recovery')
+ # Set osd_recovery_delay_start back to 0 and kick the queue
+ for i in osds:
+ manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug',
+ 'kick_recovery_wq', ' 0')
+
+ log.info('reading divergent objects')
+ for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE):
+ exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i,
+ '/tmp/existing'])
+ assert exit_status == 0
+
+ log.info("success")
diff --git a/qa/tasks/divergent_priors2.py b/qa/tasks/divergent_priors2.py
new file mode 100644
index 00000000..4d4b07fc
--- /dev/null
+++ b/qa/tasks/divergent_priors2.py
@@ -0,0 +1,192 @@
+"""
+Special case divergence test with ceph-objectstore-tool export/remove/import
+"""
+import logging
+import time
+
+from teuthology.exceptions import CommandFailedError
+from teuthology import misc as teuthology
+from tasks.util.rados import rados
+import os
+
+
+log = logging.getLogger(__name__)
+
+
+def task(ctx, config):
+ """
+ Test handling of divergent entries with prior_version
+ prior to log_tail and a ceph-objectstore-tool export/import
+
+ overrides:
+ ceph:
+ conf:
+ osd:
+ debug osd: 5
+
+ Requires 3 osds on a single test node.
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'divergent_priors task only accepts a dict for configuration'
+
+ manager = ctx.managers['ceph']
+
+ while len(manager.get_osd_status()['up']) < 3:
+ time.sleep(10)
+ manager.flush_pg_stats([0, 1, 2])
+ manager.raw_cluster_cmd('osd', 'set', 'noout')
+ manager.raw_cluster_cmd('osd', 'set', 'noin')
+ manager.raw_cluster_cmd('osd', 'set', 'nodown')
+ manager.wait_for_clean()
+
+ # something that is always there
+ dummyfile = '/etc/fstab'
+ dummyfile2 = '/etc/resolv.conf'
+ testdir = teuthology.get_testdir(ctx)
+
+ # create 1 pg pool
+ log.info('creating foo')
+ manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1')
+
+ osds = [0, 1, 2]
+ for i in osds:
+ manager.set_config(i, osd_min_pg_log_entries=10)
+ manager.set_config(i, osd_max_pg_log_entries=10)
+ manager.set_config(i, osd_pg_log_trim_min=5)
+
+ # determine primary
+ divergent = manager.get_pg_primary('foo', 0)
+ log.info("primary and soon to be divergent is %d", divergent)
+ non_divergent = list(osds)
+ non_divergent.remove(divergent)
+
+ log.info('writing initial objects')
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+ # write 100 objects
+ for i in range(100):
+ rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile])
+
+ manager.wait_for_clean()
+
+ # blackhole non_divergent
+ log.info("blackholing osds %s", str(non_divergent))
+ for i in non_divergent:
+ manager.set_config(i, objectstore_blackhole=1)
+
+ DIVERGENT_WRITE = 5
+ DIVERGENT_REMOVE = 5
+ # Write some soon to be divergent
+ log.info('writing divergent objects')
+ for i in range(DIVERGENT_WRITE):
+ rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i,
+ dummyfile2], wait=False)
+ # Remove some soon to be divergent
+ log.info('remove divergent objects')
+ for i in range(DIVERGENT_REMOVE):
+ rados(ctx, mon, ['-p', 'foo', 'rm',
+ 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False)
+ time.sleep(10)
+ mon.run(
+ args=['killall', '-9', 'rados'],
+ wait=True,
+ check_status=False)
+
+ # kill all the osds but leave divergent in
+ log.info('killing all the osds')
+ for i in osds:
+ manager.kill_osd(i)
+ for i in osds:
+ manager.mark_down_osd(i)
+ for i in non_divergent:
+ manager.mark_out_osd(i)
+
+ # bring up non-divergent
+ log.info("bringing up non_divergent %s", str(non_divergent))
+ for i in non_divergent:
+ manager.revive_osd(i)
+ for i in non_divergent:
+ manager.mark_in_osd(i)
+
+ # write 1 non-divergent object (ensure that old divergent one is divergent)
+ objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE)
+ log.info('writing non-divergent object ' + objname)
+ rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2])
+
+ manager.wait_for_recovery()
+
+ # ensure no recovery of up osds first
+ log.info('delay recovery')
+ for i in non_divergent:
+ manager.wait_run_admin_socket(
+ 'osd', i, ['set_recovery_delay', '100000'])
+
+ # bring in our divergent friend
+ log.info("revive divergent %d", divergent)
+ manager.raw_cluster_cmd('osd', 'set', 'noup')
+ manager.revive_osd(divergent)
+
+ log.info('delay recovery divergent')
+ manager.wait_run_admin_socket(
+ 'osd', divergent, ['set_recovery_delay', '100000'])
+
+ manager.raw_cluster_cmd('osd', 'unset', 'noup')
+ while len(manager.get_osd_status()['up']) < 3:
+ time.sleep(10)
+
+ log.info('wait for peering')
+ rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile])
+
+ # At this point the divergent_priors should have been detected
+
+ log.info("killing divergent %d", divergent)
+ manager.kill_osd(divergent)
+
+ # Export a pg
+ (exp_remote,) = ctx.\
+ cluster.only('osd.{o}'.format(o=divergent)).remotes.keys()
+ FSPATH = manager.get_filepath()
+ JPATH = os.path.join(FSPATH, "journal")
+ prefix = ("sudo adjust-ulimits ceph-objectstore-tool "
+ "--data-path {fpath} --journal-path {jpath} "
+ "--log-file="
+ "/var/log/ceph/objectstore_tool.$$.log ".
+ format(fpath=FSPATH, jpath=JPATH))
+ pid = os.getpid()
+ expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid))
+ cmd = ((prefix + "--op export-remove --pgid 2.0 --file {file}").
+ format(id=divergent, file=expfile))
+ try:
+ exp_remote.sh(cmd, wait=True)
+ except CommandFailedError as e:
+ assert e.exitstatus == 0
+
+ cmd = ((prefix + "--op import --file {file}").
+ format(id=divergent, file=expfile))
+ try:
+ exp_remote.sh(cmd, wait=True)
+ except CommandFailedError as e:
+ assert e.exitstatus == 0
+
+ log.info("reviving divergent %d", divergent)
+ manager.revive_osd(divergent)
+ manager.wait_run_admin_socket('osd', divergent, ['dump_ops_in_flight'])
+ time.sleep(20);
+
+ log.info('allowing recovery')
+ # Set osd_recovery_delay_start back to 0 and kick the queue
+ for i in osds:
+ manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug',
+ 'kick_recovery_wq', ' 0')
+
+ log.info('reading divergent objects')
+ for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE):
+ exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i,
+ '/tmp/existing'])
+ assert exit_status == 0
+
+ cmd = 'rm {file}'.format(file=expfile)
+ exp_remote.run(args=cmd, wait=True)
+ log.info("success")
diff --git a/qa/tasks/dnsmasq.py b/qa/tasks/dnsmasq.py
new file mode 100644
index 00000000..352ed246
--- /dev/null
+++ b/qa/tasks/dnsmasq.py
@@ -0,0 +1,170 @@
+"""
+Task for dnsmasq configuration
+"""
+import contextlib
+import logging
+
+from teuthology import misc
+from teuthology.exceptions import ConfigError
+from teuthology import contextutil
+from teuthology import packaging
+from tasks.util import get_remote_for_role
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def install_dnsmasq(remote):
+ """
+ If dnsmasq is not installed, install it for the duration of the task.
+ """
+ try:
+ existing = packaging.get_package_version(remote, 'dnsmasq')
+ except:
+ existing = None
+
+ if existing is None:
+ packaging.install_package('dnsmasq', remote)
+ try:
+ yield
+ finally:
+ if existing is None:
+ packaging.remove_package('dnsmasq', remote)
+
+@contextlib.contextmanager
+def backup_resolv(remote, path):
+ """
+ Store a backup of resolv.conf in the testdir and restore it after the task.
+ """
+ remote.run(args=['cp', '/etc/resolv.conf', path])
+ try:
+ yield
+ finally:
+ # restore with 'cp' to avoid overwriting its security context
+ remote.run(args=['sudo', 'cp', path, '/etc/resolv.conf'])
+ remote.run(args=['rm', path])
+
+@contextlib.contextmanager
+def replace_resolv(remote, path):
+ """
+ Update resolv.conf to point the nameserver at localhost.
+ """
+ misc.write_file(remote, path, "nameserver 127.0.0.1\n")
+ try:
+ # install it
+ if remote.os.package_type == "rpm":
+ # for centos ovh resolv.conf has immutable attribute set
+ remote.run(args=['sudo', 'chattr', '-i', '/etc/resolv.conf'], check_status=False)
+ remote.run(args=['sudo', 'cp', path, '/etc/resolv.conf'])
+ yield
+ finally:
+ remote.run(args=['rm', path])
+
+@contextlib.contextmanager
+def setup_dnsmasq(remote, testdir, cnames):
+ """ configure dnsmasq on the given remote, adding each cname given """
+ log.info('Configuring dnsmasq on remote %s..', remote.name)
+
+ # add address entries for each cname
+ dnsmasq = "server=8.8.8.8\nserver=8.8.4.4\n"
+ address_template = "address=/{cname}/{ip_address}\n"
+ for cname, ip_address in cnames.items():
+ dnsmasq += address_template.format(cname=cname, ip_address=ip_address)
+
+ # write to temporary dnsmasq file
+ dnsmasq_tmp = '/'.join((testdir, 'ceph.tmp'))
+ misc.write_file(remote, dnsmasq_tmp, dnsmasq)
+
+ # move into /etc/dnsmasq.d/
+ dnsmasq_path = '/etc/dnsmasq.d/ceph'
+ remote.run(args=['sudo', 'mv', dnsmasq_tmp, dnsmasq_path])
+ # restore selinux context if necessary
+ remote.run(args=['sudo', 'restorecon', dnsmasq_path], check_status=False)
+
+ # restart dnsmasq
+ remote.run(args=['sudo', 'systemctl', 'restart', 'dnsmasq'])
+ # verify dns name is set
+ remote.run(args=['ping', '-c', '4', next(iter(cnames.keys()))])
+
+ try:
+ yield
+ finally:
+ log.info('Removing dnsmasq configuration from remote %s..', remote.name)
+ # remove /etc/dnsmasq.d/ceph
+ remote.run(args=['sudo', 'rm', dnsmasq_path])
+ # restart dnsmasq
+ remote.run(args=['sudo', 'systemctl', 'restart', 'dnsmasq'])
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Configures dnsmasq to add cnames for teuthology remotes. The task expects a
+ dictionary, where each key is a role. If all cnames for that role use the
+ same address as that role, the cnames can be given as a list. For example,
+ this entry configures dnsmasq on the remote associated with client.0, adding
+ two cnames for the ip address associated with client.0:
+
+ - dnsmasq:
+ client.0:
+ - client0.example.com
+ - c0.example.com
+
+ If the addresses do not all match the given role, a dictionary can be given
+ to specify the ip address by its target role. For example:
+
+ - dnsmasq:
+ client.0:
+ client.0.example.com: client.0
+ client.1.example.com: client.1
+
+ Cnames that end with a . are treated as prefix for the existing hostname.
+ For example, if the remote for client.0 has a hostname of 'example.com',
+ this task will add cnames for dev.example.com and test.example.com:
+
+ - dnsmasq:
+ client.0: [dev., test.]
+ """
+ # apply overrides
+ overrides = config.get('overrides', {})
+ misc.deep_merge(config, overrides.get('dnsmasq', {}))
+
+ # multiple roles may map to the same remote, so collect names by remote
+ remote_names = {}
+ for role, cnames in config.items():
+ remote = get_remote_for_role(ctx, role)
+ if remote is None:
+ raise ConfigError('no remote for role %s' % role)
+
+ names = remote_names.get(remote, {})
+
+ if isinstance(cnames, list):
+ # when given a list of cnames, point to local ip
+ for cname in cnames:
+ if cname.endswith('.'):
+ cname += remote.hostname
+ names[cname] = remote.ip_address
+ elif isinstance(cnames, dict):
+ # when given a dict, look up the remote ip for each
+ for cname, client in cnames.items():
+ r = get_remote_for_role(ctx, client)
+ if r is None:
+ raise ConfigError('no remote for role %s' % client)
+ if cname.endswith('.'):
+ cname += r.hostname
+ names[cname] = r.ip_address
+
+ remote_names[remote] = names
+
+ testdir = misc.get_testdir(ctx)
+ resolv_bak = '/'.join((testdir, 'resolv.bak'))
+ resolv_tmp = '/'.join((testdir, 'resolv.tmp'))
+
+ # run subtasks for each unique remote
+ subtasks = []
+ for remote, cnames in remote_names.items():
+ subtasks.extend([ lambda r=remote: install_dnsmasq(r) ])
+ subtasks.extend([ lambda r=remote: backup_resolv(r, resolv_bak) ])
+ subtasks.extend([ lambda r=remote: replace_resolv(r, resolv_tmp) ])
+ subtasks.extend([ lambda r=remote, cn=cnames: setup_dnsmasq(r, testdir, cn) ])
+
+ with contextutil.nested(*subtasks):
+ yield
diff --git a/qa/tasks/dump_stuck.py b/qa/tasks/dump_stuck.py
new file mode 100644
index 00000000..fb2823f4
--- /dev/null
+++ b/qa/tasks/dump_stuck.py
@@ -0,0 +1,161 @@
+"""
+Dump_stuck command
+"""
+import logging
+import time
+
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+def check_stuck(manager, num_inactive, num_unclean, num_stale, timeout=10):
+ """
+ Do checks. Make sure get_stuck_pgs return the right amount of information, then
+ extract health information from the raw_cluster_cmd and compare the results with
+ values passed in. This passes if all asserts pass.
+
+ :param num_manager: Ceph manager
+ :param num_inactive: number of inaactive pages that are stuck
+ :param num_unclean: number of unclean pages that are stuck
+ :paran num_stale: number of stale pages that are stuck
+ :param timeout: timeout value for get_stuck_pgs calls
+ """
+ inactive = manager.get_stuck_pgs('inactive', timeout)
+ unclean = manager.get_stuck_pgs('unclean', timeout)
+ stale = manager.get_stuck_pgs('stale', timeout)
+ log.info('inactive %s / %d, unclean %s / %d, stale %s / %d',
+ len(inactive), num_inactive,
+ len(unclean), num_unclean,
+ len(stale), num_stale)
+ assert len(inactive) == num_inactive
+ assert len(unclean) == num_unclean
+ assert len(stale) == num_stale
+
+def task(ctx, config):
+ """
+ Test the dump_stuck command.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
+ assert config is None, \
+ 'dump_stuck requires no configuration'
+ assert teuthology.num_instances_of_type(ctx.cluster, 'osd') == 2, \
+ 'dump_stuck requires exactly 2 osds'
+
+ timeout = 60
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+ manager = ceph_manager.CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager'),
+ )
+
+ manager.flush_pg_stats([0, 1])
+ manager.wait_for_clean(timeout)
+
+ manager.raw_cluster_cmd('tell', 'mon.0', 'injectargs', '--',
+# '--mon-osd-report-timeout 90',
+ '--mon-pg-stuck-threshold 10')
+
+ # all active+clean
+ check_stuck(
+ manager,
+ num_inactive=0,
+ num_unclean=0,
+ num_stale=0,
+ )
+ num_pgs = manager.get_num_pgs()
+
+ manager.mark_out_osd(0)
+ time.sleep(timeout)
+ manager.flush_pg_stats([1])
+ manager.wait_for_recovery(timeout)
+
+ # all active+clean+remapped
+ check_stuck(
+ manager,
+ num_inactive=0,
+ num_unclean=0,
+ num_stale=0,
+ )
+
+ manager.mark_in_osd(0)
+ manager.flush_pg_stats([0, 1])
+ manager.wait_for_clean(timeout)
+
+ # all active+clean
+ check_stuck(
+ manager,
+ num_inactive=0,
+ num_unclean=0,
+ num_stale=0,
+ )
+
+ log.info('stopping first osd')
+ manager.kill_osd(0)
+ manager.mark_down_osd(0)
+ manager.wait_for_active(timeout)
+
+ log.info('waiting for all to be unclean')
+ starttime = time.time()
+ done = False
+ while not done:
+ try:
+ check_stuck(
+ manager,
+ num_inactive=0,
+ num_unclean=num_pgs,
+ num_stale=0,
+ )
+ done = True
+ except AssertionError:
+ # wait up to 15 minutes to become stale
+ if time.time() - starttime > 900:
+ raise
+
+
+ log.info('stopping second osd')
+ manager.kill_osd(1)
+ manager.mark_down_osd(1)
+
+ log.info('waiting for all to be stale')
+ starttime = time.time()
+ done = False
+ while not done:
+ try:
+ check_stuck(
+ manager,
+ num_inactive=0,
+ num_unclean=num_pgs,
+ num_stale=num_pgs,
+ )
+ done = True
+ except AssertionError:
+ # wait up to 15 minutes to become stale
+ if time.time() - starttime > 900:
+ raise
+
+ log.info('reviving')
+ for id_ in teuthology.all_roles_of_type(ctx.cluster, 'osd'):
+ manager.revive_osd(id_)
+ manager.mark_in_osd(id_)
+ while True:
+ try:
+ manager.flush_pg_stats([0, 1])
+ break
+ except Exception:
+ log.exception('osds must not be started yet, waiting...')
+ time.sleep(1)
+ manager.wait_for_clean(timeout)
+
+ check_stuck(
+ manager,
+ num_inactive=0,
+ num_unclean=0,
+ num_stale=0,
+ )
diff --git a/qa/tasks/ec_lost_unfound.py b/qa/tasks/ec_lost_unfound.py
new file mode 100644
index 00000000..e12b6901
--- /dev/null
+++ b/qa/tasks/ec_lost_unfound.py
@@ -0,0 +1,158 @@
+"""
+Lost_unfound
+"""
+import logging
+import time
+from tasks import ceph_manager
+from tasks.util.rados import rados
+from teuthology import misc as teuthology
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+ """
+ Test handling of lost objects on an ec pool.
+
+ A pretty rigid cluster is brought up andtested by this task
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'lost_unfound task only accepts a dict for configuration'
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+ manager = ceph_manager.CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager'),
+ )
+
+ manager.wait_for_clean()
+
+ profile = config.get('erasure_code_profile', {
+ 'k': '2',
+ 'm': '2',
+ 'crush-failure-domain': 'osd'
+ })
+ profile_name = profile.get('name', 'lost_unfound')
+ manager.create_erasure_code_profile(profile_name, profile)
+ pool = manager.create_pool_with_unique_name(
+ erasure_code_profile_name=profile_name,
+ min_size=2)
+
+ # something that is always there, readable and never empty
+ dummyfile = '/etc/group'
+
+ # kludge to make sure they get a map
+ rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile])
+
+ manager.flush_pg_stats([0, 1])
+ manager.wait_for_recovery()
+
+ # create old objects
+ for f in range(1, 10):
+ rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile])
+ rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile])
+ rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f])
+
+ # delay recovery, and make the pg log very long (to prevent backfill)
+ manager.raw_cluster_cmd(
+ 'tell', 'osd.1',
+ 'injectargs',
+ '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000'
+ )
+
+ manager.kill_osd(0)
+ manager.mark_down_osd(0)
+ manager.kill_osd(3)
+ manager.mark_down_osd(3)
+
+ for f in range(1, 10):
+ rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile])
+ rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile])
+ rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile])
+
+ # take out osd.1 and a necessary shard of those objects.
+ manager.kill_osd(1)
+ manager.mark_down_osd(1)
+ manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')
+ manager.revive_osd(0)
+ manager.wait_till_osd_is_up(0)
+ manager.revive_osd(3)
+ manager.wait_till_osd_is_up(3)
+
+ manager.flush_pg_stats([0, 2, 3])
+ manager.wait_till_active()
+ manager.flush_pg_stats([0, 2, 3])
+
+ # verify that there are unfound objects
+ unfound = manager.get_num_unfound_objects()
+ log.info("there are %d unfound objects" % unfound)
+ assert unfound
+
+ testdir = teuthology.get_testdir(ctx)
+ procs = []
+ if config.get('parallel_bench', True):
+ procs.append(mon.run(
+ args=[
+ "/bin/sh", "-c",
+ " ".join(['adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage',
+ 'rados',
+ '--no-log-to-stderr',
+ '--name', 'client.admin',
+ '-b', str(4<<10),
+ '-p' , pool,
+ '-t', '20',
+ 'bench', '240', 'write',
+ ]).format(tdir=testdir),
+ ],
+ logger=log.getChild('radosbench.{id}'.format(id='client.admin')),
+ stdin=run.PIPE,
+ wait=False
+ ))
+ time.sleep(10)
+
+ # mark stuff lost
+ pgs = manager.get_pg_stats()
+ for pg in pgs:
+ if pg['stat_sum']['num_objects_unfound'] > 0:
+ # verify that i can list them direct from the osd
+ log.info('listing missing/lost in %s state %s', pg['pgid'],
+ pg['state']);
+ m = manager.list_pg_unfound(pg['pgid'])
+ log.info('%s' % m)
+ assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']
+
+ log.info("reverting unfound in %s", pg['pgid'])
+ manager.raw_cluster_cmd('pg', pg['pgid'],
+ 'mark_unfound_lost', 'delete')
+ else:
+ log.info("no unfound in %s", pg['pgid'])
+
+ manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
+ manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
+ manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5')
+ manager.flush_pg_stats([0, 2, 3])
+ manager.wait_for_recovery()
+
+ if not config.get('parallel_bench', True):
+ time.sleep(20)
+
+ # verify result
+ for f in range(1, 10):
+ err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-'])
+ assert err
+ err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-'])
+ assert err
+ err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-'])
+ assert err
+
+ # see if osd.1 can cope
+ manager.revive_osd(1)
+ manager.wait_till_osd_is_up(1)
+ manager.wait_for_clean()
+ run.wait(procs)
diff --git a/qa/tasks/exec_on_cleanup.py b/qa/tasks/exec_on_cleanup.py
new file mode 100644
index 00000000..a7c7ee5d
--- /dev/null
+++ b/qa/tasks/exec_on_cleanup.py
@@ -0,0 +1,61 @@
+"""
+Exececute custom commands during unwind/cleanup
+"""
+import logging
+import contextlib
+
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Execute commands on a given role
+
+ tasks:
+ - ceph:
+ - kclient: [client.a]
+ - exec:
+ client.a:
+ - "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control"
+ - "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control"
+ - interactive:
+
+ It stops and fails with the first command that does not return on success. It means
+ that if the first command fails, the second won't run at all.
+
+ To avoid confusion it is recommended to explicitly enclose the commands in
+ double quotes. For instance if the command is false (without double quotes) it will
+ be interpreted as a boolean by the YAML parser.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
+ try:
+ yield
+ finally:
+ log.info('Executing custom commands...')
+ assert isinstance(config, dict), "task exec got invalid config"
+
+ testdir = teuthology.get_testdir(ctx)
+
+ if 'all' in config and len(config) == 1:
+ a = config['all']
+ roles = teuthology.all_roles(ctx.cluster)
+ config = dict((id_, a) for id_ in roles)
+
+ for role, ls in config.items():
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ log.info('Running commands on role %s host %s', role, remote.name)
+ for c in ls:
+ c.replace('$TESTDIR', testdir)
+ remote.run(
+ args=[
+ 'sudo',
+ 'TESTDIR={tdir}'.format(tdir=testdir),
+ 'bash',
+ '-c',
+ c],
+ )
+
diff --git a/qa/tasks/filestore_idempotent.py b/qa/tasks/filestore_idempotent.py
new file mode 100644
index 00000000..319bef76
--- /dev/null
+++ b/qa/tasks/filestore_idempotent.py
@@ -0,0 +1,83 @@
+"""
+Filestore/filejournal handler
+"""
+import logging
+from teuthology.orchestra import run
+import random
+
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+ """
+ Test filestore/filejournal handling of non-idempotent events.
+
+ Currently this is a kludge; we require the ceph task precedes us just
+ so that we get the tarball installed to run the test binary.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
+ assert config is None or isinstance(config, list) \
+ or isinstance(config, dict), \
+ "task only supports a list or dictionary for configuration"
+ all_clients = ['client.{id}'.format(id=id_)
+ for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+ if config is None:
+ config = all_clients
+ if isinstance(config, list):
+ config = dict.fromkeys(config)
+ clients = config.keys()
+
+ # just use the first client...
+ client = next(iter(clients))
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+
+ testdir = teuthology.get_testdir(ctx)
+
+ dir = '%s/ceph.data/test.%s' % (testdir, client)
+
+ seed = int(random.uniform(1,100))
+ start = 800 + random.randint(800,1200)
+ end = start + 50
+
+ try:
+ log.info('creating a working dir')
+ remote.run(args=['mkdir', dir])
+ remote.run(
+ args=[
+ 'cd', dir,
+ run.Raw('&&'),
+ 'wget','-q', '-Orun_seed_to.sh',
+ 'http://git.ceph.com/?p=ceph.git;a=blob_plain;f=src/test/objectstore/run_seed_to.sh;hb=HEAD',
+ run.Raw('&&'),
+ 'wget','-q', '-Orun_seed_to_range.sh',
+ 'http://git.ceph.com/?p=ceph.git;a=blob_plain;f=src/test/objectstore/run_seed_to_range.sh;hb=HEAD',
+ run.Raw('&&'),
+ 'chmod', '+x', 'run_seed_to.sh', 'run_seed_to_range.sh',
+ ]);
+
+ log.info('running a series of tests')
+ proc = remote.run(
+ args=[
+ 'cd', dir,
+ run.Raw('&&'),
+ './run_seed_to_range.sh', str(seed), str(start), str(end),
+ ],
+ wait=False,
+ check_status=False)
+ result = proc.wait()
+
+ if result != 0:
+ remote.run(
+ args=[
+ 'cp', '-a', dir, '{tdir}/archive/idempotent_failure'.format(tdir=testdir),
+ ])
+ raise Exception("./run_seed_to_range.sh errored out")
+
+ finally:
+ remote.run(args=[
+ 'rm', '-rf', '--', dir
+ ])
+
diff --git a/qa/tasks/fs.py b/qa/tasks/fs.py
new file mode 100644
index 00000000..ca84dc7a
--- /dev/null
+++ b/qa/tasks/fs.py
@@ -0,0 +1,66 @@
+"""
+CephFS sub-tasks.
+"""
+
+import logging
+import re
+import six
+
+from tasks.cephfs.filesystem import Filesystem
+
+log = logging.getLogger(__name__)
+
+def clients_evicted(ctx, config):
+ """
+ Check clients are evicted, unmount (cleanup) if so.
+ """
+
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'task only accepts a dict for configuration'
+
+ clients = config.get('clients')
+
+ if clients is None:
+ clients = {("client."+client_id): True for client_id in ctx.mounts}
+
+ log.info("clients is {}".format(str(clients)))
+
+ fs = Filesystem(ctx)
+ status = fs.status()
+
+ has_session = set()
+ mounts = {}
+ for client in clients:
+ client_id = re.match("^client.([0-9]+)$", client).groups(1)[0]
+ mounts[client] = ctx.mounts.get(client_id)
+
+ for rank in fs.get_ranks(status=status):
+ ls = fs.rank_asok(['session', 'ls'], rank=rank['rank'], status=status)
+ for session in ls:
+ for client, evicted in six.viewitems(clients):
+ mount = mounts.get(client)
+ if mount is not None:
+ global_id = mount.get_global_id()
+ if session['id'] == global_id:
+ if evicted:
+ raise RuntimeError("client still has session: {}".format(str(session)))
+ else:
+ log.info("client {} has a session with MDS {}.{}".format(client, fs.id, rank['rank']))
+ has_session.add(client)
+
+ no_session = set(clients) - has_session
+ should_assert = False
+ for client, evicted in six.viewitems(clients):
+ mount = mounts.get(client)
+ if mount is not None:
+ if evicted:
+ log.info("confirming client {} is blacklisted".format(client))
+ assert mount.is_blacklisted()
+ elif client in no_session:
+ log.info("client {} should not be evicted but has no session with an MDS".format(client))
+ mount.is_blacklisted() # for debugging
+ should_assert = True
+ if should_assert:
+ raise RuntimeError("some clients which should not be evicted have no session with an MDS?")
diff --git a/qa/tasks/kclient.py b/qa/tasks/kclient.py
new file mode 100644
index 00000000..ce0d73f5
--- /dev/null
+++ b/qa/tasks/kclient.py
@@ -0,0 +1,130 @@
+"""
+Mount/unmount a ``kernel`` client.
+"""
+import contextlib
+import logging
+
+from teuthology.misc import deep_merge
+from teuthology.orchestra.run import CommandFailedError
+from teuthology import misc
+from teuthology.contextutil import MaxWhileTries
+from tasks.cephfs.kernel_mount import KernelMount
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Mount/unmount a ``kernel`` client.
+
+ The config is optional and defaults to mounting on all clients. If
+ a config is given, it is expected to be a list of clients to do
+ this operation on. This lets you e.g. set up one client with
+ ``ceph-fuse`` and another with ``kclient``.
+
+ Example that mounts all clients::
+
+ tasks:
+ - ceph:
+ - kclient:
+ - interactive:
+
+ Example that uses both ``kclient` and ``ceph-fuse``::
+
+ tasks:
+ - ceph:
+ - ceph-fuse: [client.0]
+ - kclient: [client.1]
+ - interactive:
+
+
+ Pass a dictionary instead of lists to specify per-client config:
+
+ tasks:
+ -kclient:
+ client.0:
+ debug: true
+
+ :param ctx: Context
+ :param config: Configuration
+ """
+ log.info('Mounting kernel clients...')
+ assert config is None or isinstance(config, list) or isinstance(config, dict), \
+ "task kclient got invalid config"
+
+ if config is None:
+ config = ['client.{id}'.format(id=id_)
+ for id_ in misc.all_roles_of_type(ctx.cluster, 'client')]
+
+ if isinstance(config, list):
+ client_roles = config
+ config = dict([r, dict()] for r in client_roles)
+ elif isinstance(config, dict):
+ client_roles = filter(lambda x: 'client.' in x, config.keys())
+ else:
+ raise ValueError("Invalid config object: {0} ({1})".format(config, config.__class__))
+
+ # config has been converted to a dict by this point
+ overrides = ctx.config.get('overrides', {})
+ deep_merge(config, overrides.get('kclient', {}))
+
+ clients = list(misc.get_clients(ctx=ctx, roles=client_roles))
+
+ test_dir = misc.get_testdir(ctx)
+
+ mounts = {}
+ for id_, remote in clients:
+ client_config = config.get("client.%s" % id_)
+ if client_config is None:
+ client_config = {}
+
+ if config.get("disabled", False) or not client_config.get('mounted', True):
+ continue
+
+ kernel_mount = KernelMount(
+ ctx,
+ test_dir,
+ id_,
+ remote,
+ ctx.teuthology_config.get('ipmi_user', None),
+ ctx.teuthology_config.get('ipmi_password', None),
+ ctx.teuthology_config.get('ipmi_domain', None)
+ )
+
+ mounts[id_] = kernel_mount
+
+ if client_config.get('debug', False):
+ remote.run(args=["sudo", "bash", "-c", "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control"])
+ remote.run(args=["sudo", "bash", "-c", "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control"])
+
+ kernel_mount.mount()
+
+
+ def umount_all():
+ log.info('Unmounting kernel clients...')
+
+ forced = False
+ for mount in mounts.values():
+ if mount.is_mounted():
+ try:
+ mount.umount()
+ except (CommandFailedError, MaxWhileTries):
+ log.warning("Ordinary umount failed, forcing...")
+ forced = True
+ mount.umount_wait(force=True)
+
+ return forced
+
+ ctx.mounts = mounts
+ try:
+ yield mounts
+ except:
+ umount_all() # ignore forced retval, we are already in error handling
+ finally:
+
+ forced = umount_all()
+ if forced:
+ # The context managers within the kclient manager worked (i.e.
+ # the test workload passed) but for some reason we couldn't
+ # umount, so turn this into a test failure.
+ raise RuntimeError("Kernel mounts did not umount cleanly")
diff --git a/qa/tasks/keystone.py b/qa/tasks/keystone.py
new file mode 100644
index 00000000..4a22a885
--- /dev/null
+++ b/qa/tasks/keystone.py
@@ -0,0 +1,397 @@
+"""
+Deploy and configure Keystone for Teuthology
+"""
+import argparse
+import contextlib
+import logging
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.orchestra import run
+from teuthology.packaging import install_package
+from teuthology.packaging import remove_package
+
+log = logging.getLogger(__name__)
+
+
+def get_keystone_dir(ctx):
+ return '{tdir}/keystone'.format(tdir=teuthology.get_testdir(ctx))
+
+def run_in_keystone_dir(ctx, client, args, **kwargs):
+ return ctx.cluster.only(client).run(
+ args=[ 'cd', get_keystone_dir(ctx), run.Raw('&&'), ] + args,
+ **kwargs
+ )
+
+def get_toxvenv_dir(ctx):
+ return ctx.tox.venv_path
+
+def toxvenv_sh(ctx, remote, args, **kwargs):
+ activate = get_toxvenv_dir(ctx) + '/bin/activate'
+ return remote.sh(['source', activate, run.Raw('&&')] + args, **kwargs)
+
+def run_in_keystone_venv(ctx, client, args):
+ run_in_keystone_dir(ctx, client,
+ [ 'source',
+ '.tox/venv/bin/activate',
+ run.Raw('&&')
+ ] + args)
+
+def get_keystone_venved_cmd(ctx, cmd, args):
+ kbindir = get_keystone_dir(ctx) + '/.tox/venv/bin/'
+ return [ kbindir + 'python', kbindir + cmd ] + args
+
+@contextlib.contextmanager
+def download(ctx, config):
+ """
+ Download the Keystone from github.
+ Remove downloaded file upon exit.
+
+ The context passed in should be identical to the context
+ passed in to the main task.
+ """
+ assert isinstance(config, dict)
+ log.info('Downloading keystone...')
+ keystonedir = get_keystone_dir(ctx)
+
+ for (client, cconf) in config.items():
+ ctx.cluster.only(client).run(
+ args=[
+ 'git', 'clone',
+ '-b', cconf.get('force-branch', 'master'),
+ 'https://github.com/openstack/keystone.git',
+ keystonedir,
+ ],
+ )
+
+ sha1 = cconf.get('sha1')
+ if sha1 is not None:
+ run_in_keystone_dir(ctx, client, [
+ 'git', 'reset', '--hard', sha1,
+ ],
+ )
+
+ # hax for http://tracker.ceph.com/issues/23659
+ run_in_keystone_dir(ctx, client, [
+ 'sed', '-i',
+ 's/pysaml2<4.0.3,>=2.4.0/pysaml2>=4.5.0/',
+ 'requirements.txt'
+ ],
+ )
+ try:
+ yield
+ finally:
+ log.info('Removing keystone...')
+ for client in config:
+ ctx.cluster.only(client).run(
+ args=[ 'rm', '-rf', keystonedir ],
+ )
+
+@contextlib.contextmanager
+def install_packages(ctx, config):
+ """
+ Download the packaged dependencies of Keystone.
+ Remove install packages upon exit.
+
+ The context passed in should be identical to the context
+ passed in to the main task.
+ """
+ assert isinstance(config, dict)
+ log.info('Installing packages for Keystone...')
+
+ packages = {}
+ for (client, _) in config.items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ # use bindep to read which dependencies we need from keystone/bindep.txt
+ toxvenv_sh(ctx, remote, ['pip', 'install', 'bindep'])
+ packages[client] = toxvenv_sh(ctx, remote,
+ ['bindep', '--brief', '--file', '{}/bindep.txt'.format(get_keystone_dir(ctx))],
+ check_status=False).splitlines() # returns 1 on success?
+ # install python3 as bindep installs python34 which is not supported
+ # by keystone or tempest's tox based tests.
+ packages[client].append('python3')
+ for dep in packages[client]:
+ install_package(dep, remote)
+ try:
+ yield
+ finally:
+ log.info('Removing packaged dependencies of Keystone...')
+
+ for (client, _) in config.items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ for dep in packages[client]:
+ remove_package(dep, remote)
+
+@contextlib.contextmanager
+def setup_venv(ctx, config):
+ """
+ Setup the virtualenv for Keystone using tox.
+ """
+ assert isinstance(config, dict)
+ log.info('Setting up virtualenv for keystone...')
+ for (client, _) in config.items():
+ run_in_keystone_dir(ctx, client,
+ [ 'source',
+ '{tvdir}/bin/activate'.format(tvdir=get_toxvenv_dir(ctx)),
+ run.Raw('&&'),
+ 'tox', '-e', 'venv', '--notest'
+ ])
+
+ run_in_keystone_venv(ctx, client,
+ [ 'pip', 'install', 'python-openstackclient' ])
+ try:
+ yield
+ finally:
+ pass
+
+@contextlib.contextmanager
+def configure_instance(ctx, config):
+ assert isinstance(config, dict)
+ log.info('Configuring keystone...')
+
+ keyrepo_dir = '{kdir}/etc/fernet-keys'.format(kdir=get_keystone_dir(ctx))
+ for (client, _) in config.items():
+ # prepare the config file
+ run_in_keystone_dir(ctx, client,
+ [
+ 'cp', '-f',
+ 'etc/keystone.conf.sample',
+ 'etc/keystone.conf'
+ ])
+ run_in_keystone_dir(ctx, client,
+ [
+ 'sed',
+ '-e', 's/#admin_token =.*/admin_token = ADMIN/',
+ '-i', 'etc/keystone.conf'
+ ])
+ run_in_keystone_dir(ctx, client,
+ [
+ 'sed',
+ '-e', 's^#key_repository =.*^key_repository = {kr}^'.format(kr = keyrepo_dir),
+ '-i', 'etc/keystone.conf'
+ ])
+
+ # prepare key repository for Fetnet token authenticator
+ run_in_keystone_dir(ctx, client, [ 'mkdir', '-p', keyrepo_dir ])
+ run_in_keystone_venv(ctx, client, [ 'keystone-manage', 'fernet_setup' ])
+
+ # sync database
+ run_in_keystone_venv(ctx, client, [ 'keystone-manage', 'db_sync' ])
+ yield
+
+@contextlib.contextmanager
+def run_keystone(ctx, config):
+ assert isinstance(config, dict)
+ log.info('Configuring keystone...')
+
+ for (client, _) in config.items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ cluster_name, _, client_id = teuthology.split_role(client)
+
+ # start the public endpoint
+ client_public_with_id = 'keystone.public' + '.' + client_id
+
+ public_host, public_port = ctx.keystone.public_endpoints[client]
+ run_cmd = get_keystone_venved_cmd(ctx, 'keystone-wsgi-public',
+ [ '--host', public_host, '--port', str(public_port),
+ # Let's put the Keystone in background, wait for EOF
+ # and after receiving it, send SIGTERM to the daemon.
+ # This crazy hack is because Keystone, in contrast to
+ # our other daemons, doesn't quit on stdin.close().
+ # Teuthology relies on this behaviour.
+ run.Raw('& { read; kill %1; }')
+ ]
+ )
+ ctx.daemons.add_daemon(
+ remote, 'keystone', client_public_with_id,
+ cluster=cluster_name,
+ args=run_cmd,
+ logger=log.getChild(client),
+ stdin=run.PIPE,
+ cwd=get_keystone_dir(ctx),
+ wait=False,
+ check_status=False,
+ )
+
+ # start the admin endpoint
+ client_admin_with_id = 'keystone.admin' + '.' + client_id
+
+ admin_host, admin_port = ctx.keystone.admin_endpoints[client]
+ run_cmd = get_keystone_venved_cmd(ctx, 'keystone-wsgi-admin',
+ [ '--host', admin_host, '--port', str(admin_port),
+ run.Raw('& { read; kill %1; }')
+ ]
+ )
+ ctx.daemons.add_daemon(
+ remote, 'keystone', client_admin_with_id,
+ cluster=cluster_name,
+ args=run_cmd,
+ logger=log.getChild(client),
+ stdin=run.PIPE,
+ cwd=get_keystone_dir(ctx),
+ wait=False,
+ check_status=False,
+ )
+
+ # sleep driven synchronization
+ run_in_keystone_venv(ctx, client, [ 'sleep', '15' ])
+ try:
+ yield
+ finally:
+ log.info('Stopping Keystone admin instance')
+ ctx.daemons.get_daemon('keystone', client_admin_with_id,
+ cluster_name).stop()
+
+ log.info('Stopping Keystone public instance')
+ ctx.daemons.get_daemon('keystone', client_public_with_id,
+ cluster_name).stop()
+
+
+def dict_to_args(special, items):
+ """
+ Transform
+ [(key1, val1), (special, val_special), (key3, val3) ]
+ into:
+ [ '--key1', 'val1', '--key3', 'val3', 'val_special' ]
+ """
+ args=[]
+ for (k, v) in items:
+ if k == special:
+ special_val = v
+ else:
+ args.append('--{k}'.format(k=k))
+ args.append(v)
+ if special_val:
+ args.append(special_val)
+ return args
+
+def run_section_cmds(ctx, cclient, section_cmd, special,
+ section_config_list):
+ admin_host, admin_port = ctx.keystone.admin_endpoints[cclient]
+
+ auth_section = [
+ ( 'os-token', 'ADMIN' ),
+ ( 'os-url', 'http://{host}:{port}/v2.0'.format(host=admin_host,
+ port=admin_port) ),
+ ]
+
+ for section_item in section_config_list:
+ run_in_keystone_venv(ctx, cclient,
+ [ 'openstack' ] + section_cmd.split() +
+ dict_to_args(special, auth_section + list(section_item.items())))
+
+def create_endpoint(ctx, cclient, service, url):
+ endpoint_section = {
+ 'service': service,
+ 'publicurl': url,
+ }
+ return run_section_cmds(ctx, cclient, 'endpoint create', 'service',
+ [ endpoint_section ])
+
+@contextlib.contextmanager
+def fill_keystone(ctx, config):
+ assert isinstance(config, dict)
+
+ for (cclient, cconfig) in config.items():
+ # configure tenants/projects
+ run_section_cmds(ctx, cclient, 'project create', 'name',
+ cconfig['tenants'])
+ run_section_cmds(ctx, cclient, 'user create', 'name',
+ cconfig['users'])
+ run_section_cmds(ctx, cclient, 'role create', 'name',
+ cconfig['roles'])
+ run_section_cmds(ctx, cclient, 'role add', 'name',
+ cconfig['role-mappings'])
+ run_section_cmds(ctx, cclient, 'service create', 'name',
+ cconfig['services'])
+
+ public_host, public_port = ctx.keystone.public_endpoints[cclient]
+ url = 'http://{host}:{port}/v2.0'.format(host=public_host,
+ port=public_port)
+ create_endpoint(ctx, cclient, 'keystone', url)
+ # for the deferred endpoint creation; currently it's used in rgw.py
+ ctx.keystone.create_endpoint = create_endpoint
+
+ # sleep driven synchronization -- just in case
+ run_in_keystone_venv(ctx, cclient, [ 'sleep', '3' ])
+ try:
+ yield
+ finally:
+ pass
+
+def assign_ports(ctx, config, initial_port):
+ """
+ Assign port numbers starting from @initial_port
+ """
+ port = initial_port
+ role_endpoints = {}
+ for remote, roles_for_host in ctx.cluster.remotes.items():
+ for role in roles_for_host:
+ if role in config:
+ role_endpoints[role] = (remote.name.split('@')[1], port)
+ port += 1
+
+ return role_endpoints
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Deploy and configure Keystone
+
+ Example of configuration:
+
+ - install:
+ - ceph:
+ - tox: [ client.0 ]
+ - keystone:
+ client.0:
+ force-branch: master
+ tenants:
+ - name: admin
+ description: Admin Tenant
+ users:
+ - name: admin
+ password: ADMIN
+ project: admin
+ roles: [ name: admin, name: Member ]
+ role-mappings:
+ - name: admin
+ user: admin
+ project: admin
+ services:
+ - name: keystone
+ type: identity
+ description: Keystone Identity Service
+ - name: swift
+ type: object-store
+ description: Swift Service
+ """
+ assert config is None or isinstance(config, list) \
+ or isinstance(config, dict), \
+ "task keystone only supports a list or dictionary for configuration"
+
+ if not ctx.tox:
+ raise ConfigError('keystone must run after the tox task')
+
+ all_clients = ['client.{id}'.format(id=id_)
+ for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+ if config is None:
+ config = all_clients
+ if isinstance(config, list):
+ config = dict.fromkeys(config)
+
+ log.debug('Keystone config is %s', config)
+
+ ctx.keystone = argparse.Namespace()
+ ctx.keystone.public_endpoints = assign_ports(ctx, config, 5000)
+ ctx.keystone.admin_endpoints = assign_ports(ctx, config, 35357)
+
+ with contextutil.nested(
+ lambda: download(ctx=ctx, config=config),
+ lambda: install_packages(ctx=ctx, config=config),
+ lambda: setup_venv(ctx=ctx, config=config),
+ lambda: configure_instance(ctx=ctx, config=config),
+ lambda: run_keystone(ctx=ctx, config=config),
+ lambda: fill_keystone(ctx=ctx, config=config),
+ ):
+ yield
diff --git a/qa/tasks/locktest.py b/qa/tasks/locktest.py
new file mode 100755
index 00000000..9de5ba40
--- /dev/null
+++ b/qa/tasks/locktest.py
@@ -0,0 +1,134 @@
+"""
+locktests
+"""
+import logging
+
+from teuthology.orchestra import run
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+ """
+ Run locktests, from the xfstests suite, on the given
+ clients. Whether the clients are ceph-fuse or kernel does not
+ matter, and the two clients can refer to the same mount.
+
+ The config is a list of two clients to run the locktest on. The
+ first client will be the host.
+
+ For example:
+ tasks:
+ - ceph:
+ - ceph-fuse: [client.0, client.1]
+ - locktest:
+ [client.0, client.1]
+
+ This task does not yield; there would be little point.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
+
+ assert isinstance(config, list)
+ log.info('fetching and building locktests...')
+ (host,) = ctx.cluster.only(config[0]).remotes
+ (client,) = ctx.cluster.only(config[1]).remotes
+ ( _, _, host_id) = config[0].partition('.')
+ ( _, _, client_id) = config[1].partition('.')
+ testdir = teuthology.get_testdir(ctx)
+ hostmnt = '{tdir}/mnt.{id}'.format(tdir=testdir, id=host_id)
+ clientmnt = '{tdir}/mnt.{id}'.format(tdir=testdir, id=client_id)
+
+ try:
+ for client_name in config:
+ log.info('building on {client_}'.format(client_=client_name))
+ ctx.cluster.only(client_name).run(
+ args=[
+ # explicitly does not support multiple autotest tasks
+ # in a single run; the result archival would conflict
+ 'mkdir', '{tdir}/archive/locktest'.format(tdir=testdir),
+ run.Raw('&&'),
+ 'mkdir', '{tdir}/locktest'.format(tdir=testdir),
+ run.Raw('&&'),
+ 'wget',
+ '-nv',
+ 'https://raw.github.com/gregsfortytwo/xfstests-ceph/master/src/locktest.c',
+ '-O', '{tdir}/locktest/locktest.c'.format(tdir=testdir),
+ run.Raw('&&'),
+ 'g++', '{tdir}/locktest/locktest.c'.format(tdir=testdir),
+ '-o', '{tdir}/locktest/locktest'.format(tdir=testdir)
+ ],
+ logger=log.getChild('locktest_client.{id}'.format(id=client_name)),
+ )
+
+ log.info('built locktest on each client')
+
+ host.run(args=['sudo', 'touch',
+ '{mnt}/locktestfile'.format(mnt=hostmnt),
+ run.Raw('&&'),
+ 'sudo', 'chown', 'ubuntu.ubuntu',
+ '{mnt}/locktestfile'.format(mnt=hostmnt)
+ ]
+ )
+
+ log.info('starting on host')
+ hostproc = host.run(
+ args=[
+ '{tdir}/locktest/locktest'.format(tdir=testdir),
+ '-p', '6788',
+ '-d',
+ '{mnt}/locktestfile'.format(mnt=hostmnt),
+ ],
+ wait=False,
+ logger=log.getChild('locktest.host'),
+ )
+ log.info('starting on client')
+ (_,_,hostaddr) = host.name.partition('@')
+ clientproc = client.run(
+ args=[
+ '{tdir}/locktest/locktest'.format(tdir=testdir),
+ '-p', '6788',
+ '-d',
+ '-h', hostaddr,
+ '{mnt}/locktestfile'.format(mnt=clientmnt),
+ ],
+ logger=log.getChild('locktest.client'),
+ wait=False
+ )
+
+ hostresult = hostproc.wait()
+ clientresult = clientproc.wait()
+ if (hostresult != 0) or (clientresult != 0):
+ raise Exception("Did not pass locking test!")
+ log.info('finished locktest executable with results {r} and {s}'. \
+ format(r=hostresult, s=clientresult))
+
+ finally:
+ log.info('cleaning up host dir')
+ host.run(
+ args=[
+ 'mkdir', '-p', '{tdir}/locktest'.format(tdir=testdir),
+ run.Raw('&&'),
+ 'rm', '-f', '{tdir}/locktest/locktest.c'.format(tdir=testdir),
+ run.Raw('&&'),
+ 'rm', '-f', '{tdir}/locktest/locktest'.format(tdir=testdir),
+ run.Raw('&&'),
+ 'rmdir', '{tdir}/locktest'
+ ],
+ logger=log.getChild('.{id}'.format(id=config[0])),
+ )
+ log.info('cleaning up client dir')
+ client.run(
+ args=[
+ 'mkdir', '-p', '{tdir}/locktest'.format(tdir=testdir),
+ run.Raw('&&'),
+ 'rm', '-f', '{tdir}/locktest/locktest.c'.format(tdir=testdir),
+ run.Raw('&&'),
+ 'rm', '-f', '{tdir}/locktest/locktest'.format(tdir=testdir),
+ run.Raw('&&'),
+ 'rmdir', '{tdir}/locktest'.format(tdir=testdir)
+ ],
+ logger=log.getChild('.{id}'.format(\
+ id=config[1])),
+ )
diff --git a/qa/tasks/logrotate.conf b/qa/tasks/logrotate.conf
new file mode 100644
index 00000000..b0cb8012
--- /dev/null
+++ b/qa/tasks/logrotate.conf
@@ -0,0 +1,13 @@
+/var/log/ceph/*{daemon_type}*.log {{
+ rotate 100
+ size {max_size}
+ compress
+ sharedscripts
+ postrotate
+ killall {daemon_type} -1 || true
+ endscript
+ missingok
+ notifempty
+ su root root
+}}
+
diff --git a/qa/tasks/lost_unfound.py b/qa/tasks/lost_unfound.py
new file mode 100644
index 00000000..ab17a95d
--- /dev/null
+++ b/qa/tasks/lost_unfound.py
@@ -0,0 +1,176 @@
+"""
+Lost_unfound
+"""
+import logging
+import time
+from tasks import ceph_manager
+from tasks.util.rados import rados
+from teuthology import misc as teuthology
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+ """
+ Test handling of lost objects.
+
+ A pretty rigid cluseter is brought up andtested by this task
+ """
+ POOL = 'unfound_pool'
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'lost_unfound task only accepts a dict for configuration'
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+ manager = ceph_manager.CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager'),
+ )
+
+ while len(manager.get_osd_status()['up']) < 3:
+ time.sleep(10)
+
+ manager.wait_for_clean()
+
+ manager.create_pool(POOL)
+
+ # something that is always there
+ dummyfile = '/etc/fstab'
+
+ # take an osd out until the very end
+ manager.kill_osd(2)
+ manager.mark_down_osd(2)
+ manager.mark_out_osd(2)
+
+ # kludge to make sure they get a map
+ rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile])
+
+ manager.flush_pg_stats([0, 1])
+ manager.wait_for_recovery()
+
+ # create old objects
+ for f in range(1, 10):
+ rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile])
+ rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile])
+ rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f])
+
+ # delay recovery, and make the pg log very long (to prevent backfill)
+ manager.raw_cluster_cmd(
+ 'tell', 'osd.1',
+ 'injectargs',
+ '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000'
+ )
+
+ manager.kill_osd(0)
+ manager.mark_down_osd(0)
+
+ for f in range(1, 10):
+ rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile])
+ rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile])
+ rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile])
+
+ # bring osd.0 back up, let it peer, but don't replicate the new
+ # objects...
+ log.info('osd.0 command_args is %s' % 'foo')
+ log.info(ctx.daemons.get_daemon('osd', 0).command_args)
+ ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([
+ '--osd-recovery-delay-start', '1000'
+ ])
+ manager.revive_osd(0)
+ manager.mark_in_osd(0)
+ manager.wait_till_osd_is_up(0)
+
+ manager.flush_pg_stats([1, 0])
+ manager.wait_till_active()
+
+ # take out osd.1 and the only copy of those objects.
+ manager.kill_osd(1)
+ manager.mark_down_osd(1)
+ manager.mark_out_osd(1)
+ manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')
+
+ # bring up osd.2 so that things would otherwise, in theory, recovery fully
+ manager.revive_osd(2)
+ manager.mark_in_osd(2)
+ manager.wait_till_osd_is_up(2)
+
+ manager.flush_pg_stats([0, 2])
+ manager.wait_till_active()
+ manager.flush_pg_stats([0, 2])
+
+ # verify that there are unfound objects
+ unfound = manager.get_num_unfound_objects()
+ log.info("there are %d unfound objects" % unfound)
+ assert unfound
+
+ testdir = teuthology.get_testdir(ctx)
+ procs = []
+ if config.get('parallel_bench', True):
+ procs.append(mon.run(
+ args=[
+ "/bin/sh", "-c",
+ " ".join(['adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage',
+ 'rados',
+ '--no-log-to-stderr',
+ '--name', 'client.admin',
+ '-b', str(4<<10),
+ '-p' , POOL,
+ '-t', '20',
+ 'bench', '240', 'write',
+ ]).format(tdir=testdir),
+ ],
+ logger=log.getChild('radosbench.{id}'.format(id='client.admin')),
+ stdin=run.PIPE,
+ wait=False
+ ))
+ time.sleep(10)
+
+ # mark stuff lost
+ pgs = manager.get_pg_stats()
+ for pg in pgs:
+ if pg['stat_sum']['num_objects_unfound'] > 0:
+ primary = 'osd.%d' % pg['acting'][0]
+
+ # verify that i can list them direct from the osd
+ log.info('listing missing/lost in %s state %s', pg['pgid'],
+ pg['state']);
+ m = manager.list_pg_unfound(pg['pgid'])
+ #log.info('%s' % m)
+ assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']
+ num_unfound=0
+ for o in m['objects']:
+ if len(o['locations']) == 0:
+ num_unfound += 1
+ assert m['num_unfound'] == num_unfound
+
+ log.info("reverting unfound in %s on %s", pg['pgid'], primary)
+ manager.raw_cluster_cmd('pg', pg['pgid'],
+ 'mark_unfound_lost', 'revert')
+ else:
+ log.info("no unfound in %s", pg['pgid'])
+
+ manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
+ manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
+ manager.flush_pg_stats([0, 2])
+ manager.wait_for_recovery()
+
+ # verify result
+ for f in range(1, 10):
+ err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-'])
+ assert err
+ err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-'])
+ assert err
+ err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-'])
+ assert not err
+
+ # see if osd.1 can cope
+ manager.mark_in_osd(1)
+ manager.revive_osd(1)
+ manager.wait_till_osd_is_up(1)
+ manager.wait_for_clean()
+ run.wait(procs)
diff --git a/qa/tasks/manypools.py b/qa/tasks/manypools.py
new file mode 100644
index 00000000..7fe7e43e
--- /dev/null
+++ b/qa/tasks/manypools.py
@@ -0,0 +1,73 @@
+"""
+Force pg creation on all osds
+"""
+from teuthology import misc as teuthology
+from teuthology.orchestra import run
+import logging
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+ """
+ Create the specified number of pools and write 16 objects to them (thereby forcing
+ the PG creation on each OSD). This task creates pools from all the clients,
+ in parallel. It is easy to add other daemon types which have the appropriate
+ permissions, but I don't think anything else does.
+ The config is just the number of pools to create. I recommend setting
+ "mon create pg interval" to a very low value in your ceph config to speed
+ this up.
+
+ You probably want to do this to look at memory consumption, and
+ maybe to test how performance changes with the number of PGs. For example:
+
+ tasks:
+ - ceph:
+ config:
+ mon:
+ mon create pg interval: 1
+ - manypools: 3000
+ - radosbench:
+ clients: [client.0]
+ time: 360
+ """
+
+ log.info('creating {n} pools'.format(n=config))
+
+ poolnum = int(config)
+ creator_remotes = []
+ client_roles = teuthology.all_roles_of_type(ctx.cluster, 'client')
+ log.info('got client_roles={client_roles_}'.format(client_roles_=client_roles))
+ for role in client_roles:
+ log.info('role={role_}'.format(role_=role))
+ (creator_remote, ) = ctx.cluster.only('client.{id}'.format(id=role)).remotes.keys()
+ creator_remotes.append((creator_remote, 'client.{id}'.format(id=role)))
+
+ remaining_pools = poolnum
+ poolprocs=dict()
+ while (remaining_pools > 0):
+ log.info('{n} pools remaining to create'.format(n=remaining_pools))
+ for remote, role_ in creator_remotes:
+ poolnum = remaining_pools
+ remaining_pools -= 1
+ if remaining_pools < 0:
+ continue
+ log.info('creating pool{num} on {role}'.format(num=poolnum, role=role_))
+ proc = remote.run(
+ args=[
+ 'ceph',
+ '--name', role_,
+ 'osd', 'pool', 'create', 'pool{num}'.format(num=poolnum), '8',
+ run.Raw('&&'),
+ 'rados',
+ '--name', role_,
+ '--pool', 'pool{num}'.format(num=poolnum),
+ 'bench', '0', 'write', '-t', '16', '--block-size', '1'
+ ],
+ wait = False
+ )
+ log.info('waiting for pool and object creates')
+ poolprocs[remote] = proc
+
+ run.wait(poolprocs.values())
+
+ log.info('created all {n} pools and wrote 16 objects to each'.format(n=poolnum))
diff --git a/qa/tasks/mds_creation_failure.py b/qa/tasks/mds_creation_failure.py
new file mode 100644
index 00000000..58314086
--- /dev/null
+++ b/qa/tasks/mds_creation_failure.py
@@ -0,0 +1,69 @@
+# FIXME: this file has many undefined vars which are accessed!
+# flake8: noqa
+import logging
+import contextlib
+import time
+from tasks import ceph_manager
+from teuthology import misc
+from teuthology.orchestra.run import CommandFailedError, Raw
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Go through filesystem creation with a synthetic failure in an MDS
+ in its 'up:creating' state, to exercise the retry behaviour.
+ """
+ # Grab handles to the teuthology objects of interest
+ mdslist = list(misc.all_roles_of_type(ctx.cluster, 'mds'))
+ if len(mdslist) != 1:
+ # Require exactly one MDS, the code path for creation failure when
+ # a standby is available is different
+ raise RuntimeError("This task requires exactly one MDS")
+
+ mds_id = mdslist[0]
+ (mds_remote,) = ctx.cluster.only('mds.{_id}'.format(_id=mds_id)).remotes.keys()
+ manager = ceph_manager.CephManager(
+ mds_remote, ctx=ctx, logger=log.getChild('ceph_manager'),
+ )
+
+ # Stop MDS
+ self.fs.set_max_mds(0)
+ self.fs.mds_stop(mds_id)
+ self.fs.mds_fail(mds_id)
+
+ # Reset the filesystem so that next start will go into CREATING
+ manager.raw_cluster_cmd('fs', 'rm', "default", "--yes-i-really-mean-it")
+ manager.raw_cluster_cmd('fs', 'new', "default", "metadata", "data")
+
+ # Start the MDS with mds_kill_create_at set, it will crash during creation
+ mds.restart_with_args(["--mds_kill_create_at=1"])
+ try:
+ mds.wait_for_exit()
+ except CommandFailedError as e:
+ if e.exitstatus == 1:
+ log.info("MDS creation killed as expected")
+ else:
+ log.error("Unexpected status code %s" % e.exitstatus)
+ raise
+
+ # Since I have intentionally caused a crash, I will clean up the resulting core
+ # file to avoid task.internal.coredump seeing it as a failure.
+ log.info("Removing core file from synthetic MDS failure")
+ mds_remote.run(args=['rm', '-f', Raw("{archive}/coredump/*.core".format(archive=misc.get_archive_dir(ctx)))])
+
+ # It should have left the MDS map state still in CREATING
+ status = self.fs.status().get_mds(mds_id)
+ assert status['state'] == 'up:creating'
+
+ # Start the MDS again without the kill flag set, it should proceed with creation successfully
+ mds.restart()
+
+ # Wait for state ACTIVE
+ self.fs.wait_for_state("up:active", timeout=120, mds_id=mds_id)
+
+ # The system should be back up in a happy healthy state, go ahead and run any further tasks
+ # inside this context.
+ yield
diff --git a/qa/tasks/mds_pre_upgrade.py b/qa/tasks/mds_pre_upgrade.py
new file mode 100644
index 00000000..0856d483
--- /dev/null
+++ b/qa/tasks/mds_pre_upgrade.py
@@ -0,0 +1,43 @@
+"""
+Prepare MDS cluster for upgrade.
+"""
+
+import logging
+import time
+
+from tasks.cephfs.filesystem import Filesystem
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+ """
+ Prepare MDS cluster for upgrade.
+
+ This task reduces ranks to 1 and stops all standbys.
+ """
+
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'snap-upgrade task only accepts a dict for configuration'
+
+ fs = Filesystem(ctx)
+ status = fs.getinfo()
+
+ fs.set_max_mds(1)
+ fs.reach_max_mds()
+
+ # Stop standbys now to minimize time rank 0 is down in subsequent:
+ # tasks:
+ # - ceph.stop: [mds.*]
+ rank0 = fs.get_rank(rank=0, status=status)
+ for daemon in ctx.daemons.iter_daemons_of_role('mds', fs.mon_manager.cluster):
+ if rank0['name'] != daemon.id_:
+ daemon.stop()
+
+ for i in range(1, 10):
+ time.sleep(5) # time for FSMap to update
+ status = fs.getinfo()
+ if len(list(status.get_standbys())) == 0:
+ break
+ assert(len(list(status.get_standbys())) == 0)
diff --git a/qa/tasks/mds_thrash.py b/qa/tasks/mds_thrash.py
new file mode 100644
index 00000000..ac543838
--- /dev/null
+++ b/qa/tasks/mds_thrash.py
@@ -0,0 +1,543 @@
+"""
+Thrash mds by simulating failures
+"""
+import logging
+import contextlib
+import itertools
+import random
+import signal
+import time
+
+from gevent import sleep
+from gevent.greenlet import Greenlet
+from gevent.event import Event
+from teuthology import misc as teuthology
+
+from tasks import ceph_manager
+from tasks.cephfs.filesystem import MDSCluster, Filesystem
+
+log = logging.getLogger(__name__)
+
+class DaemonWatchdog(Greenlet):
+ """
+ DaemonWatchdog::
+
+ Watch Ceph daemons for failures. If an extended failure is detected (i.e.
+ not intentional), then the watchdog will unmount file systems and send
+ SIGTERM to all daemons. The duration of an extended failure is configurable
+ with watchdog_daemon_timeout.
+
+ watchdog_daemon_timeout [default: 300]: number of seconds a daemon
+ is allowed to be failed before the watchdog will bark.
+ """
+
+ def __init__(self, ctx, manager, config, thrashers):
+ Greenlet.__init__(self)
+ self.ctx = ctx
+ self.config = config
+ self.e = None
+ self.logger = log.getChild('daemon_watchdog')
+ self.manager = manager
+ self.name = 'watchdog'
+ self.stopping = Event()
+ self.thrashers = thrashers
+
+ def _run(self):
+ try:
+ self.watch()
+ except Exception as e:
+ # See _run exception comment for MDSThrasher
+ self.e = e
+ self.logger.exception("exception:")
+ # allow successful completion so gevent doesn't see an exception...
+
+ def log(self, x):
+ """Write data to logger"""
+ self.logger.info(x)
+
+ def stop(self):
+ self.stopping.set()
+
+ def bark(self):
+ self.log("BARK! unmounting mounts and killing all daemons")
+ for mount in self.ctx.mounts.values():
+ try:
+ mount.umount_wait(force=True)
+ except:
+ self.logger.exception("ignoring exception:")
+ daemons = []
+ daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('mds', cluster=self.manager.cluster)))
+ daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('mon', cluster=self.manager.cluster)))
+ for daemon in daemons:
+ try:
+ daemon.signal(signal.SIGTERM)
+ except:
+ self.logger.exception("ignoring exception:")
+
+ def watch(self):
+ self.log("watchdog starting")
+ daemon_timeout = int(self.config.get('watchdog_daemon_timeout', 300))
+ daemon_failure_time = {}
+ while not self.stopping.is_set():
+ bark = False
+ now = time.time()
+
+ mons = self.ctx.daemons.iter_daemons_of_role('mon', cluster=self.manager.cluster)
+ mdss = self.ctx.daemons.iter_daemons_of_role('mds', cluster=self.manager.cluster)
+ clients = self.ctx.daemons.iter_daemons_of_role('client', cluster=self.manager.cluster)
+
+ #for daemon in mons:
+ # self.log("mon daemon {role}.{id}: running={r}".format(role=daemon.role, id=daemon.id_, r=daemon.running() and not daemon.proc.finished))
+ #for daemon in mdss:
+ # self.log("mds daemon {role}.{id}: running={r}".format(role=daemon.role, id=daemon.id_, r=daemon.running() and not daemon.proc.finished))
+
+ daemon_failures = []
+ daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, mons))
+ daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, mdss))
+ for daemon in daemon_failures:
+ name = daemon.role + '.' + daemon.id_
+ dt = daemon_failure_time.setdefault(name, (daemon, now))
+ assert dt[0] is daemon
+ delta = now-dt[1]
+ self.log("daemon {name} is failed for ~{t:.0f}s".format(name=name, t=delta))
+ if delta > daemon_timeout:
+ bark = True
+
+ # If a daemon is no longer failed, remove it from tracking:
+ for name in daemon_failure_time.keys():
+ if name not in [d.role + '.' + d.id_ for d in daemon_failures]:
+ self.log("daemon {name} has been restored".format(name=name))
+ del daemon_failure_time[name]
+
+ for thrasher in self.thrashers:
+ if thrasher.e is not None:
+ self.log("thrasher on fs.{name} failed".format(name=thrasher.fs.name))
+ bark = True
+
+ if bark:
+ self.bark()
+ return
+
+ sleep(5)
+
+ self.log("watchdog finished")
+
+class MDSThrasher(Greenlet):
+ """
+ MDSThrasher::
+
+ The MDSThrasher thrashes MDSs during execution of other tasks (workunits, etc).
+
+ The config is optional. Many of the config parameters are a a maximum value
+ to use when selecting a random value from a range. To always use the maximum
+ value, set no_random to true. The config is a dict containing some or all of:
+
+ max_thrash: [default: 1] the maximum number of active MDSs per FS that will be thrashed at
+ any given time.
+
+ max_thrash_delay: [default: 30] maximum number of seconds to delay before
+ thrashing again.
+
+ max_replay_thrash_delay: [default: 4] maximum number of seconds to delay while in
+ the replay state before thrashing.
+
+ max_revive_delay: [default: 10] maximum number of seconds to delay before
+ bringing back a thrashed MDS.
+
+ randomize: [default: true] enables randomization and use the max/min values
+
+ seed: [no default] seed the random number generator
+
+ thrash_in_replay: [default: 0.0] likelihood that the MDS will be thrashed
+ during replay. Value should be between 0.0 and 1.0.
+
+ thrash_max_mds: [default: 0.05] likelihood that the max_mds of the mds
+ cluster will be modified to a value [1, current) or (current, starting
+ max_mds]. Value should be between 0.0 and 1.0.
+
+ thrash_while_stopping: [default: false] thrash an MDS while there
+ are MDS in up:stopping (because max_mds was changed and some
+ MDS were deactivated).
+
+ thrash_weights: allows specific MDSs to be thrashed more/less frequently.
+ This option overrides anything specified by max_thrash. This option is a
+ dict containing mds.x: weight pairs. For example, [mds.a: 0.7, mds.b:
+ 0.3, mds.c: 0.0]. Each weight is a value from 0.0 to 1.0. Any MDSs not
+ specified will be automatically given a weight of 0.0 (not thrashed).
+ For a given MDS, by default the trasher delays for up to
+ max_thrash_delay, trashes, waits for the MDS to recover, and iterates.
+ If a non-zero weight is specified for an MDS, for each iteration the
+ thrasher chooses whether to thrash during that iteration based on a
+ random value [0-1] not exceeding the weight of that MDS.
+
+ Examples::
+
+
+ The following example sets the likelihood that mds.a will be thrashed
+ to 80%, mds.b to 20%, and other MDSs will not be thrashed. It also sets the
+ likelihood that an MDS will be thrashed in replay to 40%.
+ Thrash weights do not have to sum to 1.
+
+ tasks:
+ - ceph:
+ - mds_thrash:
+ thrash_weights:
+ - mds.a: 0.8
+ - mds.b: 0.2
+ thrash_in_replay: 0.4
+ - ceph-fuse:
+ - workunit:
+ clients:
+ all: [suites/fsx.sh]
+
+ The following example disables randomization, and uses the max delay values:
+
+ tasks:
+ - ceph:
+ - mds_thrash:
+ max_thrash_delay: 10
+ max_revive_delay: 1
+ max_replay_thrash_delay: 4
+
+ """
+
+ def __init__(self, ctx, manager, config, fs, max_mds):
+ Greenlet.__init__(self)
+
+ self.config = config
+ self.ctx = ctx
+ self.e = None
+ self.logger = log.getChild('fs.[{f}]'.format(f = fs.name))
+ self.fs = fs
+ self.manager = manager
+ self.max_mds = max_mds
+ self.name = 'thrasher.fs.[{f}]'.format(f = fs.name)
+ self.stopping = Event()
+
+ self.randomize = bool(self.config.get('randomize', True))
+ self.thrash_max_mds = float(self.config.get('thrash_max_mds', 0.05))
+ self.max_thrash = int(self.config.get('max_thrash', 1))
+ self.max_thrash_delay = float(self.config.get('thrash_delay', 120.0))
+ self.thrash_in_replay = float(self.config.get('thrash_in_replay', False))
+ assert self.thrash_in_replay >= 0.0 and self.thrash_in_replay <= 1.0, 'thrash_in_replay ({v}) must be between [0.0, 1.0]'.format(
+ v=self.thrash_in_replay)
+ self.max_replay_thrash_delay = float(self.config.get('max_replay_thrash_delay', 4.0))
+ self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0))
+
+ def _run(self):
+ try:
+ self.do_thrash()
+ except Exception as e:
+ # Log exceptions here so we get the full backtrace (gevent loses them).
+ # Also allow successful completion as gevent exception handling is a broken mess:
+ #
+ # 2017-02-03T14:34:01.259 CRITICAL:root: File "gevent.libev.corecext.pyx", line 367, in gevent.libev.corecext.loop.handle_error (src/gevent/libev/gevent.corecext.c:5051)
+ # File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 558, in handle_error
+ # self.print_exception(context, type, value, tb)
+ # File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 605, in print_exception
+ # traceback.print_exception(type, value, tb, file=errstream)
+ # File "/usr/lib/python2.7/traceback.py", line 124, in print_exception
+ # _print(file, 'Traceback (most recent call last):')
+ # File "/usr/lib/python2.7/traceback.py", line 13, in _print
+ # file.write(str+terminator)
+ # 2017-02-03T14:34:01.261 CRITICAL:root:IOError
+ self.e = e
+ self.logger.exception("exception:")
+ # allow successful completion so gevent doesn't see an exception...
+
+ def log(self, x):
+ """Write data to logger assigned to this MDThrasher"""
+ self.logger.info(x)
+
+ def stop(self):
+ self.stopping.set()
+
+ def kill_mds(self, mds):
+ if self.config.get('powercycle'):
+ (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)).
+ remotes.keys())
+ self.log('kill_mds on mds.{m} doing powercycle of {s}'.
+ format(m=mds, s=remote.name))
+ self._assert_ipmi(remote)
+ remote.console.power_off()
+ else:
+ self.ctx.daemons.get_daemon('mds', mds).stop()
+
+ @staticmethod
+ def _assert_ipmi(remote):
+ assert remote.console.has_ipmi_credentials, (
+ "powercycling requested but RemoteConsole is not "
+ "initialized. Check ipmi config.")
+
+ def revive_mds(self, mds):
+ """
+ Revive mds -- do an ipmpi powercycle (if indicated by the config)
+ and then restart.
+ """
+ if self.config.get('powercycle'):
+ (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)).
+ remotes.keys())
+ self.log('revive_mds on mds.{m} doing powercycle of {s}'.
+ format(m=mds, s=remote.name))
+ self._assert_ipmi(remote)
+ remote.console.power_on()
+ self.manager.make_admin_daemon_dir(self.ctx, remote)
+ args = []
+ self.ctx.daemons.get_daemon('mds', mds).restart(*args)
+
+ def wait_for_stable(self, rank = None, gid = None):
+ self.log('waiting for mds cluster to stabilize...')
+ for itercount in itertools.count():
+ status = self.fs.status()
+ max_mds = status.get_fsmap(self.fs.id)['mdsmap']['max_mds']
+ ranks = list(status.get_ranks(self.fs.id))
+ stopping = sum(1 for _ in ranks if "up:stopping" == _['state'])
+ actives = sum(1 for _ in ranks
+ if "up:active" == _['state'] and "laggy_since" not in _)
+
+ if not bool(self.config.get('thrash_while_stopping', False)) and stopping > 0:
+ if itercount % 5 == 0:
+ self.log('cluster is considered unstable while MDS are in up:stopping (!thrash_while_stopping)')
+ else:
+ if rank is not None:
+ try:
+ info = status.get_rank(self.fs.id, rank)
+ if info['gid'] != gid and "up:active" == info['state']:
+ self.log('mds.{name} has gained rank={rank}, replacing gid={gid}'.format(name = info['name'], rank = rank, gid = gid))
+ return status
+ except:
+ pass # no rank present
+ if actives >= max_mds:
+ # no replacement can occur!
+ self.log("cluster has {actives} actives (max_mds is {max_mds}), no MDS can replace rank {rank}".format(
+ actives=actives, max_mds=max_mds, rank=rank))
+ return status
+ else:
+ if actives == max_mds:
+ self.log('mds cluster has {count} alive and active, now stable!'.format(count = actives))
+ return status, None
+ if itercount > 300/2: # 5 minutes
+ raise RuntimeError('timeout waiting for cluster to stabilize')
+ elif itercount % 5 == 0:
+ self.log('mds map: {status}'.format(status=status))
+ else:
+ self.log('no change')
+ sleep(2)
+
+ def do_thrash(self):
+ """
+ Perform the random thrashing action
+ """
+
+ self.log('starting mds_do_thrash for fs {fs}'.format(fs = self.fs.name))
+ stats = {
+ "max_mds": 0,
+ "deactivate": 0,
+ "kill": 0,
+ }
+
+ while not self.stopping.is_set():
+ delay = self.max_thrash_delay
+ if self.randomize:
+ delay = random.randrange(0.0, self.max_thrash_delay)
+
+ if delay > 0.0:
+ self.log('waiting for {delay} secs before thrashing'.format(delay=delay))
+ self.stopping.wait(delay)
+ if self.stopping.is_set():
+ continue
+
+ status = self.fs.status()
+
+ if random.random() <= self.thrash_max_mds:
+ max_mds = status.get_fsmap(self.fs.id)['mdsmap']['max_mds']
+ options = list(range(1, max_mds))+list(range(max_mds+1, self.max_mds+1))
+ if len(options) > 0:
+ sample = random.sample(options, 1)
+ new_max_mds = sample[0]
+ self.log('thrashing max_mds: %d -> %d' % (max_mds, new_max_mds))
+ self.fs.set_max_mds(new_max_mds)
+ stats['max_mds'] += 1
+ self.wait_for_stable()
+
+ count = 0
+ for info in status.get_ranks(self.fs.id):
+ name = info['name']
+ label = 'mds.' + name
+ rank = info['rank']
+ gid = info['gid']
+
+ # if thrash_weights isn't specified and we've reached max_thrash,
+ # we're done
+ count = count + 1
+ if 'thrash_weights' not in self.config and count > self.max_thrash:
+ break
+
+ weight = 1.0
+ if 'thrash_weights' in self.config:
+ weight = self.config['thrash_weights'].get(label, '0.0')
+ skip = random.randrange(0.0, 1.0)
+ if weight <= skip:
+ self.log('skipping thrash iteration with skip ({skip}) > weight ({weight})'.format(skip=skip, weight=weight))
+ continue
+
+ self.log('kill {label} (rank={rank})'.format(label=label, rank=rank))
+ self.kill_mds(name)
+ stats['kill'] += 1
+
+ # wait for mon to report killed mds as crashed
+ last_laggy_since = None
+ itercount = 0
+ while True:
+ status = self.fs.status()
+ info = status.get_mds(name)
+ if not info:
+ break
+ if 'laggy_since' in info:
+ last_laggy_since = info['laggy_since']
+ break
+ if any([(f == name) for f in status.get_fsmap(self.fs.id)['mdsmap']['failed']]):
+ break
+ self.log(
+ 'waiting till mds map indicates {label} is laggy/crashed, in failed state, or {label} is removed from mdsmap'.format(
+ label=label))
+ itercount = itercount + 1
+ if itercount > 10:
+ self.log('mds map: {status}'.format(status=status))
+ sleep(2)
+
+ if last_laggy_since:
+ self.log(
+ '{label} reported laggy/crashed since: {since}'.format(label=label, since=last_laggy_since))
+ else:
+ self.log('{label} down, removed from mdsmap'.format(label=label, since=last_laggy_since))
+
+ # wait for a standby mds to takeover and become active
+ status = self.wait_for_stable(rank, gid)
+
+ # wait for a while before restarting old active to become new
+ # standby
+ delay = self.max_revive_delay
+ if self.randomize:
+ delay = random.randrange(0.0, self.max_revive_delay)
+
+ self.log('waiting for {delay} secs before reviving {label}'.format(
+ delay=delay, label=label))
+ sleep(delay)
+
+ self.log('reviving {label}'.format(label=label))
+ self.revive_mds(name)
+
+ for itercount in itertools.count():
+ if itercount > 300/2: # 5 minutes
+ raise RuntimeError('timeout waiting for MDS to revive')
+ status = self.fs.status()
+ info = status.get_mds(name)
+ if info and info['state'] in ('up:standby', 'up:standby-replay', 'up:active'):
+ self.log('{label} reported in {state} state'.format(label=label, state=info['state']))
+ break
+ self.log(
+ 'waiting till mds map indicates {label} is in active, standby or standby-replay'.format(label=label))
+ sleep(2)
+
+ for stat in stats:
+ self.log("stat['{key}'] = {value}".format(key = stat, value = stats[stat]))
+
+ # don't do replay thrashing right now
+# for info in status.get_replays(self.fs.id):
+# # this might race with replay -> active transition...
+# if status['state'] == 'up:replay' and random.randrange(0.0, 1.0) < self.thrash_in_replay:
+# delay = self.max_replay_thrash_delay
+# if self.randomize:
+# delay = random.randrange(0.0, self.max_replay_thrash_delay)
+# sleep(delay)
+# self.log('kill replaying mds.{id}'.format(id=self.to_kill))
+# self.kill_mds(self.to_kill)
+#
+# delay = self.max_revive_delay
+# if self.randomize:
+# delay = random.randrange(0.0, self.max_revive_delay)
+#
+# self.log('waiting for {delay} secs before reviving mds.{id}'.format(
+# delay=delay, id=self.to_kill))
+# sleep(delay)
+#
+# self.log('revive mds.{id}'.format(id=self.to_kill))
+# self.revive_mds(self.to_kill)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Stress test the mds by thrashing while another task/workunit
+ is running.
+
+ Please refer to MDSThrasher class for further information on the
+ available options.
+ """
+
+ mds_cluster = MDSCluster(ctx)
+
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'mds_thrash task only accepts a dict for configuration'
+ mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds'))
+ assert len(mdslist) > 1, \
+ 'mds_thrash task requires at least 2 metadata servers'
+
+ # choose random seed
+ if 'seed' in config:
+ seed = int(config['seed'])
+ else:
+ seed = int(time.time())
+ log.info('mds thrasher using random seed: {seed}'.format(seed=seed))
+ random.seed(seed)
+
+ (first,) = ctx.cluster.only('mds.{_id}'.format(_id=mdslist[0])).remotes.keys()
+ manager = ceph_manager.CephManager(
+ first, ctx=ctx, logger=log.getChild('ceph_manager'),
+ )
+
+ # make sure everyone is in active, standby, or standby-replay
+ log.info('Wait for all MDSs to reach steady state...')
+ status = mds_cluster.status()
+ while True:
+ steady = True
+ for info in status.get_all():
+ state = info['state']
+ if state not in ('up:active', 'up:standby', 'up:standby-replay'):
+ steady = False
+ break
+ if steady:
+ break
+ sleep(2)
+ status = mds_cluster.status()
+ log.info('Ready to start thrashing')
+
+ thrashers = []
+
+ watchdog = DaemonWatchdog(ctx, manager, config, thrashers)
+ watchdog.start()
+
+ manager.wait_for_clean()
+ assert manager.is_clean()
+ for fs in status.get_filesystems():
+ thrasher = MDSThrasher(ctx, manager, config, Filesystem(ctx, fs['id']), fs['mdsmap']['max_mds'])
+ thrasher.start()
+ thrashers.append(thrasher)
+
+ try:
+ log.debug('Yielding')
+ yield
+ finally:
+ log.info('joining mds_thrashers')
+ for thrasher in thrashers:
+ thrasher.stop()
+ if thrasher.e:
+ raise RuntimeError('error during thrashing')
+ thrasher.join()
+ log.info('done joining')
+
+ watchdog.stop()
+ watchdog.join()
diff --git a/qa/tasks/metadata.yaml b/qa/tasks/metadata.yaml
new file mode 100644
index 00000000..ccdc3b07
--- /dev/null
+++ b/qa/tasks/metadata.yaml
@@ -0,0 +1,2 @@
+instance-id: test
+local-hostname: test
diff --git a/qa/tasks/mgr/__init__.py b/qa/tasks/mgr/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/qa/tasks/mgr/__init__.py
diff --git a/qa/tasks/mgr/dashboard/__init__.py b/qa/tasks/mgr/dashboard/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/__init__.py
diff --git a/qa/tasks/mgr/dashboard/helper.py b/qa/tasks/mgr/dashboard/helper.py
new file mode 100644
index 00000000..5c430a69
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/helper.py
@@ -0,0 +1,574 @@
+# -*- coding: utf-8 -*-
+# pylint: disable=W0212,too-many-return-statements
+from __future__ import absolute_import
+
+import json
+import logging
+import random
+import string
+from collections import namedtuple
+import time
+
+import requests
+import six
+from teuthology.exceptions import CommandFailedError
+
+from tasks.mgr.mgr_test_case import MgrTestCase
+
+
+log = logging.getLogger(__name__)
+
+
+class DashboardTestCase(MgrTestCase):
+ # Display full error diffs
+ maxDiff = None
+
+ # Increased x3 (20 -> 60)
+ TIMEOUT_HEALTH_CLEAR = 60
+
+ MGRS_REQUIRED = 2
+ MDSS_REQUIRED = 1
+ REQUIRE_FILESYSTEM = True
+ CLIENTS_REQUIRED = 1
+ CEPHFS = False
+
+ _session = None # type: requests.sessions.Session
+ _token = None
+ _resp = None # type: requests.models.Response
+ _loggedin = False
+ _base_uri = None
+
+ AUTO_AUTHENTICATE = True
+
+ AUTH_ROLES = ['administrator']
+
+ @classmethod
+ def create_user(cls, username, password, roles):
+ try:
+ cls._ceph_cmd(['dashboard', 'ac-user-show', username])
+ cls._ceph_cmd(['dashboard', 'ac-user-delete', username])
+ except CommandFailedError as ex:
+ if ex.exitstatus != 2:
+ raise ex
+
+ user_create_args = [
+ 'dashboard', 'ac-user-create', username
+ ]
+ cls._ceph_cmd_with_secret(user_create_args, password)
+
+ set_roles_args = ['dashboard', 'ac-user-set-roles', username]
+ for idx, role in enumerate(roles):
+ if isinstance(role, str):
+ set_roles_args.append(role)
+ else:
+ assert isinstance(role, dict)
+ rolename = 'test_role_{}'.format(idx)
+ try:
+ cls._ceph_cmd(['dashboard', 'ac-role-show', rolename])
+ cls._ceph_cmd(['dashboard', 'ac-role-delete', rolename])
+ except CommandFailedError as ex:
+ if ex.exitstatus != 2:
+ raise ex
+ cls._ceph_cmd(['dashboard', 'ac-role-create', rolename])
+ for mod, perms in role.items():
+ args = ['dashboard', 'ac-role-add-scope-perms', rolename, mod]
+ args.extend(perms)
+ cls._ceph_cmd(args)
+ set_roles_args.append(rolename)
+ cls._ceph_cmd(set_roles_args)
+
+ @classmethod
+ def login(cls, username, password, set_cookies=False):
+ if cls._loggedin:
+ cls.logout()
+ cls._post('/api/auth', {'username': username,
+ 'password': password}, set_cookies=set_cookies)
+ cls._assertEq(cls._resp.status_code, 201)
+ cls._token = cls.jsonBody()['token']
+ cls._loggedin = True
+
+ @classmethod
+ def logout(cls, set_cookies=False):
+ if cls._loggedin:
+ cls._post('/api/auth/logout', set_cookies=set_cookies)
+ cls._assertEq(cls._resp.status_code, 200)
+ cls._token = None
+ cls._loggedin = False
+
+ @classmethod
+ def delete_user(cls, username, roles=None):
+ if roles is None:
+ roles = []
+ cls._ceph_cmd(['dashboard', 'ac-user-delete', username])
+ for idx, role in enumerate(roles):
+ if isinstance(role, dict):
+ cls._ceph_cmd(['dashboard', 'ac-role-delete', 'test_role_{}'.format(idx)])
+
+ @classmethod
+ def RunAs(cls, username, password, roles):
+ def wrapper(func):
+ def execute(self, *args, **kwargs):
+ self.create_user(username, password, roles)
+ self.login(username, password)
+ res = func(self, *args, **kwargs)
+ self.logout()
+ self.delete_user(username, roles)
+ return res
+ return execute
+ return wrapper
+
+ @classmethod
+ def set_jwt_token(cls, token):
+ cls._token = token
+
+ @classmethod
+ def setUpClass(cls):
+ super(DashboardTestCase, cls).setUpClass()
+ cls._assign_ports("dashboard", "ssl_server_port")
+ cls._load_module("dashboard")
+ cls._base_uri = cls._get_uri("dashboard").rstrip('/')
+
+ if cls.CEPHFS:
+ cls.mds_cluster.clear_firewall()
+
+ # To avoid any issues with e.g. unlink bugs, we destroy and recreate
+ # the filesystem rather than just doing a rm -rf of files
+ cls.mds_cluster.mds_stop()
+ cls.mds_cluster.mds_fail()
+ cls.mds_cluster.delete_all_filesystems()
+ cls.fs = None # is now invalid!
+
+ cls.fs = cls.mds_cluster.newfs(create=True)
+ cls.fs.mds_restart()
+
+ # In case some test messed with auth caps, reset them
+ # pylint: disable=not-an-iterable
+ client_mount_ids = [m.client_id for m in cls.mounts]
+ for client_id in client_mount_ids:
+ cls.mds_cluster.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', "client.{0}".format(client_id),
+ 'mds', 'allow',
+ 'mon', 'allow r',
+ 'osd', 'allow rw pool={0}'.format(cls.fs.get_data_pool_name()))
+
+ # wait for mds restart to complete...
+ cls.fs.wait_for_daemons()
+
+ cls._token = None
+ cls._session = requests.Session()
+ cls._resp = None
+
+ cls.create_user('admin', 'admin', cls.AUTH_ROLES)
+ if cls.AUTO_AUTHENTICATE:
+ cls.login('admin', 'admin')
+
+ def setUp(self):
+ super(DashboardTestCase, self).setUp()
+ if not self._loggedin and self.AUTO_AUTHENTICATE:
+ self.login('admin', 'admin')
+ self.wait_for_health_clear(self.TIMEOUT_HEALTH_CLEAR)
+
+ @classmethod
+ def tearDownClass(cls):
+ super(DashboardTestCase, cls).tearDownClass()
+
+ # pylint: disable=inconsistent-return-statements, too-many-arguments, too-many-branches
+ @classmethod
+ def _request(cls, url, method, data=None, params=None, set_cookies=False):
+ url = "{}{}".format(cls._base_uri, url)
+ log.debug("Request %s to %s", method, url)
+ headers = {}
+ cookies = {}
+ if cls._token:
+ if set_cookies:
+ cookies['token'] = cls._token
+ else:
+ headers['Authorization'] = "Bearer {}".format(cls._token)
+
+ if set_cookies:
+ if method == 'GET':
+ cls._resp = cls._session.get(url, params=params, verify=False,
+ headers=headers, cookies=cookies)
+ elif method == 'POST':
+ cls._resp = cls._session.post(url, json=data, params=params,
+ verify=False, headers=headers, cookies=cookies)
+ elif method == 'DELETE':
+ cls._resp = cls._session.delete(url, json=data, params=params,
+ verify=False, headers=headers, cookies=cookies)
+ elif method == 'PUT':
+ cls._resp = cls._session.put(url, json=data, params=params,
+ verify=False, headers=headers, cookies=cookies)
+ else:
+ assert False
+ else:
+ if method == 'GET':
+ cls._resp = cls._session.get(url, params=params, verify=False,
+ headers=headers)
+ elif method == 'POST':
+ cls._resp = cls._session.post(url, json=data, params=params,
+ verify=False, headers=headers)
+ elif method == 'DELETE':
+ cls._resp = cls._session.delete(url, json=data, params=params,
+ verify=False, headers=headers)
+ elif method == 'PUT':
+ cls._resp = cls._session.put(url, json=data, params=params,
+ verify=False, headers=headers)
+ else:
+ assert False
+ try:
+ if not cls._resp.ok:
+ # Output response for easier debugging.
+ log.error("Request response: %s", cls._resp.text)
+ content_type = cls._resp.headers['content-type']
+ if content_type == 'application/json' and cls._resp.text and cls._resp.text != "":
+ return cls._resp.json()
+ return cls._resp.text
+ except ValueError as ex:
+ log.exception("Failed to decode response: %s", cls._resp.text)
+ raise ex
+
+ @classmethod
+ def _get(cls, url, params=None, set_cookies=False):
+ return cls._request(url, 'GET', params=params, set_cookies=set_cookies)
+
+ @classmethod
+ def _view_cache_get(cls, url, retries=5):
+ retry = True
+ while retry and retries > 0:
+ retry = False
+ res = cls._get(url)
+ if isinstance(res, dict):
+ res = [res]
+ for view in res:
+ assert 'value' in view
+ if not view['value']:
+ retry = True
+ retries -= 1
+ if retries == 0:
+ raise Exception("{} view cache exceeded number of retries={}"
+ .format(url, retries))
+ return res
+
+ @classmethod
+ def _post(cls, url, data=None, params=None, set_cookies=False):
+ cls._request(url, 'POST', data, params, set_cookies=set_cookies)
+
+ @classmethod
+ def _delete(cls, url, data=None, params=None, set_cookies=False):
+ cls._request(url, 'DELETE', data, params, set_cookies=set_cookies)
+
+ @classmethod
+ def _put(cls, url, data=None, params=None, set_cookies=False):
+ cls._request(url, 'PUT', data, params, set_cookies=set_cookies)
+
+ @classmethod
+ def _assertEq(cls, v1, v2):
+ if not v1 == v2:
+ raise Exception("assertion failed: {} != {}".format(v1, v2))
+
+ @classmethod
+ def _assertIn(cls, v1, v2):
+ if v1 not in v2:
+ raise Exception("assertion failed: {} not in {}".format(v1, v2))
+
+ @classmethod
+ def _assertIsInst(cls, v1, v2):
+ if not isinstance(v1, v2):
+ raise Exception("assertion failed: {} not instance of {}".format(v1, v2))
+
+ # pylint: disable=too-many-arguments
+ @classmethod
+ def _task_request(cls, method, url, data, timeout, set_cookies=False):
+ res = cls._request(url, method, data, set_cookies=set_cookies)
+ cls._assertIn(cls._resp.status_code, [200, 201, 202, 204, 400, 403, 404])
+
+ if cls._resp.status_code == 403:
+ return None
+
+ if cls._resp.status_code != 202:
+ log.debug("task finished immediately")
+ return res
+
+ cls._assertIn('name', res)
+ cls._assertIn('metadata', res)
+ task_name = res['name']
+ task_metadata = res['metadata']
+
+ retries = int(timeout)
+ res_task = None
+ while retries > 0 and not res_task:
+ retries -= 1
+ log.debug("task (%s, %s) is still executing", task_name, task_metadata)
+ time.sleep(1)
+ _res = cls._get('/api/task?name={}'.format(task_name))
+ cls._assertEq(cls._resp.status_code, 200)
+ executing_tasks = [task for task in _res['executing_tasks'] if
+ task['metadata'] == task_metadata]
+ finished_tasks = [task for task in _res['finished_tasks'] if
+ task['metadata'] == task_metadata]
+ if not executing_tasks and finished_tasks:
+ res_task = finished_tasks[0]
+
+ if retries <= 0:
+ raise Exception("Waiting for task ({}, {}) to finish timed out. {}"
+ .format(task_name, task_metadata, _res))
+
+ log.debug("task (%s, %s) finished", task_name, task_metadata)
+ if res_task['success']:
+ if method == 'POST':
+ cls._resp.status_code = 201
+ elif method == 'PUT':
+ cls._resp.status_code = 200
+ elif method == 'DELETE':
+ cls._resp.status_code = 204
+ return res_task['ret_value']
+ else:
+ if 'status' in res_task['exception']:
+ cls._resp.status_code = res_task['exception']['status']
+ else:
+ cls._resp.status_code = 500
+ return res_task['exception']
+
+ @classmethod
+ def _task_post(cls, url, data=None, timeout=60, set_cookies=False):
+ return cls._task_request('POST', url, data, timeout, set_cookies=set_cookies)
+
+ @classmethod
+ def _task_delete(cls, url, timeout=60, set_cookies=False):
+ return cls._task_request('DELETE', url, None, timeout, set_cookies=set_cookies)
+
+ @classmethod
+ def _task_put(cls, url, data=None, timeout=60, set_cookies=False):
+ return cls._task_request('PUT', url, data, timeout, set_cookies=set_cookies)
+
+ @classmethod
+ def cookies(cls):
+ return cls._resp.cookies
+
+ @classmethod
+ def jsonBody(cls):
+ return cls._resp.json()
+
+ @classmethod
+ def reset_session(cls):
+ cls._session = requests.Session()
+
+ def assertSubset(self, data, biggerData):
+ for key, value in data.items():
+ self.assertEqual(biggerData[key], value)
+
+ def assertJsonBody(self, data):
+ body = self._resp.json()
+ self.assertEqual(body, data)
+
+ def assertJsonSubset(self, data):
+ self.assertSubset(data, self._resp.json())
+
+ def assertSchema(self, data, schema):
+ try:
+ return _validate_json(data, schema)
+ except _ValError as e:
+ self.assertEqual(data, str(e))
+
+ def assertSchemaBody(self, schema):
+ self.assertSchema(self.jsonBody(), schema)
+
+ def assertBody(self, body):
+ self.assertEqual(self._resp.text, body)
+
+ def assertStatus(self, status):
+ if isinstance(status, list):
+ self.assertIn(self._resp.status_code, status)
+ else:
+ self.assertEqual(self._resp.status_code, status)
+
+ def assertHeaders(self, headers):
+ for name, value in headers.items():
+ self.assertIn(name, self._resp.headers)
+ self.assertEqual(self._resp.headers[name], value)
+
+ def assertError(self, code=None, component=None, detail=None):
+ body = self._resp.json()
+ if code:
+ self.assertEqual(body['code'], code)
+ if component:
+ self.assertEqual(body['component'], component)
+ if detail:
+ self.assertEqual(body['detail'], detail)
+
+ @classmethod
+ def _ceph_cmd(cls, cmd):
+ res = cls.mgr_cluster.mon_manager.raw_cluster_cmd(*cmd)
+ log.debug("command result: %s", res)
+ return res
+
+ @classmethod
+ def _ceph_cmd_result(cls, cmd):
+ exitstatus = cls.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd)
+ log.debug("command exit status: %d", exitstatus)
+ return exitstatus
+
+ @classmethod
+ def _ceph_cmd_with_secret(cls, cmd, secret, return_exit_code=False):
+ cmd.append('-i')
+ cmd.append('{}'.format(cls._ceph_create_tmp_file(secret)))
+ if return_exit_code:
+ return cls._ceph_cmd_result(cmd)
+ return cls._ceph_cmd(cmd)
+
+ @classmethod
+ def _ceph_create_tmp_file(cls, content):
+ """Create a temporary file in the remote cluster"""
+ file_name = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(20))
+ file_path = '/tmp/{}'.format(file_name)
+ cls._cmd(['sh', '-c', 'echo -n {} > {}'.format(content, file_path)])
+ return file_path
+
+ def set_config_key(self, key, value):
+ self._ceph_cmd(['config-key', 'set', key, value])
+
+ def get_config_key(self, key):
+ return self._ceph_cmd(['config-key', 'get', key])
+
+ @classmethod
+ def _cmd(cls, args):
+ return cls.mgr_cluster.admin_remote.run(args=args)
+
+ @classmethod
+ def _rbd_cmd(cls, cmd):
+ args = ['rbd']
+ args.extend(cmd)
+ cls._cmd(args)
+
+ @classmethod
+ def _radosgw_admin_cmd(cls, cmd):
+ args = ['radosgw-admin']
+ args.extend(cmd)
+ cls._cmd(args)
+
+ @classmethod
+ def _rados_cmd(cls, cmd):
+ args = ['rados']
+ args.extend(cmd)
+ cls._cmd(args)
+
+ @classmethod
+ def mons(cls):
+ out = cls.ceph_cluster.mon_manager.raw_cluster_cmd('mon_status')
+ j = json.loads(out)
+ return [mon['name'] for mon in j['monmap']['mons']]
+
+ @classmethod
+ def find_object_in_list(cls, key, value, iterable):
+ """
+ Get the first occurrence of an object within a list with
+ the specified key/value.
+ :param key: The name of the key.
+ :param value: The value to search for.
+ :param iterable: The list to process.
+ :return: Returns the found object or None.
+ """
+ for obj in iterable:
+ if key in obj and obj[key] == value:
+ return obj
+ return None
+
+
+class JLeaf(namedtuple('JLeaf', ['typ', 'none'])):
+ def __new__(cls, typ, none=False):
+ if typ == str:
+ typ = six.string_types
+ return super(JLeaf, cls).__new__(cls, typ, none)
+
+
+JList = namedtuple('JList', ['elem_typ'])
+
+JTuple = namedtuple('JList', ['elem_typs'])
+
+JUnion = namedtuple('JUnion', ['elem_typs'])
+
+class JObj(namedtuple('JObj', ['sub_elems', 'allow_unknown', 'none', 'unknown_schema'])):
+ def __new__(cls, sub_elems, allow_unknown=False, none=False, unknown_schema=None):
+ """
+ :type sub_elems: dict[str, JAny | JLeaf | JList | JObj | type]
+ :type allow_unknown: bool
+ :type none: bool
+ :type unknown_schema: int, str, JAny | JLeaf | JList | JObj
+ :return:
+ """
+ return super(JObj, cls).__new__(cls, sub_elems, allow_unknown, none, unknown_schema)
+
+
+JAny = namedtuple('JAny', ['none'])
+
+
+class _ValError(Exception):
+ def __init__(self, msg, path):
+ path_str = ''.join('[{}]'.format(repr(p)) for p in path)
+ super(_ValError, self).__init__('In `input{}`: {}'.format(path_str, msg))
+
+
+# pylint: disable=dangerous-default-value,inconsistent-return-statements
+def _validate_json(val, schema, path=[]):
+ """
+ >>> d = {'a': 1, 'b': 'x', 'c': range(10)}
+ ... ds = JObj({'a': int, 'b': str, 'c': JList(int)})
+ ... _validate_json(d, ds)
+ True
+ >>> _validate_json({'num': 1}, JObj({'num': JUnion([int,float])}))
+ True
+ >>> _validate_json({'num': 'a'}, JObj({'num': JUnion([int,float])}))
+ False
+ """
+ if isinstance(schema, JAny):
+ if not schema.none and val is None:
+ raise _ValError('val is None', path)
+ return True
+ if isinstance(schema, JLeaf):
+ if schema.none and val is None:
+ return True
+ if not isinstance(val, schema.typ):
+ raise _ValError('val not of type {}'.format(schema.typ), path)
+ return True
+ if isinstance(schema, JList):
+ if not isinstance(val, list):
+ raise _ValError('val="{}" is not a list'.format(val), path)
+ return all(_validate_json(e, schema.elem_typ, path + [i]) for i, e in enumerate(val))
+ if isinstance(schema, JTuple):
+ return all(_validate_json(val[i], typ, path + [i])
+ for i, typ in enumerate(schema.elem_typs))
+ if isinstance(schema, JUnion):
+ for typ in schema.elem_typs:
+ try:
+ if _validate_json(val, typ, path):
+ return True
+ except _ValError:
+ pass
+ return False
+ if isinstance(schema, JObj):
+ if val is None and schema.none:
+ return True
+ elif val is None:
+ raise _ValError('val is None', path)
+ if not hasattr(val, 'keys'):
+ raise _ValError('val="{}" is not a dict'.format(val), path)
+ missing_keys = set(schema.sub_elems.keys()).difference(set(val.keys()))
+ if missing_keys:
+ raise _ValError('missing keys: {}'.format(missing_keys), path)
+ unknown_keys = set(val.keys()).difference(set(schema.sub_elems.keys()))
+ if not schema.allow_unknown and unknown_keys:
+ raise _ValError('unknown keys: {}'.format(unknown_keys), path)
+ result = all(
+ _validate_json(val[key], sub_schema, path + [key])
+ for key, sub_schema in schema.sub_elems.items()
+ )
+ if unknown_keys and schema.allow_unknown and schema.unknown_schema:
+ result += all(
+ _validate_json(val[key], schema.unknown_schema, path + [key])
+ for key in unknown_keys
+ )
+ return result
+ if schema in [str, int, float, bool, six.string_types]:
+ return _validate_json(val, JLeaf(schema), path)
+
+ assert False, str(path)
diff --git a/qa/tasks/mgr/dashboard/test_auth.py b/qa/tasks/mgr/dashboard/test_auth.py
new file mode 100644
index 00000000..df5485d4
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_auth.py
@@ -0,0 +1,240 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+import time
+
+import jwt
+from teuthology.orchestra.run import \
+ CommandFailedError # pylint: disable=import-error
+
+from .helper import DashboardTestCase
+
+
+class AuthTest(DashboardTestCase):
+
+ AUTO_AUTHENTICATE = False
+
+ def setUp(self):
+ super(AuthTest, self).setUp()
+ self.reset_session()
+
+ def _validate_jwt_token(self, token, username, permissions):
+ payload = jwt.decode(token, options={'verify_signature': False})
+ self.assertIn('username', payload)
+ self.assertEqual(payload['username'], username)
+
+ for scope, perms in permissions.items():
+ self.assertIsNotNone(scope)
+ self.assertIn('read', perms)
+ self.assertIn('update', perms)
+ self.assertIn('create', perms)
+ self.assertIn('delete', perms)
+
+ def test_login_without_password(self):
+ with self.assertRaises(CommandFailedError):
+ self.create_user('admin2', '', ['administrator'])
+
+ def test_a_set_login_credentials(self):
+ # test with Authorization header
+ self.create_user('admin2', 'admin2', ['administrator'])
+ self._post("/api/auth", {'username': 'admin2', 'password': 'admin2'})
+ self.assertStatus(201)
+ data = self.jsonBody()
+ self._validate_jwt_token(data['token'], "admin2", data['permissions'])
+ self.delete_user('admin2')
+
+ # test with Cookies set
+ self.create_user('admin2', 'admin2', ['administrator'])
+ self._post("/api/auth", {'username': 'admin2', 'password': 'admin2'}, set_cookies=True)
+ self.assertStatus(201)
+ data = self.jsonBody()
+ self._validate_jwt_token(data['token'], "admin2", data['permissions'])
+ self.delete_user('admin2')
+
+ def test_login_valid(self):
+ # test with Authorization header
+ self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
+ self.assertStatus(201)
+ data = self.jsonBody()
+ self._validate_jwt_token(data['token'], "admin", data['permissions'])
+
+ # test with Cookies set
+ self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+ self.assertStatus(201)
+ data = self.jsonBody()
+ self._validate_jwt_token(data['token'], "admin", data['permissions'])
+
+ def test_login_invalid(self):
+ # test with Authorization header
+ self._post("/api/auth", {'username': 'admin', 'password': 'inval'})
+ self.assertStatus(400)
+ self.assertJsonBody({
+ "component": "auth",
+ "code": "invalid_credentials",
+ "detail": "Invalid credentials"
+ })
+
+ # test with Cookies set
+ self._post("/api/auth", {'username': 'admin', 'password': 'inval'}, set_cookies=True)
+ self.assertStatus(400)
+ self.assertJsonBody({
+ "component": "auth",
+ "code": "invalid_credentials",
+ "detail": "Invalid credentials"
+ })
+
+ def test_logout(self):
+ # test with Authorization header
+ self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
+ self.assertStatus(201)
+ data = self.jsonBody()
+ self._validate_jwt_token(data['token'], "admin", data['permissions'])
+ self.set_jwt_token(data['token'])
+ self._post("/api/auth/logout")
+ self.assertStatus(200)
+ self.assertJsonBody({
+ "redirect_url": "#/login"
+ })
+ self._get("/api/host")
+ self.assertStatus(401)
+ self.set_jwt_token(None)
+
+ # test with Cookies set
+ self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+ self.assertStatus(201)
+ data = self.jsonBody()
+ self._validate_jwt_token(data['token'], "admin", data['permissions'])
+ self.set_jwt_token(data['token'])
+ self._post("/api/auth/logout", set_cookies=True)
+ self.assertStatus(200)
+ self.assertJsonBody({
+ "redirect_url": "#/login"
+ })
+ self._get("/api/host", set_cookies=True)
+ self.assertStatus(401)
+ self.set_jwt_token(None)
+
+ def test_token_ttl(self):
+ # test with Authorization header
+ self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5'])
+ self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
+ self.assertStatus(201)
+ self.set_jwt_token(self.jsonBody()['token'])
+ self._get("/api/host")
+ self.assertStatus(200)
+ time.sleep(6)
+ self._get("/api/host")
+ self.assertStatus(401)
+ self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800'])
+ self.set_jwt_token(None)
+
+ # test with Cookies set
+ self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5'])
+ self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+ self.assertStatus(201)
+ self.set_jwt_token(self.jsonBody()['token'])
+ self._get("/api/host", set_cookies=True)
+ self.assertStatus(200)
+ time.sleep(6)
+ self._get("/api/host")
+ self.assertStatus(401)
+ self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800'])
+ self.set_jwt_token(None)
+
+ def test_remove_from_blacklist(self):
+ # test with Authorization header
+ self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5'])
+ self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
+ self.assertStatus(201)
+ self.set_jwt_token(self.jsonBody()['token'])
+ # the following call adds the token to the blacklist
+ self._post("/api/auth/logout")
+ self.assertStatus(200)
+ self._get("/api/host")
+ self.assertStatus(401)
+ time.sleep(6)
+ self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800'])
+ self.set_jwt_token(None)
+ self._post("/api/auth", {'username': 'admin', 'password': 'admin'})
+ self.assertStatus(201)
+ self.set_jwt_token(self.jsonBody()['token'])
+ # the following call removes expired tokens from the blacklist
+ self._post("/api/auth/logout")
+ self.assertStatus(200)
+
+ # test with Cookies set
+ self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5'])
+ self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+ self.assertStatus(201)
+ self.set_jwt_token(self.jsonBody()['token'])
+ # the following call adds the token to the blocklist
+ self._post("/api/auth/logout", set_cookies=True)
+ self.assertStatus(200)
+ self._get("/api/host", set_cookies=True)
+ self.assertStatus(401)
+ time.sleep(6)
+ self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800'])
+ self.set_jwt_token(None)
+ self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True)
+ self.assertStatus(201)
+ self.set_jwt_token(self.jsonBody()['token'])
+ # the following call removes expired tokens from the blocklist
+ self._post("/api/auth/logout", set_cookies=True)
+ self.assertStatus(200)
+
+ def test_unauthorized(self):
+ # test with Authorization header
+ self._get("/api/host")
+ self.assertStatus(401)
+
+ # test with Cookies set
+ self._get("/api/host", set_cookies=True)
+ self.assertStatus(401)
+
+ def test_invalidate_token_by_admin(self):
+ # test with Authorization header
+ self._get("/api/host")
+ self.assertStatus(401)
+ self.create_user('user', 'user', ['read-only'])
+ time.sleep(1)
+ self._post("/api/auth", {'username': 'user', 'password': 'user'})
+ self.assertStatus(201)
+ self.set_jwt_token(self.jsonBody()['token'])
+ self._get("/api/host")
+ self.assertStatus(200)
+ time.sleep(1)
+ self._ceph_cmd_with_secret(['dashboard', 'ac-user-set-password', 'user'], 'user2')
+ time.sleep(1)
+ self._get("/api/host")
+ self.assertStatus(401)
+ self.set_jwt_token(None)
+ self._post("/api/auth", {'username': 'user', 'password': 'user2'})
+ self.assertStatus(201)
+ self.set_jwt_token(self.jsonBody()['token'])
+ self._get("/api/host")
+ self.assertStatus(200)
+ self.delete_user("user")
+
+ # test with Cookies set
+ self._get("/api/host", set_cookies=True)
+ self.assertStatus(401)
+ self.create_user('user', 'user', ['read-only'])
+ time.sleep(1)
+ self._post("/api/auth", {'username': 'user', 'password': 'user'}, set_cookies=True)
+ self.assertStatus(201)
+ self.set_jwt_token(self.jsonBody()['token'])
+ self._get("/api/host", set_cookies=True)
+ self.assertStatus(200)
+ time.sleep(1)
+ self._ceph_cmd_with_secret(['dashboard', 'ac-user-set-password', 'user'], 'user2')
+ time.sleep(1)
+ self._get("/api/host", set_cookies=True)
+ self.assertStatus(401)
+ self.set_jwt_token(None)
+ self._post("/api/auth", {'username': 'user', 'password': 'user2'}, set_cookies=True)
+ self.assertStatus(201)
+ self.set_jwt_token(self.jsonBody()['token'])
+ self._get("/api/host", set_cookies=True)
+ self.assertStatus(200)
+ self.delete_user("user")
diff --git a/qa/tasks/mgr/dashboard/test_cephfs.py b/qa/tasks/mgr/dashboard/test_cephfs.py
new file mode 100644
index 00000000..24350cce
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_cephfs.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+from contextlib import contextmanager
+
+from .helper import DashboardTestCase
+
+
+class CephfsTest(DashboardTestCase):
+ CEPHFS = True
+
+ AUTH_ROLES = ['cephfs-manager']
+
+ @DashboardTestCase.RunAs('test', 'test', ['block-manager'])
+ def test_access_permissions(self):
+ fs_id = self.fs.get_namespace_id()
+ self._get("/api/cephfs/{}/clients".format(fs_id))
+ self.assertStatus(403)
+ self._get("/api/cephfs/{}".format(fs_id))
+ self.assertStatus(403)
+ self._get("/api/cephfs/{}/mds_counters".format(fs_id))
+ self.assertStatus(403)
+
+ def test_cephfs_clients(self):
+ fs_id = self.fs.get_namespace_id()
+ data = self._get("/api/cephfs/{}/clients".format(fs_id))
+ self.assertStatus(200)
+
+ self.assertIn('status', data)
+ self.assertIn('data', data)
+
+ def test_cephfs_get(self):
+ fs_id = self.fs.get_namespace_id()
+ data = self._get("/api/cephfs/{}/".format(fs_id))
+ self.assertStatus(200)
+
+ self.assertIn('cephfs', data)
+ self.assertIn('standbys', data)
+ self.assertIn('versions', data)
+ self.assertIsNotNone(data['cephfs'])
+ self.assertIsNotNone(data['standbys'])
+ self.assertIsNotNone(data['versions'])
+
+ def test_cephfs_mds_counters(self):
+ fs_id = self.fs.get_namespace_id()
+ data = self._get("/api/cephfs/{}/mds_counters".format(fs_id))
+ self.assertStatus(200)
+
+ self.assertIsInstance(data, dict)
+ self.assertIsNotNone(data)
+
+ def test_cephfs_mds_counters_wrong(self):
+ self._get("/api/cephfs/baadbaad/mds_counters")
+ self.assertStatus(400)
+ self.assertJsonBody({
+ "component": 'cephfs',
+ "code": "invalid_cephfs_id",
+ "detail": "Invalid cephfs ID baadbaad"
+ })
+
+ def test_cephfs_list(self):
+ data = self._get("/api/cephfs/")
+ self.assertStatus(200)
+ self.assertIsInstance(data, list)
+
+ cephfs = data[0]
+ self.assertIn('id', cephfs)
+ self.assertIn('mdsmap', cephfs)
+ self.assertIsNotNone(cephfs['id'])
+ self.assertIsNotNone(cephfs['mdsmap'])
diff --git a/qa/tasks/mgr/dashboard/test_cluster_configuration.py b/qa/tasks/mgr/dashboard/test_cluster_configuration.py
new file mode 100644
index 00000000..798afe9c
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_cluster_configuration.py
@@ -0,0 +1,388 @@
+from __future__ import absolute_import
+
+import time
+
+from .helper import DashboardTestCase
+
+
+class ClusterConfigurationTest(DashboardTestCase):
+
+ def test_list(self):
+ data = self._get('/api/cluster_conf')
+ self.assertStatus(200)
+ self.assertIsInstance(data, list)
+ self.assertGreater(len(data), 1000)
+ for conf in data:
+ self._validate_single(conf)
+
+ def test_get(self):
+ data = self._get('/api/cluster_conf/admin_socket')
+ self.assertStatus(200)
+ self._validate_single(data)
+ self.assertIn('enum_values', data)
+
+ data = self._get('/api/cluster_conf/fantasy_name')
+ self.assertStatus(404)
+
+ def test_get_specific_db_config_option(self):
+ config_name = 'mon_allow_pool_delete'
+
+ orig_value = self._get_config_by_name(config_name)
+
+ self._ceph_cmd(['config', 'set', 'mon', config_name, 'true'])
+ result = self._wait_for_expected_get_result(self._get_config_by_name, config_name,
+ [{'section': 'mon', 'value': 'true'}])
+ self.assertEqual(result, [{'section': 'mon', 'value': 'true'}])
+
+ self._ceph_cmd(['config', 'set', 'mon', config_name, 'false'])
+ result = self._wait_for_expected_get_result(self._get_config_by_name, config_name,
+ [{'section': 'mon', 'value': 'false'}])
+ self.assertEqual(result, [{'section': 'mon', 'value': 'false'}])
+
+ # restore value
+ if orig_value:
+ self._ceph_cmd(['config', 'set', 'mon', config_name, orig_value[0]['value']])
+
+ def test_filter_config_options(self):
+ config_names = ['osd_scrub_during_recovery', 'osd_scrub_begin_hour', 'osd_scrub_end_hour']
+ data = self._get('/api/cluster_conf/filter?names={}'.format(','.join(config_names)))
+ self.assertStatus(200)
+ self.assertIsInstance(data, list)
+ self.assertEqual(len(data), 3)
+ for conf in data:
+ self._validate_single(conf)
+ self.assertIn(conf['name'], config_names)
+
+ def test_filter_config_options_empty_names(self):
+ self._get('/api/cluster_conf/filter?names=')
+ self.assertStatus(404)
+ self.assertEqual(self._resp.json()['detail'], 'Config options `` not found')
+
+ def test_filter_config_options_unknown_name(self):
+ self._get('/api/cluster_conf/filter?names=abc')
+ self.assertStatus(404)
+ self.assertEqual(self._resp.json()['detail'], 'Config options `abc` not found')
+
+ def test_filter_config_options_contains_unknown_name(self):
+ config_names = ['osd_scrub_during_recovery', 'osd_scrub_begin_hour', 'abc']
+ data = self._get('/api/cluster_conf/filter?names={}'.format(','.join(config_names)))
+ self.assertStatus(200)
+ self.assertIsInstance(data, list)
+ self.assertEqual(len(data), 2)
+ for conf in data:
+ self._validate_single(conf)
+ self.assertIn(conf['name'], config_names)
+
+ def test_create(self):
+ config_name = 'debug_ms'
+ orig_value = self._get_config_by_name(config_name)
+
+ # remove all existing settings for equal preconditions
+ self._clear_all_values_for_config_option(config_name)
+
+ expected_result = [{'section': 'mon', 'value': '0/3'}]
+
+ self._post('/api/cluster_conf', {
+ 'name': config_name,
+ 'value': expected_result
+ })
+ self.assertStatus(201)
+ result = self._wait_for_expected_get_result(self._get_config_by_name, config_name,
+ expected_result)
+ self.assertEqual(result, expected_result)
+
+ # reset original value
+ self._clear_all_values_for_config_option(config_name)
+ self._reset_original_values(config_name, orig_value)
+
+ def test_delete(self):
+ config_name = 'debug_ms'
+ orig_value = self._get_config_by_name(config_name)
+
+ # set a config option
+ expected_result = [{'section': 'mon', 'value': '0/3'}]
+ self._post('/api/cluster_conf', {
+ 'name': config_name,
+ 'value': expected_result
+ })
+ self.assertStatus(201)
+ self._wait_for_expected_get_result(self._get_config_by_name, config_name, expected_result)
+
+ # delete it and check if it's deleted
+ self._delete('/api/cluster_conf/{}?section={}'.format(config_name, 'mon'))
+ self.assertStatus(204)
+ result = self._wait_for_expected_get_result(self._get_config_by_name, config_name, None)
+ self.assertEqual(result, None)
+
+ # reset original value
+ self._clear_all_values_for_config_option(config_name)
+ self._reset_original_values(config_name, orig_value)
+
+ def test_create_cant_update_at_runtime(self):
+ config_name = 'public_bind_addr' # not updatable
+ config_value = [{'section': 'global', 'value': 'true'}]
+ orig_value = self._get_config_by_name(config_name)
+
+ # try to set config option and check if it fails
+ self._post('/api/cluster_conf', {
+ 'name': config_name,
+ 'value': config_value
+ })
+ self.assertStatus(400)
+ self.assertError(code='config_option_not_updatable_at_runtime',
+ component='cluster_configuration',
+ detail='Config option {} is/are not updatable at runtime'.format(
+ config_name))
+
+ # check if config option value is still the original one
+ result = self._wait_for_expected_get_result(self._get_config_by_name, config_name,
+ orig_value)
+ self.assertEqual(result, orig_value)
+
+ def test_create_two_values(self):
+ config_name = 'debug_ms'
+ orig_value = self._get_config_by_name(config_name)
+
+ # remove all existing settings for equal preconditions
+ self._clear_all_values_for_config_option(config_name)
+
+ expected_result = [{'section': 'mon', 'value': '0/3'},
+ {'section': 'osd', 'value': '0/5'}]
+
+ self._post('/api/cluster_conf', {
+ 'name': config_name,
+ 'value': expected_result
+ })
+ self.assertStatus(201)
+ result = self._wait_for_expected_get_result(self._get_config_by_name, config_name,
+ expected_result)
+ self.assertEqual(result, expected_result)
+
+ # reset original value
+ self._clear_all_values_for_config_option(config_name)
+ self._reset_original_values(config_name, orig_value)
+
+ def test_create_can_handle_none_values(self):
+ config_name = 'debug_ms'
+ orig_value = self._get_config_by_name(config_name)
+
+ # remove all existing settings for equal preconditions
+ self._clear_all_values_for_config_option(config_name)
+
+ self._post('/api/cluster_conf', {
+ 'name': config_name,
+ 'value': [{'section': 'mon', 'value': '0/3'},
+ {'section': 'osd', 'value': None}]
+ })
+ self.assertStatus(201)
+
+ expected_result = [{'section': 'mon', 'value': '0/3'}]
+ result = self._wait_for_expected_get_result(self._get_config_by_name, config_name,
+ expected_result)
+ self.assertEqual(result, expected_result)
+
+ # reset original value
+ self._clear_all_values_for_config_option(config_name)
+ self._reset_original_values(config_name, orig_value)
+
+ def test_create_can_handle_boolean_values(self):
+ config_name = 'mon_allow_pool_delete'
+ orig_value = self._get_config_by_name(config_name)
+
+ # remove all existing settings for equal preconditions
+ self._clear_all_values_for_config_option(config_name)
+
+ expected_result = [{'section': 'mon', 'value': 'true'}]
+
+ self._post('/api/cluster_conf', {
+ 'name': config_name,
+ 'value': [{'section': 'mon', 'value': True}]})
+ self.assertStatus(201)
+
+ result = self._wait_for_expected_get_result(self._get_config_by_name, config_name,
+ expected_result)
+ self.assertEqual(result, expected_result)
+
+ # reset original value
+ self._clear_all_values_for_config_option(config_name)
+ self._reset_original_values(config_name, orig_value)
+
+ def test_bulk_set(self):
+ expected_result = {
+ 'osd_max_backfills': {'section': 'osd', 'value': '1'},
+ 'osd_recovery_max_active': {'section': 'osd', 'value': '3'},
+ 'osd_recovery_max_single_start': {'section': 'osd', 'value': '1'},
+ 'osd_recovery_sleep': {'section': 'osd', 'value': '2.000000'}
+ }
+ orig_values = dict()
+
+ for config_name in expected_result:
+ orig_values[config_name] = self._get_config_by_name(config_name)
+
+ # remove all existing settings for equal preconditions
+ self._clear_all_values_for_config_option(config_name)
+
+ self._put('/api/cluster_conf', {'options': expected_result})
+ self.assertStatus(200)
+
+ for config_name, value in expected_result.items():
+ result = self._wait_for_expected_get_result(self._get_config_by_name, config_name,
+ [value])
+ self.assertEqual(result, [value])
+
+ # reset original value
+ self._clear_all_values_for_config_option(config_name)
+ self._reset_original_values(config_name, orig_values[config_name])
+
+ def test_bulk_set_cant_update_at_runtime(self):
+ config_options = {
+ 'public_bind_addr': {'section': 'global', 'value': '1.2.3.4:567'}, # not updatable
+ 'public_network': {'section': 'global', 'value': '10.0.0.0/8'} # not updatable
+ }
+ orig_values = dict()
+
+ for config_name in config_options:
+ orig_values[config_name] = self._get_config_by_name(config_name)
+
+ # try to set config options and see if it fails
+ self._put('/api/cluster_conf', {'options': config_options})
+ self.assertStatus(400)
+ self.assertError(code='config_option_not_updatable_at_runtime',
+ component='cluster_configuration',
+ detail='Config option {} is/are not updatable at runtime'.format(
+ ', '.join(config_options.keys())))
+
+ # check if config option values are still the original ones
+ for config_name, value in orig_values.items():
+ result = self._wait_for_expected_get_result(self._get_config_by_name, config_name,
+ value)
+ self.assertEqual(result, value)
+
+ def test_bulk_set_cant_update_at_runtime_partial(self):
+ config_options = {
+ 'public_bind_addr': {'section': 'global', 'value': 'true'}, # not updatable
+ 'log_to_stderr': {'section': 'global', 'value': 'true'} # updatable
+ }
+ orig_values = dict()
+
+ for config_name in config_options:
+ orig_values[config_name] = self._get_config_by_name(config_name)
+
+ # try to set config options and see if it fails
+ self._put('/api/cluster_conf', {'options': config_options})
+ self.assertStatus(400)
+ self.assertError(code='config_option_not_updatable_at_runtime',
+ component='cluster_configuration',
+ detail='Config option {} is/are not updatable at runtime'.format(
+ 'public_bind_addr'))
+
+ # check if config option values are still the original ones
+ for config_name, value in orig_values.items():
+ result = self._wait_for_expected_get_result(self._get_config_by_name, config_name,
+ value)
+ self.assertEqual(result, value)
+
+ def test_check_existence(self):
+ """
+ This test case is intended to check the existence of all hard coded config options used by
+ the dashboard.
+ If you include further hard coded options in the dashboard, feel free to add them to the
+ list.
+ """
+ hard_coded_options = [
+ 'osd_max_backfills', # osd-recv-speed
+ 'osd_recovery_max_active', # osd-recv-speed
+ 'osd_recovery_max_single_start', # osd-recv-speed
+ 'osd_recovery_sleep', # osd-recv-speed
+ 'osd_scrub_during_recovery', # osd-pg-scrub
+ 'osd_scrub_begin_hour', # osd-pg-scrub
+ 'osd_scrub_end_hour', # osd-pg-scrub
+ 'osd_scrub_begin_week_day', # osd-pg-scrub
+ 'osd_scrub_end_week_day', # osd-pg-scrub
+ 'osd_scrub_min_interval', # osd-pg-scrub
+ 'osd_scrub_max_interval', # osd-pg-scrub
+ 'osd_deep_scrub_interval', # osd-pg-scrub
+ 'osd_scrub_auto_repair', # osd-pg-scrub
+ 'osd_max_scrubs', # osd-pg-scrub
+ 'osd_scrub_priority', # osd-pg-scrub
+ 'osd_scrub_sleep', # osd-pg-scrub
+ 'osd_scrub_auto_repair_num_errors', # osd-pg-scrub
+ 'osd_debug_deep_scrub_sleep', # osd-pg-scrub
+ 'osd_deep_scrub_keys', # osd-pg-scrub
+ 'osd_deep_scrub_large_omap_object_key_threshold', # osd-pg-scrub
+ 'osd_deep_scrub_large_omap_object_value_sum_threshold', # osd-pg-scrub
+ 'osd_deep_scrub_randomize_ratio', # osd-pg-scrub
+ 'osd_deep_scrub_stride', # osd-pg-scrub
+ 'osd_deep_scrub_update_digest_min_age', # osd-pg-scrub
+ 'osd_op_queue_mclock_scrub_lim', # osd-pg-scrub
+ 'osd_op_queue_mclock_scrub_res', # osd-pg-scrub
+ 'osd_op_queue_mclock_scrub_wgt', # osd-pg-scrub
+ 'osd_requested_scrub_priority', # osd-pg-scrub
+ 'osd_scrub_backoff_ratio', # osd-pg-scrub
+ 'osd_scrub_chunk_max', # osd-pg-scrub
+ 'osd_scrub_chunk_min', # osd-pg-scrub
+ 'osd_scrub_cost', # osd-pg-scrub
+ 'osd_scrub_interval_randomize_ratio', # osd-pg-scrub
+ 'osd_scrub_invalid_stats', # osd-pg-scrub
+ 'osd_scrub_load_threshold', # osd-pg-scrub
+ 'osd_scrub_max_preemptions', # osd-pg-scrub
+ 'mon_allow_pool_delete' # pool-list
+ ]
+
+ for config_option in hard_coded_options:
+ self._get('/api/cluster_conf/{}'.format(config_option))
+ self.assertStatus(200)
+
+ def _validate_single(self, data):
+ self.assertIn('name', data)
+ self.assertIn('daemon_default', data)
+ self.assertIn('long_desc', data)
+ self.assertIn('level', data)
+ self.assertIn('default', data)
+ self.assertIn('see_also', data)
+ self.assertIn('tags', data)
+ self.assertIn('min', data)
+ self.assertIn('max', data)
+ self.assertIn('services', data)
+ self.assertIn('type', data)
+ self.assertIn('desc', data)
+ self.assertIn(data['type'], ['str', 'bool', 'float', 'int', 'size', 'uint', 'addr', 'uuid',
+ 'secs'])
+
+ if 'value' in data:
+ self.assertIn('source', data)
+ self.assertIsInstance(data['value'], list)
+
+ for entry in data['value']:
+ self.assertIsInstance(entry, dict)
+ self.assertIn('section', entry)
+ self.assertIn('value', entry)
+
+ def _wait_for_expected_get_result(self, get_func, get_params, expected_result, max_attempts=30,
+ sleep_time=1):
+ attempts = 0
+ while attempts < max_attempts:
+ get_result = get_func(get_params)
+ if get_result == expected_result:
+ self.assertStatus(200)
+ return get_result
+
+ time.sleep(sleep_time)
+ attempts += 1
+
+ def _get_config_by_name(self, conf_name):
+ data = self._get('/api/cluster_conf/{}'.format(conf_name))
+ if 'value' in data:
+ return data['value']
+ return None
+
+ def _clear_all_values_for_config_option(self, config_name):
+ values = self._get_config_by_name(config_name)
+ if values:
+ for value in values:
+ self._ceph_cmd(['config', 'rm', value['section'], config_name])
+
+ def _reset_original_values(self, config_name, orig_values):
+ if orig_values:
+ for value in orig_values:
+ self._ceph_cmd(['config', 'set', value['section'], config_name, value['value']])
diff --git a/qa/tasks/mgr/dashboard/test_erasure_code_profile.py b/qa/tasks/mgr/dashboard/test_erasure_code_profile.py
new file mode 100644
index 00000000..9fcce30f
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_erasure_code_profile.py
@@ -0,0 +1,110 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+import six
+
+from .helper import DashboardTestCase, JObj, JList
+
+
+class ECPTest(DashboardTestCase):
+
+ AUTH_ROLES = ['pool-manager']
+
+ @DashboardTestCase.RunAs('test', 'test', ['rgw-manager'])
+ def test_read_access_permissions(self):
+ self._get('/api/erasure_code_profile')
+ self.assertStatus(403)
+
+ @DashboardTestCase.RunAs('test', 'test', ['read-only'])
+ def test_write_access_permissions(self):
+ self._get('/api/erasure_code_profile')
+ self.assertStatus(200)
+ data = {'name': 'ecp32', 'k': 3, 'm': 2}
+ self._post('/api/erasure_code_profile', data)
+ self.assertStatus(403)
+ self._delete('/api/erasure_code_profile/default')
+ self.assertStatus(403)
+
+ @classmethod
+ def tearDownClass(cls):
+ super(ECPTest, cls).tearDownClass()
+ cls._ceph_cmd(['osd', 'erasure-code-profile', 'rm', 'ecp32'])
+ cls._ceph_cmd(['osd', 'erasure-code-profile', 'rm', 'lrc'])
+
+ def test_list(self):
+ data = self._get('/api/erasure_code_profile')
+ self.assertStatus(200)
+
+ default = [p for p in data if p['name'] == 'default']
+ if default:
+ default_ecp = {
+ 'k': 2,
+ 'technique': 'reed_sol_van',
+ 'm': 1,
+ 'name': 'default',
+ 'plugin': 'jerasure'
+ }
+ if 'crush-failure-domain' in default[0]:
+ default_ecp['crush-failure-domain'] = default[0]['crush-failure-domain']
+ self.assertSubset(default_ecp, default[0])
+ get_data = self._get('/api/erasure_code_profile/default')
+ self.assertEqual(get_data, default[0])
+
+
+ def test_create(self):
+ data = {'name': 'ecp32', 'k': 3, 'm': 2}
+ self._post('/api/erasure_code_profile', data)
+ self.assertStatus(201)
+
+ self._get('/api/erasure_code_profile/ecp32')
+ self.assertJsonSubset({
+ 'crush-device-class': '',
+ 'crush-failure-domain': 'osd',
+ 'crush-root': 'default',
+ 'jerasure-per-chunk-alignment': 'false',
+ 'k': 3,
+ 'm': 2,
+ 'name': 'ecp32',
+ 'plugin': 'jerasure',
+ 'technique': 'reed_sol_van',
+ })
+
+ self.assertStatus(200)
+
+ self._delete('/api/erasure_code_profile/ecp32')
+ self.assertStatus(204)
+
+ def test_create_plugin(self):
+ data = {'name': 'lrc', 'k': '2', 'm': '2', 'l': '2', 'plugin': 'lrc'}
+ self._post('/api/erasure_code_profile', data)
+ self.assertJsonBody(None)
+ self.assertStatus(201)
+
+ self._get('/api/erasure_code_profile/lrc')
+ self.assertJsonBody({
+ 'crush-device-class': '',
+ 'crush-failure-domain': 'host',
+ 'crush-root': 'default',
+ 'k': 2,
+ 'l': '2',
+ 'm': 2,
+ 'name': 'lrc',
+ 'plugin': 'lrc'
+ })
+
+ self.assertStatus(200)
+
+ self._delete('/api/erasure_code_profile/lrc')
+ self.assertStatus(204)
+
+ def test_ecp_info(self):
+ self._get('/api/erasure_code_profile/_info')
+ self.assertSchemaBody(JObj({
+ 'names': JList(six.string_types),
+ 'failure_domains': JList(six.string_types),
+ 'plugins': JList(six.string_types),
+ 'devices': JList(six.string_types),
+ 'directory': six.string_types,
+ }))
+
diff --git a/qa/tasks/mgr/dashboard/test_ganesha.py b/qa/tasks/mgr/dashboard/test_ganesha.py
new file mode 100644
index 00000000..b90bb4af
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_ganesha.py
@@ -0,0 +1,168 @@
+# -*- coding: utf-8 -*-
+# pylint: disable=too-many-public-methods
+
+from __future__ import absolute_import
+
+
+from .helper import DashboardTestCase
+
+
+class GaneshaTest(DashboardTestCase):
+ CEPHFS = True
+ AUTH_ROLES = ['pool-manager', 'ganesha-manager']
+
+ @classmethod
+ def create_pool(cls, name, pg_num, pool_type, application='rbd'):
+ data = {
+ 'pool': name,
+ 'pg_num': pg_num,
+ 'pool_type': pool_type,
+ 'application_metadata': [application]
+ }
+ if pool_type == 'erasure':
+ data['flags'] = ['ec_overwrites']
+ cls._task_post("/api/pool", data)
+
+ @classmethod
+ def setUpClass(cls):
+ super(GaneshaTest, cls).setUpClass()
+ cls.create_pool('ganesha', 2**2, 'replicated')
+ cls._rados_cmd(['-p', 'ganesha', '-N', 'ganesha1', 'create', 'conf-node1'])
+ cls._rados_cmd(['-p', 'ganesha', '-N', 'ganesha1', 'create', 'conf-node2'])
+ cls._rados_cmd(['-p', 'ganesha', '-N', 'ganesha1', 'create', 'conf-node3'])
+ cls._rados_cmd(['-p', 'ganesha', '-N', 'ganesha2', 'create', 'conf-node1'])
+ cls._rados_cmd(['-p', 'ganesha', '-N', 'ganesha2', 'create', 'conf-node2'])
+ cls._rados_cmd(['-p', 'ganesha', '-N', 'ganesha2', 'create', 'conf-node3'])
+ cls._ceph_cmd(['dashboard', 'set-ganesha-clusters-rados-pool-namespace', 'cluster1:ganesha/ganesha1,cluster2:ganesha/ganesha2'])
+
+ # RGW setup
+ cls._radosgw_admin_cmd([
+ 'user', 'create', '--uid', 'admin', '--display-name', 'admin',
+ '--system', '--access-key', 'admin', '--secret', 'admin'
+ ])
+ cls._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-secret-key'], 'admin')
+ cls._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-access-key'], 'admin')
+
+ @classmethod
+ def tearDownClass(cls):
+ super(GaneshaTest, cls).tearDownClass()
+ cls._radosgw_admin_cmd(['user', 'rm', '--uid', 'admin', '--purge-data'])
+ cls._ceph_cmd(['osd', 'pool', 'delete', 'ganesha', 'ganesha', '--yes-i-really-really-mean-it'])
+
+ @DashboardTestCase.RunAs('test', 'test', [{'rbd-image': ['create', 'update', 'delete']}])
+ def test_read_access_permissions(self):
+ self._get('/api/nfs-ganesha/export')
+ self.assertStatus(403)
+
+ def test_list_daemons(self):
+ daemons = self._get("/api/nfs-ganesha/daemon")
+ self.assertEqual(len(daemons), 6)
+ daemons = [(d['daemon_id'], d['cluster_id']) for d in daemons]
+ self.assertIn(('node1', 'cluster1'), daemons)
+ self.assertIn(('node2', 'cluster1'), daemons)
+ self.assertIn(('node3', 'cluster1'), daemons)
+ self.assertIn(('node1', 'cluster2'), daemons)
+ self.assertIn(('node2', 'cluster2'), daemons)
+ self.assertIn(('node3', 'cluster2'), daemons)
+
+ @classmethod
+ def create_export(cls, path, cluster_id, daemons, fsal, sec_label_xattr=None):
+ if fsal == 'CEPH':
+ fsal = {"name": "CEPH", "user_id":"admin", "fs_name": None, "sec_label_xattr": sec_label_xattr}
+ pseudo = "/cephfs{}".format(path)
+ else:
+ fsal = {"name": "RGW", "rgw_user_id": "admin"}
+ pseudo = "/rgw/{}".format(path if path[0] != '/' else "")
+ ex_json = {
+ "path": path,
+ "fsal": fsal,
+ "cluster_id": cluster_id,
+ "daemons": ["node1", "node3"],
+ "pseudo": pseudo,
+ "tag": None,
+ "access_type": "RW",
+ "squash": "no_root_squash",
+ "security_label": sec_label_xattr is not None,
+ "protocols": [4],
+ "transports": ["TCP"],
+ "clients": [{
+ "addresses":["10.0.0.0/8"],
+ "access_type": "RO",
+ "squash": "root"
+ }]
+ }
+ return cls._task_post('/api/nfs-ganesha/export', ex_json)
+
+ def tearDown(self):
+ super(GaneshaTest, self).tearDown()
+ exports = self._get("/api/nfs-ganesha/export")
+ if self._resp.status_code != 200:
+ return
+ self.assertIsInstance(exports, list)
+ for exp in exports:
+ self._task_delete("/api/nfs-ganesha/export/{}/{}"
+ .format(exp['cluster_id'], exp['export_id']))
+
+ def test_create_export(self):
+ exports = self._get("/api/nfs-ganesha/export")
+ self.assertEqual(len(exports), 0)
+
+ data = self.create_export("/foo", 'cluster1', ['node1', 'node2'], 'CEPH', "security.selinux")
+
+ exports = self._get("/api/nfs-ganesha/export")
+ self.assertEqual(len(exports), 1)
+ self.assertDictEqual(exports[0], data)
+ return data
+
+ def test_update_export(self):
+ export = self.test_create_export()
+ export['access_type'] = 'RO'
+ export['daemons'] = ['node1', 'node3']
+ export['security_label'] = True
+ data = self._task_put('/api/nfs-ganesha/export/{}/{}'
+ .format(export['cluster_id'], export['export_id']),
+ export)
+ exports = self._get("/api/nfs-ganesha/export")
+ self.assertEqual(len(exports), 1)
+ self.assertDictEqual(exports[0], data)
+ self.assertEqual(exports[0]['daemons'], ['node1', 'node3'])
+ self.assertEqual(exports[0]['security_label'], True)
+
+ def test_delete_export(self):
+ export = self.test_create_export()
+ self._task_delete("/api/nfs-ganesha/export/{}/{}"
+ .format(export['cluster_id'], export['export_id']))
+ self.assertStatus(204)
+
+ def test_get_export(self):
+ exports = self._get("/api/nfs-ganesha/export")
+ self.assertEqual(len(exports), 0)
+
+ data1 = self.create_export("/foo", 'cluster2', ['node1', 'node2'], 'CEPH')
+ data2 = self.create_export("mybucket", 'cluster2', ['node2', 'node3'], 'RGW')
+
+ export1 = self._get("/api/nfs-ganesha/export/cluster2/1")
+ self.assertDictEqual(export1, data1)
+
+ export2 = self._get("/api/nfs-ganesha/export/cluster2/2")
+ self.assertDictEqual(export2, data2)
+
+ def test_invalid_status(self):
+ self._ceph_cmd(['dashboard', 'set-ganesha-clusters-rados-pool-namespace', ''])
+
+ data = self._get('/api/nfs-ganesha/status')
+ self.assertStatus(200)
+ self.assertIn('available', data)
+ self.assertIn('message', data)
+ self.assertFalse(data['available'])
+ self.assertIn('Ganesha config location is not configured. Please set the GANESHA_RADOS_POOL_NAMESPACE setting.',
+ data['message'])
+
+ self._ceph_cmd(['dashboard', 'set-ganesha-clusters-rados-pool-namespace', 'cluster1:ganesha/ganesha1,cluster2:ganesha/ganesha2'])
+
+ def test_valid_status(self):
+ data = self._get('/api/nfs-ganesha/status')
+ self.assertStatus(200)
+ self.assertIn('available', data)
+ self.assertIn('message', data)
+ self.assertTrue(data['available'])
diff --git a/qa/tasks/mgr/dashboard/test_health.py b/qa/tasks/mgr/dashboard/test_health.py
new file mode 100644
index 00000000..a9334edc
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_health.py
@@ -0,0 +1,305 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase, JAny, JLeaf, JList, JObj
+
+
+class HealthTest(DashboardTestCase):
+ CEPHFS = True
+
+ __pg_info_schema = JObj({
+ 'object_stats': JObj({
+ 'num_objects': int,
+ 'num_object_copies': int,
+ 'num_objects_degraded': int,
+ 'num_objects_misplaced': int,
+ 'num_objects_unfound': int
+ }),
+ 'pgs_per_osd': float,
+ 'statuses': JObj({}, allow_unknown=True, unknown_schema=int)
+ })
+
+ __mdsmap_schema = JObj({
+ 'session_autoclose': int,
+ 'balancer': str,
+ 'up': JObj({}, allow_unknown=True),
+ 'last_failure_osd_epoch': int,
+ 'in': JList(int),
+ 'last_failure': int,
+ 'max_file_size': int,
+ 'explicitly_allowed_features': int,
+ 'damaged': JList(int),
+ 'tableserver': int,
+ 'failed': JList(int),
+ 'metadata_pool': int,
+ 'epoch': int,
+ 'stopped': JList(int),
+ 'max_mds': int,
+ 'compat': JObj({
+ 'compat': JObj({}, allow_unknown=True),
+ 'ro_compat': JObj({}, allow_unknown=True),
+ 'incompat': JObj({}, allow_unknown=True)
+ }),
+ 'min_compat_client': str,
+ 'data_pools': JList(int),
+ 'info': JObj({}, allow_unknown=True),
+ 'fs_name': str,
+ 'created': str,
+ 'standby_count_wanted': int,
+ 'enabled': bool,
+ 'modified': str,
+ 'session_timeout': int,
+ 'flags': int,
+ 'ever_allowed_features': int,
+ 'root': int
+ })
+
+ def test_minimal_health(self):
+ data = self._get('/api/health/minimal')
+ self.assertStatus(200)
+ schema = JObj({
+ 'client_perf': JObj({
+ 'read_bytes_sec': int,
+ 'read_op_per_sec': int,
+ 'recovering_bytes_per_sec': int,
+ 'write_bytes_sec': int,
+ 'write_op_per_sec': int
+ }),
+ 'df': JObj({
+ 'stats': JObj({
+ 'total_avail_bytes': int,
+ 'total_bytes': int,
+ 'total_used_raw_bytes': int,
+ })
+ }),
+ 'fs_map': JObj({
+ 'filesystems': JList(
+ JObj({
+ 'mdsmap': self.__mdsmap_schema
+ }),
+ ),
+ 'standbys': JList(JObj({}, allow_unknown=True)),
+ }),
+ 'health': JObj({
+ 'checks': JList(str),
+ 'status': str,
+ }),
+ 'hosts': int,
+ 'iscsi_daemons': int,
+ 'mgr_map': JObj({
+ 'active_name': str,
+ 'standbys': JList(JLeaf(dict))
+ }),
+ 'mon_status': JObj({
+ 'monmap': JObj({
+ 'mons': JList(JLeaf(dict)),
+ }),
+ 'quorum': JList(int)
+ }),
+ 'osd_map': JObj({
+ 'osds': JList(
+ JObj({
+ 'in': int,
+ 'up': int,
+ })),
+ }),
+ 'pg_info': self.__pg_info_schema,
+ 'pools': JList(JLeaf(dict)),
+ 'rgw': int,
+ 'scrub_status': str
+ })
+ self.assertSchema(data, schema)
+
+ def test_full_health(self):
+ data = self._get('/api/health/full')
+ self.assertStatus(200)
+ module_info_schema = JObj({
+ 'can_run': bool,
+ 'error_string': str,
+ 'name': str,
+ 'module_options': JObj(
+ {},
+ allow_unknown=True,
+ unknown_schema=JObj({
+ 'name': str,
+ 'type': str,
+ 'level': str,
+ 'flags': int,
+ 'default_value': str,
+ 'min': str,
+ 'max': str,
+ 'enum_allowed': JList(str),
+ 'see_also': JList(str),
+ 'desc': str,
+ 'long_desc': str,
+ 'tags': JList(str),
+ })),
+ })
+ schema = JObj({
+ 'client_perf': JObj({
+ 'read_bytes_sec': int,
+ 'read_op_per_sec': int,
+ 'recovering_bytes_per_sec': int,
+ 'write_bytes_sec': int,
+ 'write_op_per_sec': int
+ }),
+ 'df': JObj({
+ 'pools': JList(JObj({
+ 'stats': JObj({
+ 'stored': int,
+ 'objects': int,
+ 'kb_used': int,
+ 'bytes_used': int,
+ 'percent_used': float,
+ 'max_avail': int,
+ 'quota_objects': int,
+ 'quota_bytes': int,
+ 'dirty': int,
+ 'rd': int,
+ 'rd_bytes': int,
+ 'wr': int,
+ 'wr_bytes': int,
+ 'compress_bytes_used': int,
+ 'compress_under_bytes': int,
+ 'stored_raw': int,
+ 'avail_raw': int
+ }),
+ 'name': str,
+ 'id': int
+ })),
+ 'stats': JObj({
+ 'total_avail_bytes': int,
+ 'total_bytes': int,
+ 'total_used_bytes': int,
+ 'total_used_raw_bytes': int,
+ 'total_used_raw_ratio': float,
+ 'num_osds': int,
+ 'num_per_pool_osds': int
+ })
+ }),
+ 'fs_map': JObj({
+ 'compat': JObj({
+ 'compat': JObj({}, allow_unknown=True, unknown_schema=str),
+ 'incompat': JObj(
+ {}, allow_unknown=True, unknown_schema=str),
+ 'ro_compat': JObj(
+ {}, allow_unknown=True, unknown_schema=str)
+ }),
+ 'default_fscid': int,
+ 'epoch': int,
+ 'feature_flags': JObj(
+ {}, allow_unknown=True, unknown_schema=bool),
+ 'filesystems': JList(
+ JObj({
+ 'id': int,
+ 'mdsmap': self.__mdsmap_schema
+ }),
+ ),
+ 'standbys': JList(JObj({}, allow_unknown=True)),
+ }),
+ 'health': JObj({
+ 'checks': JList(str),
+ 'status': str,
+ }),
+ 'hosts': int,
+ 'iscsi_daemons': int,
+ 'mgr_map': JObj({
+ 'active_addr': str,
+ 'active_addrs': JObj({
+ 'addrvec': JList(JObj({
+ 'addr': str,
+ 'nonce': int,
+ 'type': str
+ }))
+ }),
+ 'active_change': str, # timestamp
+ 'active_gid': int,
+ 'active_name': str,
+ 'always_on_modules': JObj(
+ {},
+ allow_unknown=True, unknown_schema=JList(str)
+ ),
+ 'available': bool,
+ 'available_modules': JList(module_info_schema),
+ 'epoch': int,
+ 'modules': JList(str),
+ 'services': JObj(
+ {'dashboard': str}, # This module should always be present
+ allow_unknown=True, unknown_schema=str
+ ),
+ 'standbys': JList(JObj({
+ 'available_modules': JList(module_info_schema),
+ 'gid': int,
+ 'name': str
+ }))
+ }),
+ 'mon_status': JObj({
+ 'election_epoch': int,
+ 'extra_probe_peers': JList(JAny(none=True)),
+ 'feature_map': JObj(
+ {}, allow_unknown=True, unknown_schema=JList(JObj({
+ 'features': str,
+ 'num': int,
+ 'release': str
+ }))
+ ),
+ 'features': JObj({
+ 'quorum_con': str,
+ 'quorum_mon': JList(str),
+ 'required_con': str,
+ 'required_mon': JList(str)
+ }),
+ 'monmap': JObj({
+ # TODO: expand on monmap schema
+ 'mons': JList(JLeaf(dict)),
+ }, allow_unknown=True),
+ 'name': str,
+ 'outside_quorum': JList(int),
+ 'quorum': JList(int),
+ 'quorum_age': int,
+ 'rank': int,
+ 'state': str,
+ # TODO: What type should be expected here?
+ 'sync_provider': JList(JAny(none=True))
+ }),
+ 'osd_map': JObj({
+ # TODO: define schema for crush map and osd_metadata, among
+ # others
+ 'osds': JList(
+ JObj({
+ 'in': int,
+ 'up': int,
+ }, allow_unknown=True)),
+ }, allow_unknown=True),
+ 'pg_info': self.__pg_info_schema,
+ 'pools': JList(JLeaf(dict)),
+ 'rgw': int,
+ 'scrub_status': str
+ })
+ self.assertSchema(data, schema)
+
+ cluster_pools = self.ceph_cluster.mon_manager.list_pools()
+ self.assertEqual(len(cluster_pools), len(data['pools']))
+ for pool in data['pools']:
+ self.assertIn(pool['pool_name'], cluster_pools)
+
+ @DashboardTestCase.RunAs('test', 'test', ['pool-manager'])
+ def test_health_permissions(self):
+ data = self._get('/api/health/full')
+ self.assertStatus(200)
+
+ schema = JObj({
+ 'client_perf': JObj({}, allow_unknown=True),
+ 'df': JObj({}, allow_unknown=True),
+ 'health': JObj({
+ 'checks': JList(str),
+ 'status': str
+ }),
+ 'pools': JList(JLeaf(dict)),
+ })
+ self.assertSchema(data, schema)
+
+ cluster_pools = self.ceph_cluster.mon_manager.list_pools()
+ self.assertEqual(len(cluster_pools), len(data['pools']))
+ for pool in data['pools']:
+ self.assertIn(pool['pool_name'], cluster_pools)
diff --git a/qa/tasks/mgr/dashboard/test_host.py b/qa/tasks/mgr/dashboard/test_host.py
new file mode 100644
index 00000000..f9acbeff
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_host.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase, JList, JObj
+
+
+class HostControllerTest(DashboardTestCase):
+
+ AUTH_ROLES = ['read-only']
+
+ @DashboardTestCase.RunAs('test', 'test', ['block-manager'])
+ def test_access_permissions(self):
+ self._get('/api/host')
+ self.assertStatus(403)
+
+ def test_host_list(self):
+ data = self._get('/api/host')
+ self.assertStatus(200)
+
+ for server in data:
+ self.assertIn('services', server)
+ self.assertIn('hostname', server)
+ self.assertIn('ceph_version', server)
+ self.assertIsNotNone(server['hostname'])
+ self.assertIsNotNone(server['ceph_version'])
+ self.assertGreaterEqual(len(server['services']), 1)
+ for service in server['services']:
+ self.assertIn('type', service)
+ self.assertIn('id', service)
+ self.assertIsNotNone(service['type'])
+ self.assertIsNotNone(service['id'])
diff --git a/qa/tasks/mgr/dashboard/test_logs.py b/qa/tasks/mgr/dashboard/test_logs.py
new file mode 100644
index 00000000..17d5d830
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_logs.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase, JList, JObj
+
+
+class LogsTest(DashboardTestCase):
+ CEPHFS = True
+
+ def test_logs(self):
+ data = self._get("/api/logs/all")
+ self.assertStatus(200)
+ log_entry_schema = JList(JObj({
+ 'addrs': JObj({
+ 'addrvec': JList(JObj({
+ 'addr': str,
+ 'nonce': int,
+ 'type': str
+ }))
+ }),
+ 'channel': str,
+ 'message': str,
+ 'name': str,
+ 'priority': str,
+ 'rank': str,
+ 'seq': int,
+ 'stamp': str
+ }))
+ schema = JObj({
+ 'audit_log': log_entry_schema,
+ 'clog': log_entry_schema
+ })
+ self.assertSchema(data, schema)
+
+ @DashboardTestCase.RunAs('test', 'test', ['pool-manager'])
+ def test_log_perms(self):
+ self._get("/api/logs/all")
+ self.assertStatus(403)
diff --git a/qa/tasks/mgr/dashboard/test_mgr_module.py b/qa/tasks/mgr/dashboard/test_mgr_module.py
new file mode 100644
index 00000000..080b8b64
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_mgr_module.py
@@ -0,0 +1,160 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+import logging
+import requests
+
+from .helper import DashboardTestCase, JAny, JObj, JList, JLeaf
+
+logger = logging.getLogger(__name__)
+
+
+class MgrModuleTestCase(DashboardTestCase):
+ MGRS_REQUIRED = 1
+
+ def wait_until_rest_api_accessible(self):
+ """
+ Wait until the REST API is accessible.
+ """
+
+ def _check_connection():
+ try:
+ # Try reaching an API endpoint successfully.
+ self._get('/api/mgr/module')
+ if self._resp.status_code == 200:
+ return True
+ except requests.ConnectionError:
+ pass
+ return False
+
+ self.wait_until_true(_check_connection, timeout=30)
+
+
+class MgrModuleTest(MgrModuleTestCase):
+ def test_list_disabled_module(self):
+ self._ceph_cmd(['mgr', 'module', 'disable', 'iostat'])
+ self.wait_until_rest_api_accessible()
+ data = self._get('/api/mgr/module')
+ self.assertStatus(200)
+ self.assertSchema(
+ data,
+ JList(
+ JObj(sub_elems={
+ 'name': JLeaf(str),
+ 'enabled': JLeaf(bool),
+ 'always_on': JLeaf(bool),
+ 'options': JObj(
+ {},
+ allow_unknown=True,
+ unknown_schema=JObj({
+ 'name': str,
+ 'type': str,
+ 'level': str,
+ 'flags': int,
+ 'default_value': JAny(none=False),
+ 'min': JAny(none=False),
+ 'max': JAny(none=False),
+ 'enum_allowed': JList(str),
+ 'see_also': JList(str),
+ 'desc': str,
+ 'long_desc': str,
+ 'tags': JList(str)
+ }))
+ })))
+ module_info = self.find_object_in_list('name', 'iostat', data)
+ self.assertIsNotNone(module_info)
+ self.assertFalse(module_info['enabled'])
+
+ def test_list_enabled_module(self):
+ self._ceph_cmd(['mgr', 'module', 'enable', 'iostat'])
+ self.wait_until_rest_api_accessible()
+ data = self._get('/api/mgr/module')
+ self.assertStatus(200)
+ self.assertSchema(
+ data,
+ JList(
+ JObj(sub_elems={
+ 'name': JLeaf(str),
+ 'enabled': JLeaf(bool),
+ 'always_on': JLeaf(bool),
+ 'options': JObj(
+ {},
+ allow_unknown=True,
+ unknown_schema=JObj({
+ 'name': str,
+ 'type': str,
+ 'level': str,
+ 'flags': int,
+ 'default_value': JAny(none=False),
+ 'min': JAny(none=False),
+ 'max': JAny(none=False),
+ 'enum_allowed': JList(str),
+ 'see_also': JList(str),
+ 'desc': str,
+ 'long_desc': str,
+ 'tags': JList(str)
+ }))
+ })))
+ module_info = self.find_object_in_list('name', 'iostat', data)
+ self.assertIsNotNone(module_info)
+ self.assertTrue(module_info['enabled'])
+
+
+class MgrModuleTelemetryTest(MgrModuleTestCase):
+ def test_get(self):
+ data = self._get('/api/mgr/module/telemetry')
+ self.assertStatus(200)
+ self.assertSchema(
+ data,
+ JObj(
+ allow_unknown=True,
+ sub_elems={
+ 'channel_basic': bool,
+ 'channel_ident': bool,
+ 'channel_crash': bool,
+ 'channel_device': bool,
+ 'contact': str,
+ 'description': str,
+ 'enabled': bool,
+ 'interval': int,
+ 'last_opt_revision': int,
+ 'leaderboard': bool,
+ 'organization': str,
+ 'proxy': str,
+ 'url': str
+ }))
+
+ def test_put(self):
+ self.set_config_key('config/mgr/mgr/telemetry/contact', '')
+ self.set_config_key('config/mgr/mgr/telemetry/description', '')
+ self.set_config_key('config/mgr/mgr/telemetry/enabled', 'True')
+ self.set_config_key('config/mgr/mgr/telemetry/interval', '72')
+ self.set_config_key('config/mgr/mgr/telemetry/leaderboard', 'False')
+ self.set_config_key('config/mgr/mgr/telemetry/organization', '')
+ self.set_config_key('config/mgr/mgr/telemetry/proxy', '')
+ self.set_config_key('config/mgr/mgr/telemetry/url', '')
+ self._put(
+ '/api/mgr/module/telemetry',
+ data={
+ 'config': {
+ 'contact': 'tux@suse.com',
+ 'description': 'test',
+ 'enabled': False,
+ 'interval': 4711,
+ 'leaderboard': True,
+ 'organization': 'SUSE Linux',
+ 'proxy': 'foo',
+ 'url': 'https://foo.bar/report'
+ }
+ })
+ self.assertStatus(200)
+ data = self._get('/api/mgr/module/telemetry')
+ self.assertStatus(200)
+ self.assertEqual(data['contact'], 'tux@suse.com')
+ self.assertEqual(data['description'], 'test')
+ self.assertFalse(data['enabled'])
+ self.assertEqual(data['interval'], 4711)
+ self.assertTrue(data['leaderboard'])
+ self.assertEqual(data['organization'], 'SUSE Linux')
+ self.assertEqual(data['proxy'], 'foo')
+ self.assertEqual(data['url'], 'https://foo.bar/report')
diff --git a/qa/tasks/mgr/dashboard/test_monitor.py b/qa/tasks/mgr/dashboard/test_monitor.py
new file mode 100644
index 00000000..0cf7e25a
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_monitor.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase
+
+
+class MonitorTest(DashboardTestCase):
+ AUTH_ROLES = ['cluster-manager']
+
+ @DashboardTestCase.RunAs('test', 'test', ['block-manager'])
+ def test_access_permissions(self):
+ self._get('/api/monitor')
+ self.assertStatus(403)
+
+
+ def test_monitor_default(self):
+ data = self._get("/api/monitor")
+ self.assertStatus(200)
+
+ self.assertIn('mon_status', data)
+ self.assertIn('in_quorum', data)
+ self.assertIn('out_quorum', data)
+ self.assertIsNotNone(data['mon_status'])
+ self.assertIsNotNone(data['in_quorum'])
+ self.assertIsNotNone(data['out_quorum'])
diff --git a/qa/tasks/mgr/dashboard/test_osd.py b/qa/tasks/mgr/dashboard/test_osd.py
new file mode 100644
index 00000000..c6c7c5aa
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_osd.py
@@ -0,0 +1,157 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+import json
+
+from .helper import DashboardTestCase, JObj, JAny, JList, JLeaf, JTuple
+
+
+class OsdTest(DashboardTestCase):
+
+ AUTH_ROLES = ['cluster-manager']
+
+ def tearDown(self):
+ self._post('/api/osd/0/mark_in')
+
+ @DashboardTestCase.RunAs('test', 'test', ['block-manager'])
+ def test_access_permissions(self):
+ self._get('/api/osd')
+ self.assertStatus(403)
+ self._get('/api/osd/0')
+ self.assertStatus(403)
+
+ def assert_in_and_not_none(self, data, properties):
+ self.assertSchema(data, JObj({p: JAny(none=False) for p in properties}, allow_unknown=True))
+
+ def test_list(self):
+ data = self._get('/api/osd')
+ self.assertStatus(200)
+
+ self.assertGreaterEqual(len(data), 1)
+ data = data[0]
+ self.assert_in_and_not_none(data, ['host', 'tree', 'state', 'stats', 'stats_history'])
+ self.assert_in_and_not_none(data['host'], ['name'])
+ self.assert_in_and_not_none(data['tree'], ['id'])
+ self.assert_in_and_not_none(data['stats'], ['numpg', 'stat_bytes_used', 'stat_bytes',
+ 'op_r', 'op_w'])
+ self.assert_in_and_not_none(data['stats_history'], ['op_out_bytes', 'op_in_bytes'])
+ self.assertSchema(data['stats_history']['op_out_bytes'],
+ JList(JTuple([JLeaf(int), JLeaf(float)])))
+
+ def test_details(self):
+ data = self._get('/api/osd/0')
+ self.assertStatus(200)
+ self.assert_in_and_not_none(data, ['osd_metadata', 'histogram'])
+ self.assert_in_and_not_none(data['histogram'], ['osd'])
+ self.assert_in_and_not_none(data['histogram']['osd'], ['op_w_latency_in_bytes_histogram',
+ 'op_r_latency_out_bytes_histogram'])
+
+ def test_scrub(self):
+ self._post('/api/osd/0/scrub?deep=False')
+ self.assertStatus(200)
+
+ self._post('/api/osd/0/scrub?deep=True')
+ self.assertStatus(200)
+
+ def test_mark_out_and_in(self):
+ self._post('/api/osd/0/mark_out')
+ self.assertStatus(200)
+
+ self._post('/api/osd/0/mark_in')
+ self.assertStatus(200)
+
+ def test_mark_down(self):
+ self._post('/api/osd/0/mark_down')
+ self.assertStatus(200)
+
+ def test_reweight(self):
+ self._post('/api/osd/0/reweight', {'weight': 0.4})
+ self.assertStatus(200)
+
+ def get_reweight_value():
+ self._get('/api/osd/0')
+ response = self.jsonBody()
+ if 'osd_map' in response and 'weight' in response['osd_map']:
+ return round(response['osd_map']['weight'], 1)
+ self.wait_until_equal(get_reweight_value, 0.4, 10)
+ self.assertStatus(200)
+
+ # Undo
+ self._post('/api/osd/0/reweight', {'weight': 1})
+
+ def test_create_lost_destroy_remove(self):
+ # Create
+ self._post('/api/osd', {
+ 'uuid': 'f860ca2e-757d-48ce-b74a-87052cad563f',
+ 'svc_id': 5
+ })
+ self.assertStatus(201)
+ # Lost
+ self._post('/api/osd/5/mark_lost')
+ self.assertStatus(200)
+ # Destroy
+ self._post('/api/osd/5/destroy')
+ self.assertStatus(200)
+ # Purge
+ self._post('/api/osd/5/purge')
+ self.assertStatus(200)
+
+ def test_safe_to_destroy(self):
+ osd_dump = json.loads(self._ceph_cmd(['osd', 'dump', '-f', 'json']))
+ unused_osd_id = max(map(lambda e: e['osd'], osd_dump['osds'])) + 10
+ self._get('/api/osd/{}/safe_to_destroy'.format(unused_osd_id))
+ self.assertStatus(200)
+ self.assertJsonBody({
+ 'is_safe_to_destroy': True,
+ 'active': [],
+ 'missing_stats': [],
+ 'safe_to_destroy': [unused_osd_id],
+ 'stored_pgs': [],
+ })
+
+ def get_destroy_status():
+ self._get('/api/osd/0/safe_to_destroy')
+ if 'is_safe_to_destroy' in self.jsonBody():
+ return self.jsonBody()['is_safe_to_destroy']
+ return None
+ self.wait_until_equal(get_destroy_status, False, 10)
+ self.assertStatus(200)
+
+
+class OsdFlagsTest(DashboardTestCase):
+ def __init__(self, *args, **kwargs):
+ super(OsdFlagsTest, self).__init__(*args, **kwargs)
+ self._initial_flags = sorted( # These flags cannot be unset
+ ['sortbitwise', 'recovery_deletes', 'purged_snapdirs',
+ 'pglog_hardlimit'])
+
+ @classmethod
+ def _get_cluster_osd_flags(cls):
+ return sorted(
+ json.loads(cls._ceph_cmd(['osd', 'dump',
+ '--format=json']))['flags_set'])
+
+ @classmethod
+ def _put_flags(cls, flags):
+ cls._put('/api/osd/flags', data={'flags': flags})
+ return sorted(cls._resp.json())
+
+ def test_list_osd_flags(self):
+ flags = self._get('/api/osd/flags')
+ self.assertStatus(200)
+ self.assertEqual(len(flags), 4)
+ self.assertEqual(sorted(flags), self._initial_flags)
+
+ def test_add_osd_flag(self):
+ flags = self._put_flags([
+ 'sortbitwise', 'recovery_deletes', 'purged_snapdirs', 'noout',
+ 'pause', 'pglog_hardlimit'
+ ])
+ self.assertEqual(flags, sorted([
+ 'sortbitwise', 'recovery_deletes', 'purged_snapdirs', 'noout',
+ 'pause', 'pglog_hardlimit'
+ ]))
+
+ # Restore flags
+ self._put_flags(self._initial_flags)
diff --git a/qa/tasks/mgr/dashboard/test_perf_counters.py b/qa/tasks/mgr/dashboard/test_perf_counters.py
new file mode 100644
index 00000000..c01368bc
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_perf_counters.py
@@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase, JObj
+
+
+class PerfCountersControllerTest(DashboardTestCase):
+
+ def test_perf_counters_list(self):
+ data = self._get('/api/perf_counters')
+ self.assertStatus(200)
+
+ self.assertIsInstance(data, dict)
+ for mon in self.mons():
+ self.assertIn('mon.{}'.format(mon), data)
+
+ osds = self.ceph_cluster.mon_manager.get_osd_dump()
+ for osd in osds:
+ self.assertIn('osd.{}'.format(osd['osd']), data)
+
+ def _validate_perf(self, srv_id, srv_type, data, allow_empty):
+ self.assertIsInstance(data, dict)
+ self.assertEqual(srv_type, data['service']['type'])
+ self.assertEqual(str(srv_id), data['service']['id'])
+ self.assertIsInstance(data['counters'], list)
+ if not allow_empty:
+ self.assertGreater(len(data['counters']), 0)
+ for counter in data['counters'][0:1]:
+ self.assertIsInstance(counter, dict)
+ self.assertIn('description', counter)
+ self.assertIn('name', counter)
+ self.assertIn('unit', counter)
+ self.assertIn('value', counter)
+
+ def test_perf_counters_mon_get(self):
+ mon = self.mons()[0]
+ data = self._get('/api/perf_counters/mon/{}'.format(mon))
+ self.assertStatus(200)
+ self._validate_perf(mon, 'mon', data, allow_empty=False)
+
+ def test_perf_counters_mgr_get(self):
+ mgr = list(self.mgr_cluster.mgr_ids)[0]
+ data = self._get('/api/perf_counters/mgr/{}'.format(mgr))
+ self.assertStatus(200)
+ self._validate_perf(mgr, 'mgr', data, allow_empty=False)
+
+ def test_perf_counters_mds_get(self):
+ for mds in self.mds_cluster.mds_ids:
+ data = self._get('/api/perf_counters/mds/{}'.format(mds))
+ self.assertStatus(200)
+ self._validate_perf(mds, 'mds', data, allow_empty=True)
+
+ def test_perf_counters_osd_get(self):
+ for osd in self.ceph_cluster.mon_manager.get_osd_dump():
+ osd = osd['osd']
+ data = self._get('/api/perf_counters/osd/{}'.format(osd))
+ self.assertStatus(200)
+ self._validate_perf(osd, 'osd', data, allow_empty=False)
+
+ def test_perf_counters_not_found(self):
+ osds = self.ceph_cluster.mon_manager.get_osd_dump()
+ unused_id = int(list(map(lambda o: o['osd'], osds)).pop()) + 1
+
+ self._get('/api/perf_counters/osd/{}'.format(unused_id))
+ self.assertStatus(404)
+ schema = JObj(sub_elems={
+ 'status': str,
+ 'detail': str,
+ }, allow_unknown=True)
+ self.assertEqual(self._resp.json()['detail'], "'osd.{}' not found".format(unused_id))
+ self.assertSchemaBody(schema)
diff --git a/qa/tasks/mgr/dashboard/test_pool.py b/qa/tasks/mgr/dashboard/test_pool.py
new file mode 100644
index 00000000..ae81f779
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_pool.py
@@ -0,0 +1,364 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+import logging
+import six
+import time
+from contextlib import contextmanager
+
+from .helper import DashboardTestCase, JAny, JList, JObj, JUnion
+
+log = logging.getLogger(__name__)
+
+
+class PoolTest(DashboardTestCase):
+ AUTH_ROLES = ['pool-manager']
+
+ pool_schema = JObj(sub_elems={
+ 'pool_name': str,
+ 'type': str,
+ 'application_metadata': JList(str),
+ 'flags': int,
+ 'flags_names': str,
+ }, allow_unknown=True)
+
+ pool_list_stat_schema = JObj(sub_elems={
+ 'latest': JUnion([int,float]),
+ 'rate': float,
+ 'rates': JList(JAny(none=False)),
+ })
+
+ pool_list_stats_schema = JObj(sub_elems={
+ 'avail_raw': pool_list_stat_schema,
+ 'bytes_used': pool_list_stat_schema,
+ 'max_avail': pool_list_stat_schema,
+ 'percent_used': pool_list_stat_schema,
+ 'rd_bytes': pool_list_stat_schema,
+ 'wr_bytes': pool_list_stat_schema,
+ 'rd': pool_list_stat_schema,
+ 'wr': pool_list_stat_schema,
+ }, allow_unknown=True)
+
+ pool_rbd_conf_schema = JList(JObj(sub_elems={
+ 'name': str,
+ 'value': str,
+ 'source': int
+ }))
+
+ @contextmanager
+ def __yield_pool(self, name=None, data=None, deletion_name=None):
+ """
+ Use either just a name or whole description of a pool to create one.
+ This also validates the correct creation and deletion after the pool was used.
+
+ :param name: Name of the pool
+ :param data: Describes the pool in full length
+ :param deletion_name: Only needed if the pool was renamed
+ :return:
+ """
+ data = self._create_pool(name, data)
+ yield data
+ self._delete_pool(deletion_name or data['pool'])
+
+ def _create_pool(self, name, data):
+ data = data or {
+ 'pool': name,
+ 'pg_num': '4',
+ 'pool_type': 'replicated',
+ 'compression_algorithm': 'snappy',
+ 'compression_mode': 'passive',
+ 'compression_max_blob_size': '131072',
+ 'compression_required_ratio': '0.875',
+ 'application_metadata': ['rbd'],
+ 'configuration': {
+ 'rbd_qos_bps_limit': 1024000,
+ 'rbd_qos_iops_limit': 5000,
+ }
+ }
+ self._task_post('/api/pool/', data)
+ self.assertStatus(201)
+ self._validate_pool_properties(data, self._get_pool(data['pool']))
+ return data
+
+ def _delete_pool(self, name):
+ self._task_delete('/api/pool/' + name)
+ self.assertStatus(204)
+
+ def _validate_pool_properties(self, data, pool, timeout=DashboardTestCase.TIMEOUT_HEALTH_CLEAR):
+ for prop, value in data.items():
+ if prop == 'pool_type':
+ self.assertEqual(pool['type'], value)
+ elif prop == 'size':
+ self.assertEqual(pool[prop], int(value),
+ '{}: {} != {}'.format(prop, pool[prop], value))
+ elif prop == 'pg_num':
+ self._check_pg_num(value, pool)
+ elif prop == 'application_metadata':
+ self.assertIsInstance(pool[prop], list)
+ self.assertEqual(value, pool[prop])
+ elif prop == 'pool':
+ self.assertEqual(pool['pool_name'], value)
+ elif prop.startswith('compression'):
+ if value is not None:
+ if prop.endswith('size'):
+ value = int(value)
+ elif prop.endswith('ratio'):
+ value = float(value)
+ self.assertEqual(pool['options'][prop], value)
+ else:
+ self.assertEqual(pool['options'], {})
+ elif prop == 'configuration':
+ # configuration cannot really be checked here for two reasons:
+ # 1. The default value cannot be given to this method, which becomes relevant
+ # when resetting a value, because it's not always zero.
+ # 2. The expected `source` cannot be given to this method, and it cannot
+ # reliably be determined (see 1)
+ pass
+ else:
+ self.assertEqual(pool[prop], value, '{}: {} != {}'.format(prop, pool[prop], value))
+
+ self.wait_until_equal(self._get_health_status, 'HEALTH_OK', timeout)
+
+ def _get_health_status(self):
+ return self._get('/api/health/minimal')['health']['status']
+
+ def _get_pool(self, pool_name):
+ pool = self._get("/api/pool/" + pool_name)
+ self.assertStatus(200)
+ self.assertSchemaBody(self.pool_schema)
+ return pool
+
+ def _check_pg_num(self, value, pool):
+ """
+ If both properties have not the same value, the cluster goes into a warning state, which
+ will only happen during a pg update on an existing pool. The test that does that is
+ currently commented out because our QA systems can't deal with the change. Feel free to test
+ it locally.
+ """
+ pgp_prop = 'pg_placement_num'
+ t = 0
+ while (int(value) != pool[pgp_prop] or self._get_health_status() != 'HEALTH_OK') \
+ and t < 180:
+ time.sleep(2)
+ t += 2
+ pool = self._get_pool(pool['pool_name'])
+ for p in ['pg_num', pgp_prop]: # Should have the same values
+ self.assertEqual(pool[p], int(value), '{}: {} != {}'.format(p, pool[p], value))
+
+ @DashboardTestCase.RunAs('test', 'test', [{'pool': ['create', 'update', 'delete']}])
+ def test_read_access_permissions(self):
+ self._get('/api/pool')
+ self.assertStatus(403)
+ self._get('/api/pool/bla')
+ self.assertStatus(403)
+
+ @DashboardTestCase.RunAs('test', 'test', [{'pool': ['read', 'update', 'delete']}])
+ def test_create_access_permissions(self):
+ self._task_post('/api/pool/', {})
+ self.assertStatus(403)
+
+ @DashboardTestCase.RunAs('test', 'test', [{'pool': ['read', 'create', 'update']}])
+ def test_delete_access_permissions(self):
+ self._delete('/api/pool/ddd')
+ self.assertStatus(403)
+
+ def test_pool_list(self):
+ data = self._get("/api/pool")
+ self.assertStatus(200)
+
+ cluster_pools = self.ceph_cluster.mon_manager.list_pools()
+ self.assertEqual(len(cluster_pools), len(data))
+ self.assertSchemaBody(JList(self.pool_schema))
+ for pool in data:
+ self.assertNotIn('pg_status', pool)
+ self.assertNotIn('stats', pool)
+ self.assertIn(pool['pool_name'], cluster_pools)
+
+ def test_pool_list_attrs(self):
+ data = self._get("/api/pool?attrs=type,flags")
+ self.assertStatus(200)
+
+ cluster_pools = self.ceph_cluster.mon_manager.list_pools()
+ self.assertEqual(len(cluster_pools), len(data))
+ for pool in data:
+ self.assertIn('pool_name', pool)
+ self.assertIn('type', pool)
+ self.assertIn('flags', pool)
+ self.assertNotIn('flags_names', pool)
+ self.assertNotIn('pg_status', pool)
+ self.assertNotIn('stats', pool)
+ self.assertIn(pool['pool_name'], cluster_pools)
+
+ def test_pool_list_stats(self):
+ data = self._get("/api/pool?stats=true")
+ self.assertStatus(200)
+
+ cluster_pools = self.ceph_cluster.mon_manager.list_pools()
+ self.assertEqual(len(cluster_pools), len(data))
+ self.assertSchemaBody(JList(self.pool_schema))
+ for pool in data:
+ self.assertIn('pool_name', pool)
+ self.assertIn('type', pool)
+ self.assertIn('application_metadata', pool)
+ self.assertIn('flags', pool)
+ self.assertIn('pg_status', pool)
+ self.assertSchema(pool['stats'], self.pool_list_stats_schema)
+ self.assertIn('flags_names', pool)
+ self.assertIn(pool['pool_name'], cluster_pools)
+
+ def test_pool_get(self):
+ cluster_pools = self.ceph_cluster.mon_manager.list_pools()
+ pool = self._get("/api/pool/{}?stats=true&attrs=type,flags,stats"
+ .format(cluster_pools[0]))
+ self.assertEqual(pool['pool_name'], cluster_pools[0])
+ self.assertIn('type', pool)
+ self.assertIn('flags', pool)
+ self.assertNotIn('pg_status', pool)
+ self.assertSchema(pool['stats'], self.pool_list_stats_schema)
+ self.assertNotIn('flags_names', pool)
+ self.assertSchema(pool['configuration'], self.pool_rbd_conf_schema)
+
+ def test_pool_create_with_two_applications(self):
+ self.__yield_pool(None, {
+ 'pool': 'dashboard_pool1',
+ 'pg_num': '8',
+ 'pool_type': 'replicated',
+ 'application_metadata': ['rbd', 'sth'],
+ })
+
+ def test_pool_create_with_ecp_and_rule(self):
+ self._ceph_cmd(['osd', 'crush', 'rule', 'create-erasure', 'ecrule'])
+ self._ceph_cmd(
+ ['osd', 'erasure-code-profile', 'set', 'ecprofile', 'crush-failure-domain=osd'])
+ self.__yield_pool(None, {
+ 'pool': 'dashboard_pool2',
+ 'pg_num': '8',
+ 'pool_type': 'erasure',
+ 'application_metadata': ['rbd'],
+ 'erasure_code_profile': 'ecprofile',
+ 'crush_rule': 'ecrule',
+ })
+ self._ceph_cmd(['osd', 'erasure-code-profile', 'rm', 'ecprofile'])
+
+ def test_pool_create_with_compression(self):
+ pool = {
+ 'pool': 'dashboard_pool3',
+ 'pg_num': '8',
+ 'pool_type': 'replicated',
+ 'compression_algorithm': 'zstd',
+ 'compression_mode': 'aggressive',
+ 'compression_max_blob_size': '10000000',
+ 'compression_required_ratio': '0.8',
+ 'configuration': {
+ 'rbd_qos_bps_limit': 2048,
+ 'rbd_qos_iops_limit': None,
+ },
+ }
+ with self.__yield_pool(None, pool):
+ expected_configuration = [{
+ 'name': 'rbd_qos_bps_limit',
+ 'source': 1,
+ 'value': '2048',
+ }, {
+ 'name': 'rbd_qos_iops_limit',
+ 'source': 0,
+ 'value': '0',
+ }]
+ new_pool = self._get_pool(pool['pool'])
+ for conf in expected_configuration:
+ self.assertIn(conf, new_pool['configuration'])
+
+ def test_pool_update_metadata(self):
+ pool_name = 'pool_update_metadata'
+ with self.__yield_pool(pool_name):
+ props = {'application_metadata': ['rbd', 'sth']}
+ self._task_put('/api/pool/{}'.format(pool_name), props)
+ self._validate_pool_properties(props, self._get_pool(pool_name),
+ self.TIMEOUT_HEALTH_CLEAR * 2)
+
+ properties = {'application_metadata': ['rgw']}
+ self._task_put('/api/pool/' + pool_name, properties)
+ self._validate_pool_properties(properties, self._get_pool(pool_name),
+ self.TIMEOUT_HEALTH_CLEAR * 2)
+
+ properties = {'application_metadata': ['rbd', 'sth']}
+ self._task_put('/api/pool/' + pool_name, properties)
+ self._validate_pool_properties(properties, self._get_pool(pool_name),
+ self.TIMEOUT_HEALTH_CLEAR * 2)
+
+ properties = {'application_metadata': ['rgw']}
+ self._task_put('/api/pool/' + pool_name, properties)
+ self._validate_pool_properties(properties, self._get_pool(pool_name),
+ self.TIMEOUT_HEALTH_CLEAR * 2)
+
+ def test_pool_update_configuration(self):
+ pool_name = 'pool_update_configuration'
+ with self.__yield_pool(pool_name):
+ configuration = {
+ 'rbd_qos_bps_limit': 1024,
+ 'rbd_qos_iops_limit': None,
+ }
+ expected_configuration = [{
+ 'name': 'rbd_qos_bps_limit',
+ 'source': 1,
+ 'value': '1024',
+ }, {
+ 'name': 'rbd_qos_iops_limit',
+ 'source': 0,
+ 'value': '0',
+ }]
+ self._task_put('/api/pool/' + pool_name, {'configuration': configuration})
+ time.sleep(5)
+ pool_config = self._get_pool(pool_name)['configuration']
+ for conf in expected_configuration:
+ self.assertIn(conf, pool_config)
+
+ def test_pool_update_compression(self):
+ pool_name = 'pool_update_compression'
+ with self.__yield_pool(pool_name):
+ properties = {
+ 'compression_algorithm': 'zstd',
+ 'compression_mode': 'aggressive',
+ 'compression_max_blob_size': '10000000',
+ 'compression_required_ratio': '0.8',
+ }
+ self._task_put('/api/pool/' + pool_name, properties)
+ time.sleep(5)
+ self._validate_pool_properties(properties, self._get_pool(pool_name))
+
+ def test_pool_update_unset_compression(self):
+ pool_name = 'pool_update_unset_compression'
+ with self.__yield_pool(pool_name):
+ self._task_put('/api/pool/' + pool_name, {'compression_mode': 'unset'})
+ time.sleep(5)
+ self._validate_pool_properties({
+ 'compression_algorithm': None,
+ 'compression_mode': None,
+ 'compression_max_blob_size': None,
+ 'compression_required_ratio': None,
+ }, self._get_pool(pool_name))
+
+ def test_pool_create_fail(self):
+ data = {'pool_type': u'replicated', 'rule_name': u'dnf', 'pg_num': u'8', 'pool': u'sadfs'}
+ self._task_post('/api/pool/', data)
+ self.assertStatus(400)
+ self.assertJsonBody({
+ 'component': 'pool',
+ 'code': "2",
+ 'detail': "[errno -2] specified rule dnf doesn't exist"
+ })
+
+ def test_pool_info(self):
+ self._get("/api/pool/_info")
+ self.assertSchemaBody(JObj({
+ 'pool_names': JList(six.string_types),
+ 'compression_algorithms': JList(six.string_types),
+ 'compression_modes': JList(six.string_types),
+ 'is_all_bluestore': bool,
+ 'bluestore_compression_algorithm': six.string_types,
+ 'osd_count': int,
+ 'crush_rules_replicated': JList(JObj({}, allow_unknown=True)),
+ 'crush_rules_erasure': JList(JObj({}, allow_unknown=True)),
+ 'pg_autoscale_default_mode': six.string_types,
+ 'pg_autoscale_modes': JList(six.string_types),
+ }))
diff --git a/qa/tasks/mgr/dashboard/test_rbd.py b/qa/tasks/mgr/dashboard/test_rbd.py
new file mode 100644
index 00000000..68af93d9
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_rbd.py
@@ -0,0 +1,797 @@
+# -*- coding: utf-8 -*-
+# pylint: disable=too-many-public-methods
+
+from __future__ import absolute_import
+
+import time
+
+from .helper import DashboardTestCase, JObj, JLeaf, JList
+
+
+class RbdTest(DashboardTestCase):
+ AUTH_ROLES = ['pool-manager', 'block-manager']
+
+ @classmethod
+ def create_pool(cls, name, pg_num, pool_type, application='rbd'):
+ data = {
+ 'pool': name,
+ 'pg_num': pg_num,
+ 'pool_type': pool_type,
+ 'application_metadata': [application]
+ }
+ if pool_type == 'erasure':
+ data['flags'] = ['ec_overwrites']
+ cls._task_post("/api/pool", data)
+
+ @DashboardTestCase.RunAs('test', 'test', [{'rbd-image': ['create', 'update', 'delete']}])
+ def test_read_access_permissions(self):
+ self._get('/api/block/image')
+ self.assertStatus(403)
+ self._get('/api/block/image/pool/image')
+ self.assertStatus(403)
+
+ @DashboardTestCase.RunAs('test', 'test', [{'rbd-image': ['read', 'update', 'delete']}])
+ def test_create_access_permissions(self):
+ self.create_image('pool', 'name', 0)
+ self.assertStatus(403)
+ self.create_snapshot('pool', 'image', 'snapshot')
+ self.assertStatus(403)
+ self.copy_image('src_pool', 'src_image', 'dest_pool', 'dest_image')
+ self.assertStatus(403)
+ self.clone_image('parent_pool', 'parent_image', 'parent_snap', 'pool', 'name')
+ self.assertStatus(403)
+
+ @DashboardTestCase.RunAs('test', 'test', [{'rbd-image': ['read', 'create', 'delete']}])
+ def test_update_access_permissions(self):
+ self.edit_image('pool', 'image')
+ self.assertStatus(403)
+ self.update_snapshot('pool', 'image', 'snapshot', None, None)
+ self.assertStatus(403)
+ self._task_post('/api/block/image/rbd/rollback_img/snap/snap1/rollback')
+ self.assertStatus(403)
+ self.flatten_image('pool', 'image')
+ self.assertStatus(403)
+
+ @DashboardTestCase.RunAs('test', 'test', [{'rbd-image': ['read', 'create', 'update']}])
+ def test_delete_access_permissions(self):
+ self.remove_image('pool', 'image')
+ self.assertStatus(403)
+ self.remove_snapshot('pool', 'image', 'snapshot')
+ self.assertStatus(403)
+
+ @classmethod
+ def create_image(cls, pool, name, size, **kwargs):
+ data = {'name': name, 'pool_name': pool, 'size': size}
+ data.update(kwargs)
+ return cls._task_post('/api/block/image', data)
+
+ @classmethod
+ def clone_image(cls, parent_pool, parent_image, parent_snap, pool, name,
+ **kwargs):
+ # pylint: disable=too-many-arguments
+ data = {'child_image_name': name, 'child_pool_name': pool}
+ data.update(kwargs)
+ return cls._task_post('/api/block/image/{}/{}/snap/{}/clone'
+ .format(parent_pool, parent_image, parent_snap),
+ data)
+
+ @classmethod
+ def copy_image(cls, src_pool, src_image, dest_pool, dest_image, **kwargs):
+ # pylint: disable=too-many-arguments
+ data = {'dest_image_name': dest_image, 'dest_pool_name': dest_pool}
+ data.update(kwargs)
+ return cls._task_post('/api/block/image/{}/{}/copy'
+ .format(src_pool, src_image), data)
+
+ @classmethod
+ def remove_image(cls, pool, image):
+ return cls._task_delete('/api/block/image/{}/{}'.format(pool, image))
+
+ # pylint: disable=too-many-arguments
+ @classmethod
+ def edit_image(cls, pool, image, name=None, size=None, features=None, **kwargs):
+ kwargs.update({'name': name, 'size': size, 'features': features})
+ return cls._task_put('/api/block/image/{}/{}'.format(pool, image), kwargs)
+
+ @classmethod
+ def flatten_image(cls, pool, image):
+ return cls._task_post('/api/block/image/{}/{}/flatten'.format(pool, image))
+
+ @classmethod
+ def create_snapshot(cls, pool, image, snapshot):
+ return cls._task_post('/api/block/image/{}/{}/snap'.format(pool, image),
+ {'snapshot_name': snapshot})
+
+ @classmethod
+ def remove_snapshot(cls, pool, image, snapshot):
+ return cls._task_delete('/api/block/image/{}/{}/snap/{}'.format(pool, image, snapshot))
+
+ @classmethod
+ def update_snapshot(cls, pool, image, snapshot, new_name, is_protected):
+ return cls._task_put('/api/block/image/{}/{}/snap/{}'.format(pool, image, snapshot),
+ {'new_snap_name': new_name, 'is_protected': is_protected})
+
+ @classmethod
+ def setUpClass(cls):
+ super(RbdTest, cls).setUpClass()
+ cls.create_pool('rbd', 2**3, 'replicated')
+ cls.create_pool('rbd_iscsi', 2**3, 'replicated')
+
+ cls.create_image('rbd', 'img1', 2**30)
+ cls.create_image('rbd', 'img2', 2*2**30)
+ cls.create_image('rbd_iscsi', 'img1', 2**30)
+ cls.create_image('rbd_iscsi', 'img2', 2*2**30)
+
+ osd_metadata = cls.ceph_cluster.mon_manager.get_osd_metadata()
+ cls.bluestore_support = True
+ for osd in osd_metadata:
+ if osd['osd_objectstore'] != 'bluestore':
+ cls.bluestore_support = False
+ break
+
+ @classmethod
+ def tearDownClass(cls):
+ super(RbdTest, cls).tearDownClass()
+ cls._ceph_cmd(['osd', 'pool', 'delete', 'rbd', 'rbd', '--yes-i-really-really-mean-it'])
+ cls._ceph_cmd(['osd', 'pool', 'delete', 'rbd_iscsi', 'rbd_iscsi',
+ '--yes-i-really-really-mean-it'])
+ cls._ceph_cmd(['osd', 'pool', 'delete', 'rbd_data', 'rbd_data',
+ '--yes-i-really-really-mean-it'])
+
+ @classmethod
+ def create_image_in_trash(cls, pool, name, delay=0):
+ cls.create_image(pool, name, 10240)
+ img = cls._get('/api/block/image/{}/{}'.format(pool, name))
+
+ cls._task_post("/api/block/image/{}/{}/move_trash".format(pool, name),
+ {'delay': delay})
+
+ return img['id']
+
+ @classmethod
+ def remove_trash(cls, pool, image_id, image_name, force=False):
+ return cls._task_delete('/api/block/image/trash/{}/{}/?image_name={}&force={}'.format('rbd', image_id, image_name, force))
+
+ @classmethod
+ def get_trash(cls, pool, image_id):
+ trash = cls._get('/api/block/image/trash/?pool_name={}'.format(pool))
+ if isinstance(trash, list):
+ for pool in trash:
+ for image in pool['value']:
+ if image['id'] == image_id:
+ return image
+
+ return None
+
+ def _validate_image(self, img, **kwargs):
+ """
+ Example of an RBD image json:
+
+ {
+ "size": 1073741824,
+ "obj_size": 4194304,
+ "num_objs": 256,
+ "order": 22,
+ "block_name_prefix": "rbd_data.10ae2ae8944a",
+ "name": "img1",
+ "pool_name": "rbd",
+ "features": 61,
+ "features_name": ["deep-flatten", "exclusive-lock", "fast-diff", "layering",
+ "object-map"]
+ }
+ """
+ schema = JObj(sub_elems={
+ 'size': JLeaf(int),
+ 'obj_size': JLeaf(int),
+ 'num_objs': JLeaf(int),
+ 'order': JLeaf(int),
+ 'block_name_prefix': JLeaf(str),
+ 'name': JLeaf(str),
+ 'id': JLeaf(str),
+ 'unique_id': JLeaf(str),
+ 'image_format': JLeaf(int),
+ 'pool_name': JLeaf(str),
+ 'features': JLeaf(int),
+ 'features_name': JList(JLeaf(str)),
+ 'stripe_count': JLeaf(int, none=True),
+ 'stripe_unit': JLeaf(int, none=True),
+ 'parent': JObj(sub_elems={'pool_name': JLeaf(str),
+ 'image_name': JLeaf(str),
+ 'snap_name': JLeaf(str)}, none=True),
+ 'data_pool': JLeaf(str, none=True),
+ 'snapshots': JList(JLeaf(dict)),
+ 'timestamp': JLeaf(str, none=True),
+ 'disk_usage': JLeaf(int, none=True),
+ 'total_disk_usage': JLeaf(int, none=True),
+ 'configuration': JList(JObj(sub_elems={
+ 'name': JLeaf(str),
+ 'source': JLeaf(int),
+ 'value': JLeaf(str),
+ })),
+ })
+ self.assertSchema(img, schema)
+
+ for k, v in kwargs.items():
+ if isinstance(v, list):
+ self.assertSetEqual(set(img[k]), set(v))
+ else:
+ self.assertEqual(img[k], v)
+
+ def _validate_snapshot(self, snap, **kwargs):
+ self.assertIn('id', snap)
+ self.assertIn('name', snap)
+ self.assertIn('is_protected', snap)
+ self.assertIn('timestamp', snap)
+ self.assertIn('size', snap)
+ self.assertIn('children', snap)
+
+ for k, v in kwargs.items():
+ if isinstance(v, list):
+ self.assertSetEqual(set(snap[k]), set(v))
+ else:
+ self.assertEqual(snap[k], v)
+
+ def _validate_snapshot_list(self, snap_list, snap_name=None, **kwargs):
+ found = False
+ for snap in snap_list:
+ self.assertIn('name', snap)
+ if snap_name and snap['name'] == snap_name:
+ found = True
+ self._validate_snapshot(snap, **kwargs)
+ break
+ if snap_name and not found:
+ self.fail("Snapshot {} not found".format(snap_name))
+
+ def test_list(self):
+ data = self._view_cache_get('/api/block/image')
+ self.assertStatus(200)
+ self.assertEqual(len(data), 2)
+
+ for pool_view in data:
+ self.assertEqual(pool_view['status'], 0)
+ self.assertIsNotNone(pool_view['value'])
+ self.assertIn('pool_name', pool_view)
+ self.assertIn(pool_view['pool_name'], ['rbd', 'rbd_iscsi'])
+ image_list = pool_view['value']
+ self.assertEqual(len(image_list), 2)
+
+ for img in image_list:
+ self.assertIn('name', img)
+ self.assertIn('pool_name', img)
+ self.assertIn(img['pool_name'], ['rbd', 'rbd_iscsi'])
+ if img['name'] == 'img1':
+ self._validate_image(img, size=1073741824,
+ num_objs=256, obj_size=4194304,
+ features_name=['deep-flatten',
+ 'exclusive-lock',
+ 'fast-diff',
+ 'layering',
+ 'object-map'])
+ elif img['name'] == 'img2':
+ self._validate_image(img, size=2147483648,
+ num_objs=512, obj_size=4194304,
+ features_name=['deep-flatten',
+ 'exclusive-lock',
+ 'fast-diff',
+ 'layering',
+ 'object-map'])
+ else:
+ assert False, "Unexcepted image '{}' in result list".format(img['name'])
+
+ def test_create(self):
+ rbd_name = 'test_rbd'
+ self.create_image('rbd', rbd_name, 10240)
+ self.assertStatus(201)
+
+ img = self._get('/api/block/image/rbd/test_rbd')
+ self.assertStatus(200)
+
+ self._validate_image(img, name=rbd_name, size=10240,
+ num_objs=1, obj_size=4194304,
+ features_name=['deep-flatten',
+ 'exclusive-lock',
+ 'fast-diff', 'layering',
+ 'object-map'])
+
+ self.remove_image('rbd', rbd_name)
+
+ def test_create_with_configuration(self):
+ pool = 'rbd'
+ image_name = 'image_with_config'
+ size = 10240
+ configuration = {
+ 'rbd_qos_bps_limit': 10240,
+ 'rbd_qos_bps_burst': 10240 * 2,
+ }
+ expected = [{
+ 'name': 'rbd_qos_bps_limit',
+ 'source': 2,
+ 'value': str(10240),
+ }, {
+ 'name': 'rbd_qos_bps_burst',
+ 'source': 2,
+ 'value': str(10240 * 2),
+ }]
+
+ self.create_image(pool, image_name, size, configuration=configuration)
+ self.assertStatus(201)
+ img = self._get('/api/block/image/rbd/{}'.format(image_name))
+ self.assertStatus(200)
+ for conf in expected:
+ self.assertIn(conf, img['configuration'])
+
+ self.remove_image(pool, image_name)
+
+ def test_create_rbd_in_data_pool(self):
+ if not self.bluestore_support:
+ self.skipTest('requires bluestore cluster')
+
+ self.create_pool('data_pool', 2**4, 'erasure')
+
+ rbd_name = 'test_rbd_in_data_pool'
+ self.create_image('rbd', rbd_name, 10240, data_pool='data_pool')
+ self.assertStatus(201)
+
+ img = self._get('/api/block/image/rbd/test_rbd_in_data_pool')
+ self.assertStatus(200)
+
+ self._validate_image(img, name=rbd_name, size=10240,
+ num_objs=1, obj_size=4194304,
+ data_pool='data_pool',
+ features_name=['data-pool', 'deep-flatten',
+ 'exclusive-lock',
+ 'fast-diff', 'layering',
+ 'object-map'])
+
+ self.remove_image('rbd', rbd_name)
+ self.assertStatus(204)
+ self._ceph_cmd(['osd', 'pool', 'delete', 'data_pool', 'data_pool',
+ '--yes-i-really-really-mean-it'])
+
+ def test_create_rbd_twice(self):
+ res = self.create_image('rbd', 'test_rbd_twice', 10240)
+
+ res = self.create_image('rbd', 'test_rbd_twice', 10240)
+ self.assertStatus(400)
+ self.assertEqual(res, {"code": '17', 'status': 400, "component": "rbd",
+ "detail": "[errno 17] error creating image",
+ 'task': {'name': 'rbd/create',
+ 'metadata': {'pool_name': 'rbd',
+ 'image_name': 'test_rbd_twice'}}})
+ self.remove_image('rbd', 'test_rbd_twice')
+ self.assertStatus(204)
+
+ def test_snapshots_and_clone_info(self):
+ self.create_snapshot('rbd', 'img1', 'snap1')
+ self.create_snapshot('rbd', 'img1', 'snap2')
+ self._rbd_cmd(['snap', 'protect', 'rbd/img1@snap1'])
+ self._rbd_cmd(['clone', 'rbd/img1@snap1', 'rbd_iscsi/img1_clone'])
+
+ img = self._get('/api/block/image/rbd/img1')
+ self.assertStatus(200)
+ self._validate_image(img, name='img1', size=1073741824,
+ num_objs=256, obj_size=4194304, parent=None,
+ features_name=['deep-flatten', 'exclusive-lock',
+ 'fast-diff', 'layering',
+ 'object-map'])
+ for snap in img['snapshots']:
+ if snap['name'] == 'snap1':
+ self._validate_snapshot(snap, is_protected=True)
+ self.assertEqual(len(snap['children']), 1)
+ self.assertDictEqual(snap['children'][0],
+ {'pool_name': 'rbd_iscsi',
+ 'image_name': 'img1_clone'})
+ elif snap['name'] == 'snap2':
+ self._validate_snapshot(snap, is_protected=False)
+
+ img = self._get('/api/block/image/rbd_iscsi/img1_clone')
+ self.assertStatus(200)
+ self._validate_image(img, name='img1_clone', size=1073741824,
+ num_objs=256, obj_size=4194304,
+ parent={'pool_name': 'rbd', 'image_name': 'img1',
+ 'snap_name': 'snap1'},
+ features_name=['deep-flatten', 'exclusive-lock',
+ 'fast-diff', 'layering',
+ 'object-map'])
+ self.remove_image('rbd_iscsi', 'img1_clone')
+ self.assertStatus(204)
+
+ def test_disk_usage(self):
+ self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '50M', 'rbd/img2'])
+ self.create_snapshot('rbd', 'img2', 'snap1')
+ self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '20M', 'rbd/img2'])
+ self.create_snapshot('rbd', 'img2', 'snap2')
+ self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '10M', 'rbd/img2'])
+ self.create_snapshot('rbd', 'img2', 'snap3')
+ self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '5M', 'rbd/img2'])
+ img = self._get('/api/block/image/rbd/img2')
+ self.assertStatus(200)
+ self._validate_image(img, name='img2', size=2147483648,
+ total_disk_usage=268435456, disk_usage=67108864)
+
+ def test_delete_non_existent_image(self):
+ res = self.remove_image('rbd', 'i_dont_exist')
+ self.assertStatus(400)
+ self.assertEqual(res, {u'code': u'2', "status": 400, "component": "rbd",
+ "detail": "[errno 2] error removing image",
+ 'task': {'name': 'rbd/delete',
+ 'metadata': {'pool_name': 'rbd',
+ 'image_name': 'i_dont_exist'}}})
+
+ def test_image_delete(self):
+ self.create_image('rbd', 'delete_me', 2**30)
+ self.assertStatus(201)
+ self.create_snapshot('rbd', 'delete_me', 'snap1')
+ self.assertStatus(201)
+ self.create_snapshot('rbd', 'delete_me', 'snap2')
+ self.assertStatus(201)
+
+ img = self._get('/api/block/image/rbd/delete_me')
+ self.assertStatus(200)
+ self._validate_image(img, name='delete_me', size=2**30)
+ self.assertEqual(len(img['snapshots']), 2)
+
+ self.remove_snapshot('rbd', 'delete_me', 'snap1')
+ self.assertStatus(204)
+ self.remove_snapshot('rbd', 'delete_me', 'snap2')
+ self.assertStatus(204)
+
+ img = self._get('/api/block/image/rbd/delete_me')
+ self.assertStatus(200)
+ self._validate_image(img, name='delete_me', size=2**30)
+ self.assertEqual(len(img['snapshots']), 0)
+
+ self.remove_image('rbd', 'delete_me')
+ self.assertStatus(204)
+
+ def test_image_rename(self):
+ self.create_image('rbd', 'edit_img', 2**30)
+ self.assertStatus(201)
+ self._get('/api/block/image/rbd/edit_img')
+ self.assertStatus(200)
+ self.edit_image('rbd', 'edit_img', 'new_edit_img')
+ self.assertStatus(200)
+ self._get('/api/block/image/rbd/edit_img')
+ self.assertStatus(404)
+ self._get('/api/block/image/rbd/new_edit_img')
+ self.assertStatus(200)
+ self.remove_image('rbd', 'new_edit_img')
+ self.assertStatus(204)
+
+ def test_image_resize(self):
+ self.create_image('rbd', 'edit_img', 2**30)
+ self.assertStatus(201)
+ img = self._get('/api/block/image/rbd/edit_img')
+ self.assertStatus(200)
+ self._validate_image(img, size=2**30)
+ self.edit_image('rbd', 'edit_img', size=2*2**30)
+ self.assertStatus(200)
+ img = self._get('/api/block/image/rbd/edit_img')
+ self.assertStatus(200)
+ self._validate_image(img, size=2*2**30)
+ self.remove_image('rbd', 'edit_img')
+ self.assertStatus(204)
+
+ def test_image_change_features(self):
+ self.create_image('rbd', 'edit_img', 2**30, features=["layering"])
+ self.assertStatus(201)
+ img = self._get('/api/block/image/rbd/edit_img')
+ self.assertStatus(200)
+ self._validate_image(img, features_name=["layering"])
+ self.edit_image('rbd', 'edit_img',
+ features=["fast-diff", "object-map", "exclusive-lock"])
+ self.assertStatus(200)
+ img = self._get('/api/block/image/rbd/edit_img')
+ self.assertStatus(200)
+ self._validate_image(img, features_name=['exclusive-lock',
+ 'fast-diff', 'layering',
+ 'object-map'])
+ self.edit_image('rbd', 'edit_img',
+ features=["journaling", "exclusive-lock"])
+ self.assertStatus(200)
+ img = self._get('/api/block/image/rbd/edit_img')
+ self.assertStatus(200)
+ self._validate_image(img, features_name=['exclusive-lock',
+ 'journaling', 'layering'])
+ self.remove_image('rbd', 'edit_img')
+ self.assertStatus(204)
+
+ def test_image_change_config(self):
+ pool = 'rbd'
+ image = 'image_with_config'
+ initial_conf = {
+ 'rbd_qos_bps_limit': 10240,
+ 'rbd_qos_write_iops_limit': None
+ }
+ initial_expect = [{
+ 'name': 'rbd_qos_bps_limit',
+ 'source': 2,
+ 'value': '10240',
+ }, {
+ 'name': 'rbd_qos_write_iops_limit',
+ 'source': 0,
+ 'value': '0',
+ }]
+ new_conf = {
+ 'rbd_qos_bps_limit': 0,
+ 'rbd_qos_bps_burst': 20480,
+ 'rbd_qos_write_iops_limit': None
+ }
+ new_expect = [{
+ 'name': 'rbd_qos_bps_limit',
+ 'source': 2,
+ 'value': '0',
+ }, {
+ 'name': 'rbd_qos_bps_burst',
+ 'source': 2,
+ 'value': '20480',
+ }, {
+ 'name': 'rbd_qos_write_iops_limit',
+ 'source': 0,
+ 'value': '0',
+ }]
+
+ self.create_image(pool, image, 2**30, configuration=initial_conf)
+ self.assertStatus(201)
+ img = self._get('/api/block/image/{}/{}'.format(pool, image))
+ self.assertStatus(200)
+ for conf in initial_expect:
+ self.assertIn(conf, img['configuration'])
+
+ self.edit_image(pool, image, configuration=new_conf)
+ img = self._get('/api/block/image/{}/{}'.format(pool, image))
+ self.assertStatus(200)
+ for conf in new_expect:
+ self.assertIn(conf, img['configuration'])
+
+ self.remove_image(pool, image)
+ self.assertStatus(204)
+
+ def test_update_snapshot(self):
+ self.create_snapshot('rbd', 'img1', 'snap5')
+ self.assertStatus(201)
+ img = self._get('/api/block/image/rbd/img1')
+ self._validate_snapshot_list(img['snapshots'], 'snap5', is_protected=False)
+
+ self.update_snapshot('rbd', 'img1', 'snap5', 'snap6', None)
+ self.assertStatus(200)
+ img = self._get('/api/block/image/rbd/img1')
+ self._validate_snapshot_list(img['snapshots'], 'snap6', is_protected=False)
+
+ self.update_snapshot('rbd', 'img1', 'snap6', None, True)
+ self.assertStatus(200)
+ img = self._get('/api/block/image/rbd/img1')
+ self._validate_snapshot_list(img['snapshots'], 'snap6', is_protected=True)
+
+ self.update_snapshot('rbd', 'img1', 'snap6', 'snap5', False)
+ self.assertStatus(200)
+ img = self._get('/api/block/image/rbd/img1')
+ self._validate_snapshot_list(img['snapshots'], 'snap5', is_protected=False)
+
+ self.remove_snapshot('rbd', 'img1', 'snap5')
+ self.assertStatus(204)
+
+ def test_snapshot_rollback(self):
+ self.create_image('rbd', 'rollback_img', 2**30,
+ features=["layering", "exclusive-lock", "fast-diff",
+ "object-map"])
+ self.assertStatus(201)
+ self.create_snapshot('rbd', 'rollback_img', 'snap1')
+ self.assertStatus(201)
+
+ img = self._get('/api/block/image/rbd/rollback_img')
+ self.assertStatus(200)
+ self.assertEqual(img['disk_usage'], 0)
+
+ self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '5M',
+ 'rbd/rollback_img'])
+
+ img = self._get('/api/block/image/rbd/rollback_img')
+ self.assertStatus(200)
+ self.assertGreater(img['disk_usage'], 0)
+
+ self._task_post('/api/block/image/rbd/rollback_img/snap/snap1/rollback')
+ self.assertStatus([201, 200])
+
+ img = self._get('/api/block/image/rbd/rollback_img')
+ self.assertStatus(200)
+ self.assertEqual(img['disk_usage'], 0)
+
+ self.remove_snapshot('rbd', 'rollback_img', 'snap1')
+ self.assertStatus(204)
+ self.remove_image('rbd', 'rollback_img')
+ self.assertStatus(204)
+
+ def test_clone(self):
+ self.create_image('rbd', 'cimg', 2**30, features=["layering"])
+ self.assertStatus(201)
+ self.create_snapshot('rbd', 'cimg', 'snap1')
+ self.assertStatus(201)
+ self.update_snapshot('rbd', 'cimg', 'snap1', None, True)
+ self.assertStatus(200)
+ self.clone_image('rbd', 'cimg', 'snap1', 'rbd', 'cimg-clone',
+ features=["layering", "exclusive-lock", "fast-diff",
+ "object-map"])
+ self.assertStatus([200, 201])
+
+ img = self._get('/api/block/image/rbd/cimg-clone')
+ self.assertStatus(200)
+ self._validate_image(img, features_name=['exclusive-lock',
+ 'fast-diff', 'layering',
+ 'object-map'],
+ parent={'pool_name': 'rbd', 'image_name': 'cimg',
+ 'snap_name': 'snap1'})
+
+ res = self.remove_image('rbd', 'cimg')
+ self.assertStatus(400)
+ self.assertIn('code', res)
+ self.assertEqual(res['code'], '39')
+
+ self.remove_image('rbd', 'cimg-clone')
+ self.assertStatus(204)
+ self.update_snapshot('rbd', 'cimg', 'snap1', None, False)
+ self.assertStatus(200)
+ self.remove_snapshot('rbd', 'cimg', 'snap1')
+ self.assertStatus(204)
+ self.remove_image('rbd', 'cimg')
+ self.assertStatus(204)
+
+ def test_copy(self):
+ self.create_image('rbd', 'coimg', 2**30,
+ features=["layering", "exclusive-lock", "fast-diff",
+ "object-map"])
+ self.assertStatus(201)
+
+ self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '5M',
+ 'rbd/coimg'])
+
+ self.copy_image('rbd', 'coimg', 'rbd_iscsi', 'coimg-copy',
+ features=["layering", "fast-diff", "exclusive-lock",
+ "object-map"])
+ self.assertStatus([200, 201])
+
+ img = self._get('/api/block/image/rbd/coimg')
+ self.assertStatus(200)
+ self._validate_image(img, features_name=['layering', 'exclusive-lock',
+ 'fast-diff', 'object-map'])
+
+ img_copy = self._get('/api/block/image/rbd_iscsi/coimg-copy')
+ self._validate_image(img_copy, features_name=['exclusive-lock',
+ 'fast-diff', 'layering',
+ 'object-map'],
+ disk_usage=img['disk_usage'])
+
+ self.remove_image('rbd', 'coimg')
+ self.assertStatus(204)
+ self.remove_image('rbd_iscsi', 'coimg-copy')
+ self.assertStatus(204)
+
+ def test_flatten(self):
+ self.create_snapshot('rbd', 'img1', 'snapf')
+ self.update_snapshot('rbd', 'img1', 'snapf', None, True)
+ self.clone_image('rbd', 'img1', 'snapf', 'rbd_iscsi', 'img1_snapf_clone')
+
+ img = self._get('/api/block/image/rbd_iscsi/img1_snapf_clone')
+ self.assertStatus(200)
+ self.assertIsNotNone(img['parent'])
+
+ self.flatten_image('rbd_iscsi', 'img1_snapf_clone')
+ self.assertStatus([200, 201])
+
+ img = self._get('/api/block/image/rbd_iscsi/img1_snapf_clone')
+ self.assertStatus(200)
+ self.assertIsNone(img['parent'])
+
+ self.update_snapshot('rbd', 'img1', 'snapf', None, False)
+ self.remove_snapshot('rbd', 'img1', 'snapf')
+ self.assertStatus(204)
+
+ self.remove_image('rbd_iscsi', 'img1_snapf_clone')
+ self.assertStatus(204)
+
+ def test_default_features(self):
+ default_features = self._get('/api/block/image/default_features')
+ self.assertEqual(default_features, ['deep-flatten', 'exclusive-lock',
+ 'fast-diff', 'layering',
+ 'object-map'])
+
+ def test_image_with_special_name(self):
+ rbd_name = 'test/rbd'
+ rbd_name_encoded = 'test%2Frbd'
+
+ self.create_image('rbd', rbd_name, 10240)
+ self.assertStatus(201)
+
+ img = self._get("/api/block/image/rbd/" + rbd_name_encoded)
+ self.assertStatus(200)
+
+ self._validate_image(img, name=rbd_name, size=10240,
+ num_objs=1, obj_size=4194304,
+ features_name=['deep-flatten',
+ 'exclusive-lock',
+ 'fast-diff', 'layering',
+ 'object-map'])
+
+ self.remove_image('rbd', rbd_name_encoded)
+
+ def test_move_image_to_trash(self):
+ id = self.create_image_in_trash('rbd', 'test_rbd')
+ self.assertStatus(200)
+
+ self._get('/api/block/image/rbd/test_rbd')
+ self.assertStatus(404)
+
+ time.sleep(1)
+
+ image = self.get_trash('rbd', id)
+ self.assertIsNotNone(image)
+
+ self.remove_trash('rbd', id, 'test_rbd')
+
+ def test_list_trash(self):
+ id = self.create_image_in_trash('rbd', 'test_rbd', 0)
+ data = self._get('/api/block/image/trash/?pool_name={}'.format('rbd'))
+ self.assertStatus(200)
+ self.assertIsInstance(data, list)
+ self.assertIsNotNone(data)
+
+ self.remove_trash('rbd', id, 'test_rbd')
+ self.assertStatus(204)
+
+ def test_restore_trash(self):
+ id = self.create_image_in_trash('rbd', 'test_rbd')
+
+ self._task_post('/api/block/image/trash/{}/{}/restore'.format('rbd', id), {'new_image_name': 'test_rbd'})
+
+ self._get('/api/block/image/rbd/test_rbd')
+ self.assertStatus(200)
+
+ image = self.get_trash('rbd', id)
+ self.assertIsNone(image)
+
+ self.remove_image('rbd', 'test_rbd')
+
+ def test_remove_expired_trash(self):
+ id = self.create_image_in_trash('rbd', 'test_rbd', 0)
+ self.remove_trash('rbd', id, 'test_rbd', False)
+ self.assertStatus(204)
+
+ image = self.get_trash('rbd', id)
+ self.assertIsNone(image)
+
+ def test_remove_not_expired_trash(self):
+ id = self.create_image_in_trash('rbd', 'test_rbd', 9999)
+ self.remove_trash('rbd', id, 'test_rbd', False)
+ self.assertStatus(400)
+
+ time.sleep(1)
+
+ image = self.get_trash('rbd', id)
+ self.assertIsNotNone(image)
+
+ self.remove_trash('rbd', id, 'test_rbd', True)
+
+ def test_remove_not_expired_trash_with_force(self):
+ id = self.create_image_in_trash('rbd', 'test_rbd', 9999)
+ self.remove_trash('rbd', id, 'test_rbd', True)
+ self.assertStatus(204)
+
+ image = self.get_trash('rbd', id)
+ self.assertIsNone(image)
+
+ def test_purge_trash(self):
+ id_expired = self.create_image_in_trash('rbd', 'test_rbd_expired', 0)
+ id_not_expired = self.create_image_in_trash('rbd', 'test_rbd', 9999)
+
+ time.sleep(1)
+
+ self._task_post('/api/block/image/trash/purge?pool_name={}'.format('rbd'))
+ self.assertStatus([200, 201])
+
+ time.sleep(1)
+
+ trash_not_expired = self.get_trash('rbd', id_not_expired)
+ self.assertIsNotNone(trash_not_expired)
+
+ trash_expired = self.get_trash('rbd', id_expired)
+ self.wait_until_equal(lambda: self.get_trash('rbd', id_expired), None, 60)
diff --git a/qa/tasks/mgr/dashboard/test_rbd_mirroring.py b/qa/tasks/mgr/dashboard/test_rbd_mirroring.py
new file mode 100644
index 00000000..8480cb87
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_rbd_mirroring.py
@@ -0,0 +1,177 @@
+# -*- coding: utf-8 -*-
+# pylint: disable=too-many-public-methods
+
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase
+
+
+class RbdMirroringTest(DashboardTestCase):
+ AUTH_ROLES = ['pool-manager', 'block-manager']
+
+ @classmethod
+ def create_pool(cls, name, application='rbd'):
+ data = {
+ 'pool': name,
+ 'pg_num': 2**3,
+ 'pool_type': 'replicated',
+ 'application_metadata': [application]
+ }
+ cls._task_post("/api/pool", data)
+
+ @classmethod
+ def get_pool(cls, pool):
+ data = cls._get('/api/block/mirroring/pool/{}'.format(pool))
+ if isinstance(data, dict):
+ return data
+ return {}
+
+ @classmethod
+ def update_pool(cls, pool, mirror_mode):
+ data = {'mirror_mode': mirror_mode}
+ return cls._task_put('/api/block/mirroring/pool/{}'.format(pool),
+ data)
+
+ @classmethod
+ def list_peers(cls, pool):
+ data = cls._get('/api/block/mirroring/pool/{}/peer'.format(pool))
+ if isinstance(data, list):
+ return data
+ return []
+
+ @classmethod
+ def get_peer(cls, pool, peer_uuid):
+ data = cls._get('/api/block/mirroring/pool/{}/peer/{}'.format(pool, peer_uuid))
+ if isinstance(data, dict):
+ return data
+ return {}
+
+ @classmethod
+ def create_peer(cls, pool, cluster_name, client_id, **kwargs):
+ data = {'cluster_name': cluster_name, 'client_id': client_id}
+ data.update(kwargs)
+ return cls._task_post('/api/block/mirroring/pool/{}/peer'.format(pool),
+ data)
+
+ @classmethod
+ def update_peer(cls, pool, peer_uuid, **kwargs):
+ return cls._task_put('/api/block/mirroring/pool/{}/peer/{}'.format(pool, peer_uuid),
+ kwargs)
+
+ @classmethod
+ def delete_peer(cls, pool, peer_uuid):
+ return cls._task_delete('/api/block/mirroring/pool/{}/peer/{}'.format(pool, peer_uuid))
+
+ @classmethod
+ def setUpClass(cls):
+ super(RbdMirroringTest, cls).setUpClass()
+ cls.create_pool('rbd')
+
+ @classmethod
+ def tearDownClass(cls):
+ super(RbdMirroringTest, cls).tearDownClass()
+ cls._ceph_cmd(['osd', 'pool', 'delete', 'rbd', 'rbd', '--yes-i-really-really-mean-it'])
+
+ @DashboardTestCase.RunAs('test', 'test', [{'rbd-mirroring': ['create', 'update', 'delete']}])
+ def test_read_access_permissions(self):
+ self.get_pool('rbd')
+ self.assertStatus(403)
+ self.list_peers('rbd')
+ self.assertStatus(403)
+ self.get_peer('rbd', '123')
+ self.assertStatus(403)
+
+ @DashboardTestCase.RunAs('test', 'test', [{'rbd-mirroring': ['read', 'update', 'delete']}])
+ def test_create_access_permissions(self):
+ self.create_peer('rbd', 'remote', 'id')
+ self.assertStatus(403)
+
+ @DashboardTestCase.RunAs('test', 'test', [{'rbd-mirroring': ['read', 'create', 'delete']}])
+ def test_update_access_permissions(self):
+ self.update_peer('rbd', '123')
+ self.assertStatus(403)
+
+ @DashboardTestCase.RunAs('test', 'test', [{'rbd-mirroring': ['read', 'create', 'update']}])
+ def test_delete_access_permissions(self):
+ self.delete_peer('rbd', '123')
+ self.assertStatus(403)
+
+ def test_mirror_mode(self):
+ self.update_pool('rbd', 'disabled')
+ mode = self.get_pool('rbd').get('mirror_mode')
+ self.assertEqual(mode, 'disabled')
+
+ self.update_pool('rbd', 'image')
+ mode = self.get_pool('rbd').get('mirror_mode')
+ self.assertEqual(mode, 'image')
+
+ self.update_pool('rbd', 'pool')
+ mode = self.get_pool('rbd').get('mirror_mode')
+ self.assertEqual(mode, 'pool')
+
+ self.update_pool('rbd', 'disabled')
+ mode = self.get_pool('rbd').get('mirror_mode')
+ self.assertEqual(mode, 'disabled')
+
+ def test_set_invalid_mirror_mode(self):
+ self.update_pool('rbd', 'invalid')
+ self.assertStatus(400)
+
+ def test_set_same_mirror_mode(self):
+ self.update_pool('rbd', 'disabled')
+ self.update_pool('rbd', 'disabled')
+ self.assertStatus(200)
+
+ def test_peer(self):
+ self.update_pool('rbd', 'image')
+ self.assertStatus(200)
+
+ peers = self.list_peers('rbd')
+ self.assertStatus(200)
+ self.assertEqual([], peers)
+
+ uuid = self.create_peer('rbd', 'remote', 'admin')['uuid']
+ self.assertStatus(201)
+
+ peers = self.list_peers('rbd')
+ self.assertStatus(200)
+ self.assertEqual([uuid], peers)
+
+ expected_peer = {
+ 'uuid': uuid,
+ 'cluster_name': 'remote',
+ 'client_id': 'admin',
+ 'mon_host': '',
+ 'key': ''
+ }
+ peer = self.get_peer('rbd', uuid)
+ self.assertEqual(expected_peer, peer)
+
+ self.update_peer('rbd', uuid, mon_host='1.2.3.4')
+ self.assertStatus(200)
+
+ expected_peer['mon_host'] = '1.2.3.4'
+ peer = self.get_peer('rbd', uuid)
+ self.assertEqual(expected_peer, peer)
+
+ self.delete_peer('rbd', uuid)
+ self.assertStatus(204)
+
+ self.update_pool('rbd', 'disabled')
+ self.assertStatus(200)
+
+ def test_disable_mirror_with_peers(self):
+ self.update_pool('rbd', 'image')
+ self.assertStatus(200)
+
+ uuid = self.create_peer('rbd', 'remote', 'admin')['uuid']
+ self.assertStatus(201)
+
+ self.update_pool('rbd', 'disabled')
+ self.assertStatus(400)
+
+ self.delete_peer('rbd', uuid)
+ self.assertStatus(204)
+
+ self.update_pool('rbd', 'disabled')
+ self.assertStatus(200)
diff --git a/qa/tasks/mgr/dashboard/test_requests.py b/qa/tasks/mgr/dashboard/test_requests.py
new file mode 100644
index 00000000..cd917dae
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_requests.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase
+
+
+class RequestsTest(DashboardTestCase):
+ def test_gzip(self):
+ self._get('/api/summary')
+ self.assertHeaders({
+ 'Content-Encoding': 'gzip',
+ 'Content-Type': 'application/json',
+ })
+
+ def test_force_no_gzip(self):
+ self._get('/api/summary', params=dict(
+ headers={'Accept-Encoding': 'identity'}
+ ))
+ self.assertNotIn('Content-Encoding', self._resp.headers)
+ self.assertHeaders({
+ 'Content-Type': 'application/json',
+ })
+
+ def test_server(self):
+ self._get('/api/summary')
+ self.assertHeaders({
+ 'server': 'Ceph-Dashboard',
+ 'Content-Security-Policy': "frame-ancestors 'self';",
+ 'X-Content-Type-Options': 'nosniff',
+ 'Strict-Transport-Security': 'max-age=63072000; includeSubDomains; preload'
+ })
diff --git a/qa/tasks/mgr/dashboard/test_rgw.py b/qa/tasks/mgr/dashboard/test_rgw.py
new file mode 100644
index 00000000..9e781142
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_rgw.py
@@ -0,0 +1,710 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+import logging
+import six
+from six.moves.urllib import parse
+
+from .helper import DashboardTestCase, JObj, JList, JLeaf
+
+logger = logging.getLogger(__name__)
+
+
+class RgwTestCase(DashboardTestCase):
+
+ maxDiff = None
+ create_test_user = False
+
+ AUTH_ROLES = ['rgw-manager']
+
+ @classmethod
+ def setUpClass(cls):
+ super(RgwTestCase, cls).setUpClass()
+ # Create the administrator account.
+ cls._radosgw_admin_cmd([
+ 'user', 'create', '--uid', 'admin', '--display-name', 'admin',
+ '--system', '--access-key', 'admin', '--secret', 'admin'
+ ])
+ # Update the dashboard configuration.
+ cls._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-secret-key'], 'admin')
+ cls._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-access-key'], 'admin')
+ # Create a test user?
+ if cls.create_test_user:
+ cls._radosgw_admin_cmd([
+ 'user', 'create', '--uid', 'teuth-test-user', '--display-name',
+ 'teuth-test-user'
+ ])
+ cls._radosgw_admin_cmd([
+ 'caps', 'add', '--uid', 'teuth-test-user', '--caps',
+ 'metadata=write'
+ ])
+ cls._radosgw_admin_cmd([
+ 'subuser', 'create', '--uid', 'teuth-test-user', '--subuser',
+ 'teuth-test-subuser', '--access', 'full', '--key-type', 's3',
+ '--access-key', 'xyz123'
+ ])
+ cls._radosgw_admin_cmd([
+ 'subuser', 'create', '--uid', 'teuth-test-user', '--subuser',
+ 'teuth-test-subuser2', '--access', 'full', '--key-type',
+ 'swift'
+ ])
+
+ @classmethod
+ def tearDownClass(cls):
+ if cls.create_test_user:
+ cls._radosgw_admin_cmd(['user', 'rm', '--uid=teuth-test-user'])
+ super(RgwTestCase, cls).tearDownClass()
+
+ def setUp(self):
+ super(RgwTestCase, self).setUp()
+
+ def get_rgw_user(self, uid):
+ return self._get('/api/rgw/user/{}'.format(uid))
+
+
+class RgwApiCredentialsTest(RgwTestCase):
+
+ AUTH_ROLES = ['rgw-manager']
+
+ def setUp(self):
+ super(RgwApiCredentialsTest, self).setUp()
+ # Restart the Dashboard module to ensure that the connection to the
+ # RGW Admin Ops API is re-established with the new credentials.
+ self.logout()
+ self._ceph_cmd(['mgr', 'module', 'disable', 'dashboard'])
+ self._ceph_cmd(['mgr', 'module', 'enable', 'dashboard', '--force'])
+ # Set the default credentials.
+ self._ceph_cmd(['dashboard', 'set-rgw-api-user-id', ''])
+ self._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-secret-key'], 'admin')
+ self._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-access-key'], 'admin')
+ super(RgwApiCredentialsTest, self).setUp()
+
+ def test_no_access_secret_key(self):
+ self._ceph_cmd(['dashboard', 'reset-rgw-api-secret-key'])
+ self._ceph_cmd(['dashboard', 'reset-rgw-api-access-key'])
+ resp = self._get('/api/rgw/user')
+ self.assertStatus(500)
+ self.assertIn('detail', resp)
+ self.assertIn('component', resp)
+ self.assertIn('No RGW credentials found', resp['detail'])
+ self.assertEqual(resp['component'], 'rgw')
+
+ def test_success(self):
+ data = self._get('/api/rgw/status')
+ self.assertStatus(200)
+ self.assertIn('available', data)
+ self.assertIn('message', data)
+ self.assertTrue(data['available'])
+
+ def test_invalid_user_id(self):
+ self._ceph_cmd(['dashboard', 'set-rgw-api-user-id', 'xyz'])
+ data = self._get('/api/rgw/status')
+ self.assertStatus(200)
+ self.assertIn('available', data)
+ self.assertIn('message', data)
+ self.assertFalse(data['available'])
+ self.assertIn('The user "xyz" is unknown to the Object Gateway.',
+ data['message'])
+
+
+class RgwBucketTest(RgwTestCase):
+
+ AUTH_ROLES = ['rgw-manager']
+
+ @classmethod
+ def setUpClass(cls):
+ cls.create_test_user = True
+ super(RgwBucketTest, cls).setUpClass()
+ # Create tenanted users.
+ cls._radosgw_admin_cmd([
+ 'user', 'create', '--tenant', 'testx', '--uid', 'teuth-test-user',
+ '--display-name', 'tenanted teuth-test-user'
+ ])
+ cls._radosgw_admin_cmd([
+ 'user', 'create', '--tenant', 'testx', '--uid', 'teuth-test-user2',
+ '--display-name', 'tenanted teuth-test-user 2'
+ ])
+
+ @classmethod
+ def tearDownClass(cls):
+ cls._radosgw_admin_cmd(
+ ['user', 'rm', '--tenant', 'testx', '--uid=teuth-test-user'])
+ cls._radosgw_admin_cmd(
+ ['user', 'rm', '--tenant', 'testx', '--uid=teuth-test-user2'])
+ super(RgwBucketTest, cls).tearDownClass()
+
+ def test_all(self):
+ # Create a new bucket.
+ self._post(
+ '/api/rgw/bucket',
+ params={
+ 'bucket': 'teuth-test-bucket',
+ 'uid': 'admin'
+ })
+ self.assertStatus(201)
+ data = self.jsonBody()
+ self.assertSchema(data, JObj(sub_elems={
+ 'bucket_info': JObj(sub_elems={
+ 'bucket': JObj(allow_unknown=True, sub_elems={
+ 'name': JLeaf(str),
+ 'bucket_id': JLeaf(str),
+ 'tenant': JLeaf(str)
+ }),
+ 'quota': JObj(sub_elems={}, allow_unknown=True),
+ 'creation_time': JLeaf(str)
+ }, allow_unknown=True)
+ }, allow_unknown=True))
+ data = data['bucket_info']['bucket']
+ self.assertEqual(data['name'], 'teuth-test-bucket')
+ self.assertEqual(data['tenant'], '')
+
+ # List all buckets.
+ data = self._get('/api/rgw/bucket')
+ self.assertStatus(200)
+ self.assertEqual(len(data), 1)
+ self.assertIn('teuth-test-bucket', data)
+
+ # List all buckets with stats.
+ data = self._get('/api/rgw/bucket?stats=true')
+ self.assertStatus(200)
+ self.assertEqual(len(data), 1)
+ self.assertSchema(data[0], JObj(sub_elems={
+ 'bid': JLeaf(str),
+ 'bucket': JLeaf(str),
+ 'bucket_quota': JObj(sub_elems={}, allow_unknown=True),
+ 'id': JLeaf(str),
+ 'owner': JLeaf(str),
+ 'usage': JObj(sub_elems={}, allow_unknown=True),
+ 'tenant': JLeaf(str),
+ }, allow_unknown=True))
+
+ # Get the bucket.
+ data = self._get('/api/rgw/bucket/teuth-test-bucket')
+ self.assertStatus(200)
+ self.assertSchema(data, JObj(sub_elems={
+ 'id': JLeaf(str),
+ 'bid': JLeaf(str),
+ 'tenant': JLeaf(str),
+ 'bucket': JLeaf(str),
+ 'bucket_quota': JObj(sub_elems={}, allow_unknown=True),
+ 'owner': JLeaf(str),
+ 'usage': JObj(sub_elems={}, allow_unknown=True),
+ }, allow_unknown=True))
+ self.assertEqual(data['bucket'], 'teuth-test-bucket')
+ self.assertEqual(data['owner'], 'admin')
+
+ # Update the bucket.
+ self._put(
+ '/api/rgw/bucket/teuth-test-bucket',
+ params={
+ 'bucket_id': data['id'],
+ 'uid': 'teuth-test-user'
+ })
+ self.assertStatus(200)
+ data = self._get('/api/rgw/bucket/teuth-test-bucket')
+ self.assertStatus(200)
+ self.assertSchema(data, JObj(sub_elems={
+ 'owner': JLeaf(str),
+ 'bid': JLeaf(str),
+ 'tenant': JLeaf(str)
+ }, allow_unknown=True))
+ self.assertEqual(data['owner'], 'teuth-test-user')
+
+ # Delete the bucket.
+ self._delete('/api/rgw/bucket/teuth-test-bucket')
+ self.assertStatus(204)
+ data = self._get('/api/rgw/bucket')
+ self.assertStatus(200)
+ self.assertEqual(len(data), 0)
+
+ def test_create_get_update_delete_w_tenant(self):
+ # Create a new bucket. The tenant of the user is used when
+ # the bucket is created.
+ self._post(
+ '/api/rgw/bucket',
+ params={
+ 'bucket': 'teuth-test-bucket',
+ 'uid': 'testx$teuth-test-user'
+ })
+ self.assertStatus(201)
+ # It's not possible to validate the result because there
+ # IS NO result object returned by the RGW Admin OPS API
+ # when a tenanted bucket is created.
+ data = self.jsonBody()
+ self.assertIsNone(data)
+
+ # List all buckets.
+ data = self._get('/api/rgw/bucket')
+ self.assertStatus(200)
+ self.assertEqual(len(data), 1)
+ self.assertIn('testx/teuth-test-bucket', data)
+
+ # Get the bucket.
+ data = self._get('/api/rgw/bucket/{}'.format(
+ parse.quote_plus('testx/teuth-test-bucket')))
+ self.assertStatus(200)
+ self.assertSchema(data, JObj(sub_elems={
+ 'owner': JLeaf(str),
+ 'bucket': JLeaf(str),
+ 'tenant': JLeaf(str),
+ 'bid': JLeaf(str)
+ }, allow_unknown=True))
+ self.assertEqual(data['owner'], 'testx$teuth-test-user')
+ self.assertEqual(data['bucket'], 'teuth-test-bucket')
+ self.assertEqual(data['tenant'], 'testx')
+ self.assertEqual(data['bid'], 'testx/teuth-test-bucket')
+
+ # Update bucket: different user from same tenant.
+ self._put(
+ '/api/rgw/bucket/{}'.format(
+ parse.quote_plus('testx/teuth-test-bucket')),
+ params={
+ 'bucket_id': data['id'],
+ 'uid': 'testx$teuth-test-user2'
+ })
+ self.assertStatus(200)
+ data = self._get('/api/rgw/bucket/{}'.format(
+ parse.quote_plus('testx/teuth-test-bucket')))
+ self.assertStatus(200)
+ self.assertIn('owner', data)
+ self.assertEqual(data['owner'], 'testx$teuth-test-user2')
+
+ # Update bucket: different user from empty tenant.
+ self._put(
+ '/api/rgw/bucket/{}'.format(
+ parse.quote_plus('testx/teuth-test-bucket')),
+ params={
+ 'bucket_id': data['id'],
+ 'uid': 'admin'
+ })
+ self.assertStatus(200)
+ data = self._get('/api/rgw/bucket/{}'.format(
+ parse.quote_plus('testx/teuth-test-bucket')))
+ self.assertStatus(200)
+ self.assertIn('owner', data)
+ self.assertEqual(data['owner'], 'admin')
+
+ # Delete the bucket.
+ self._delete('/api/rgw/bucket/{}'.format(
+ parse.quote_plus('testx/teuth-test-bucket')))
+ self.assertStatus(204)
+ data = self._get('/api/rgw/bucket')
+ self.assertStatus(200)
+ self.assertEqual(len(data), 0)
+
+
+class RgwDaemonTest(DashboardTestCase):
+
+ AUTH_ROLES = ['rgw-manager']
+
+ @DashboardTestCase.RunAs('test', 'test', [{
+ 'rgw': ['create', 'update', 'delete']
+ }])
+ def test_read_access_permissions(self):
+ self._get('/api/rgw/daemon')
+ self.assertStatus(403)
+ self._get('/api/rgw/daemon/id')
+ self.assertStatus(403)
+
+ def test_list(self):
+ data = self._get('/api/rgw/daemon')
+ self.assertStatus(200)
+ self.assertEqual(len(data), 1)
+ data = data[0]
+ self.assertIn('id', data)
+ self.assertIn('version', data)
+ self.assertIn('server_hostname', data)
+
+ def test_get(self):
+ data = self._get('/api/rgw/daemon')
+ self.assertStatus(200)
+
+ data = self._get('/api/rgw/daemon/{}'.format(data[0]['id']))
+ self.assertStatus(200)
+ self.assertIn('rgw_metadata', data)
+ self.assertIn('rgw_id', data)
+ self.assertIn('rgw_status', data)
+ self.assertTrue(data['rgw_metadata'])
+
+ def test_status(self):
+ self._radosgw_admin_cmd([
+ 'user', 'create', '--uid=admin', '--display-name=admin',
+ '--system', '--access-key=admin', '--secret=admin'
+ ])
+ self._ceph_cmd(['dashboard', 'set-rgw-api-user-id', 'admin'])
+ self._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-secret-key'], 'admin')
+ self._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-access-key'], 'admin')
+
+ data = self._get('/api/rgw/status')
+ self.assertStatus(200)
+ self.assertIn('available', data)
+ self.assertIn('message', data)
+ self.assertTrue(data['available'])
+
+
+class RgwUserTest(RgwTestCase):
+
+ AUTH_ROLES = ['rgw-manager']
+
+ @classmethod
+ def setUpClass(cls):
+ super(RgwUserTest, cls).setUpClass()
+
+ def _assert_user_data(self, data):
+ self.assertSchema(data, JObj(sub_elems={
+ 'caps': JList(JObj(sub_elems={}, allow_unknown=True)),
+ 'display_name': JLeaf(str),
+ 'email': JLeaf(str),
+ 'keys': JList(JObj(sub_elems={}, allow_unknown=True)),
+ 'max_buckets': JLeaf(int),
+ 'subusers': JList(JLeaf(str)),
+ 'suspended': JLeaf(int),
+ 'swift_keys': JList(JObj(sub_elems={}, allow_unknown=True)),
+ 'tenant': JLeaf(str),
+ 'user_id': JLeaf(str),
+ 'uid': JLeaf(str)
+ }, allow_unknown=True))
+ self.assertGreaterEqual(len(data['keys']), 1)
+
+ def test_get(self):
+ data = self.get_rgw_user('admin')
+ self.assertStatus(200)
+ self._assert_user_data(data)
+ self.assertEqual(data['user_id'], 'admin')
+
+ def test_list(self):
+ data = self._get('/api/rgw/user')
+ self.assertStatus(200)
+ self.assertGreaterEqual(len(data), 1)
+ self.assertIn('admin', data)
+
+ def test_create_get_update_delete(self):
+ # Create a new user.
+ self._post('/api/rgw/user', params={
+ 'uid': 'teuth-test-user',
+ 'display_name': 'display name'
+ })
+ self.assertStatus(201)
+ data = self.jsonBody()
+ self._assert_user_data(data)
+ self.assertEqual(data['user_id'], 'teuth-test-user')
+ self.assertEqual(data['display_name'], 'display name')
+
+ # Get the user.
+ data = self.get_rgw_user('teuth-test-user')
+ self.assertStatus(200)
+ self._assert_user_data(data)
+ self.assertEqual(data['tenant'], '')
+ self.assertEqual(data['user_id'], 'teuth-test-user')
+ self.assertEqual(data['uid'], 'teuth-test-user')
+
+ # Update the user.
+ self._put(
+ '/api/rgw/user/teuth-test-user',
+ params={'display_name': 'new name'})
+ self.assertStatus(200)
+ data = self.jsonBody()
+ self._assert_user_data(data)
+ self.assertEqual(data['display_name'], 'new name')
+
+ # Delete the user.
+ self._delete('/api/rgw/user/teuth-test-user')
+ self.assertStatus(204)
+ self.get_rgw_user('teuth-test-user')
+ self.assertStatus(500)
+ resp = self.jsonBody()
+ self.assertIn('detail', resp)
+ self.assertIn('failed request with status code 404', resp['detail'])
+ self.assertIn('"Code":"NoSuchUser"', resp['detail'])
+ self.assertIn('"HostId"', resp['detail'])
+ self.assertIn('"RequestId"', resp['detail'])
+
+ def test_create_get_update_delete_w_tenant(self):
+ # Create a new user.
+ self._post(
+ '/api/rgw/user',
+ params={
+ 'uid': 'test01$teuth-test-user',
+ 'display_name': 'display name'
+ })
+ self.assertStatus(201)
+ data = self.jsonBody()
+ self._assert_user_data(data)
+ self.assertEqual(data['user_id'], 'teuth-test-user')
+ self.assertEqual(data['display_name'], 'display name')
+
+ # Get the user.
+ data = self.get_rgw_user('test01$teuth-test-user')
+ self.assertStatus(200)
+ self._assert_user_data(data)
+ self.assertEqual(data['tenant'], 'test01')
+ self.assertEqual(data['user_id'], 'teuth-test-user')
+ self.assertEqual(data['uid'], 'test01$teuth-test-user')
+
+ # Update the user.
+ self._put(
+ '/api/rgw/user/test01$teuth-test-user',
+ params={'display_name': 'new name'})
+ self.assertStatus(200)
+ data = self.jsonBody()
+ self._assert_user_data(data)
+ self.assertEqual(data['display_name'], 'new name')
+
+ # Delete the user.
+ self._delete('/api/rgw/user/test01$teuth-test-user')
+ self.assertStatus(204)
+ self.get_rgw_user('test01$teuth-test-user')
+ self.assertStatus(500)
+ resp = self.jsonBody()
+ self.assertIn('detail', resp)
+ self.assertIn('failed request with status code 404', resp['detail'])
+ self.assertIn('"Code":"NoSuchUser"', resp['detail'])
+ self.assertIn('"HostId"', resp['detail'])
+ self.assertIn('"RequestId"', resp['detail'])
+
+
+class RgwUserCapabilityTest(RgwTestCase):
+
+ AUTH_ROLES = ['rgw-manager']
+
+ @classmethod
+ def setUpClass(cls):
+ cls.create_test_user = True
+ super(RgwUserCapabilityTest, cls).setUpClass()
+
+ def test_set(self):
+ self._post(
+ '/api/rgw/user/teuth-test-user/capability',
+ params={
+ 'type': 'usage',
+ 'perm': 'read'
+ })
+ self.assertStatus(201)
+ data = self.jsonBody()
+ self.assertEqual(len(data), 1)
+ data = data[0]
+ self.assertEqual(data['type'], 'usage')
+ self.assertEqual(data['perm'], 'read')
+
+ # Get the user data to validate the capabilities.
+ data = self.get_rgw_user('teuth-test-user')
+ self.assertStatus(200)
+ self.assertGreaterEqual(len(data['caps']), 1)
+ self.assertEqual(data['caps'][0]['type'], 'usage')
+ self.assertEqual(data['caps'][0]['perm'], 'read')
+
+ def test_delete(self):
+ self._delete(
+ '/api/rgw/user/teuth-test-user/capability',
+ params={
+ 'type': 'metadata',
+ 'perm': 'write'
+ })
+ self.assertStatus(204)
+
+ # Get the user data to validate the capabilities.
+ data = self.get_rgw_user('teuth-test-user')
+ self.assertStatus(200)
+ self.assertEqual(len(data['caps']), 0)
+
+
+class RgwUserKeyTest(RgwTestCase):
+
+ AUTH_ROLES = ['rgw-manager']
+
+ @classmethod
+ def setUpClass(cls):
+ cls.create_test_user = True
+ super(RgwUserKeyTest, cls).setUpClass()
+
+ def test_create_s3(self):
+ self._post(
+ '/api/rgw/user/teuth-test-user/key',
+ params={
+ 'key_type': 's3',
+ 'generate_key': 'false',
+ 'access_key': 'abc987',
+ 'secret_key': 'aaabbbccc'
+ })
+ data = self.jsonBody()
+ self.assertStatus(201)
+ self.assertGreaterEqual(len(data), 3)
+ key = self.find_object_in_list('access_key', 'abc987', data)
+ self.assertIsInstance(key, object)
+ self.assertEqual(key['secret_key'], 'aaabbbccc')
+
+ def test_create_swift(self):
+ self._post(
+ '/api/rgw/user/teuth-test-user/key',
+ params={
+ 'key_type': 'swift',
+ 'subuser': 'teuth-test-subuser',
+ 'generate_key': 'false',
+ 'secret_key': 'xxxyyyzzz'
+ })
+ data = self.jsonBody()
+ self.assertStatus(201)
+ self.assertGreaterEqual(len(data), 2)
+ key = self.find_object_in_list('secret_key', 'xxxyyyzzz', data)
+ self.assertIsInstance(key, object)
+
+ def test_delete_s3(self):
+ self._delete(
+ '/api/rgw/user/teuth-test-user/key',
+ params={
+ 'key_type': 's3',
+ 'access_key': 'xyz123'
+ })
+ self.assertStatus(204)
+
+ def test_delete_swift(self):
+ self._delete(
+ '/api/rgw/user/teuth-test-user/key',
+ params={
+ 'key_type': 'swift',
+ 'subuser': 'teuth-test-user:teuth-test-subuser2'
+ })
+ self.assertStatus(204)
+
+
+class RgwUserQuotaTest(RgwTestCase):
+
+ AUTH_ROLES = ['rgw-manager']
+
+ @classmethod
+ def setUpClass(cls):
+ cls.create_test_user = True
+ super(RgwUserQuotaTest, cls).setUpClass()
+
+ def _assert_quota(self, data):
+ self.assertIn('user_quota', data)
+ self.assertIn('max_objects', data['user_quota'])
+ self.assertIn('enabled', data['user_quota'])
+ self.assertIn('max_size_kb', data['user_quota'])
+ self.assertIn('max_size', data['user_quota'])
+ self.assertIn('bucket_quota', data)
+ self.assertIn('max_objects', data['bucket_quota'])
+ self.assertIn('enabled', data['bucket_quota'])
+ self.assertIn('max_size_kb', data['bucket_quota'])
+ self.assertIn('max_size', data['bucket_quota'])
+
+ def test_get_quota(self):
+ data = self._get('/api/rgw/user/teuth-test-user/quota')
+ self.assertStatus(200)
+ self._assert_quota(data)
+
+ def test_set_user_quota(self):
+ self._put(
+ '/api/rgw/user/teuth-test-user/quota',
+ params={
+ 'quota_type': 'user',
+ 'enabled': 'true',
+ 'max_size_kb': 2048,
+ 'max_objects': 101
+ })
+ self.assertStatus(200)
+
+ data = self._get('/api/rgw/user/teuth-test-user/quota')
+ self.assertStatus(200)
+ self._assert_quota(data)
+ self.assertEqual(data['user_quota']['max_objects'], 101)
+ self.assertTrue(data['user_quota']['enabled'])
+ self.assertEqual(data['user_quota']['max_size_kb'], 2048)
+
+ def test_set_bucket_quota(self):
+ self._put(
+ '/api/rgw/user/teuth-test-user/quota',
+ params={
+ 'quota_type': 'bucket',
+ 'enabled': 'false',
+ 'max_size_kb': 4096,
+ 'max_objects': 2000
+ })
+ self.assertStatus(200)
+
+ data = self._get('/api/rgw/user/teuth-test-user/quota')
+ self.assertStatus(200)
+ self._assert_quota(data)
+ self.assertEqual(data['bucket_quota']['max_objects'], 2000)
+ self.assertFalse(data['bucket_quota']['enabled'])
+ self.assertEqual(data['bucket_quota']['max_size_kb'], 4096)
+
+
+class RgwUserSubuserTest(RgwTestCase):
+
+ AUTH_ROLES = ['rgw-manager']
+
+ @classmethod
+ def setUpClass(cls):
+ cls.create_test_user = True
+ super(RgwUserSubuserTest, cls).setUpClass()
+
+ def test_create_swift(self):
+ self._post(
+ '/api/rgw/user/teuth-test-user/subuser',
+ params={
+ 'subuser': 'tux',
+ 'access': 'readwrite',
+ 'key_type': 'swift'
+ })
+ self.assertStatus(201)
+ data = self.jsonBody()
+ subuser = self.find_object_in_list('id', 'teuth-test-user:tux', data)
+ self.assertIsInstance(subuser, object)
+ self.assertEqual(subuser['permissions'], 'read-write')
+
+ # Get the user data to validate the keys.
+ data = self.get_rgw_user('teuth-test-user')
+ self.assertStatus(200)
+ key = self.find_object_in_list('user', 'teuth-test-user:tux',
+ data['swift_keys'])
+ self.assertIsInstance(key, object)
+
+ def test_create_s3(self):
+ self._post(
+ '/api/rgw/user/teuth-test-user/subuser',
+ params={
+ 'subuser': 'hugo',
+ 'access': 'write',
+ 'generate_secret': 'false',
+ 'access_key': 'yyy',
+ 'secret_key': 'xxx'
+ })
+ self.assertStatus(201)
+ data = self.jsonBody()
+ subuser = self.find_object_in_list('id', 'teuth-test-user:hugo', data)
+ self.assertIsInstance(subuser, object)
+ self.assertEqual(subuser['permissions'], 'write')
+
+ # Get the user data to validate the keys.
+ data = self.get_rgw_user('teuth-test-user')
+ self.assertStatus(200)
+ key = self.find_object_in_list('user', 'teuth-test-user:hugo',
+ data['keys'])
+ self.assertIsInstance(key, object)
+ self.assertEqual(key['secret_key'], 'xxx')
+
+ def test_delete_w_purge(self):
+ self._delete(
+ '/api/rgw/user/teuth-test-user/subuser/teuth-test-subuser2')
+ self.assertStatus(204)
+
+ # Get the user data to check that the keys don't exist anymore.
+ data = self.get_rgw_user('teuth-test-user')
+ self.assertStatus(200)
+ key = self.find_object_in_list(
+ 'user', 'teuth-test-user:teuth-test-subuser2', data['swift_keys'])
+ self.assertIsNone(key)
+
+ def test_delete_wo_purge(self):
+ self._delete(
+ '/api/rgw/user/teuth-test-user/subuser/teuth-test-subuser',
+ params={'purge_keys': 'false'})
+ self.assertStatus(204)
+
+ # Get the user data to check whether they keys still exist.
+ data = self.get_rgw_user('teuth-test-user')
+ self.assertStatus(200)
+ key = self.find_object_in_list(
+ 'user', 'teuth-test-user:teuth-test-subuser', data['keys'])
+ self.assertIsInstance(key, object)
diff --git a/qa/tasks/mgr/dashboard/test_role.py b/qa/tasks/mgr/dashboard/test_role.py
new file mode 100644
index 00000000..6b0e35b2
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_role.py
@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase
+
+
+class RoleTest(DashboardTestCase):
+ @classmethod
+ def _create_role(cls, name=None, description=None, scopes_permissions=None):
+ data = {}
+ if name:
+ data['name'] = name
+ if description:
+ data['description'] = description
+ if scopes_permissions:
+ data['scopes_permissions'] = scopes_permissions
+ cls._post('/api/role', data)
+
+ def test_crud_role(self):
+ self._create_role(name='role1',
+ description='Description 1',
+ scopes_permissions={'osd': ['read']})
+ self.assertStatus(201)
+ self.assertJsonBody({
+ 'name': 'role1',
+ 'description': 'Description 1',
+ 'scopes_permissions': {'osd': ['read']},
+ 'system': False
+ })
+
+ self._get('/api/role/role1')
+ self.assertStatus(200)
+ self.assertJsonBody({
+ 'name': 'role1',
+ 'description': 'Description 1',
+ 'scopes_permissions': {'osd': ['read']},
+ 'system': False
+ })
+
+ self._put('/api/role/role1', {
+ 'description': 'Description 2',
+ 'scopes_permissions': {'osd': ['read', 'update']},
+ })
+ self.assertStatus(200)
+ self.assertJsonBody({
+ 'name': 'role1',
+ 'description': 'Description 2',
+ 'scopes_permissions': {'osd': ['read', 'update']},
+ 'system': False
+ })
+
+ self._delete('/api/role/role1')
+ self.assertStatus(204)
+
+ def test_list_roles(self):
+ roles = self._get('/api/role')
+ self.assertStatus(200)
+
+ self.assertGreaterEqual(len(roles), 1)
+ for role in roles:
+ self.assertIn('name', role)
+ self.assertIn('description', role)
+ self.assertIn('scopes_permissions', role)
+ self.assertIn('system', role)
+
+ def test_get_role_does_not_exist(self):
+ self._get('/api/role/role2')
+ self.assertStatus(404)
+
+ def test_create_role_already_exists(self):
+ self._create_role(name='read-only',
+ description='Description 1',
+ scopes_permissions={'osd': ['read']})
+ self.assertStatus(400)
+ self.assertError(code='role_already_exists',
+ component='role')
+
+ def test_create_role_no_name(self):
+ self._create_role(description='Description 1',
+ scopes_permissions={'osd': ['read']})
+ self.assertStatus(400)
+ self.assertError(code='name_required',
+ component='role')
+
+ def test_create_role_invalid_scope(self):
+ self._create_role(name='role1',
+ description='Description 1',
+ scopes_permissions={'invalid-scope': ['read']})
+ self.assertStatus(400)
+ self.assertError(code='invalid_scope',
+ component='role')
+
+ def test_create_role_invalid_permission(self):
+ self._create_role(name='role1',
+ description='Description 1',
+ scopes_permissions={'osd': ['invalid-permission']})
+ self.assertStatus(400)
+ self.assertError(code='invalid_permission',
+ component='role')
+
+ def test_delete_role_does_not_exist(self):
+ self._delete('/api/role/role2')
+ self.assertStatus(404)
+
+ def test_delete_system_role(self):
+ self._delete('/api/role/read-only')
+ self.assertStatus(400)
+ self.assertError(code='cannot_delete_system_role',
+ component='role')
+
+ def test_delete_role_associated_with_user(self):
+ self.create_user("user", "user", ['read-only'])
+ self._create_role(name='role1',
+ description='Description 1',
+ scopes_permissions={'user': ['create', 'read', 'update', 'delete']})
+ self.assertStatus(201)
+ self._put('/api/user/user', {'roles': ['role1']})
+ self.assertStatus(200)
+
+ self._delete('/api/role/role1')
+ self.assertStatus(400)
+ self.assertError(code='role_is_associated_with_user',
+ component='role')
+
+ self._put('/api/user/user', {'roles': ['administrator']})
+ self.assertStatus(200)
+ self._delete('/api/role/role1')
+ self.assertStatus(204)
+ self.delete_user("user")
+
+ def test_update_role_does_not_exist(self):
+ self._put('/api/role/role2', {})
+ self.assertStatus(404)
+
+ def test_update_system_role(self):
+ self._put('/api/role/read-only', {})
+ self.assertStatus(400)
+ self.assertError(code='cannot_update_system_role',
+ component='role')
diff --git a/qa/tasks/mgr/dashboard/test_settings.py b/qa/tasks/mgr/dashboard/test_settings.py
new file mode 100644
index 00000000..2d890484
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_settings.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase, JList, JObj, JAny
+
+
+class SettingsTest(DashboardTestCase):
+ def setUp(self):
+ super(SettingsTest, self).setUp()
+ self.settings = self._get('/api/settings')
+
+ def tearDown(self):
+ self._put(
+ '/api/settings',
+ {setting['name']: setting['value']
+ for setting in self.settings})
+
+ def test_list_settings(self):
+ settings = self._get('/api/settings')
+ self.assertGreater(len(settings), 10)
+ self.assertSchema(
+ settings,
+ JList(
+ JObj({
+ 'default': JAny(none=False),
+ 'name': str,
+ 'type': str,
+ 'value': JAny(none=False)
+ })))
+ self.assertStatus(200)
+
+ def test_get_setting(self):
+ setting = self._get('/api/settings/rgw-api-access-key')
+ self.assertSchema(
+ setting,
+ JObj({
+ 'default': JAny(none=False),
+ 'name': str,
+ 'type': str,
+ 'value': JAny(none=False)
+ }))
+ self.assertStatus(200)
+
+ def test_set_setting(self):
+ self._put('/api/settings/rgw-api-access-key', {'value': 'foo'})
+ self.assertStatus(200)
+
+ value = self._get('/api/settings/rgw-api-access-key')['value']
+ self.assertEqual('foo', value)
+
+ def test_bulk_set(self):
+ self._put('/api/settings', {
+ 'RGW_API_HOST': 'somehost',
+ 'RGW_API_PORT': 7777,
+ })
+ self.assertStatus(200)
+
+ host = self._get('/api/settings/rgw-api-host')['value']
+ self.assertStatus(200)
+ self.assertEqual('somehost', host)
+
+ port = self._get('/api/settings/rgw-api-port')['value']
+ self.assertStatus(200)
+ self.assertEqual(7777, port)
diff --git a/qa/tasks/mgr/dashboard/test_summary.py b/qa/tasks/mgr/dashboard/test_summary.py
new file mode 100644
index 00000000..1a5d1e99
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_summary.py
@@ -0,0 +1,40 @@
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase
+
+
+class SummaryTest(DashboardTestCase):
+ CEPHFS = True
+
+ def test_summary(self):
+ data = self._get("/api/summary")
+ self.assertStatus(200)
+
+ self.assertIn('health_status', data)
+ self.assertIn('mgr_id', data)
+ self.assertIn('have_mon_connection', data)
+ self.assertIn('rbd_mirroring', data)
+ self.assertIn('executing_tasks', data)
+ self.assertIn('finished_tasks', data)
+ self.assertIn('version', data)
+ self.assertIsNotNone(data['health_status'])
+ self.assertIsNotNone(data['mgr_id'])
+ self.assertIsNotNone(data['have_mon_connection'])
+ self.assertEqual(data['rbd_mirroring'], {'errors': 0, 'warnings': 0})
+
+ @DashboardTestCase.RunAs('test', 'test', ['pool-manager'])
+ def test_summary_permissions(self):
+ data = self._get("/api/summary")
+ self.assertStatus(200)
+
+ self.assertIn('health_status', data)
+ self.assertIn('mgr_id', data)
+ self.assertIn('have_mon_connection', data)
+ self.assertNotIn('rbd_mirroring', data)
+ self.assertIn('executing_tasks', data)
+ self.assertIn('finished_tasks', data)
+ self.assertIn('version', data)
+ self.assertIsNotNone(data['health_status'])
+ self.assertIsNotNone(data['mgr_id'])
+ self.assertIsNotNone(data['have_mon_connection'])
+
diff --git a/qa/tasks/mgr/dashboard/test_user.py b/qa/tasks/mgr/dashboard/test_user.py
new file mode 100644
index 00000000..7af3442d
--- /dev/null
+++ b/qa/tasks/mgr/dashboard/test_user.py
@@ -0,0 +1,115 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+
+from .helper import DashboardTestCase
+
+
+class UserTest(DashboardTestCase):
+
+ @classmethod
+ def _create_user(cls, username=None, password=None, name=None, email=None, roles=None):
+ data = {}
+ if username:
+ data['username'] = username
+ if password:
+ data['password'] = password
+ if name:
+ data['name'] = name
+ if email:
+ data['email'] = email
+ if roles:
+ data['roles'] = roles
+ cls._post("/api/user", data)
+
+ def test_crud_user(self):
+ self._create_user(username='user1',
+ password='mypassword',
+ name='My Name',
+ email='my@email.com',
+ roles=['administrator'])
+ self.assertStatus(201)
+ user = self.jsonBody()
+
+ self._get('/api/user/user1')
+ self.assertStatus(200)
+ self.assertJsonBody({
+ 'username': 'user1',
+ 'name': 'My Name',
+ 'email': 'my@email.com',
+ 'roles': ['administrator'],
+ 'lastUpdate': user['lastUpdate']
+ })
+
+ self._put('/api/user/user1', {
+ 'name': 'My New Name',
+ 'email': 'mynew@email.com',
+ 'roles': ['block-manager'],
+ })
+ self.assertStatus(200)
+ user = self.jsonBody()
+ self.assertJsonBody({
+ 'username': 'user1',
+ 'name': 'My New Name',
+ 'email': 'mynew@email.com',
+ 'roles': ['block-manager'],
+ 'lastUpdate': user['lastUpdate']
+ })
+
+ self._delete('/api/user/user1')
+ self.assertStatus(204)
+
+ def test_list_users(self):
+ self._get('/api/user')
+ self.assertStatus(200)
+ user = self.jsonBody()
+ self.assertEqual(len(user), 1)
+ user = user[0]
+ self.assertJsonBody([{
+ 'username': 'admin',
+ 'name': None,
+ 'email': None,
+ 'roles': ['administrator'],
+ 'lastUpdate': user['lastUpdate']
+ }])
+
+ def test_create_user_already_exists(self):
+ self._create_user(username='admin',
+ password='mypassword',
+ name='administrator',
+ email='my@email.com',
+ roles=['administrator'])
+ self.assertStatus(400)
+ self.assertError(code='username_already_exists',
+ component='user')
+
+ def test_create_user_invalid_role(self):
+ self._create_user(username='user1',
+ password='mypassword',
+ name='My Name',
+ email='my@email.com',
+ roles=['invalid-role'])
+ self.assertStatus(400)
+ self.assertError(code='role_does_not_exist',
+ component='user')
+
+ def test_delete_user_does_not_exist(self):
+ self._delete('/api/user/user2')
+ self.assertStatus(404)
+
+ @DashboardTestCase.RunAs('test', 'test', [{'user': ['create', 'read', 'update', 'delete']}])
+ def test_delete_current_user(self):
+ self._delete('/api/user/test')
+ self.assertStatus(400)
+ self.assertError(code='cannot_delete_current_user',
+ component='user')
+
+ def test_update_user_does_not_exist(self):
+ self._put('/api/user/user2', {'name': 'My New Name'})
+ self.assertStatus(404)
+
+ def test_update_user_invalid_role(self):
+ self._put('/api/user/admin', {'roles': ['invalid-role']})
+ self.assertStatus(400)
+ self.assertError(code='role_does_not_exist',
+ component='user')
diff --git a/qa/tasks/mgr/mgr_test_case.py b/qa/tasks/mgr/mgr_test_case.py
new file mode 100644
index 00000000..66f87486
--- /dev/null
+++ b/qa/tasks/mgr/mgr_test_case.py
@@ -0,0 +1,204 @@
+
+from unittest import case
+import json
+import logging
+
+from teuthology import misc
+from tasks.ceph_test_case import CephTestCase
+
+# TODO move definition of CephCluster away from the CephFS stuff
+from tasks.cephfs.filesystem import CephCluster
+
+
+log = logging.getLogger(__name__)
+
+
+class MgrCluster(CephCluster):
+ def __init__(self, ctx):
+ super(MgrCluster, self).__init__(ctx)
+ self.mgr_ids = list(misc.all_roles_of_type(ctx.cluster, 'mgr'))
+
+ if len(self.mgr_ids) == 0:
+ raise RuntimeError(
+ "This task requires at least one manager daemon")
+
+ self.mgr_daemons = dict(
+ [(mgr_id, self._ctx.daemons.get_daemon('mgr', mgr_id)) for mgr_id
+ in self.mgr_ids])
+
+ def mgr_stop(self, mgr_id):
+ self.mgr_daemons[mgr_id].stop()
+
+ def mgr_fail(self, mgr_id):
+ self.mon_manager.raw_cluster_cmd("mgr", "fail", mgr_id)
+
+ def mgr_restart(self, mgr_id):
+ self.mgr_daemons[mgr_id].restart()
+
+ def get_mgr_map(self):
+ status = json.loads(
+ self.mon_manager.raw_cluster_cmd("status", "--format=json-pretty"))
+
+ return status["mgrmap"]
+
+ def get_active_id(self):
+ return self.get_mgr_map()["active_name"]
+
+ def get_standby_ids(self):
+ return [s['name'] for s in self.get_mgr_map()["standbys"]]
+
+ def set_module_conf(self, module, key, val):
+ self.mon_manager.raw_cluster_cmd("config", "set", "mgr",
+ "mgr/{0}/{1}".format(
+ module, key
+ ), val)
+
+ def set_module_localized_conf(self, module, mgr_id, key, val):
+ self.mon_manager.raw_cluster_cmd("config", "set", "mgr",
+ "mgr/{0}/{1}/{2}".format(
+ module, mgr_id, key
+ ), val)
+
+
+class MgrTestCase(CephTestCase):
+ MGRS_REQUIRED = 1
+
+ @classmethod
+ def setup_mgrs(cls):
+ # Stop all the daemons
+ for daemon in cls.mgr_cluster.mgr_daemons.values():
+ daemon.stop()
+
+ for mgr_id in cls.mgr_cluster.mgr_ids:
+ cls.mgr_cluster.mgr_fail(mgr_id)
+
+ # Unload all non-default plugins
+ loaded = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "module", "ls"))['enabled_modules']
+ unload_modules = set(loaded) - {"restful"}
+
+ for m in unload_modules:
+ cls.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "module", "disable", m)
+
+ # Start all the daemons
+ for daemon in cls.mgr_cluster.mgr_daemons.values():
+ daemon.restart()
+
+ # Wait for an active to come up
+ cls.wait_until_true(lambda: cls.mgr_cluster.get_active_id() != "",
+ timeout=20)
+
+ expect_standbys = set(cls.mgr_cluster.mgr_ids) \
+ - {cls.mgr_cluster.get_active_id()}
+ cls.wait_until_true(
+ lambda: set(cls.mgr_cluster.get_standby_ids()) == expect_standbys,
+ timeout=20)
+
+ @classmethod
+ def setUpClass(cls):
+ # The test runner should have populated this
+ assert cls.mgr_cluster is not None
+
+ if len(cls.mgr_cluster.mgr_ids) < cls.MGRS_REQUIRED:
+ cls.skipTest(
+ "Only have {0} manager daemons, {1} are required".format(
+ len(cls.mgr_cluster.mgr_ids), cls.MGRS_REQUIRED))
+
+ cls.setup_mgrs()
+
+ @classmethod
+ def _load_module(cls, module_name):
+ loaded = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "module", "ls"))['enabled_modules']
+ if module_name in loaded:
+ # The enable command is idempotent, but our wait for a restart
+ # isn't, so let's return now if it's already loaded
+ return
+
+ initial_mgr_map = cls.mgr_cluster.get_mgr_map()
+
+ # check if the the module is configured as an always on module
+ mgr_daemons = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "metadata"))
+
+ for daemon in mgr_daemons:
+ if daemon["name"] == initial_mgr_map["active_name"]:
+ ceph_version = daemon["ceph_release"]
+ always_on = initial_mgr_map["always_on_modules"].get(ceph_version, [])
+ if module_name in always_on:
+ return
+
+ initial_gid = initial_mgr_map['active_gid']
+ cls.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable",
+ module_name, "--force")
+
+ # Wait for the module to load
+ def has_restarted():
+ mgr_map = cls.mgr_cluster.get_mgr_map()
+ done = mgr_map['active_gid'] != initial_gid and mgr_map['available']
+ if done:
+ log.debug("Restarted after module load (new active {0}/{1})".format(
+ mgr_map['active_name'] , mgr_map['active_gid']))
+ return done
+ cls.wait_until_true(has_restarted, timeout=30)
+
+
+ @classmethod
+ def _get_uri(cls, service_name):
+ # Little dict hack so that I can assign into this from
+ # the get_or_none function
+ mgr_map = {'x': None}
+
+ def _get_or_none():
+ mgr_map['x'] = cls.mgr_cluster.get_mgr_map()
+ result = mgr_map['x']['services'].get(service_name, None)
+ return result
+
+ cls.wait_until_true(lambda: _get_or_none() is not None, 30)
+
+ uri = mgr_map['x']['services'][service_name]
+
+ log.debug("Found {0} at {1} (daemon {2}/{3})".format(
+ service_name, uri, mgr_map['x']['active_name'],
+ mgr_map['x']['active_gid']))
+
+ return uri
+
+ @classmethod
+ def _assign_ports(cls, module_name, config_name, min_port=7789):
+ """
+ To avoid the need to run lots of hosts in teuthology tests to
+ get different URLs per mgr, we will hand out different ports
+ to each mgr here.
+
+ This is already taken care of for us when running in a vstart
+ environment.
+ """
+ # Start handing out ports well above Ceph's range.
+ assign_port = min_port
+
+ for mgr_id in cls.mgr_cluster.mgr_ids:
+ cls.mgr_cluster.mgr_stop(mgr_id)
+ cls.mgr_cluster.mgr_fail(mgr_id)
+
+ for mgr_id in cls.mgr_cluster.mgr_ids:
+ log.debug("Using port {0} for {1} on mgr.{2}".format(
+ assign_port, module_name, mgr_id
+ ))
+ cls.mgr_cluster.set_module_localized_conf(module_name, mgr_id,
+ config_name,
+ str(assign_port))
+ assign_port += 1
+
+ for mgr_id in cls.mgr_cluster.mgr_ids:
+ cls.mgr_cluster.mgr_restart(mgr_id)
+
+ def is_available():
+ mgr_map = cls.mgr_cluster.get_mgr_map()
+ done = mgr_map['available']
+ if done:
+ log.debug("Available after assign ports (new active {0}/{1})".format(
+ mgr_map['active_name'], mgr_map['active_gid']))
+ return done
+ cls.wait_until_true(is_available, timeout=30)
diff --git a/qa/tasks/mgr/test_crash.py b/qa/tasks/mgr/test_crash.py
new file mode 100644
index 00000000..49191127
--- /dev/null
+++ b/qa/tasks/mgr/test_crash.py
@@ -0,0 +1,108 @@
+import json
+import logging
+import datetime
+
+from .mgr_test_case import MgrTestCase
+
+
+log = logging.getLogger(__name__)
+UUID = 'd5775432-0742-44a3-a435-45095e32e6b1'
+DATEFMT = '%Y-%m-%d %H:%M:%S.%f'
+
+
+class TestCrash(MgrTestCase):
+
+ def setUp(self):
+ super(TestCrash, self).setUp()
+ self.setup_mgrs()
+ self._load_module('crash')
+
+ # Whip up some crash data
+ self.crashes = dict()
+ now = datetime.datetime.utcnow()
+
+ for i in (0, 1, 3, 4, 8):
+ timestamp = now - datetime.timedelta(days=i)
+ timestamp = timestamp.strftime(DATEFMT) + 'Z'
+ crash_id = '_'.join((timestamp, UUID)).replace(' ', '_')
+ self.crashes[crash_id] = {
+ 'crash_id': crash_id, 'timestamp': timestamp,
+ }
+
+ self.assertEqual(
+ 0,
+ self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ 'crash', 'post', '-i', '-',
+ stdin=json.dumps(self.crashes[crash_id]),
+ )
+ )
+
+ retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'crash', 'ls',
+ )
+ log.warning("setUp: crash ls returns %s" % retstr)
+
+ self.oldest_crashid = crash_id
+
+ def tearDown(self):
+ for crash in self.crashes.values():
+ self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ 'crash', 'rm', crash['crash_id']
+ )
+
+ def test_info(self):
+ for crash in self.crashes.values():
+ log.warning('test_info: crash %s' % crash)
+ retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'crash', 'ls'
+ )
+ log.warning('ls output: %s' % retstr)
+ retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'crash', 'info', crash['crash_id'],
+ )
+ log.warning('crash info output: %s' % retstr)
+ crashinfo = json.loads(retstr)
+ self.assertIn('crash_id', crashinfo)
+ self.assertIn('timestamp', crashinfo)
+
+ def test_ls(self):
+ retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'crash', 'ls',
+ )
+ for crash in self.crashes.values():
+ self.assertIn(crash['crash_id'], retstr)
+
+ def test_rm(self):
+ crashid = next(iter(self.crashes.keys()))
+ self.assertEqual(
+ 0,
+ self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ 'crash', 'rm', crashid,
+ )
+ )
+
+ retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'crash', 'ls',
+ )
+ self.assertNotIn(crashid, retstr)
+
+ def test_stat(self):
+ retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'crash', 'stat',
+ )
+ self.assertIn('5 crashes recorded', retstr)
+ self.assertIn('4 older than 1 days old:', retstr)
+ self.assertIn('3 older than 3 days old:', retstr)
+ self.assertIn('1 older than 7 days old:', retstr)
+
+ def test_prune(self):
+ self.assertEqual(
+ 0,
+ self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ 'crash', 'prune', '5'
+ )
+ )
+ retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'crash', 'ls',
+ )
+ self.assertNotIn(self.oldest_crashid, retstr)
diff --git a/qa/tasks/mgr/test_dashboard.py b/qa/tasks/mgr/test_dashboard.py
new file mode 100644
index 00000000..41b26ad8
--- /dev/null
+++ b/qa/tasks/mgr/test_dashboard.py
@@ -0,0 +1,140 @@
+import logging
+import requests
+
+from .mgr_test_case import MgrTestCase
+
+
+log = logging.getLogger(__name__)
+
+
+class TestDashboard(MgrTestCase):
+ MGRS_REQUIRED = 3
+
+ def setUp(self):
+ super(TestDashboard, self).setUp()
+
+ self._assign_ports("dashboard", "ssl_server_port")
+ self._load_module("dashboard")
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("dashboard",
+ "create-self-signed-cert")
+
+ def tearDown(self):
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr",
+ "mgr/dashboard/standby_behaviour",
+ "redirect")
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr",
+ "mgr/dashboard/standby_error_status_code",
+ "500")
+
+ def wait_until_webserver_available(self, url):
+ def _check_connection():
+ try:
+ requests.get(url, allow_redirects=False, verify=False)
+ return True
+ except requests.ConnectionError:
+ pass
+ return False
+ self.wait_until_true(_check_connection, timeout=30)
+
+ def test_standby(self):
+ original_active_id = self.mgr_cluster.get_active_id()
+ original_uri = self._get_uri("dashboard")
+ log.info("Originally running manager '{}' at {}".format(
+ original_active_id, original_uri))
+
+ # Force a failover and wait until the previously active manager
+ # is listed as standby.
+ self.mgr_cluster.mgr_fail(original_active_id)
+ self.wait_until_true(
+ lambda: original_active_id in self.mgr_cluster.get_standby_ids(),
+ timeout=30)
+
+ failed_active_id = self.mgr_cluster.get_active_id()
+ failed_over_uri = self._get_uri("dashboard")
+ log.info("After failover running manager '{}' at {}".format(
+ failed_active_id, failed_over_uri))
+
+ self.assertNotEqual(original_uri, failed_over_uri)
+
+ # Wait until web server of the standby node is settled.
+ self.wait_until_webserver_available(original_uri)
+
+ # The original active daemon should have come back up as a standby
+ # and be doing redirects to the new active daemon.
+ r = requests.get(original_uri, allow_redirects=False, verify=False)
+ self.assertEqual(r.status_code, 303)
+ self.assertEqual(r.headers['Location'], failed_over_uri)
+
+ # Ensure that every URL redirects to the active daemon.
+ r = requests.get("{}/runtime.js".format(original_uri.strip('/')),
+ allow_redirects=False,
+ verify=False)
+ self.assertEqual(r.status_code, 303)
+ self.assertEqual(r.headers['Location'], failed_over_uri)
+
+ def test_standby_disable_redirect(self):
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr",
+ "mgr/dashboard/standby_behaviour",
+ "error")
+
+ original_active_id = self.mgr_cluster.get_active_id()
+ original_uri = self._get_uri("dashboard")
+ log.info("Originally running manager '{}' at {}".format(
+ original_active_id, original_uri))
+
+ # Force a failover and wait until the previously active manager
+ # is listed as standby.
+ self.mgr_cluster.mgr_fail(original_active_id)
+ self.wait_until_true(
+ lambda: original_active_id in self.mgr_cluster.get_standby_ids(),
+ timeout=30)
+
+ failed_active_id = self.mgr_cluster.get_active_id()
+ failed_over_uri = self._get_uri("dashboard")
+ log.info("After failover running manager '{}' at {}".format(
+ failed_active_id, failed_over_uri))
+
+ self.assertNotEqual(original_uri, failed_over_uri)
+
+ # Wait until web server of the standby node is settled.
+ self.wait_until_webserver_available(original_uri)
+
+ # Redirection should be disabled now, instead a 500 must be returned.
+ r = requests.get(original_uri, allow_redirects=False, verify=False)
+ self.assertEqual(r.status_code, 500)
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr",
+ "mgr/dashboard/standby_error_status_code",
+ "503")
+
+ # The customized HTTP status code (503) must be returned.
+ r = requests.get(original_uri, allow_redirects=False, verify=False)
+ self.assertEqual(r.status_code, 503)
+
+ def test_urls(self):
+ base_uri = self._get_uri("dashboard")
+
+ # This is a very simple smoke test to check that the dashboard can
+ # give us a 200 response to requests. We're not testing that
+ # the content is correct or even renders!
+
+ urls = [
+ "/",
+ ]
+
+ failures = []
+
+ for url in urls:
+ r = requests.get(base_uri + url, allow_redirects=False,
+ verify=False)
+ if r.status_code >= 300 and r.status_code < 400:
+ log.error("Unexpected redirect to: {0} (from {1})".format(
+ r.headers['Location'], base_uri))
+ if r.status_code != 200:
+ failures.append(url)
+
+ log.info("{0}: {1} ({2} bytes)".format(
+ url, r.status_code, len(r.content)
+ ))
+
+ self.assertListEqual(failures, [])
diff --git a/qa/tasks/mgr/test_failover.py b/qa/tasks/mgr/test_failover.py
new file mode 100644
index 00000000..a4e84088
--- /dev/null
+++ b/qa/tasks/mgr/test_failover.py
@@ -0,0 +1,148 @@
+
+import logging
+import json
+
+from .mgr_test_case import MgrTestCase
+
+
+log = logging.getLogger(__name__)
+
+
+class TestFailover(MgrTestCase):
+ MGRS_REQUIRED = 2
+
+ def setUp(self):
+ super(TestFailover, self).setUp()
+ self.setup_mgrs()
+
+ def test_timeout(self):
+ """
+ That when an active mgr stops responding, a standby is promoted
+ after mon_mgr_beacon_grace.
+ """
+
+ # Query which mgr is active
+ original_active = self.mgr_cluster.get_active_id()
+ original_standbys = self.mgr_cluster.get_standby_ids()
+
+ # Stop that daemon
+ self.mgr_cluster.mgr_stop(original_active)
+
+ # Assert that the other mgr becomes active
+ self.wait_until_true(
+ lambda: self.mgr_cluster.get_active_id() in original_standbys,
+ timeout=60
+ )
+
+ self.mgr_cluster.mgr_restart(original_active)
+ self.wait_until_true(
+ lambda: original_active in self.mgr_cluster.get_standby_ids(),
+ timeout=10
+ )
+
+ def test_timeout_nostandby(self):
+ """
+ That when an active mgr stop responding, and no standby is
+ available, the active mgr is removed from the map anyway.
+ """
+ # Query which mgr is active
+ original_active = self.mgr_cluster.get_active_id()
+ original_standbys = self.mgr_cluster.get_standby_ids()
+
+ for s in original_standbys:
+ self.mgr_cluster.mgr_stop(s)
+ self.mgr_cluster.mgr_fail(s)
+
+ self.assertListEqual(self.mgr_cluster.get_standby_ids(), [])
+ self.assertEqual(self.mgr_cluster.get_active_id(), original_active)
+
+ grace = int(self.mgr_cluster.get_config("mon_mgr_beacon_grace"))
+ log.info("Should time out in about {0} seconds".format(grace))
+
+ self.mgr_cluster.mgr_stop(original_active)
+
+ # Now wait for the mon to notice the mgr is gone and remove it
+ # from the map.
+ self.wait_until_equal(
+ lambda: self.mgr_cluster.get_active_id(),
+ "",
+ timeout=grace * 2
+ )
+
+ self.assertListEqual(self.mgr_cluster.get_standby_ids(), [])
+ self.assertEqual(self.mgr_cluster.get_active_id(), "")
+
+ def test_explicit_fail(self):
+ """
+ That when a user explicitly fails a daemon, a standby immediately
+ replaces it.
+ :return:
+ """
+ # Query which mgr is active
+ original_active = self.mgr_cluster.get_active_id()
+ original_standbys = self.mgr_cluster.get_standby_ids()
+
+ self.mgr_cluster.mgr_fail(original_active)
+
+ # A standby should take over
+ self.wait_until_true(
+ lambda: self.mgr_cluster.get_active_id() in original_standbys,
+ timeout=60
+ )
+
+ # The one we failed should come back as a standby (he isn't
+ # really dead)
+ self.wait_until_true(
+ lambda: original_active in self.mgr_cluster.get_standby_ids(),
+ timeout=10
+ )
+
+ # Both daemons should have fully populated metadata
+ # (regression test for http://tracker.ceph.com/issues/21260)
+ meta = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "metadata"))
+ id_to_meta = dict([(i['name'], i) for i in meta])
+ for i in [original_active] + original_standbys:
+ self.assertIn(i, id_to_meta)
+ self.assertIn('ceph_version', id_to_meta[i])
+
+ # We should be able to fail back over again: the exercises
+ # our re-initialization of the python runtime within
+ # a single process lifetime.
+
+ # Get rid of any bystander standbys so that the original_active
+ # will be selected as next active.
+ new_active = self.mgr_cluster.get_active_id()
+ for daemon in original_standbys:
+ if daemon != new_active:
+ self.mgr_cluster.mgr_stop(daemon)
+ self.mgr_cluster.mgr_fail(daemon)
+
+ self.assertListEqual(self.mgr_cluster.get_standby_ids(),
+ [original_active])
+
+ self.mgr_cluster.mgr_stop(new_active)
+ self.mgr_cluster.mgr_fail(new_active)
+
+ self.assertEqual(self.mgr_cluster.get_active_id(), original_active)
+ self.assertEqual(self.mgr_cluster.get_standby_ids(), [])
+
+ def test_standby_timeout(self):
+ """
+ That when a standby daemon stops sending beacons, it is
+ removed from the list of standbys
+ :return:
+ """
+ original_active = self.mgr_cluster.get_active_id()
+ original_standbys = self.mgr_cluster.get_standby_ids()
+
+ victim = original_standbys[0]
+ self.mgr_cluster.mgr_stop(victim)
+
+ expect_standbys = set(original_standbys) - {victim}
+
+ self.wait_until_true(
+ lambda: set(self.mgr_cluster.get_standby_ids()) == expect_standbys,
+ timeout=60
+ )
+ self.assertEqual(self.mgr_cluster.get_active_id(), original_active)
diff --git a/qa/tasks/mgr/test_insights.py b/qa/tasks/mgr/test_insights.py
new file mode 100644
index 00000000..53a98b9c
--- /dev/null
+++ b/qa/tasks/mgr/test_insights.py
@@ -0,0 +1,203 @@
+import logging
+import json
+import datetime
+import time
+
+from .mgr_test_case import MgrTestCase
+
+
+log = logging.getLogger(__name__)
+UUID = 'd5775432-0742-44a3-a435-45095e32e6b2'
+DATEFMT = '%Y-%m-%d %H:%M:%S.%f'
+
+class TestInsights(MgrTestCase):
+ def setUp(self):
+ super(TestInsights, self).setUp()
+ self.setup_mgrs()
+ self._load_module("insights")
+ self._load_module("selftest")
+ self.crash_ids = []
+
+ def tearDown(self):
+ self._clear_crashes()
+
+ def _insights(self):
+ retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd("insights")
+ return json.loads(retstr)
+
+ def _add_crash(self, hours, make_invalid = False):
+ now = datetime.datetime.utcnow()
+ timestamp = now - datetime.timedelta(hours = hours)
+ timestamp = timestamp.strftime(DATEFMT) + 'Z'
+ crash_id = '_'.join((timestamp, UUID)).replace(' ', '_')
+ crash = {
+ 'crash_id': crash_id,
+ 'timestamp': timestamp,
+ }
+ if make_invalid:
+ crash["timestamp"] = "not a timestamp"
+
+ ret = self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ 'crash', 'post', '-i', '-',
+ stdin=json.dumps(crash)
+ )
+ self.crash_ids.append(crash_id)
+ self.assertEqual(0, ret)
+
+ def _clear_crashes(self):
+ for crash_id in self.crash_ids:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ 'crash', 'rm', crash_id
+ )
+
+ def _wait_for_health_history_checks(self, *args):
+ """Wait for a set of health checks to appear in the health history"""
+ timeout = datetime.datetime.utcnow() + \
+ datetime.timedelta(seconds = 15)
+ while True:
+ report = self._insights()
+ missing = False
+ for check in args:
+ if check not in report["health"]["history"]["checks"]:
+ missing = True
+ break
+ if not missing:
+ return
+ self.assertGreater(timeout,
+ datetime.datetime.utcnow())
+ time.sleep(0.25)
+
+ def _wait_for_curr_health_cleared(self, check):
+ timeout = datetime.datetime.utcnow() + \
+ datetime.timedelta(seconds = 15)
+ while True:
+ report = self._insights()
+ if check not in report["health"]["current"]["checks"]:
+ return
+ self.assertGreater(timeout,
+ datetime.datetime.utcnow())
+ time.sleep(0.25)
+
+ def test_health_history(self):
+ # use empty health history as starting point
+ self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ "insights", "prune-health", "0")
+ report = self._insights()
+ self.assertFalse(report["health"]["history"]["checks"])
+
+ # generate health check history entries. we want to avoid the edge case
+ # of running these tests at _exactly_ the top of the hour so we can
+ # explicitly control when hourly work occurs. for this we use the
+ # current time offset to a half hour.
+ now = datetime.datetime.utcnow()
+ now = datetime.datetime(
+ year = now.year,
+ month = now.month,
+ day = now.day,
+ hour = now.hour,
+ minute = 30)
+
+ check_names = set()
+ for hours in [-18, -11, -5, -1, 0]:
+ # change the insight module's perception of "now" ...
+ self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ "mgr", "self-test", "insights_set_now_offset", str(hours))
+
+ # ... to simulate health check arrivals in the past
+ unique_check_name = "insights_health_check_{}".format(hours)
+ health_check = {
+ unique_check_name: {
+ "severity": "warning",
+ "summary": "summary",
+ "detail": ["detail"]
+ }
+ }
+ self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ "mgr", "self-test", "health", "set",
+ json.dumps(health_check))
+
+ check_names.add(unique_check_name)
+
+ # and also set the same health check to test deduplication
+ dupe_check_name = "insights_health_check".format(hours)
+ health_check = {
+ dupe_check_name: {
+ "severity": "warning",
+ "summary": "summary",
+ "detail": ["detail"]
+ }
+ }
+ self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ "mgr", "self-test", "health", "set",
+ json.dumps(health_check))
+
+ check_names.add(dupe_check_name)
+
+ # wait for the health check to show up in the history report
+ self._wait_for_health_history_checks(unique_check_name, dupe_check_name)
+
+ # clear out the current health checks before moving on
+ self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ "mgr", "self-test", "health", "clear")
+ self._wait_for_curr_health_cleared(unique_check_name)
+
+ report = self._insights()
+ for check in check_names:
+ self.assertIn(check, report["health"]["history"]["checks"])
+
+ # restart the manager
+ active_id = self.mgr_cluster.get_active_id()
+ self.mgr_cluster.mgr_restart(active_id)
+
+ # ensure that at least one of the checks is present after the restart.
+ # we don't for them all to be present because "earlier" checks may not
+ # have sat in memory long enough to be flushed.
+ all_missing = True
+ report = self._insights()
+ for check in check_names:
+ if check in report["health"]["history"]["checks"]:
+ all_missing = False
+ break
+ self.assertFalse(all_missing)
+
+ # pruning really removes history
+ self.mgr_cluster.mon_manager.raw_cluster_cmd_result(
+ "insights", "prune-health", "0")
+ report = self._insights()
+ self.assertFalse(report["health"]["history"]["checks"])
+
+ def test_schema(self):
+ """TODO: assert conformance to a full schema specification?"""
+ report = self._insights()
+ for key in ["osd_metadata",
+ "pg_summary",
+ "mon_status",
+ "manager_map",
+ "service_map",
+ "mon_map",
+ "crush_map",
+ "fs_map",
+ "osd_tree",
+ "df",
+ "osd_dump",
+ "config",
+ "health",
+ "crashes",
+ "version",
+ "errors"]:
+ self.assertIn(key, report)
+
+ def test_crash_history(self):
+ self._clear_crashes()
+ report = self._insights()
+ self.assertFalse(report["crashes"]["summary"])
+ self.assertFalse(report["errors"])
+
+ # crashes show up in the report
+ self._add_crash(1)
+ report = self._insights()
+ self.assertTrue(report["crashes"]["summary"])
+ self.assertFalse(report["errors"])
+ log.warning("{}".format(json.dumps(report["crashes"], indent=2)))
+
+ self._clear_crashes()
diff --git a/qa/tasks/mgr/test_module_selftest.py b/qa/tasks/mgr/test_module_selftest.py
new file mode 100644
index 00000000..3c36a6eb
--- /dev/null
+++ b/qa/tasks/mgr/test_module_selftest.py
@@ -0,0 +1,335 @@
+
+import time
+import requests
+import errno
+import logging
+from teuthology.exceptions import CommandFailedError
+
+from .mgr_test_case import MgrTestCase
+
+
+log = logging.getLogger(__name__)
+
+
+class TestModuleSelftest(MgrTestCase):
+ """
+ That modules with a self-test command can be loaded and execute it
+ without errors.
+
+ This is not a substitute for really testing the modules, but it
+ is quick and is designed to catch regressions that could occur
+ if data structures change in a way that breaks how the modules
+ touch them.
+ """
+ MGRS_REQUIRED = 1
+
+ def setUp(self):
+ super(TestModuleSelftest, self).setUp()
+ self.setup_mgrs()
+
+ def _selftest_plugin(self, module_name):
+ self._load_module("selftest")
+ self._load_module(module_name)
+
+ # Execute the module's self_test() method
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "self-test", "module", module_name)
+
+ def test_zabbix(self):
+ # Set these mandatory config fields so that the zabbix module
+ # won't trigger health/log errors on load/serve.
+ self.mgr_cluster.set_module_conf("zabbix", "zabbix_host", "localhost")
+ self.mgr_cluster.set_module_conf("zabbix", "identifier", "foo")
+ self._selftest_plugin("zabbix")
+
+ def test_prometheus(self):
+ self._assign_ports("prometheus", "server_port", min_port=8100)
+ self._selftest_plugin("prometheus")
+
+ def test_influx(self):
+ self._selftest_plugin("influx")
+
+ def test_diskprediction_local(self):
+ self._selftest_plugin("diskprediction_local")
+
+ # Not included in qa/packages/packages.yaml
+ #def test_diskprediction_cloud(self):
+ # self._selftest_plugin("diskprediction_cloud")
+
+ def test_telegraf(self):
+ self._selftest_plugin("telegraf")
+
+ def test_iostat(self):
+ self._selftest_plugin("iostat")
+
+ def test_devicehealth(self):
+ self._selftest_plugin("devicehealth")
+ # Clean up the pool that the module creates, because otherwise
+ # it's low PG count causes test failures.
+ pool_name = "device_health_metrics"
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "osd", "pool", "delete", pool_name, pool_name,
+ "--yes-i-really-really-mean-it")
+
+ def test_selftest_run(self):
+ self._load_module("selftest")
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", "run")
+
+ def test_telemetry(self):
+ self._selftest_plugin("telemetry")
+
+ def test_crash(self):
+ self._selftest_plugin("crash")
+
+ def test_orchestrator_cli(self):
+ self._selftest_plugin("orchestrator_cli")
+
+
+ def test_selftest_config_update(self):
+ """
+ That configuration updates are seen by running mgr modules
+ """
+ self._load_module("selftest")
+
+ def get_value():
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "self-test", "config", "get", "testkey").strip()
+
+ self.assertEqual(get_value(), "None")
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "config", "set", "mgr", "mgr/selftest/testkey", "foo")
+ self.wait_until_equal(get_value, "foo", timeout=10)
+
+ def get_localized_value():
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "self-test", "config", "get_localized", "testkey").strip()
+
+ self.assertEqual(get_localized_value(), "foo")
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "config", "set", "mgr", "mgr/selftest/{}/testkey".format(
+ self.mgr_cluster.get_active_id()),
+ "bar")
+ self.wait_until_equal(get_localized_value, "bar", timeout=10)
+
+ def test_selftest_config_upgrade(self):
+ """
+ That pre-mimic config-key config settings are migrated into
+ mimic-style config settings and visible from mgr modules.
+ """
+ self._load_module("selftest")
+
+ def get_value():
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "self-test", "config", "get", "testkey").strip()
+
+ def get_config():
+ lines = self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "config", "dump")\
+ .strip().split("\n")
+ result = []
+ for line in lines[1:]:
+ tokens = line.strip().split()
+ log.info("tokens: {0}".format(tokens))
+ subsys, key, value = tokens[0], tokens[2], tokens[3]
+ result.append((subsys, key, value))
+
+ return result
+
+ # Stop ceph-mgr while we synthetically create a pre-mimic
+ # configuration scenario
+ for mgr_id in self.mgr_cluster.mgr_daemons.keys():
+ self.mgr_cluster.mgr_stop(mgr_id)
+ self.mgr_cluster.mgr_fail(mgr_id)
+
+ # Blow away any modern-style mgr module config options
+ # (the ceph-mgr implementation may only do the upgrade if
+ # it doesn't see new style options)
+ stash = []
+ for subsys, key, value in get_config():
+ if subsys == "mgr" and key.startswith("mgr/"):
+ log.info("Removing config key {0} ahead of upgrade".format(
+ key))
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "config", "rm", subsys, key)
+ stash.append((subsys, key, value))
+
+ # Inject an old-style configuration setting in config-key
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "config-key", "set", "mgr/selftest/testkey", "testvalue")
+
+ # Inject configuration settings that looks data-ish and should
+ # not be migrated to a config key
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "config-key", "set", "mgr/selftest/testnewline", "foo\nbar")
+
+ # Inject configuration setting that does not appear in the
+ # module's config schema
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "config-key", "set", "mgr/selftest/kvitem", "foo\nbar")
+
+ # Bring mgr daemons back online, the one that goes active
+ # should be doing the upgrade.
+ for mgr_id in self.mgr_cluster.mgr_daemons.keys():
+ self.mgr_cluster.mgr_restart(mgr_id)
+
+ # Wait for a new active
+ self.wait_until_true(
+ lambda: self.mgr_cluster.get_active_id() != "", timeout=30)
+
+ # Check that the selftest module sees the upgraded value
+ self.assertEqual(get_value(), "testvalue")
+
+ # Check that the upgraded value is visible in the configuration
+ seen_keys = [k for s,k,v in get_config()]
+ self.assertIn("mgr/selftest/testkey", seen_keys)
+
+ # ...and that the non-config-looking one isn't
+ self.assertNotIn("mgr/selftest/testnewline", seen_keys)
+
+ # ...and that the not-in-schema one isn't
+ self.assertNotIn("mgr/selftest/kvitem", seen_keys)
+
+ # Restore previous configuration
+ for subsys, key, value in stash:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "config", "set", subsys, key, value)
+
+ def test_selftest_command_spam(self):
+ # Use the selftest module to stress the mgr daemon
+ self._load_module("selftest")
+
+ # Use the dashboard to test that the mgr is still able to do its job
+ self._assign_ports("dashboard", "ssl_server_port")
+ self._load_module("dashboard")
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("dashboard",
+ "create-self-signed-cert")
+
+ original_active = self.mgr_cluster.get_active_id()
+ original_standbys = self.mgr_cluster.get_standby_ids()
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test",
+ "background", "start",
+ "command_spam")
+
+ dashboard_uri = self._get_uri("dashboard")
+
+ delay = 10
+ periods = 10
+ for i in range(0, periods):
+ t1 = time.time()
+ # Check that an HTTP module remains responsive
+ r = requests.get(dashboard_uri, verify=False)
+ self.assertEqual(r.status_code, 200)
+
+ # Check that a native non-module command remains responsive
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("osd", "df")
+
+ time.sleep(delay - (time.time() - t1))
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test",
+ "background", "stop")
+
+ # Check that all mgr daemons are still running
+ self.assertEqual(original_active, self.mgr_cluster.get_active_id())
+ self.assertEqual(original_standbys, self.mgr_cluster.get_standby_ids())
+
+ def test_module_commands(self):
+ """
+ That module-handled commands have appropriate behavior on
+ disabled/failed/recently-enabled modules.
+ """
+
+ # Calling a command on a disabled module should return the proper
+ # error code.
+ self._load_module("selftest")
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "module", "disable", "selftest")
+ with self.assertRaises(CommandFailedError) as exc_raised:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "self-test", "run")
+
+ self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP)
+
+ # Calling a command that really doesn't exist should give me EINVAL.
+ with self.assertRaises(CommandFailedError) as exc_raised:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "osd", "albatross")
+
+ self.assertEqual(exc_raised.exception.exitstatus, errno.EINVAL)
+
+ # Enabling a module and then immediately using ones of its commands
+ # should work (#21683)
+ self._load_module("selftest")
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "self-test", "config", "get", "testkey")
+
+ # Calling a command for a failed module should return the proper
+ # error code.
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "self-test", "background", "start", "throw_exception")
+ with self.assertRaises(CommandFailedError) as exc_raised:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "self-test", "run"
+ )
+ self.assertEqual(exc_raised.exception.exitstatus, errno.EIO)
+
+ # A health alert should be raised for a module that has thrown
+ # an exception from its serve() method
+ self.wait_for_health(
+ "Module 'selftest' has failed: Synthetic exception in serve",
+ timeout=30)
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "module", "disable", "selftest")
+
+ self.wait_for_health_clear(timeout=30)
+
+ def test_module_remote(self):
+ """
+ Use the selftest module to exercise inter-module communication
+ """
+ self._load_module("selftest")
+ # The "self-test remote" operation just happens to call into
+ # influx.
+ self._load_module("influx")
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "self-test", "remote")
+
+ def test_selftest_cluster_log(self):
+ """
+ Use the selftest module to test the cluster/audit log interface.
+ """
+ priority_map = {
+ "info": "INF",
+ "security": "SEC",
+ "warning": "WRN",
+ "error": "ERR"
+ }
+ self._load_module("selftest")
+ for priority in priority_map.keys():
+ message = "foo bar {}".format(priority)
+ log_message = "[{}] {}".format(priority_map[priority], message)
+ # Check for cluster/audit logs:
+ # 2018-09-24 09:37:10.977858 mgr.x [INF] foo bar info
+ # 2018-09-24 09:37:10.977860 mgr.x [SEC] foo bar security
+ # 2018-09-24 09:37:10.977863 mgr.x [WRN] foo bar warning
+ # 2018-09-24 09:37:10.977866 mgr.x [ERR] foo bar error
+ with self.assert_cluster_log(log_message):
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "self-test", "cluster-log", "cluster",
+ priority, message)
+ with self.assert_cluster_log(log_message, watch_channel="audit"):
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "self-test", "cluster-log", "audit",
+ priority, message)
+
+ def test_selftest_cluster_log_unknown_channel(self):
+ """
+ Use the selftest module to test the cluster/audit log interface.
+ """
+ with self.assertRaises(CommandFailedError) as exc_raised:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "mgr", "self-test", "cluster-log", "xyz",
+ "ERR", "The channel does not exist")
+ self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP)
diff --git a/qa/tasks/mgr/test_orchestrator_cli.py b/qa/tasks/mgr/test_orchestrator_cli.py
new file mode 100644
index 00000000..50416f1d
--- /dev/null
+++ b/qa/tasks/mgr/test_orchestrator_cli.py
@@ -0,0 +1,154 @@
+import errno
+import json
+import logging
+
+from teuthology.exceptions import CommandFailedError
+
+from .mgr_test_case import MgrTestCase
+
+
+log = logging.getLogger(__name__)
+
+
+class TestOrchestratorCli(MgrTestCase):
+ MGRS_REQUIRED = 1
+
+ def _orch_cmd(self, *args):
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd("orchestrator", *args)
+
+ def _progress_cmd(self, *args):
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", *args)
+
+ def _orch_cmd_result(self, *args, **kwargs):
+ """
+ raw_cluster_cmd doesn't support kwargs.
+ """
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd_result("orchestrator", *args, **kwargs)
+
+ def setUp(self):
+ super(TestOrchestratorCli, self).setUp()
+
+ self._load_module("orchestrator_cli")
+ self._load_module("test_orchestrator")
+ self._orch_cmd("set", "backend", "test_orchestrator")
+
+ def test_status(self):
+ ret = self._orch_cmd("status")
+ self.assertIn("test_orchestrator", ret)
+
+ def test_device_ls(self):
+ ret = self._orch_cmd("device", "ls")
+ self.assertIn("localhost:", ret)
+
+ def test_device_ls_refresh(self):
+ ret = self._orch_cmd("device", "ls", "--refresh")
+ self.assertIn("localhost:", ret)
+
+ def test_device_ls_hoshs(self):
+ ret = self._orch_cmd("device", "ls", "localhost", "host1")
+ self.assertIn("localhost:", ret)
+
+
+ def test_device_ls_json(self):
+ ret = self._orch_cmd("device", "ls", "--format", "json")
+ self.assertIn("localhost", ret)
+ self.assertIsInstance(json.loads(ret), list)
+
+ def test_service_ls(self):
+ ret = self._orch_cmd("service", "ls")
+ self.assertIn("ceph-mgr", ret)
+
+ def test_service_ls_json(self):
+ ret = self._orch_cmd("service", "ls", "--format", "json")
+ self.assertIsInstance(json.loads(ret), list)
+ self.assertIn("ceph-mgr", ret)
+
+
+ def test_service_action(self):
+ self._orch_cmd("service", "reload", "mds", "cephfs")
+ self._orch_cmd("service", "stop", "mds", "cephfs")
+ self._orch_cmd("service", "start", "mds", "cephfs")
+
+ def test_service_instance_action(self):
+ self._orch_cmd("service-instance", "reload", "mds", "a")
+ self._orch_cmd("service-instance", "stop", "mds", "a")
+ self._orch_cmd("service-instance", "start", "mds", "a")
+
+ def test_osd_create(self):
+ self._orch_cmd("osd", "create", "*:device")
+ self._orch_cmd("osd", "create", "*:device,device2")
+
+ drive_group = {
+ "host_pattern": "*",
+ "data_devices": {"paths": ["/dev/sda"]}
+ }
+
+ res = self._orch_cmd_result("osd", "create", "-i", "-", stdin=json.dumps(drive_group))
+ self.assertEqual(res, 0)
+
+ with self.assertRaises(CommandFailedError):
+ self._orch_cmd("osd", "create", "notfound:device")
+
+ def test_mds_add(self):
+ self._orch_cmd("mds", "add", "service_name")
+
+ def test_rgw_add(self):
+ self._orch_cmd("rgw", "add", "service_name")
+
+ def test_nfs_add(self):
+ self._orch_cmd("nfs", "add", "service_name", "pool", "--namespace", "ns")
+ self._orch_cmd("nfs", "add", "service_name", "pool")
+
+ def test_osd_rm(self):
+ self._orch_cmd("osd", "rm", "osd.0")
+
+ def test_mds_rm(self):
+ self._orch_cmd("mds", "rm", "foo")
+
+ def test_rgw_rm(self):
+ self._orch_cmd("rgw", "rm", "foo")
+
+ def test_nfs_rm(self):
+ self._orch_cmd("nfs", "rm", "service_name")
+
+ def test_host_ls(self):
+ out = self._orch_cmd("host", "ls")
+ self.assertEqual(out, "localhost\n")
+
+ def test_host_add(self):
+ self._orch_cmd("host", "add", "hostname")
+
+ def test_host_rm(self):
+ self._orch_cmd("host", "rm", "hostname")
+
+ def test_mon_update(self):
+ self._orch_cmd("mon", "update", "3")
+ self._orch_cmd("mon", "update", "3", "host1", "host2", "host3")
+ self._orch_cmd("mon", "update", "3", "host1:network", "host2:network", "host3:network")
+
+ def test_mgr_update(self):
+ self._orch_cmd("mgr", "update", "3")
+
+ def test_nfs_update(self):
+ self._orch_cmd("nfs", "update", "service_name", "2")
+
+ def test_error(self):
+ ret = self._orch_cmd_result("host", "add", "raise_no_support")
+ self.assertEqual(ret, errno.ENOENT)
+ ret = self._orch_cmd_result("host", "add", "raise_bug")
+ self.assertEqual(ret, errno.EINVAL)
+ ret = self._orch_cmd_result("host", "add", "raise_not_implemented")
+ self.assertEqual(ret, errno.ENOENT)
+ ret = self._orch_cmd_result("host", "add", "raise_no_orchestrator")
+ self.assertEqual(ret, errno.ENOENT)
+ ret = self._orch_cmd_result("host", "add", "raise_import_error")
+ self.assertEqual(ret, errno.ENOENT)
+
+ def test_progress(self):
+ self._progress_cmd('clear')
+ evs = json.loads(self._progress_cmd('json'))['completed']
+ self.assertEqual(len(evs), 0)
+ self._orch_cmd("mgr", "update", "4")
+ evs = json.loads(self._progress_cmd('json'))['completed']
+ self.assertEqual(len(evs), 1)
+ self.assertIn('update_mgrs', evs[0]['message'])
diff --git a/qa/tasks/mgr/test_progress.py b/qa/tasks/mgr/test_progress.py
new file mode 100644
index 00000000..8c06dd0e
--- /dev/null
+++ b/qa/tasks/mgr/test_progress.py
@@ -0,0 +1,376 @@
+
+import json
+import logging
+import time
+from unittest import SkipTest
+
+from .mgr_test_case import MgrTestCase
+
+
+log = logging.getLogger(__name__)
+
+
+class TestProgress(MgrTestCase):
+ POOL = "progress_data"
+
+ # How long we expect to wait at most between taking an OSD out
+ # and seeing the progress event pop up.
+ EVENT_CREATION_PERIOD = 5
+
+ WRITE_PERIOD = 30
+
+ # Generous period for OSD recovery, should be same order of magnitude
+ # to how long it took to write the data to begin with
+ RECOVERY_PERIOD = WRITE_PERIOD * 4
+
+ def _get_progress(self):
+ out = self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "json")
+ return json.loads(out)
+
+ def _all_events(self):
+ """
+ To avoid racing on completion, we almost always want to look
+ for events in the total list of active and complete, so
+ munge them into a single list.
+ """
+ p = self._get_progress()
+ log.info(json.dumps(p, indent=2))
+ return p['events'] + p['completed']
+
+ def _events_in_progress(self):
+ """
+ this function returns all events that are in progress
+ """
+ p = self._get_progress()
+ log.info(json.dumps(p, indent=2))
+ return p['events']
+
+ def _completed_events(self):
+ """
+ This function returns all events that are completed
+ """
+ p = self._get_progress()
+ log.info(json.dumps(p, indent=2))
+ return p['completed']
+
+ def is_osd_marked_out(self, ev):
+ return ev['message'].endswith('marked out')
+
+ def is_osd_marked_in(self, ev):
+ return ev['message'].endswith('marked in')
+
+ def _get_osd_in_out_events(self, marked='both'):
+ """
+ Return the event that deals with OSDs being
+ marked in, out or both
+ """
+
+ marked_in_events = []
+ marked_out_events = []
+
+ events_in_progress = self._events_in_progress()
+ for ev in events_in_progress:
+ if self.is_osd_marked_out(ev):
+ marked_out_events.append(ev)
+ elif self.is_osd_marked_in(ev):
+ marked_in_events.append(ev)
+
+ if marked == 'both':
+ return [marked_in_events] + [marked_out_events]
+ elif marked == 'in':
+ return marked_in_events
+ else:
+ return marked_out_events
+
+ def _osd_in_out_events_count(self, marked='both'):
+ """
+ Count the number of on going recovery events that deals with
+ OSDs being marked in, out or both.
+ """
+ events_in_progress = self._events_in_progress()
+ marked_in_count = 0
+ marked_out_count = 0
+
+ for ev in events_in_progress:
+ if self.is_osd_marked_out(ev):
+ marked_out_count += 1
+ elif self.is_osd_marked_in(ev):
+ marked_in_count += 1
+
+ if marked == 'both':
+ return marked_in_count + marked_out_count
+ elif marked == 'in':
+ return marked_in_count
+ else:
+ return marked_out_count
+
+ def _setup_pool(self, size=None):
+ self.mgr_cluster.mon_manager.create_pool(self.POOL)
+ if size is not None:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'osd', 'pool', 'set', self.POOL, 'size', str(size))
+
+ def _write_some_data(self, t):
+ """
+ To adapt to test systems of varying performance, we write
+ data for a defined time period, rather than to a defined
+ capacity. This will hopefully result in a similar timescale
+ for PG recovery after an OSD failure.
+ """
+
+ args = [
+ "rados", "-p", self.POOL, "bench", str(t), "write", "-t", "16"]
+
+ self.mgr_cluster.admin_remote.run(args=args, wait=True)
+
+ def _osd_count(self):
+ osd_map = self.mgr_cluster.mon_manager.get_osd_dump_json()
+ return len(osd_map['osds'])
+
+ def setUp(self):
+ super(TestProgress, self).setUp()
+ # Ensure we have at least four OSDs
+ if self._osd_count() < 4:
+ raise SkipTest("Not enough OSDS!")
+
+ # Remove any filesystems so that we can remove their pools
+ if self.mds_cluster:
+ self.mds_cluster.mds_stop()
+ self.mds_cluster.mds_fail()
+ self.mds_cluster.delete_all_filesystems()
+
+ # Remove all other pools
+ for pool in self.mgr_cluster.mon_manager.get_osd_dump_json()['pools']:
+ self.mgr_cluster.mon_manager.remove_pool(pool['pool_name'])
+
+ self._load_module("progress")
+ self.mgr_cluster.mon_manager.raw_cluster_cmd('progress', 'clear')
+
+ def _simulate_failure(self, osd_ids=None):
+ """
+ Common lead-in to several tests: get some data in the cluster,
+ then mark an OSD out to trigger the start of a progress event.
+
+ Return the JSON representation of the failure event.
+ """
+
+ if osd_ids is None:
+ osd_ids = [0]
+
+ self._setup_pool()
+ self._write_some_data(self.WRITE_PERIOD)
+
+ for osd_id in osd_ids:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'osd', 'out', str(osd_id))
+
+ # Wait for a progress event to pop up
+ self.wait_until_equal(lambda: self._osd_in_out_events_count('out'), 1,
+ timeout=self.EVENT_CREATION_PERIOD*2,
+ period=1)
+ ev = self._get_osd_in_out_events('out')[0]
+ log.info(json.dumps(ev, indent=1))
+ self.assertIn("Rebalancing after osd.0 marked out", ev['message'])
+
+ return ev
+
+ def _simulate_back_in(self, osd_ids, initial_event):
+
+ for osd_id in osd_ids:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'osd', 'in', str(osd_id))
+
+ # First Event should complete promptly
+ self.wait_until_true(lambda: self._is_complete(initial_event['id']),
+ timeout=self.EVENT_CREATION_PERIOD)
+
+ try:
+ # Wait for progress event marked in to pop up
+ self.wait_until_equal(lambda: self._osd_in_out_events_count('in'), 1,
+ timeout=self.EVENT_CREATION_PERIOD*2,
+ period=1)
+ except RuntimeError as ex:
+ if not "Timed out after" in str(ex):
+ raise ex
+
+ log.info("There was no PGs affected by osd being marked in")
+ return None
+
+ new_event = self._get_osd_in_out_events('in')[0]
+ return new_event
+
+ def _no_events_anywhere(self):
+ """
+ Whether there are any live or completed events
+ """
+ p = self._get_progress()
+ total_events = len(p['events']) + len(p['completed'])
+ return total_events == 0
+
+ def _is_quiet(self):
+ """
+ Whether any progress events are live.
+ """
+ return len(self._get_progress()['events']) == 0
+
+ def _is_complete(self, ev_id):
+ progress = self._get_progress()
+ live_ids = [ev['id'] for ev in progress['events']]
+ complete_ids = [ev['id'] for ev in progress['completed']]
+ if ev_id in complete_ids:
+ assert ev_id not in live_ids
+ return True
+ else:
+ assert ev_id in live_ids
+ return False
+
+ def tearDown(self):
+ if self.POOL in self.mgr_cluster.mon_manager.pools:
+ self.mgr_cluster.mon_manager.remove_pool(self.POOL)
+
+ osd_map = self.mgr_cluster.mon_manager.get_osd_dump_json()
+ for osd in osd_map['osds']:
+ if osd['weight'] == 0.0:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'osd', 'in', str(osd['osd']))
+
+ super(TestProgress, self).tearDown()
+
+ def test_osd_healthy_recovery(self):
+ """
+ The simple recovery case: an OSD goes down, its PGs get a new
+ placement, and we wait for the PG to get healthy in its new
+ locations.
+ """
+ ev = self._simulate_failure()
+
+ # Wait for progress event to ultimately reach completion
+ self.wait_until_true(lambda: self._is_complete(ev['id']),
+ timeout=self.RECOVERY_PERIOD)
+ self.assertTrue(self._is_quiet())
+
+ def test_pool_removal(self):
+ """
+ That a pool removed during OSD recovery causes the
+ progress event to be correctly marked complete once there
+ is no more data to move.
+ """
+ ev = self._simulate_failure()
+
+ self.mgr_cluster.mon_manager.remove_pool(self.POOL)
+
+ # Event should complete promptly
+ self.wait_until_true(lambda: self._is_complete(ev['id']),
+ timeout=self.EVENT_CREATION_PERIOD)
+ self.assertTrue(self._is_quiet())
+
+ def test_osd_came_back(self):
+ """
+ When a recovery is underway, but then the out OSD
+ comes back in, such that recovery is no longer necessary.
+ It should create another event for when osd is marked in
+ and cancel the one that is still ongoing.
+ """
+ ev1 = self._simulate_failure()
+
+ ev2 = self._simulate_back_in([0], ev1)
+
+ # Wait for progress event to ultimately complete
+ self.wait_until_true(lambda: self._is_complete(ev2['id']),
+ timeout=self.RECOVERY_PERIOD)
+
+ self.assertTrue(self._is_quiet())
+
+ def test_osd_cannot_recover(self):
+ """
+ When the cluster cannot recover from a lost OSD, e.g.
+ because there is no suitable new placement for it.
+ (a size=3 pool when there are only 2 OSDs left)
+ (a size=3 pool when the remaining osds are only on 2 hosts)
+
+ Progress event should not be created.
+ """
+
+ pool_size = 3
+
+ self._setup_pool(size=pool_size)
+ self._write_some_data(self.WRITE_PERIOD)
+
+ # Fail enough OSDs so there are less than N_replicas OSDs
+ # available.
+ osd_count = self._osd_count()
+
+ # First do some failures that will result in a normal rebalance
+ # (Assumption: we're in a test environment that is configured
+ # not to require replicas be on different hosts, like teuthology)
+ for osd_id in range(0, osd_count - pool_size):
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'osd', 'out', str(osd_id))
+
+ # We should see an event for each of the OSDs we took out
+ self.wait_until_equal(
+ lambda: len(self._all_events()),
+ osd_count - pool_size,
+ timeout=self.EVENT_CREATION_PERIOD)
+
+ # Those should complete cleanly
+ self.wait_until_true(
+ lambda: self._is_quiet(),
+ timeout=self.RECOVERY_PERIOD
+ )
+
+ # Fail one last OSD, at the point the PGs have nowhere to go
+ victim_osd = osd_count - pool_size
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'osd', 'out', str(victim_osd))
+
+ # Check that no event is created
+ time.sleep(self.EVENT_CREATION_PERIOD)
+
+ self.assertEqual(len(self._all_events()), osd_count - pool_size)
+
+ def test_turn_off_module(self):
+ """
+ When the the module is turned off, there should not
+ be any on going events or completed events.
+ Also module should not accept any kind of Remote Event
+ coming in from other module, however, once it is turned
+ back, on creating an event should be working as it is.
+ """
+
+ pool_size = 3
+ self._setup_pool(size=pool_size)
+ self._write_some_data(self.WRITE_PERIOD)
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "off")
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'osd', 'out', '0')
+
+ time.sleep(self.EVENT_CREATION_PERIOD)
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'osd', 'in', '0')
+
+ time.sleep(self.EVENT_CREATION_PERIOD)
+
+ self.assertTrue(self._is_quiet())
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "on")
+
+ self._write_some_data(self.WRITE_PERIOD)
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ 'osd', 'out', '0')
+
+ # Wait for a progress event to pop up
+ self.wait_until_equal(lambda: len(self._all_events()), 1,
+ timeout=self.EVENT_CREATION_PERIOD*2)
+
+ ev = self._all_events()[0]
+
+ log.info(json.dumps(ev, indent=1))
+
+ self.wait_until_true(lambda: self._is_complete(ev['id']),
+ timeout=self.RECOVERY_PERIOD)
+ self.assertTrue(self._is_quiet())
diff --git a/qa/tasks/mgr/test_prometheus.py b/qa/tasks/mgr/test_prometheus.py
new file mode 100644
index 00000000..376556ab
--- /dev/null
+++ b/qa/tasks/mgr/test_prometheus.py
@@ -0,0 +1,79 @@
+import json
+import logging
+import requests
+
+from .mgr_test_case import MgrTestCase
+
+log = logging.getLogger(__name__)
+
+
+class TestPrometheus(MgrTestCase):
+ MGRS_REQUIRED = 3
+
+ def setUp(self):
+ super(TestPrometheus, self).setUp()
+ self.setup_mgrs()
+
+ def test_file_sd_command(self):
+ self._assign_ports("prometheus", "server_port")
+ self._load_module("prometheus")
+
+ result = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "prometheus", "file_sd_config"))
+ mgr_map = self.mgr_cluster.get_mgr_map()
+ self.assertEqual(len(result[0]['targets']), len(mgr_map['standbys']) + 1)
+
+
+
+ def test_standby(self):
+ self._assign_ports("prometheus", "server_port")
+ self._load_module("prometheus")
+
+ original_active = self.mgr_cluster.get_active_id()
+
+ original_uri = self._get_uri("prometheus")
+ log.info("Originally running at {0}".format(original_uri))
+
+ self.mgr_cluster.mgr_fail(original_active)
+
+ failed_over_uri = self._get_uri("prometheus")
+ log.info("After failover running at {0}".format(failed_over_uri))
+
+ self.assertNotEqual(original_uri, failed_over_uri)
+
+ # The original active daemon should have come back up as a standby
+ # and serve some html under "/" and an empty answer under /metrics
+ r = requests.get(original_uri, allow_redirects=False)
+ self.assertEqual(r.status_code, 200)
+ r = requests.get(original_uri + "metrics", allow_redirects=False)
+ self.assertEqual(r.status_code, 200)
+ self.assertEqual(r.headers["content-type"], "text/plain;charset=utf-8")
+ self.assertEqual(r.headers["server"], "Ceph-Prometheus")
+
+ def test_urls(self):
+ self._assign_ports("prometheus", "server_port")
+ self._load_module("prometheus")
+
+ base_uri = self._get_uri("prometheus")
+
+ # This is a very simple smoke test to check that the module can
+ # give us a 200 response to requests. We're not testing that
+ # the content is correct or even renders!
+
+ urls = [
+ "/",
+ "/metrics"
+ ]
+
+ failures = []
+
+ for url in urls:
+ r = requests.get(base_uri + url, allow_redirects=False)
+ if r.status_code != 200:
+ failures.append(url)
+
+ log.info("{0}: {1} ({2} bytes)".format(
+ url, r.status_code, len(r.content)
+ ))
+
+ self.assertListEqual(failures, [])
diff --git a/qa/tasks/mgr/test_ssh_orchestrator.py b/qa/tasks/mgr/test_ssh_orchestrator.py
new file mode 100644
index 00000000..f7c1c0ed
--- /dev/null
+++ b/qa/tasks/mgr/test_ssh_orchestrator.py
@@ -0,0 +1,23 @@
+import logging
+from tasks.mgr.mgr_test_case import MgrTestCase
+
+log = logging.getLogger(__name__)
+
+class TestOrchestratorCli(MgrTestCase):
+ MGRS_REQUIRED = 1
+
+ def _orch_cmd(self, *args):
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd("orchestrator", *args)
+
+ def setUp(self):
+ super(TestOrchestratorCli, self).setUp()
+ self._load_module("orchestrator_cli")
+ self._load_module("ssh")
+ self._orch_cmd("set", "backend", "ssh")
+
+ def test_host_ls(self):
+ self._orch_cmd("host", "add", "osd0")
+ self._orch_cmd("host", "add", "mon0")
+ ret = self._orch_cmd("host", "ls")
+ self.assertIn("osd0", ret)
+ self.assertIn("mon0", ret)
diff --git a/qa/tasks/mon_clock_skew_check.py b/qa/tasks/mon_clock_skew_check.py
new file mode 100644
index 00000000..59d4169d
--- /dev/null
+++ b/qa/tasks/mon_clock_skew_check.py
@@ -0,0 +1,73 @@
+"""
+Handle clock skews in monitors.
+"""
+import logging
+import time
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+class ClockSkewCheck:
+ """
+ Check if there are any clock skews among the monitors in the
+ quorum.
+
+ This task accepts the following options:
+
+ interval amount of seconds to wait before check. (default: 30.0)
+ expect-skew 'true' or 'false', to indicate whether to expect a skew during
+ the run or not. If 'true', the test will fail if no skew is
+ found, and succeed if a skew is indeed found; if 'false', it's
+ the other way around. (default: false)
+
+ - mon_clock_skew_check:
+ expect-skew: true
+ """
+
+ def __init__(self, ctx, manager, config, logger):
+ self.ctx = ctx
+ self.manager = manager
+
+ self.stopping = False
+ self.logger = logger
+ self.config = config
+
+ if self.config is None:
+ self.config = dict()
+
+
+def task(ctx, config):
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'mon_clock_skew_check task only accepts a dict for configuration'
+ interval = float(config.get('interval', 30.0))
+ expect_skew = config.get('expect-skew', False)
+
+ log.info('Beginning mon_clock_skew_check...')
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+ manager = ceph_manager.CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager'),
+ )
+
+ quorum_size = len(teuthology.get_mon_names(ctx))
+ manager.wait_for_mon_quorum_size(quorum_size)
+
+ # wait a bit
+ log.info('sleeping for {s} seconds'.format(
+ s=interval))
+ time.sleep(interval)
+
+ health = manager.get_mon_health(True)
+ log.info('got health %s' % health)
+ if expect_skew:
+ if 'MON_CLOCK_SKEW' not in health['checks']:
+ raise RuntimeError('expected MON_CLOCK_SKEW but got none')
+ else:
+ if 'MON_CLOCK_SKEW' in health['checks']:
+ raise RuntimeError('got MON_CLOCK_SKEW but expected none')
+
diff --git a/qa/tasks/mon_recovery.py b/qa/tasks/mon_recovery.py
new file mode 100644
index 00000000..fa7aa1a8
--- /dev/null
+++ b/qa/tasks/mon_recovery.py
@@ -0,0 +1,80 @@
+"""
+Monitor recovery
+"""
+import logging
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+ """
+ Test monitor recovery.
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'task only accepts a dict for configuration'
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+ manager = ceph_manager.CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager'),
+ )
+
+ mons = [f.split('.')[1] for f in teuthology.get_mon_names(ctx)]
+ log.info("mon ids = %s" % mons)
+
+ manager.wait_for_mon_quorum_size(len(mons))
+
+ log.info('verifying all monitors are in the quorum')
+ for m in mons:
+ s = manager.get_mon_status(m)
+ assert s['state'] == 'leader' or s['state'] == 'peon'
+ assert len(s['quorum']) == len(mons)
+
+ log.info('restarting each monitor in turn')
+ for m in mons:
+ # stop a monitor
+ manager.kill_mon(m)
+ manager.wait_for_mon_quorum_size(len(mons) - 1)
+
+ # restart
+ manager.revive_mon(m)
+ manager.wait_for_mon_quorum_size(len(mons))
+
+ # in forward and reverse order,
+ rmons = mons
+ rmons.reverse()
+ for mons in mons, rmons:
+ log.info('stopping all monitors')
+ for m in mons:
+ manager.kill_mon(m)
+
+ log.info('forming a minimal quorum for %s, then adding monitors' % mons)
+ qnum = (len(mons) // 2) + 1
+ num = 0
+ for m in mons:
+ manager.revive_mon(m)
+ num += 1
+ if num >= qnum:
+ manager.wait_for_mon_quorum_size(num)
+
+ # on both leader and non-leader ranks...
+ for rank in [0, 1]:
+ # take one out
+ log.info('removing mon %s' % mons[rank])
+ manager.kill_mon(mons[rank])
+ manager.wait_for_mon_quorum_size(len(mons) - 1)
+
+ log.info('causing some monitor log activity')
+ m = 30
+ for n in range(1, m):
+ manager.raw_cluster_cmd('log', '%d of %d' % (n, m))
+
+ log.info('adding mon %s back in' % mons[rank])
+ manager.revive_mon(mons[rank])
+ manager.wait_for_mon_quorum_size(len(mons))
diff --git a/qa/tasks/mon_thrash.py b/qa/tasks/mon_thrash.py
new file mode 100644
index 00000000..d45e8a88
--- /dev/null
+++ b/qa/tasks/mon_thrash.py
@@ -0,0 +1,343 @@
+"""
+Monitor thrash
+"""
+import logging
+import contextlib
+import random
+import time
+import gevent
+import json
+import math
+from teuthology import misc as teuthology
+from tasks import ceph_manager
+
+log = logging.getLogger(__name__)
+
+def _get_mons(ctx):
+ """
+ Get monitor names from the context value.
+ """
+ mons = [f[len('mon.'):] for f in teuthology.get_mon_names(ctx)]
+ return mons
+
+class MonitorThrasher:
+ """
+ How it works::
+
+ - pick a monitor
+ - kill it
+ - wait for quorum to be formed
+ - sleep for 'revive_delay' seconds
+ - revive monitor
+ - wait for quorum to be formed
+ - sleep for 'thrash_delay' seconds
+
+ Options::
+
+ seed Seed to use on the RNG to reproduce a previous
+ behaviour (default: None; i.e., not set)
+ revive_delay Number of seconds to wait before reviving
+ the monitor (default: 10)
+ thrash_delay Number of seconds to wait in-between
+ test iterations (default: 0)
+ thrash_store Thrash monitor store before killing the monitor being thrashed (default: False)
+ thrash_store_probability Probability of thrashing a monitor's store
+ (default: 50)
+ thrash_many Thrash multiple monitors instead of just one. If
+ 'maintain-quorum' is set to False, then we will
+ thrash up to as many monitors as there are
+ available. (default: False)
+ maintain_quorum Always maintain quorum, taking care on how many
+ monitors we kill during the thrashing. If we
+ happen to only have one or two monitors configured,
+ if this option is set to True, then we won't run
+ this task as we cannot guarantee maintenance of
+ quorum. Setting it to false however would allow the
+ task to run with as many as just one single monitor.
+ (default: True)
+ freeze_mon_probability: how often to freeze the mon instead of killing it,
+ in % (default: 0)
+ freeze_mon_duration: how many seconds to freeze the mon (default: 15)
+ scrub Scrub after each iteration (default: True)
+
+ Note: if 'store-thrash' is set to True, then 'maintain-quorum' must also
+ be set to True.
+
+ For example::
+
+ tasks:
+ - ceph:
+ - mon_thrash:
+ revive_delay: 20
+ thrash_delay: 1
+ thrash_store: true
+ thrash_store_probability: 40
+ seed: 31337
+ maintain_quorum: true
+ thrash_many: true
+ - ceph-fuse:
+ - workunit:
+ clients:
+ all:
+ - mon/workloadgen.sh
+ """
+ def __init__(self, ctx, manager, config, logger):
+ self.ctx = ctx
+ self.manager = manager
+ self.manager.wait_for_clean()
+
+ self.stopping = False
+ self.logger = logger
+ self.config = config
+
+ if self.config is None:
+ self.config = dict()
+
+ """ Test reproducibility """
+ self.random_seed = self.config.get('seed', None)
+
+ if self.random_seed is None:
+ self.random_seed = int(time.time())
+
+ self.rng = random.Random()
+ self.rng.seed(int(self.random_seed))
+
+ """ Monitor thrashing """
+ self.revive_delay = float(self.config.get('revive_delay', 10.0))
+ self.thrash_delay = float(self.config.get('thrash_delay', 0.0))
+
+ self.thrash_many = self.config.get('thrash_many', False)
+ self.maintain_quorum = self.config.get('maintain_quorum', True)
+
+ self.scrub = self.config.get('scrub', True)
+
+ self.freeze_mon_probability = float(self.config.get('freeze_mon_probability', 10))
+ self.freeze_mon_duration = float(self.config.get('freeze_mon_duration', 15.0))
+
+ assert self.max_killable() > 0, \
+ 'Unable to kill at least one monitor with the current config.'
+
+ """ Store thrashing """
+ self.store_thrash = self.config.get('store_thrash', False)
+ self.store_thrash_probability = int(
+ self.config.get('store_thrash_probability', 50))
+ if self.store_thrash:
+ assert self.store_thrash_probability > 0, \
+ 'store_thrash is set, probability must be > 0'
+ assert self.maintain_quorum, \
+ 'store_thrash = true must imply maintain_quorum = true'
+
+ self.thread = gevent.spawn(self.do_thrash)
+
+ def log(self, x):
+ """
+ locally log info messages
+ """
+ self.logger.info(x)
+
+ def do_join(self):
+ """
+ Break out of this processes thrashing loop.
+ """
+ self.stopping = True
+ self.thread.get()
+
+ def should_thrash_store(self):
+ """
+ If allowed, indicate that we should thrash a certain percentage of
+ the time as determined by the store_thrash_probability value.
+ """
+ if not self.store_thrash:
+ return False
+ return self.rng.randrange(0, 101) < self.store_thrash_probability
+
+ def thrash_store(self, mon):
+ """
+ Thrash the monitor specified.
+ :param mon: monitor to thrash
+ """
+ addr = self.ctx.ceph['ceph'].mons['mon.%s' % mon]
+ self.log('thrashing mon.{id}@{addr} store'.format(id=mon, addr=addr))
+ out = self.manager.raw_cluster_cmd('-m', addr, 'sync', 'force')
+ j = json.loads(out)
+ assert j['ret'] == 0, \
+ 'error forcing store sync on mon.{id}:\n{ret}'.format(
+ id=mon,ret=out)
+
+ def should_freeze_mon(self):
+ """
+ Indicate that we should freeze a certain percentago of the time
+ as determined by the freeze_mon_probability value.
+ """
+ return self.rng.randrange(0, 101) < self.freeze_mon_probability
+
+ def freeze_mon(self, mon):
+ """
+ Send STOP signal to freeze the monitor.
+ """
+ log.info('Sending STOP to mon %s', mon)
+ self.manager.signal_mon(mon, 19) # STOP
+
+ def unfreeze_mon(self, mon):
+ """
+ Send CONT signal to unfreeze the monitor.
+ """
+ log.info('Sending CONT to mon %s', mon)
+ self.manager.signal_mon(mon, 18) # CONT
+
+ def kill_mon(self, mon):
+ """
+ Kill the monitor specified
+ """
+ self.log('killing mon.{id}'.format(id=mon))
+ self.manager.kill_mon(mon)
+
+ def revive_mon(self, mon):
+ """
+ Revive the monitor specified
+ """
+ self.log('killing mon.{id}'.format(id=mon))
+ self.log('reviving mon.{id}'.format(id=mon))
+ self.manager.revive_mon(mon)
+
+ def max_killable(self):
+ """
+ Return the maximum number of monitors we can kill.
+ """
+ m = len(_get_mons(self.ctx))
+ if self.maintain_quorum:
+ return max(math.ceil(m/2.0)-1, 0)
+ else:
+ return m
+
+ def do_thrash(self):
+ """
+ Cotinuously loop and thrash the monitors.
+ """
+ self.log('start thrashing')
+ self.log('seed: {s}, revive delay: {r}, thrash delay: {t} '\
+ 'thrash many: {tm}, maintain quorum: {mq} '\
+ 'store thrash: {st}, probability: {stp} '\
+ 'freeze mon: prob {fp} duration {fd}'.format(
+ s=self.random_seed,r=self.revive_delay,t=self.thrash_delay,
+ tm=self.thrash_many, mq=self.maintain_quorum,
+ st=self.store_thrash,stp=self.store_thrash_probability,
+ fp=self.freeze_mon_probability,fd=self.freeze_mon_duration,
+ ))
+
+ while not self.stopping:
+ mons = _get_mons(self.ctx)
+ self.manager.wait_for_mon_quorum_size(len(mons))
+ self.log('making sure all monitors are in the quorum')
+ for m in mons:
+ s = self.manager.get_mon_status(m)
+ assert s['state'] == 'leader' or s['state'] == 'peon'
+ assert len(s['quorum']) == len(mons)
+
+ kill_up_to = self.rng.randrange(1, self.max_killable()+1)
+ mons_to_kill = self.rng.sample(mons, kill_up_to)
+ self.log('monitors to thrash: {m}'.format(m=mons_to_kill))
+
+ mons_to_freeze = []
+ for mon in mons:
+ if mon in mons_to_kill:
+ continue
+ if self.should_freeze_mon():
+ mons_to_freeze.append(mon)
+ self.log('monitors to freeze: {m}'.format(m=mons_to_freeze))
+
+ for mon in mons_to_kill:
+ self.log('thrashing mon.{m}'.format(m=mon))
+
+ """ we only thrash stores if we are maintaining quorum """
+ if self.should_thrash_store() and self.maintain_quorum:
+ self.thrash_store(mon)
+
+ self.kill_mon(mon)
+
+ if mons_to_freeze:
+ for mon in mons_to_freeze:
+ self.freeze_mon(mon)
+ self.log('waiting for {delay} secs to unfreeze mons'.format(
+ delay=self.freeze_mon_duration))
+ time.sleep(self.freeze_mon_duration)
+ for mon in mons_to_freeze:
+ self.unfreeze_mon(mon)
+
+ if self.maintain_quorum:
+ self.manager.wait_for_mon_quorum_size(len(mons)-len(mons_to_kill))
+ for m in mons:
+ if m in mons_to_kill:
+ continue
+ s = self.manager.get_mon_status(m)
+ assert s['state'] == 'leader' or s['state'] == 'peon'
+ assert len(s['quorum']) == len(mons)-len(mons_to_kill)
+
+ self.log('waiting for {delay} secs before reviving monitors'.format(
+ delay=self.revive_delay))
+ time.sleep(self.revive_delay)
+
+ for mon in mons_to_kill:
+ self.revive_mon(mon)
+ # do more freezes
+ if mons_to_freeze:
+ for mon in mons_to_freeze:
+ self.freeze_mon(mon)
+ self.log('waiting for {delay} secs to unfreeze mons'.format(
+ delay=self.freeze_mon_duration))
+ time.sleep(self.freeze_mon_duration)
+ for mon in mons_to_freeze:
+ self.unfreeze_mon(mon)
+
+ self.manager.wait_for_mon_quorum_size(len(mons))
+ for m in mons:
+ s = self.manager.get_mon_status(m)
+ assert s['state'] == 'leader' or s['state'] == 'peon'
+ assert len(s['quorum']) == len(mons)
+
+ if self.scrub:
+ self.log('triggering scrub')
+ try:
+ self.manager.raw_cluster_cmd('scrub')
+ except Exception as e:
+ log.warning("Ignoring exception while triggering scrub: %s", e)
+
+ if self.thrash_delay > 0.0:
+ self.log('waiting for {delay} secs before continuing thrashing'.format(
+ delay=self.thrash_delay))
+ time.sleep(self.thrash_delay)
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Stress test the monitor by thrashing them while another task/workunit
+ is running.
+
+ Please refer to MonitorThrasher class for further information on the
+ available options.
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'mon_thrash task only accepts a dict for configuration'
+ assert len(_get_mons(ctx)) > 2, \
+ 'mon_thrash task requires at least 3 monitors'
+ log.info('Beginning mon_thrash...')
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+ manager = ceph_manager.CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager'),
+ )
+ thrash_proc = MonitorThrasher(ctx,
+ manager, config,
+ logger=log.getChild('mon_thrasher'))
+ try:
+ log.debug('Yielding')
+ yield
+ finally:
+ log.info('joining mon_thrasher')
+ thrash_proc.do_join()
+ mons = _get_mons(ctx)
+ manager.wait_for_mon_quorum_size(len(mons))
diff --git a/qa/tasks/multibench.py b/qa/tasks/multibench.py
new file mode 100644
index 00000000..c2a7299f
--- /dev/null
+++ b/qa/tasks/multibench.py
@@ -0,0 +1,61 @@
+"""
+Multibench testing
+"""
+import contextlib
+import logging
+import time
+import copy
+import gevent
+
+from tasks import radosbench
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run multibench
+
+ The config should be as follows:
+
+ multibench:
+ time: <seconds to run total>
+ segments: <number of concurrent benches>
+ radosbench: <config for radosbench>
+
+ example:
+
+ tasks:
+ - ceph:
+ - multibench:
+ clients: [client.0]
+ time: 360
+ - interactive:
+ """
+ log.info('Beginning multibench...')
+ assert isinstance(config, dict), \
+ "please list clients to run on"
+
+ def run_one(num):
+ """Run test spawn from gevent"""
+ start = time.time()
+ if not config.get('radosbench'):
+ benchcontext = {}
+ else:
+ benchcontext = copy.copy(config.get('radosbench'))
+ iterations = 0
+ while time.time() - start < int(config.get('time', 600)):
+ log.info("Starting iteration %s of segment %s"%(iterations, num))
+ benchcontext['pool'] = str(num) + "-" + str(iterations)
+ with radosbench.task(ctx, benchcontext):
+ time.sleep()
+ iterations += 1
+ log.info("Starting %s threads"%(str(config.get('segments', 3)),))
+ segments = [
+ gevent.spawn(run_one, i)
+ for i in range(0, int(config.get('segments', 3)))]
+
+ try:
+ yield
+ finally:
+ [i.get() for i in segments]
diff --git a/qa/tasks/netem.py b/qa/tasks/netem.py
new file mode 100644
index 00000000..1d9fd98f
--- /dev/null
+++ b/qa/tasks/netem.py
@@ -0,0 +1,268 @@
+"""
+Task to run tests with network delay between two remotes using tc and netem.
+Reference:https://wiki.linuxfoundation.org/networking/netem.
+
+"""
+
+import logging
+import contextlib
+from paramiko import SSHException
+import socket
+import time
+import gevent
+import argparse
+
+log = logging.getLogger(__name__)
+
+
+def set_priority(interface):
+
+ # create a priority queueing discipline
+ return ['sudo', 'tc', 'qdisc', 'add', 'dev', interface, 'root', 'handle', '1:', 'prio']
+
+
+def show_tc(interface):
+
+ # shows tc device present
+ return ['sudo', 'tc', 'qdisc', 'show', 'dev', interface]
+
+
+def del_tc(interface):
+
+ return ['sudo', 'tc', 'qdisc', 'del', 'dev', interface, 'root']
+
+
+def cmd_prefix(interface):
+
+ # prepare command to set delay
+ cmd1 = ['sudo', 'tc', 'qdisc', 'add', 'dev', interface, 'parent',
+ '1:1', 'handle', '2:', 'netem', 'delay']
+
+ # prepare command to change delay
+ cmd2 = ['sudo', 'tc', 'qdisc', 'replace', 'dev', interface, 'root', 'netem', 'delay']
+
+ # prepare command to apply filter to the matched ip/host
+
+ cmd3 = ['sudo', 'tc', 'filter', 'add', 'dev', interface,
+ 'parent', '1:0', 'protocol', 'ip', 'pref', '55',
+ 'handle', '::55', 'u32', 'match', 'ip', 'dst']
+
+ return cmd1, cmd2, cmd3
+
+
+def static_delay(remote, host, interface, delay):
+
+ """ Sets a constant delay between two hosts to emulate network delays using tc qdisc and netem"""
+
+ set_delay, change_delay, set_ip = cmd_prefix(interface)
+
+ ip = socket.gethostbyname(host.hostname)
+
+ tc = remote.sh(show_tc(interface))
+ if tc.strip().find('refcnt') == -1:
+ # call set_priority() func to create priority queue
+ # if not already created(indicated by -1)
+ log.info('Create priority queue')
+ remote.run(args=set_priority(interface))
+
+ # set static delay, with +/- 5ms jitter with normal distribution as default
+ log.info('Setting delay to %s' % delay)
+ set_delay.extend(['%s' % delay, '5ms', 'distribution', 'normal'])
+ remote.run(args=set_delay)
+
+ # set delay to a particular remote node via ip
+ log.info('Delay set on %s' % remote)
+ set_ip.extend(['%s' % ip, 'flowid', '2:1'])
+ remote.run(args=set_ip)
+ else:
+ # if the device is already created, only change the delay
+ log.info('Setting delay to %s' % delay)
+ change_delay.extend(['%s' % delay, '5ms', 'distribution', 'normal'])
+ remote.run(args=change_delay)
+
+
+def variable_delay(remote, host, interface, delay_range=[]):
+
+ """ Vary delay between two values"""
+
+ set_delay, change_delay, set_ip = cmd_prefix(interface)
+
+ ip = socket.gethostbyname(host.hostname)
+
+ # delay1 has to be lower than delay2
+ delay1 = delay_range[0]
+ delay2 = delay_range[1]
+
+ tc = remote.sh(show_tc(interface))
+ if tc.strip().find('refcnt') == -1:
+ # call set_priority() func to create priority queue
+ # if not already created(indicated by -1)
+ remote.run(args=set_priority(interface))
+
+ # set variable delay
+ log.info('Setting varying delay')
+ set_delay.extend(['%s' % delay1, '%s' % delay2])
+ remote.run(args=set_delay)
+
+ # set delay to a particular remote node via ip
+ log.info('Delay set on %s' % remote)
+ set_ip.extend(['%s' % ip, 'flowid', '2:1'])
+ remote.run(args=set_ip)
+ else:
+ # if the device is already created, only change the delay
+ log.info('Setting varying delay')
+ change_delay.extend(['%s' % delay1, '%s' % delay2])
+ remote.run(args=change_delay)
+
+
+def delete_dev(remote, interface):
+
+ """ Delete the qdisc if present"""
+
+ log.info('Delete tc')
+ tc = remote.sh(show_tc(interface))
+ if tc.strip().find('refcnt') != -1:
+ remote.run(args=del_tc(interface))
+
+
+class Toggle:
+
+ stop_event = gevent.event.Event()
+
+ def __init__(self, ctx, remote, host, interface, interval):
+ self.ctx = ctx
+ self.remote = remote
+ self.host = host
+ self.interval = interval
+ self.interface = interface
+ self.ip = socket.gethostbyname(self.host.hostname)
+
+ def packet_drop(self):
+
+ """ Drop packets to the remote ip specified"""
+
+ _, _, set_ip = cmd_prefix(self.interface)
+
+ tc = self.remote.sh(show_tc(self.interface))
+ if tc.strip().find('refcnt') == -1:
+ self.remote.run(args=set_priority(self.interface))
+ # packet drop to specific ip
+ log.info('Drop all packets to %s' % self.host)
+ set_ip.extend(['%s' % self.ip, 'action', 'drop'])
+ self.remote.run(args=set_ip)
+
+ def link_toggle(self):
+
+ """
+ For toggling packet drop and recovery in regular interval.
+ If interval is 5s, link is up for 5s and link is down for 5s
+ """
+
+ while not self.stop_event.is_set():
+ self.stop_event.wait(timeout=self.interval)
+ # simulate link down
+ try:
+ self.packet_drop()
+ log.info('link down')
+ except SSHException:
+ log.debug('Failed to run command')
+
+ self.stop_event.wait(timeout=self.interval)
+ # if qdisc exist,delete it.
+ try:
+ delete_dev(self.remote, self.interface)
+ log.info('link up')
+ except SSHException:
+ log.debug('Failed to run command')
+
+ def begin(self, gname):
+ self.thread = gevent.spawn(self.link_toggle)
+ self.ctx.netem.names[gname] = self.thread
+
+ def end(self, gname):
+ self.stop_event.set()
+ log.info('gname is {}'.format(self.ctx.netem.names[gname]))
+ self.ctx.netem.names[gname].get()
+
+ def cleanup(self):
+ """
+ Invoked during unwinding if the test fails or exits before executing task 'link_recover'
+ """
+ log.info('Clean up')
+ self.stop_event.set()
+ self.thread.get()
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+
+ """
+ - netem:
+ clients: [c1.rgw.0]
+ iface: eno1
+ dst_client: [c2.rgw.1]
+ delay: 10ms
+
+ - netem:
+ clients: [c1.rgw.0]
+ iface: eno1
+ dst_client: [c2.rgw.1]
+ delay_range: [10ms, 20ms] # (min, max)
+
+ - netem:
+ clients: [rgw.1, mon.0]
+ iface: eno1
+ gname: t1
+ dst_client: [c2.rgw.1]
+ link_toggle_interval: 10 # no unit mentioned. By default takes seconds.
+
+ - netem:
+ clients: [rgw.1, mon.0]
+ iface: eno1
+ link_recover: [t1, t2]
+
+
+ """
+
+ log.info('config %s' % config)
+
+ assert isinstance(config, dict), \
+ "please list clients to run on"
+ if not hasattr(ctx, 'netem'):
+ ctx.netem = argparse.Namespace()
+ ctx.netem.names = {}
+
+ if config.get('dst_client') is not None:
+ dst = config.get('dst_client')
+ (host,) = ctx.cluster.only(dst).remotes.keys()
+
+ for role in config.get('clients', None):
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ ctx.netem.remote = remote
+ if config.get('delay', False):
+ static_delay(remote, host, config.get('iface'), config.get('delay'))
+ if config.get('delay_range', False):
+ variable_delay(remote, host, config.get('iface'), config.get('delay_range'))
+ if config.get('link_toggle_interval', False):
+ log.info('Toggling link for %s' % config.get('link_toggle_interval'))
+ global toggle
+ toggle = Toggle(ctx, remote, host, config.get('iface'), config.get('link_toggle_interval'))
+ toggle.begin(config.get('gname'))
+ if config.get('link_recover', False):
+ log.info('Recovering link')
+ for gname in config.get('link_recover'):
+ toggle.end(gname)
+ log.info('sleeping')
+ time.sleep(config.get('link_toggle_interval'))
+ delete_dev(ctx.netem.remote, config.get('iface'))
+ del ctx.netem.names[gname]
+
+ try:
+ yield
+ finally:
+ if ctx.netem.names:
+ toggle.cleanup()
+ for role in config.get('clients'):
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ delete_dev(remote, config.get('iface'))
+
diff --git a/qa/tasks/object_source_down.py b/qa/tasks/object_source_down.py
new file mode 100644
index 00000000..e4519bb6
--- /dev/null
+++ b/qa/tasks/object_source_down.py
@@ -0,0 +1,101 @@
+"""
+Test Object locations going down
+"""
+import logging
+import time
+from teuthology import misc as teuthology
+from tasks import ceph_manager
+from tasks.util.rados import rados
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+ """
+ Test handling of object location going down
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'lost_unfound task only accepts a dict for configuration'
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+ manager = ceph_manager.CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager'),
+ )
+
+ while len(manager.get_osd_status()['up']) < 3:
+ time.sleep(10)
+ manager.wait_for_clean()
+
+ # something that is always there
+ dummyfile = '/etc/fstab'
+
+ # take 0, 1 out
+ manager.mark_out_osd(0)
+ manager.mark_out_osd(1)
+ manager.wait_for_clean()
+
+ # delay recovery, and make the pg log very long (to prevent backfill)
+ manager.raw_cluster_cmd(
+ 'tell', 'osd.0',
+ 'injectargs',
+ '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
+ )
+ # delay recovery, and make the pg log very long (to prevent backfill)
+ manager.raw_cluster_cmd(
+ 'tell', 'osd.1',
+ 'injectargs',
+ '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
+ )
+ # delay recovery, and make the pg log very long (to prevent backfill)
+ manager.raw_cluster_cmd(
+ 'tell', 'osd.2',
+ 'injectargs',
+ '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
+ )
+ # delay recovery, and make the pg log very long (to prevent backfill)
+ manager.raw_cluster_cmd(
+ 'tell', 'osd.3',
+ 'injectargs',
+ '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000'
+ )
+
+ # kludge to make sure they get a map
+ rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile])
+
+ # create old objects
+ for f in range(1, 10):
+ rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile])
+
+ manager.mark_out_osd(3)
+ manager.wait_till_active()
+
+ manager.mark_in_osd(0)
+ manager.wait_till_active()
+
+ manager.flush_pg_stats([2, 0])
+
+ manager.mark_out_osd(2)
+ manager.wait_till_active()
+
+ # bring up 1
+ manager.mark_in_osd(1)
+ manager.wait_till_active()
+
+ manager.flush_pg_stats([0, 1])
+ log.info("Getting unfound objects")
+ unfound = manager.get_num_unfound_objects()
+ assert not unfound
+
+ manager.kill_osd(2)
+ manager.mark_down_osd(2)
+ manager.kill_osd(3)
+ manager.mark_down_osd(3)
+
+ manager.flush_pg_stats([0, 1])
+ log.info("Getting unfound objects")
+ unfound = manager.get_num_unfound_objects()
+ assert unfound
diff --git a/qa/tasks/omapbench.py b/qa/tasks/omapbench.py
new file mode 100644
index 00000000..af0793d9
--- /dev/null
+++ b/qa/tasks/omapbench.py
@@ -0,0 +1,85 @@
+"""
+Run omapbench executable within teuthology
+"""
+import contextlib
+import logging
+
+import six
+
+from teuthology.orchestra import run
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run omapbench
+
+ The config should be as follows::
+
+ omapbench:
+ clients: [client list]
+ threads: <threads at once>
+ objects: <number of objects to write>
+ entries: <number of entries per object map>
+ keysize: <number of characters per object map key>
+ valsize: <number of characters per object map val>
+ increment: <interval to show in histogram (in ms)>
+ omaptype: <how the omaps should be generated>
+
+ example::
+
+ tasks:
+ - ceph:
+ - omapbench:
+ clients: [client.0]
+ threads: 30
+ objects: 1000
+ entries: 10
+ keysize: 10
+ valsize: 100
+ increment: 100
+ omaptype: uniform
+ - interactive:
+ """
+ log.info('Beginning omapbench...')
+ assert isinstance(config, dict), \
+ "please list clients to run on"
+ omapbench = {}
+ testdir = teuthology.get_testdir(ctx)
+ print(str(config.get('increment',-1)))
+ for role in config.get('clients', ['client.0']):
+ assert isinstance(role, six.string_types)
+ PREFIX = 'client.'
+ assert role.startswith(PREFIX)
+ id_ = role[len(PREFIX):]
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ proc = remote.run(
+ args=[
+ "/bin/sh", "-c",
+ " ".join(['adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage',
+ 'omapbench',
+ '--name', role[len(PREFIX):],
+ '-t', str(config.get('threads', 30)),
+ '-o', str(config.get('objects', 1000)),
+ '--entries', str(config.get('entries',10)),
+ '--keysize', str(config.get('keysize',10)),
+ '--valsize', str(config.get('valsize',1000)),
+ '--inc', str(config.get('increment',10)),
+ '--omaptype', str(config.get('omaptype','uniform'))
+ ]).format(tdir=testdir),
+ ],
+ logger=log.getChild('omapbench.{id}'.format(id=id_)),
+ stdin=run.PIPE,
+ wait=False
+ )
+ omapbench[id_] = proc
+
+ try:
+ yield
+ finally:
+ log.info('joining omapbench')
+ run.wait(omapbench.values())
diff --git a/qa/tasks/openssl_keys.py b/qa/tasks/openssl_keys.py
new file mode 100644
index 00000000..3cc4ed8a
--- /dev/null
+++ b/qa/tasks/openssl_keys.py
@@ -0,0 +1,227 @@
+"""
+Generates and installs a signed SSL certificate.
+"""
+import argparse
+import logging
+import os
+
+from teuthology import misc
+from teuthology.exceptions import ConfigError
+from teuthology.orchestra import run
+from teuthology.task import Task
+
+log = logging.getLogger(__name__)
+
+class OpenSSLKeys(Task):
+ name = 'openssl_keys'
+ """
+ Generates and installs a signed SSL certificate.
+
+ To create a self-signed certificate:
+
+ - openssl_keys:
+ # certificate name
+ root: # results in root.key and root.crt
+
+ # [required] make the private key and certificate available in this client's test directory
+ client: client.0
+
+ # common name, defaults to `hostname`. chained certificates must not share a common name
+ cn: teuthology
+
+ # private key type for -newkey, defaults to rsa:2048
+ key-type: rsa:4096
+
+ # install the certificate as trusted on these clients:
+ install: [client.0, client.1]
+
+
+ To create a certificate signed by a ca certificate:
+
+ - openssl_keys:
+ root: (self-signed certificate as above)
+ ...
+
+ cert-for-client1:
+ client: client.1
+
+ # use another ssl certificate (by 'name') as the certificate authority
+ ca: root # --CAkey=root.key -CA=root.crt
+
+ # embed the private key in the certificate file
+ embed-key: true
+ """
+
+ def __init__(self, ctx, config):
+ super(OpenSSLKeys, self).__init__(ctx, config)
+ self.certs = []
+ self.installed = []
+
+ def setup(self):
+ # global dictionary allows other tasks to look up certificate paths
+ if not hasattr(self.ctx, 'ssl_certificates'):
+ self.ctx.ssl_certificates = {}
+
+ # use testdir/ca as a working directory
+ self.cadir = '/'.join((misc.get_testdir(self.ctx), 'ca'))
+ # make sure self-signed certs get added first, they don't have 'ca' field
+ configs = sorted(self.config.items(), key=lambda x: 'ca' in x[1])
+ for name, config in configs:
+ # names must be unique to avoid clobbering each others files
+ if name in self.ctx.ssl_certificates:
+ raise ConfigError('ssl: duplicate certificate name {}'.format(name))
+
+ # create the key and certificate
+ cert = self.create_cert(name, config)
+
+ self.ctx.ssl_certificates[name] = cert
+ self.certs.append(cert)
+
+ # install as trusted on the requested clients
+ for client in config.get('install', []):
+ installed = self.install_cert(cert, client)
+ self.installed.append(installed)
+
+ def teardown(self):
+ """
+ Clean up any created/installed certificate files.
+ """
+ for cert in self.certs:
+ self.remove_cert(cert)
+
+ for installed in self.installed:
+ self.uninstall_cert(installed)
+
+ def create_cert(self, name, config):
+ """
+ Create a certificate with the given configuration.
+ """
+ cert = argparse.Namespace()
+ cert.name = name
+ cert.key_type = config.get('key-type', 'rsa:2048')
+
+ cert.client = config.get('client', None)
+ if not cert.client:
+ raise ConfigError('ssl: missing required field "client"')
+
+ (cert.remote,) = self.ctx.cluster.only(cert.client).remotes.keys()
+
+ cert.remote.run(args=['mkdir', '-p', self.cadir])
+
+ cert.key = '{}/{}.key'.format(self.cadir, cert.name)
+ cert.certificate = '{}/{}.crt'.format(self.cadir, cert.name)
+
+ # provide the common name in -subj to avoid the openssl command prompts
+ subject = '/CN={}'.format(config.get('cn', cert.remote.hostname))
+
+ # if a ca certificate is provided, use it to sign the new certificate
+ ca = config.get('ca', None)
+ if ca:
+ # the ca certificate must have been created by a prior ssl task
+ ca_cert = self.ctx.ssl_certificates.get(ca, None)
+ if not ca_cert:
+ raise ConfigError('ssl: ca {} not found for certificate {}'
+ .format(ca, cert.name))
+
+ # these commands are run on the ca certificate's client because
+ # they need access to its private key and cert
+
+ # generate a private key and signing request
+ csr = '{}/{}.csr'.format(self.cadir, cert.name)
+ ca_cert.remote.run(args=['openssl', 'req', '-nodes',
+ '-newkey', cert.key_type, '-keyout', cert.key,
+ '-out', csr, '-subj', subject])
+
+ # create the signed certificate
+ ca_cert.remote.run(args=['openssl', 'x509', '-req', '-in', csr,
+ '-CA', ca_cert.certificate, '-CAkey', ca_cert.key, '-CAcreateserial',
+ '-out', cert.certificate, '-days', '365', '-sha256'])
+
+ srl = '{}/{}.srl'.format(self.cadir, ca_cert.name)
+ ca_cert.remote.run(args=['rm', csr, srl]) # clean up the signing request and serial
+
+ # verify the new certificate against its ca cert
+ ca_cert.remote.run(args=['openssl', 'verify',
+ '-CAfile', ca_cert.certificate, cert.certificate])
+
+ if cert.remote != ca_cert.remote:
+ # copy to remote client
+ self.remote_copy_file(ca_cert.remote, cert.certificate, cert.remote, cert.certificate)
+ self.remote_copy_file(ca_cert.remote, cert.key, cert.remote, cert.key)
+ # clean up the local copies
+ ca_cert.remote.run(args=['rm', cert.certificate, cert.key])
+ # verify the remote certificate (requires ca to be in its trusted ca certificate store)
+ cert.remote.run(args=['openssl', 'verify', cert.certificate])
+ else:
+ # otherwise, generate a private key and use it to self-sign a new certificate
+ cert.remote.run(args=['openssl', 'req', '-x509', '-nodes',
+ '-newkey', cert.key_type, '-keyout', cert.key,
+ '-days', '365', '-out', cert.certificate, '-subj', subject])
+
+ if config.get('embed-key', False):
+ # append the private key to the certificate file
+ cert.remote.run(args=['cat', cert.key, run.Raw('>>'), cert.certificate])
+
+ return cert
+
+ def remove_cert(self, cert):
+ """
+ Delete all of the files associated with the given certificate.
+ """
+ # remove the private key and certificate
+ cert.remote.run(args=['rm', '-f', cert.certificate, cert.key])
+
+ # remove ca subdirectory if it's empty
+ cert.remote.run(args=['rmdir', '--ignore-fail-on-non-empty', self.cadir])
+
+ def install_cert(self, cert, client):
+ """
+ Install as a trusted ca certificate on the given client.
+ """
+ (remote,) = self.ctx.cluster.only(client).remotes.keys()
+
+ installed = argparse.Namespace()
+ installed.remote = remote
+
+ if remote.os.package_type == 'deb':
+ installed.path = '/usr/local/share/ca-certificates/{}.crt'.format(cert.name)
+ installed.command = ['sudo', 'update-ca-certificates']
+ else:
+ installed.path = '/usr/share/pki/ca-trust-source/anchors/{}.crt'.format(cert.name)
+ installed.command = ['sudo', 'update-ca-trust']
+
+ cp_or_mv = 'cp'
+ if remote != cert.remote:
+ # copy into remote cadir (with mkdir if necessary)
+ remote.run(args=['mkdir', '-p', self.cadir])
+ self.remote_copy_file(cert.remote, cert.certificate, remote, cert.certificate)
+ cp_or_mv = 'mv' # move this remote copy into the certificate store
+
+ # install into certificate store as root
+ remote.run(args=['sudo', cp_or_mv, cert.certificate, installed.path])
+ remote.run(args=installed.command)
+
+ return installed
+
+ def uninstall_cert(self, installed):
+ """
+ Uninstall a certificate from the trusted certificate store.
+ """
+ installed.remote.run(args=['sudo', 'rm', installed.path])
+ installed.remote.run(args=installed.command)
+
+ def remote_copy_file(self, from_remote, from_path, to_remote, to_path):
+ """
+ Copies a file from one remote to another.
+
+ The remotes don't have public-key auth for 'scp' or misc.copy_file(),
+ so this copies through an intermediate local tmp file.
+ """
+ log.info('copying from {}:{} to {}:{}...'.format(from_remote, from_path, to_remote, to_path))
+ local_path = from_remote.get_file(from_path)
+ try:
+ to_remote.put_file(local_path, to_path)
+ finally:
+ os.remove(local_path)
+
+task = OpenSSLKeys
diff --git a/qa/tasks/osd_backfill.py b/qa/tasks/osd_backfill.py
new file mode 100644
index 00000000..b33e1c91
--- /dev/null
+++ b/qa/tasks/osd_backfill.py
@@ -0,0 +1,104 @@
+"""
+Osd backfill test
+"""
+import logging
+import time
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+
+def rados_start(ctx, remote, cmd):
+ """
+ Run a remote rados command (currently used to only write data)
+ """
+ log.info("rados %s" % ' '.join(cmd))
+ testdir = teuthology.get_testdir(ctx)
+ pre = [
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'rados',
+ ];
+ pre.extend(cmd)
+ proc = remote.run(
+ args=pre,
+ wait=False,
+ )
+ return proc
+
+def task(ctx, config):
+ """
+ Test backfill
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'thrashosds task only accepts a dict for configuration'
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+ num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+ log.info('num_osds is %s' % num_osds)
+ assert num_osds == 3
+
+ manager = ceph_manager.CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager'),
+ )
+
+ while len(manager.get_osd_status()['up']) < 3:
+ time.sleep(10)
+ manager.flush_pg_stats([0, 1, 2])
+ manager.wait_for_clean()
+
+ # write some data
+ p = rados_start(ctx, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096',
+ '--no-cleanup'])
+ err = p.wait()
+ log.info('err is %d' % err)
+
+ # mark osd.0 out to trigger a rebalance/backfill
+ manager.mark_out_osd(0)
+
+ # also mark it down to it won't be included in pg_temps
+ manager.kill_osd(0)
+ manager.mark_down_osd(0)
+
+ # wait for everything to peer and be happy...
+ manager.flush_pg_stats([1, 2])
+ manager.wait_for_recovery()
+
+ # write some new data
+ p = rados_start(ctx, mon, ['-p', 'rbd', 'bench', '30', 'write', '-b', '4096',
+ '--no-cleanup'])
+
+ time.sleep(15)
+
+ # blackhole + restart osd.1
+ # this triggers a divergent backfill target
+ manager.blackhole_kill_osd(1)
+ time.sleep(2)
+ manager.revive_osd(1)
+
+ # wait for our writes to complete + succeed
+ err = p.wait()
+ log.info('err is %d' % err)
+
+ # wait for osd.1 and osd.2 to be up
+ manager.wait_till_osd_is_up(1)
+ manager.wait_till_osd_is_up(2)
+
+ # cluster must recover
+ manager.flush_pg_stats([1, 2])
+ manager.wait_for_recovery()
+
+ # re-add osd.0
+ manager.revive_osd(0)
+ manager.flush_pg_stats([1, 2])
+ manager.wait_for_clean()
+
+
diff --git a/qa/tasks/osd_failsafe_enospc.py b/qa/tasks/osd_failsafe_enospc.py
new file mode 100644
index 00000000..4b2cdb98
--- /dev/null
+++ b/qa/tasks/osd_failsafe_enospc.py
@@ -0,0 +1,219 @@
+"""
+Handle osdfailsafe configuration settings (nearfull ratio and full ratio)
+"""
+from io import BytesIO
+import logging
+import six
+import time
+
+from teuthology.orchestra import run
+from tasks.util.rados import rados
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+ """
+ Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio
+ configuration settings
+
+ In order for test to pass must use log-whitelist as follows
+
+ tasks:
+ - chef:
+ - install:
+ - ceph:
+ log-whitelist: ['OSD near full', 'OSD full dropping all updates']
+ - osd_failsafe_enospc:
+
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'osd_failsafe_enospc task only accepts a dict for configuration'
+
+ # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding
+ sleep_time = 50
+
+ # something that is always there
+ dummyfile = '/etc/fstab'
+ dummyfile2 = '/etc/resolv.conf'
+
+ manager = ctx.managers['ceph']
+
+ # create 1 pg pool with 1 rep which can only be on osd.0
+ osds = manager.get_osd_dump()
+ for osd in osds:
+ if osd['osd'] != 0:
+ manager.mark_out_osd(osd['osd'])
+
+ log.info('creating pool foo')
+ manager.create_pool("foo")
+ manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1')
+
+ # State NONE -> NEAR
+ log.info('1. Verify warning messages when exceeding nearfull_ratio')
+
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+ proc = mon.run(
+ args=[
+ 'sudo',
+ 'daemon-helper',
+ 'kill',
+ 'ceph', '-w'
+ ],
+ stdin=run.PIPE,
+ stdout=BytesIO(),
+ wait=False,
+ )
+
+ manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .00001')
+
+ time.sleep(sleep_time)
+ proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
+ proc.wait()
+
+ lines = six.ensure_str(proc.stdout.getvalue()).split('\n')
+
+ count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
+ assert count == 2, 'Incorrect number of warning messages expected 2 got %d' % count
+ count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
+ assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count
+
+ # State NEAR -> FULL
+ log.info('2. Verify error messages when exceeding full_ratio')
+
+ proc = mon.run(
+ args=[
+ 'sudo',
+ 'daemon-helper',
+ 'kill',
+ 'ceph', '-w'
+ ],
+ stdin=run.PIPE,
+ stdout=BytesIO(),
+ wait=False,
+ )
+
+ manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001')
+
+ time.sleep(sleep_time)
+ proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
+ proc.wait()
+
+ lines = six.ensure_str(proc.stdout.getvalue()).split('\n')
+
+ count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
+ assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count
+
+ log.info('3. Verify write failure when exceeding full_ratio')
+
+ # Write data should fail
+ ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile1', dummyfile])
+ assert ret != 0, 'Expected write failure but it succeeded with exit status 0'
+
+ # Put back default
+ manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97')
+ time.sleep(10)
+
+ # State FULL -> NEAR
+ log.info('4. Verify write success when NOT exceeding full_ratio')
+
+ # Write should succeed
+ ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile2', dummyfile2])
+ assert ret == 0, 'Expected write to succeed, but got exit status %d' % ret
+
+ log.info('5. Verify warning messages again when exceeding nearfull_ratio')
+
+ proc = mon.run(
+ args=[
+ 'sudo',
+ 'daemon-helper',
+ 'kill',
+ 'ceph', '-w'
+ ],
+ stdin=run.PIPE,
+ stdout=BytesIO(),
+ wait=False,
+ )
+
+ time.sleep(sleep_time)
+ proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
+ proc.wait()
+
+ lines = six.ensure_str(proc.stdout.getvalue()).split('\n')
+
+ count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
+ assert count == 1 or count == 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count
+ count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
+ assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count
+
+ manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .90')
+ time.sleep(10)
+
+ # State NONE -> FULL
+ log.info('6. Verify error messages again when exceeding full_ratio')
+
+ proc = mon.run(
+ args=[
+ 'sudo',
+ 'daemon-helper',
+ 'kill',
+ 'ceph', '-w'
+ ],
+ stdin=run.PIPE,
+ stdout=BytesIO(),
+ wait=False,
+ )
+
+ manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001')
+
+ time.sleep(sleep_time)
+ proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
+ proc.wait()
+
+ lines = six.ensure_str(proc.stdout.getvalue()).split('\n')
+
+ count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
+ assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count
+ count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
+ assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count
+
+ # State FULL -> NONE
+ log.info('7. Verify no messages settings back to default')
+
+ manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97')
+ time.sleep(10)
+
+ proc = mon.run(
+ args=[
+ 'sudo',
+ 'daemon-helper',
+ 'kill',
+ 'ceph', '-w'
+ ],
+ stdin=run.PIPE,
+ stdout=BytesIO(),
+ wait=False,
+ )
+
+ time.sleep(sleep_time)
+ proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
+ proc.wait()
+
+ lines = six.ensure_str(proc.stdout.getvalue()).split('\n')
+
+ count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
+ assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count
+ count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
+ assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count
+
+ log.info('Test Passed')
+
+ # Bring all OSDs back in
+ manager.remove_pool("foo")
+ for osd in osds:
+ if osd['osd'] != 0:
+ manager.mark_in_osd(osd['osd'])
diff --git a/qa/tasks/osd_max_pg_per_osd.py b/qa/tasks/osd_max_pg_per_osd.py
new file mode 100644
index 00000000..6680fe6e
--- /dev/null
+++ b/qa/tasks/osd_max_pg_per_osd.py
@@ -0,0 +1,126 @@
+import logging
+import random
+
+
+log = logging.getLogger(__name__)
+
+
+def pg_num_in_all_states(pgs, *states):
+ return sum(1 for state in pgs.values()
+ if all(s in state for s in states))
+
+
+def pg_num_in_any_state(pgs, *states):
+ return sum(1 for state in pgs.values()
+ if any(s in state for s in states))
+
+
+def test_create_from_mon(ctx, config):
+ """
+ osd should stop creating new pools if the number of pg it servers
+ exceeds the max-pg-per-osd setting, and it should resume the previously
+ suspended pg creations once the its pg number drops down below the setting
+ How it works::
+ 1. set the hard limit of pg-per-osd to "2"
+ 2. create pool.a with pg_num=2
+ # all pgs should be active+clean
+ 2. create pool.b with pg_num=2
+ # new pgs belonging to this pool should be unknown (the primary osd
+ reaches the limit) or creating (replica osd reaches the limit)
+ 3. remove pool.a
+ 4. all pg belonging to pool.b should be active+clean
+ """
+ pg_num = config.get('pg_num', 2)
+ manager = ctx.managers['ceph']
+ log.info('1. creating pool.a')
+ pool_a = manager.create_pool_with_unique_name(pg_num)
+ pg_states = manager.wait_till_pg_convergence(300)
+ pg_created = pg_num_in_all_states(pg_states, 'active', 'clean')
+ assert pg_created == pg_num
+
+ log.info('2. creating pool.b')
+ pool_b = manager.create_pool_with_unique_name(pg_num)
+ pg_states = manager.wait_till_pg_convergence(300)
+ pg_created = pg_num_in_all_states(pg_states, 'active', 'clean')
+ assert pg_created == pg_num
+ pg_pending = pg_num_in_any_state(pg_states, 'unknown', 'creating')
+ assert pg_pending == pg_num
+
+ log.info('3. removing pool.a')
+ manager.remove_pool(pool_a)
+ pg_states = manager.wait_till_pg_convergence(300)
+ assert len(pg_states) == pg_num
+ pg_created = pg_num_in_all_states(pg_states, 'active', 'clean')
+ assert pg_created == pg_num
+
+ # cleanup
+ manager.remove_pool(pool_b)
+
+
+def test_create_from_peer(ctx, config):
+ """
+ osd should stop creating new pools if the number of pg it servers
+ exceeds the max-pg-per-osd setting, and it should resume the previously
+ suspended pg creations once the its pg number drops down below the setting
+
+ How it works::
+ 0. create 4 OSDs.
+ 1. create pool.a with pg_num=1, size=2
+ pg will be mapped to osd.0, and osd.1, and it should be active+clean
+ 2. create pool.b with pg_num=1, size=2.
+ if the pgs stuck in creating, delete the pool since the pool and try
+ again, eventually we'll get the pool to land on the other 2 osds that
+ aren't occupied by pool.a. (this will also verify that pgs for deleted
+ pools get cleaned out of the creating wait list.)
+ 3. mark an osd out. verify that some pgs get stuck stale or peering.
+ 4. delete a pool, verify pgs go active.
+ """
+ pg_num = config.get('pg_num', 1)
+ from_primary = config.get('from_primary', True)
+
+ manager = ctx.managers['ceph']
+ log.info('1. creating pool.a')
+ pool_a = manager.create_pool_with_unique_name(pg_num)
+ pg_states = manager.wait_till_pg_convergence(300)
+ pg_created = pg_num_in_all_states(pg_states, 'active', 'clean')
+ assert pg_created == pg_num
+
+ log.info('2. creating pool.b')
+ while True:
+ pool_b = manager.create_pool_with_unique_name(pg_num)
+ pg_states = manager.wait_till_pg_convergence(300)
+ pg_created = pg_num_in_all_states(pg_states, 'active', 'clean')
+ assert pg_created >= pg_num
+ pg_pending = pg_num_in_any_state(pg_states, 'unknown', 'creating')
+ assert pg_pending == pg_num * 2 - pg_created
+ if pg_created == pg_num * 2:
+ break
+ manager.remove_pool(pool_b)
+
+ log.info('3. mark an osd out')
+ pg_stats = manager.get_pg_stats()
+ pg = random.choice(pg_stats)
+ if from_primary:
+ victim = pg['acting'][-1]
+ else:
+ victim = pg['acting'][0]
+ manager.mark_out_osd(victim)
+ pg_states = manager.wait_till_pg_convergence(300)
+ pg_stuck = pg_num_in_any_state(pg_states, 'activating', 'stale', 'peering')
+ assert pg_stuck > 0
+
+ log.info('4. removing pool.b')
+ manager.remove_pool(pool_b)
+ manager.wait_for_clean(30)
+
+ # cleanup
+ manager.remove_pool(pool_a)
+
+
+def task(ctx, config):
+ assert isinstance(config, dict), \
+ 'osd_max_pg_per_osd task only accepts a dict for config'
+ if config.get('test_create_from_mon', True):
+ test_create_from_mon(ctx, config)
+ else:
+ test_create_from_peer(ctx, config)
diff --git a/qa/tasks/osd_recovery.py b/qa/tasks/osd_recovery.py
new file mode 100644
index 00000000..b0623c21
--- /dev/null
+++ b/qa/tasks/osd_recovery.py
@@ -0,0 +1,193 @@
+"""
+osd recovery
+"""
+import logging
+import time
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+
+def rados_start(testdir, remote, cmd):
+ """
+ Run a remote rados command (currently used to only write data)
+ """
+ log.info("rados %s" % ' '.join(cmd))
+ pre = [
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'rados',
+ ];
+ pre.extend(cmd)
+ proc = remote.run(
+ args=pre,
+ wait=False,
+ )
+ return proc
+
+def task(ctx, config):
+ """
+ Test (non-backfill) recovery
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'task only accepts a dict for configuration'
+ testdir = teuthology.get_testdir(ctx)
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+ num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+ log.info('num_osds is %s' % num_osds)
+ assert num_osds == 3
+
+ manager = ceph_manager.CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager'),
+ )
+
+ while len(manager.get_osd_status()['up']) < 3:
+ time.sleep(10)
+ manager.flush_pg_stats([0, 1, 2])
+ manager.wait_for_clean()
+
+ # test some osdmap flags
+ manager.raw_cluster_cmd('osd', 'set', 'noin')
+ manager.raw_cluster_cmd('osd', 'set', 'noout')
+ manager.raw_cluster_cmd('osd', 'set', 'noup')
+ manager.raw_cluster_cmd('osd', 'set', 'nodown')
+ manager.raw_cluster_cmd('osd', 'unset', 'noin')
+ manager.raw_cluster_cmd('osd', 'unset', 'noout')
+ manager.raw_cluster_cmd('osd', 'unset', 'noup')
+ manager.raw_cluster_cmd('osd', 'unset', 'nodown')
+
+ # write some new data
+ p = rados_start(testdir, mon, ['-p', 'rbd', 'bench', '20', 'write', '-b', '4096',
+ '--no-cleanup'])
+
+ time.sleep(15)
+
+ # trigger a divergent target:
+ # blackhole + restart osd.1 (shorter log)
+ manager.blackhole_kill_osd(1)
+ # kill osd.2 (longer log... we'll make it divergent below)
+ manager.kill_osd(2)
+ time.sleep(2)
+ manager.revive_osd(1)
+
+ # wait for our writes to complete + succeed
+ err = p.wait()
+ log.info('err is %d' % err)
+
+ # cluster must repeer
+ manager.flush_pg_stats([0, 1])
+ manager.wait_for_active_or_down()
+
+ # write some more (make sure osd.2 really is divergent)
+ p = rados_start(testdir, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096'])
+ p.wait()
+
+ # revive divergent osd
+ manager.revive_osd(2)
+
+ while len(manager.get_osd_status()['up']) < 3:
+ log.info('waiting a bit...')
+ time.sleep(2)
+ log.info('3 are up!')
+
+ # cluster must recover
+ manager.flush_pg_stats([0, 1, 2])
+ manager.wait_for_clean()
+
+
+def test_incomplete_pgs(ctx, config):
+ """
+ Test handling of incomplete pgs. Requires 4 osds.
+ """
+ testdir = teuthology.get_testdir(ctx)
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'task only accepts a dict for configuration'
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+ num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+ log.info('num_osds is %s' % num_osds)
+ assert num_osds == 4
+
+ manager = ceph_manager.CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager'),
+ )
+
+ while len(manager.get_osd_status()['up']) < 4:
+ time.sleep(10)
+
+ manager.flush_pg_stats([0, 1, 2, 3])
+ manager.wait_for_clean()
+
+ log.info('Testing incomplete pgs...')
+
+ for i in range(4):
+ manager.set_config(
+ i,
+ osd_recovery_delay_start=1000)
+
+ # move data off of osd.0, osd.1
+ manager.raw_cluster_cmd('osd', 'out', '0', '1')
+ manager.flush_pg_stats([0, 1, 2, 3], [0, 1])
+ manager.wait_for_clean()
+
+ # lots of objects in rbd (no pg log, will backfill)
+ p = rados_start(testdir, mon,
+ ['-p', 'rbd', 'bench', '20', 'write', '-b', '1',
+ '--no-cleanup'])
+ p.wait()
+
+ # few objects in rbd pool (with pg log, normal recovery)
+ for f in range(1, 20):
+ p = rados_start(testdir, mon, ['-p', 'rbd', 'put',
+ 'foo.%d' % f, '/etc/passwd'])
+ p.wait()
+
+ # move it back
+ manager.raw_cluster_cmd('osd', 'in', '0', '1')
+ manager.raw_cluster_cmd('osd', 'out', '2', '3')
+ time.sleep(10)
+ manager.flush_pg_stats([0, 1, 2, 3], [2, 3])
+ time.sleep(10)
+ manager.wait_for_active()
+
+ assert not manager.is_clean()
+ assert not manager.is_recovered()
+
+ # kill 2 + 3
+ log.info('stopping 2,3')
+ manager.kill_osd(2)
+ manager.kill_osd(3)
+ log.info('...')
+ manager.raw_cluster_cmd('osd', 'down', '2', '3')
+ manager.flush_pg_stats([0, 1])
+ manager.wait_for_active_or_down()
+
+ assert manager.get_num_down() > 0
+
+ # revive 2 + 3
+ manager.revive_osd(2)
+ manager.revive_osd(3)
+ while len(manager.get_osd_status()['up']) < 4:
+ log.info('waiting a bit...')
+ time.sleep(2)
+ log.info('all are up!')
+
+ for i in range(4):
+ manager.kick_recovery_wq(i)
+
+ # cluster must recover
+ manager.wait_for_clean()
diff --git a/qa/tasks/peer.py b/qa/tasks/peer.py
new file mode 100644
index 00000000..6b19096b
--- /dev/null
+++ b/qa/tasks/peer.py
@@ -0,0 +1,90 @@
+"""
+Peer test (Single test, not much configurable here)
+"""
+import logging
+import json
+import time
+
+from tasks import ceph_manager
+from tasks.util.rados import rados
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+ """
+ Test peering.
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'peer task only accepts a dict for configuration'
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+ manager = ceph_manager.CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager'),
+ )
+
+ while len(manager.get_osd_status()['up']) < 3:
+ time.sleep(10)
+ manager.flush_pg_stats([0, 1, 2])
+ manager.wait_for_clean()
+
+ for i in range(3):
+ manager.set_config(
+ i,
+ osd_recovery_delay_start=120)
+
+ # take on osd down
+ manager.kill_osd(2)
+ manager.mark_down_osd(2)
+
+ # kludge to make sure they get a map
+ rados(ctx, mon, ['-p', 'data', 'get', 'dummy', '-'])
+
+ manager.flush_pg_stats([0, 1])
+ manager.wait_for_recovery()
+
+ # kill another and revive 2, so that some pgs can't peer.
+ manager.kill_osd(1)
+ manager.mark_down_osd(1)
+ manager.revive_osd(2)
+ manager.wait_till_osd_is_up(2)
+
+ manager.flush_pg_stats([0, 2])
+
+ manager.wait_for_active_or_down()
+
+ manager.flush_pg_stats([0, 2])
+
+ # look for down pgs
+ num_down_pgs = 0
+ pgs = manager.get_pg_stats()
+ for pg in pgs:
+ out = manager.raw_cluster_cmd('pg', pg['pgid'], 'query')
+ log.debug("out string %s",out)
+ j = json.loads(out)
+ log.info("pg is %s, query json is %s", pg, j)
+
+ if pg['state'].count('down'):
+ num_down_pgs += 1
+ # verify that it is blocked on osd.1
+ rs = j['recovery_state']
+ assert len(rs) >= 2
+ assert rs[0]['name'] == 'Started/Primary/Peering/Down'
+ assert rs[1]['name'] == 'Started/Primary/Peering'
+ assert rs[1]['blocked']
+ assert rs[1]['down_osds_we_would_probe'] == [1]
+ assert len(rs[1]['peering_blocked_by']) == 1
+ assert rs[1]['peering_blocked_by'][0]['osd'] == 1
+
+ assert num_down_pgs > 0
+
+ # bring it all back
+ manager.revive_osd(1)
+ manager.wait_till_osd_is_up(1)
+ manager.flush_pg_stats([0, 1, 2])
+ manager.wait_for_clean()
diff --git a/qa/tasks/peering_speed_test.py b/qa/tasks/peering_speed_test.py
new file mode 100644
index 00000000..9dc65836
--- /dev/null
+++ b/qa/tasks/peering_speed_test.py
@@ -0,0 +1,87 @@
+"""
+Remotely run peering tests.
+"""
+import logging
+import time
+
+log = logging.getLogger(__name__)
+
+from teuthology.task.args import argify
+
+POOLNAME = "POOLNAME"
+ARGS = [
+ ('num_pgs', 'number of pgs to create', 256, int),
+ ('max_time', 'seconds to complete peering', 0, int),
+ ('runs', 'trials to run', 10, int),
+ ('num_objects', 'objects to create', 256 * 1024, int),
+ ('object_size', 'size in bytes for objects', 64, int),
+ ('creation_time_limit', 'time limit for pool population', 60*60, int),
+ ('create_threads', 'concurrent writes for create', 256, int)
+ ]
+
+def setup(ctx, config):
+ """
+ Setup peering test on remotes.
+ """
+ manager = ctx.managers['ceph']
+ manager.clear_pools()
+ manager.create_pool(POOLNAME, config.num_pgs)
+ log.info("populating pool")
+ manager.rados_write_objects(
+ POOLNAME,
+ config.num_objects,
+ config.object_size,
+ config.creation_time_limit,
+ config.create_threads)
+ log.info("done populating pool")
+
+def do_run(ctx, config):
+ """
+ Perform the test.
+ """
+ start = time.time()
+ # mark in osd
+ manager = ctx.managers['ceph']
+ manager.mark_in_osd(0)
+ log.info("writing out objects")
+ manager.rados_write_objects(
+ POOLNAME,
+ config.num_pgs, # write 1 object per pg or so
+ 1,
+ config.creation_time_limit,
+ config.num_pgs, # lots of concurrency
+ cleanup = True)
+ peering_end = time.time()
+
+ log.info("peering done, waiting on recovery")
+ manager.wait_for_clean()
+
+ log.info("recovery done")
+ recovery_end = time.time()
+ if config.max_time:
+ assert(peering_end - start < config.max_time)
+ manager.mark_out_osd(0)
+ manager.wait_for_clean()
+ return {
+ 'time_to_active': peering_end - start,
+ 'time_to_clean': recovery_end - start
+ }
+
+@argify("peering_speed_test", ARGS)
+def task(ctx, config):
+ """
+ Peering speed test
+ """
+ setup(ctx, config)
+ manager = ctx.managers['ceph']
+ manager.mark_out_osd(0)
+ manager.wait_for_clean()
+ ret = []
+ for i in range(config.runs):
+ log.info("Run {i}".format(i = i))
+ ret.append(do_run(ctx, config))
+
+ manager.mark_in_osd(0)
+ ctx.summary['recovery_times'] = {
+ 'runs': ret
+ }
diff --git a/qa/tasks/populate_rbd_pool.py b/qa/tasks/populate_rbd_pool.py
new file mode 100644
index 00000000..76395eb6
--- /dev/null
+++ b/qa/tasks/populate_rbd_pool.py
@@ -0,0 +1,82 @@
+"""
+Populate rbd pools
+"""
+import contextlib
+import logging
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Populate <num_pools> pools with prefix <pool_prefix> with <num_images>
+ rbd images at <num_snaps> snaps
+
+ The config could be as follows::
+
+ populate_rbd_pool:
+ client: <client>
+ pool_prefix: foo
+ num_pools: 5
+ num_images: 10
+ num_snaps: 3
+ image_size: 10737418240
+ """
+ if config is None:
+ config = {}
+ client = config.get("client", "client.0")
+ pool_prefix = config.get("pool_prefix", "foo")
+ num_pools = config.get("num_pools", 2)
+ num_images = config.get("num_images", 20)
+ num_snaps = config.get("num_snaps", 4)
+ image_size = config.get("image_size", 100)
+ write_size = config.get("write_size", 1024*1024)
+ write_threads = config.get("write_threads", 10)
+ write_total_per_snap = config.get("write_total_per_snap", 1024*1024*30)
+
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+
+ for poolid in range(num_pools):
+ poolname = "%s-%s" % (pool_prefix, str(poolid))
+ log.info("Creating pool %s" % (poolname,))
+ ctx.managers['ceph'].create_pool(poolname)
+ for imageid in range(num_images):
+ imagename = "rbd-%s" % (str(imageid),)
+ log.info("Creating imagename %s" % (imagename,))
+ remote.run(
+ args = [
+ "rbd",
+ "create",
+ imagename,
+ "--image-format", "1",
+ "--size", str(image_size),
+ "--pool", str(poolname)])
+ def bench_run():
+ remote.run(
+ args = [
+ "rbd",
+ "bench-write",
+ imagename,
+ "--pool", poolname,
+ "--io-size", str(write_size),
+ "--io-threads", str(write_threads),
+ "--io-total", str(write_total_per_snap),
+ "--io-pattern", "rand"])
+ log.info("imagename %s first bench" % (imagename,))
+ bench_run()
+ for snapid in range(num_snaps):
+ snapname = "snap-%s" % (str(snapid),)
+ log.info("imagename %s creating snap %s" % (imagename, snapname))
+ remote.run(
+ args = [
+ "rbd", "snap", "create",
+ "--pool", poolname,
+ "--snap", snapname,
+ imagename
+ ])
+ bench_run()
+
+ try:
+ yield
+ finally:
+ log.info('done')
diff --git a/qa/tasks/qemu.py b/qa/tasks/qemu.py
new file mode 100644
index 00000000..b24ecece
--- /dev/null
+++ b/qa/tasks/qemu.py
@@ -0,0 +1,580 @@
+"""
+Qemu task
+"""
+
+import contextlib
+import logging
+import os
+import yaml
+import time
+
+from tasks import rbd
+from tasks.util.workunit import get_refspec_after_overrides
+from teuthology import contextutil
+from teuthology import misc as teuthology
+from teuthology.config import config as teuth_config
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+DEFAULT_NUM_DISKS = 2
+DEFAULT_IMAGE_URL = 'http://download.ceph.com/qa/ubuntu-12.04.qcow2'
+DEFAULT_IMAGE_SIZE = 10240 # in megabytes
+DEFAULT_CPUS = 1
+DEFAULT_MEM = 4096 # in megabytes
+
+def create_images(ctx, config, managers):
+ for client, client_config in config.items():
+ disks = client_config.get('disks', DEFAULT_NUM_DISKS)
+ if not isinstance(disks, list):
+ disks = [{} for n in range(int(disks))]
+ clone = client_config.get('clone', False)
+ assert disks, 'at least one rbd device must be used'
+ for i, disk in enumerate(disks[1:]):
+ create_config = {
+ client: {
+ 'image_name': '{client}.{num}'.format(client=client,
+ num=i + 1),
+ 'image_format': 2 if clone else 1,
+ 'image_size': (disk or {}).get('image_size',
+ DEFAULT_IMAGE_SIZE),
+ }
+ }
+ managers.append(
+ lambda create_config=create_config:
+ rbd.create_image(ctx=ctx, config=create_config)
+ )
+
+def create_clones(ctx, config, managers):
+ for client, client_config in config.items():
+ clone = client_config.get('clone', False)
+ if clone:
+ num_disks = client_config.get('disks', DEFAULT_NUM_DISKS)
+ if isinstance(num_disks, list):
+ num_disks = len(num_disks)
+ for i in range(num_disks):
+ create_config = {
+ client: {
+ 'image_name':
+ '{client}.{num}-clone'.format(client=client, num=i),
+ 'parent_name':
+ '{client}.{num}'.format(client=client, num=i),
+ }
+ }
+ managers.append(
+ lambda create_config=create_config:
+ rbd.clone_image(ctx=ctx, config=create_config)
+ )
+
+@contextlib.contextmanager
+def create_dirs(ctx, config):
+ """
+ Handle directory creation and cleanup
+ """
+ testdir = teuthology.get_testdir(ctx)
+ for client, client_config in config.items():
+ assert 'test' in client_config, 'You must specify a test to run'
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ remote.run(
+ args=[
+ 'install', '-d', '-m0755', '--',
+ '{tdir}/qemu'.format(tdir=testdir),
+ '{tdir}/archive/qemu'.format(tdir=testdir),
+ ]
+ )
+ try:
+ yield
+ finally:
+ for client, client_config in config.items():
+ assert 'test' in client_config, 'You must specify a test to run'
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ remote.run(
+ args=[
+ 'rmdir', '{tdir}/qemu'.format(tdir=testdir), run.Raw('||'), 'true',
+ ]
+ )
+
+@contextlib.contextmanager
+def generate_iso(ctx, config):
+ """Execute system commands to generate iso"""
+ log.info('generating iso...')
+ testdir = teuthology.get_testdir(ctx)
+
+ # use ctx.config instead of config, because config has been
+ # through teuthology.replace_all_with_clients()
+ refspec = get_refspec_after_overrides(ctx.config, {})
+
+ git_url = teuth_config.get_ceph_qa_suite_git_url()
+ log.info('Pulling tests from %s ref %s', git_url, refspec)
+
+ for client, client_config in config.items():
+ assert 'test' in client_config, 'You must specify a test to run'
+ test = client_config['test']
+
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+
+ clone_dir = '{tdir}/qemu_clone.{role}'.format(tdir=testdir, role=client)
+ remote.run(args=refspec.clone(git_url, clone_dir))
+
+ src_dir = os.path.dirname(__file__)
+ userdata_path = os.path.join(testdir, 'qemu', 'userdata.' + client)
+ metadata_path = os.path.join(testdir, 'qemu', 'metadata.' + client)
+
+ with open(os.path.join(src_dir, 'userdata_setup.yaml')) as f:
+ test_setup = ''.join(f.readlines())
+ # configuring the commands to setup the nfs mount
+ mnt_dir = "/export/{client}".format(client=client)
+ test_setup = test_setup.format(
+ mnt_dir=mnt_dir
+ )
+
+ with open(os.path.join(src_dir, 'userdata_teardown.yaml')) as f:
+ test_teardown = ''.join(f.readlines())
+
+ user_data = test_setup
+ if client_config.get('type', 'filesystem') == 'filesystem':
+ num_disks = client_config.get('disks', DEFAULT_NUM_DISKS)
+ if isinstance(num_disks, list):
+ num_disks = len(num_disks)
+ for i in range(1, num_disks):
+ dev_letter = chr(ord('a') + i)
+ user_data += """
+- |
+ #!/bin/bash
+ mkdir /mnt/test_{dev_letter}
+ mkfs -t xfs /dev/vd{dev_letter}
+ mount -t xfs /dev/vd{dev_letter} /mnt/test_{dev_letter}
+""".format(dev_letter=dev_letter)
+
+ user_data += """
+- |
+ #!/bin/bash
+ test -d /etc/ceph || mkdir /etc/ceph
+ cp /mnt/cdrom/ceph.* /etc/ceph/
+"""
+
+ cloud_config_archive = client_config.get('cloud_config_archive', [])
+ if cloud_config_archive:
+ user_data += yaml.safe_dump(cloud_config_archive, default_style='|',
+ default_flow_style=False)
+
+ # this may change later to pass the directories as args to the
+ # script or something. xfstests needs that.
+ user_data += """
+- |
+ #!/bin/bash
+ test -d /mnt/test_b && cd /mnt/test_b
+ /mnt/cdrom/test.sh > /mnt/log/test.log 2>&1 && touch /mnt/log/success
+""" + test_teardown
+
+ user_data = user_data.format(
+ ceph_branch=ctx.config.get('branch'),
+ ceph_sha1=ctx.config.get('sha1'))
+ teuthology.write_file(remote, userdata_path, user_data)
+
+ with open(os.path.join(src_dir, 'metadata.yaml'), 'rb') as f:
+ teuthology.write_file(remote, metadata_path, f)
+
+ test_file = '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client)
+
+ log.info('fetching test %s for %s', test, client)
+ remote.run(
+ args=[
+ 'cp', '--', os.path.join(clone_dir, test), test_file,
+ run.Raw('&&'),
+ 'chmod', '755', test_file,
+ ],
+ )
+ remote.run(
+ args=[
+ 'genisoimage', '-quiet', '-input-charset', 'utf-8',
+ '-volid', 'cidata', '-joliet', '-rock',
+ '-o', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client),
+ '-graft-points',
+ 'user-data={userdata}'.format(userdata=userdata_path),
+ 'meta-data={metadata}'.format(metadata=metadata_path),
+ 'ceph.conf=/etc/ceph/ceph.conf',
+ 'ceph.keyring=/etc/ceph/ceph.keyring',
+ 'test.sh={file}'.format(file=test_file),
+ ],
+ )
+ try:
+ yield
+ finally:
+ for client in config.keys():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ remote.run(
+ args=[
+ 'rm', '-rf',
+ '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client),
+ os.path.join(testdir, 'qemu', 'userdata.' + client),
+ os.path.join(testdir, 'qemu', 'metadata.' + client),
+ '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client),
+ '{tdir}/qemu_clone.{client}'.format(tdir=testdir, client=client),
+ ],
+ )
+
+@contextlib.contextmanager
+def download_image(ctx, config):
+ """Downland base image, remove image file when done"""
+ log.info('downloading base image')
+ testdir = teuthology.get_testdir(ctx)
+ for client, client_config in config.items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ base_file = '{tdir}/qemu/base.{client}.qcow2'.format(tdir=testdir, client=client)
+ image_url = client_config.get('image_url', DEFAULT_IMAGE_URL)
+ remote.run(
+ args=[
+ 'wget', '-nv', '-O', base_file, image_url,
+ ]
+ )
+
+ disks = client_config.get('disks', None)
+ if not isinstance(disks, list):
+ disks = [{}]
+ image_name = '{client}.0'.format(client=client)
+ image_size = (disks[0] or {}).get('image_size', DEFAULT_IMAGE_SIZE)
+ remote.run(
+ args=[
+ 'qemu-img', 'convert', '-f', 'qcow2', '-O', 'raw',
+ base_file, 'rbd:rbd/{image_name}'.format(image_name=image_name)
+ ]
+ )
+ remote.run(
+ args=[
+ 'rbd', 'resize',
+ '--size={image_size}M'.format(image_size=image_size),
+ image_name,
+ ]
+ )
+ try:
+ yield
+ finally:
+ log.debug('cleaning up base image files')
+ for client in config.keys():
+ base_file = '{tdir}/qemu/base.{client}.qcow2'.format(
+ tdir=testdir,
+ client=client,
+ )
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ remote.run(
+ args=[
+ 'rm', '-f', base_file,
+ ],
+ )
+
+
+def _setup_nfs_mount(remote, client, mount_dir):
+ """
+ Sets up an nfs mount on the remote that the guest can use to
+ store logs. This nfs mount is also used to touch a file
+ at the end of the test to indicate if the test was successful
+ or not.
+ """
+ export_dir = "/export/{client}".format(client=client)
+ log.info("Creating the nfs export directory...")
+ remote.run(args=[
+ 'sudo', 'mkdir', '-p', export_dir,
+ ])
+ log.info("Mounting the test directory...")
+ remote.run(args=[
+ 'sudo', 'mount', '--bind', mount_dir, export_dir,
+ ])
+ log.info("Adding mount to /etc/exports...")
+ export = "{dir} *(rw,no_root_squash,no_subtree_check,insecure)".format(
+ dir=export_dir
+ )
+ remote.run(args=[
+ 'sudo', 'sed', '-i', '/^\/export\//d', "/etc/exports",
+ ])
+ remote.run(args=[
+ 'echo', export, run.Raw("|"),
+ 'sudo', 'tee', '-a', "/etc/exports",
+ ])
+ log.info("Restarting NFS...")
+ if remote.os.package_type == "deb":
+ remote.run(args=['sudo', 'service', 'nfs-kernel-server', 'restart'])
+ else:
+ remote.run(args=['sudo', 'systemctl', 'restart', 'nfs'])
+
+
+def _teardown_nfs_mount(remote, client):
+ """
+ Tears down the nfs mount on the remote used for logging and reporting the
+ status of the tests being ran in the guest.
+ """
+ log.info("Tearing down the nfs mount for {remote}".format(remote=remote))
+ export_dir = "/export/{client}".format(client=client)
+ log.info("Stopping NFS...")
+ if remote.os.package_type == "deb":
+ remote.run(args=[
+ 'sudo', 'service', 'nfs-kernel-server', 'stop'
+ ])
+ else:
+ remote.run(args=[
+ 'sudo', 'systemctl', 'stop', 'nfs'
+ ])
+ log.info("Unmounting exported directory...")
+ remote.run(args=[
+ 'sudo', 'umount', export_dir
+ ])
+ log.info("Deleting exported directory...")
+ remote.run(args=[
+ 'sudo', 'rm', '-r', '/export'
+ ])
+ log.info("Deleting export from /etc/exports...")
+ remote.run(args=[
+ 'sudo', 'sed', '-i', '$ d', '/etc/exports'
+ ])
+ log.info("Starting NFS...")
+ if remote.os.package_type == "deb":
+ remote.run(args=[
+ 'sudo', 'service', 'nfs-kernel-server', 'start'
+ ])
+ else:
+ remote.run(args=[
+ 'sudo', 'systemctl', 'start', 'nfs'
+ ])
+
+
+@contextlib.contextmanager
+def run_qemu(ctx, config):
+ """Setup kvm environment and start qemu"""
+ procs = []
+ testdir = teuthology.get_testdir(ctx)
+ for client, client_config in config.items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ log_dir = '{tdir}/archive/qemu/{client}'.format(tdir=testdir, client=client)
+ remote.run(
+ args=[
+ 'mkdir', log_dir, run.Raw('&&'),
+ 'sudo', 'modprobe', 'kvm',
+ ]
+ )
+
+ # make an nfs mount to use for logging and to
+ # allow to test to tell teuthology the tests outcome
+ _setup_nfs_mount(remote, client, log_dir)
+
+ # Hack to make sure /dev/kvm permissions are set correctly
+ # See http://tracker.ceph.com/issues/17977 and
+ # https://bugzilla.redhat.com/show_bug.cgi?id=1333159
+ remote.run(args='sudo udevadm control --reload')
+ remote.run(args='sudo udevadm trigger /dev/kvm')
+ remote.run(args='ls -l /dev/kvm')
+
+ qemu_cmd = 'qemu-system-x86_64'
+ if remote.os.package_type == "rpm":
+ qemu_cmd = "/usr/libexec/qemu-kvm"
+ args=[
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'daemon-helper',
+ 'term',
+ qemu_cmd, '-enable-kvm', '-nographic', '-cpu', 'host',
+ '-smp', str(client_config.get('cpus', DEFAULT_CPUS)),
+ '-m', str(client_config.get('memory', DEFAULT_MEM)),
+ # cd holding metadata for cloud-init
+ '-cdrom', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client),
+ ]
+
+ cachemode = 'none'
+ ceph_config = ctx.ceph['ceph'].conf.get('global', {})
+ ceph_config.update(ctx.ceph['ceph'].conf.get('client', {}))
+ ceph_config.update(ctx.ceph['ceph'].conf.get(client, {}))
+ if ceph_config.get('rbd cache', True):
+ if ceph_config.get('rbd cache max dirty', 1) > 0:
+ cachemode = 'writeback'
+ else:
+ cachemode = 'writethrough'
+
+ clone = client_config.get('clone', False)
+ num_disks = client_config.get('disks', DEFAULT_NUM_DISKS)
+ if isinstance(num_disks, list):
+ num_disks = len(num_disks)
+ for i in range(num_disks):
+ suffix = '-clone' if clone else ''
+ args.extend([
+ '-drive',
+ 'file=rbd:rbd/{img}:id={id},format=raw,if=virtio,cache={cachemode}'.format(
+ img='{client}.{num}{suffix}'.format(client=client, num=i,
+ suffix=suffix),
+ id=client[len('client.'):],
+ cachemode=cachemode,
+ ),
+ ])
+ time_wait = client_config.get('time_wait', 0)
+
+ log.info('starting qemu...')
+ procs.append(
+ remote.run(
+ args=args,
+ logger=log.getChild(client),
+ stdin=run.PIPE,
+ wait=False,
+ )
+ )
+
+ try:
+ yield
+ finally:
+ log.info('waiting for qemu tests to finish...')
+ run.wait(procs)
+
+ if time_wait > 0:
+ log.debug('waiting {time_wait} sec for workloads detect finish...'.format(
+ time_wait=time_wait));
+ time.sleep(time_wait)
+
+ log.debug('checking that qemu tests succeeded...')
+ for client in config.keys():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+
+ # ensure we have permissions to all the logs
+ log_dir = '{tdir}/archive/qemu/{client}'.format(tdir=testdir,
+ client=client)
+ remote.run(
+ args=[
+ 'sudo', 'chmod', 'a+rw', '-R', log_dir
+ ]
+ )
+
+ # teardown nfs mount
+ _teardown_nfs_mount(remote, client)
+ # check for test status
+ remote.run(
+ args=[
+ 'test', '-f',
+ '{tdir}/archive/qemu/{client}/success'.format(
+ tdir=testdir,
+ client=client
+ ),
+ ],
+ )
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run a test inside of QEMU on top of rbd. Only one test
+ is supported per client.
+
+ For example, you can specify which clients to run on::
+
+ tasks:
+ - ceph:
+ - qemu:
+ client.0:
+ test: http://download.ceph.com/qa/test.sh
+ client.1:
+ test: http://download.ceph.com/qa/test2.sh
+
+ Or use the same settings on all clients:
+
+ tasks:
+ - ceph:
+ - qemu:
+ all:
+ test: http://download.ceph.com/qa/test.sh
+
+ For tests that don't need a filesystem, set type to block::
+
+ tasks:
+ - ceph:
+ - qemu:
+ client.0:
+ test: http://download.ceph.com/qa/test.sh
+ type: block
+
+ The test should be configured to run on /dev/vdb and later
+ devices.
+
+ If you want to run a test that uses more than one rbd image,
+ specify how many images to use::
+
+ tasks:
+ - ceph:
+ - qemu:
+ client.0:
+ test: http://download.ceph.com/qa/test.sh
+ type: block
+ disks: 2
+
+ - or -
+
+ tasks:
+ - ceph:
+ - qemu:
+ client.0:
+ test: http://ceph.com/qa/test.sh
+ type: block
+ disks:
+ - image_size: 1024
+ - image_size: 2048
+
+ You can set the amount of CPUs and memory the VM has (default is 1 CPU and
+ 4096 MB)::
+
+ tasks:
+ - ceph:
+ - qemu:
+ client.0:
+ test: http://download.ceph.com/qa/test.sh
+ cpus: 4
+ memory: 512 # megabytes
+
+ If you want to run a test against a cloned rbd image, set clone to true::
+
+ tasks:
+ - ceph:
+ - qemu:
+ client.0:
+ test: http://download.ceph.com/qa/test.sh
+ clone: true
+
+ If you need to configure additional cloud-config options, set cloud_config
+ to the required data set::
+
+ tasks:
+ - ceph
+ - qemu:
+ client.0:
+ test: http://ceph.com/qa/test.sh
+ cloud_config_archive:
+ - |
+ #/bin/bash
+ touch foo1
+ - content: |
+ test data
+ type: text/plain
+ filename: /tmp/data
+
+ If you need to override the default cloud image, set image_url:
+
+ tasks:
+ - ceph
+ - qemu:
+ client.0:
+ test: http://ceph.com/qa/test.sh
+ image_url: https://cloud-images.ubuntu.com/releases/16.04/release/ubuntu-16.04-server-cloudimg-amd64-disk1.img
+ """
+ assert isinstance(config, dict), \
+ "task qemu only supports a dictionary for configuration"
+
+ config = teuthology.replace_all_with_clients(ctx.cluster, config)
+
+ managers = []
+ create_images(ctx=ctx, config=config, managers=managers)
+ managers.extend([
+ lambda: create_dirs(ctx=ctx, config=config),
+ lambda: generate_iso(ctx=ctx, config=config),
+ lambda: download_image(ctx=ctx, config=config),
+ ])
+ create_clones(ctx=ctx, config=config, managers=managers)
+ managers.append(
+ lambda: run_qemu(ctx=ctx, config=config),
+ )
+
+ with contextutil.nested(*managers):
+ yield
diff --git a/qa/tasks/rados.py b/qa/tasks/rados.py
new file mode 100644
index 00000000..66b626a1
--- /dev/null
+++ b/qa/tasks/rados.py
@@ -0,0 +1,272 @@
+"""
+Rados modle-based integration tests
+"""
+import contextlib
+import logging
+import gevent
+from teuthology import misc as teuthology
+
+import six
+
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run RadosModel-based integration tests.
+
+ The config should be as follows::
+
+ rados:
+ clients: [client list]
+ ops: <number of ops>
+ objects: <number of objects to use>
+ max_in_flight: <max number of operations in flight>
+ object_size: <size of objects in bytes>
+ min_stride_size: <minimum write stride size in bytes>
+ max_stride_size: <maximum write stride size in bytes>
+ op_weights: <dictionary mapping operation type to integer weight>
+ runs: <number of times to run> - the pool is remade between runs
+ ec_pool: use an ec pool
+ erasure_code_profile: profile to use with the erasure coded pool
+ fast_read: enable ec_pool's fast_read
+ min_size: set the min_size of created pool
+ pool_snaps: use pool snapshots instead of selfmanaged snapshots
+ write_fadvise_dontneed: write behavior like with LIBRADOS_OP_FLAG_FADVISE_DONTNEED.
+ This mean data don't access in the near future.
+ Let osd backend don't keep data in cache.
+
+ For example::
+
+ tasks:
+ - ceph:
+ - rados:
+ clients: [client.0]
+ ops: 1000
+ max_seconds: 0 # 0 for no limit
+ objects: 25
+ max_in_flight: 16
+ object_size: 4000000
+ min_stride_size: 1024
+ max_stride_size: 4096
+ op_weights:
+ read: 20
+ write: 10
+ delete: 2
+ snap_create: 3
+ rollback: 2
+ snap_remove: 0
+ ec_pool: create an ec pool, defaults to False
+ erasure_code_use_overwrites: test overwrites, default false
+ erasure_code_profile:
+ name: teuthologyprofile
+ k: 2
+ m: 1
+ crush-failure-domain: osd
+ pool_snaps: true
+ write_fadvise_dontneed: true
+ runs: 10
+ - interactive:
+
+ Optionally, you can provide the pool name to run against:
+
+ tasks:
+ - ceph:
+ - exec:
+ client.0:
+ - ceph osd pool create foo
+ - rados:
+ clients: [client.0]
+ pools: [foo]
+ ...
+
+ Alternatively, you can provide a pool prefix:
+
+ tasks:
+ - ceph:
+ - exec:
+ client.0:
+ - ceph osd pool create foo.client.0
+ - rados:
+ clients: [client.0]
+ pool_prefix: foo
+ ...
+
+ The tests are run asynchronously, they are not complete when the task
+ returns. For instance:
+
+ - rados:
+ clients: [client.0]
+ pools: [ecbase]
+ ops: 4000
+ objects: 500
+ op_weights:
+ read: 100
+ write: 100
+ delete: 50
+ copy_from: 50
+ - print: "**** done rados ec-cache-agent (part 2)"
+
+ will run the print task immediately after the rados tasks begins but
+ not after it completes. To make the rados task a blocking / sequential
+ task, use:
+
+ - sequential:
+ - rados:
+ clients: [client.0]
+ pools: [ecbase]
+ ops: 4000
+ objects: 500
+ op_weights:
+ read: 100
+ write: 100
+ delete: 50
+ copy_from: 50
+ - print: "**** done rados ec-cache-agent (part 2)"
+
+ """
+ log.info('Beginning rados...')
+ assert isinstance(config, dict), \
+ "please list clients to run on"
+
+ object_size = int(config.get('object_size', 4000000))
+ op_weights = config.get('op_weights', {})
+ testdir = teuthology.get_testdir(ctx)
+ args = [
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'ceph_test_rados']
+ if config.get('ec_pool', False):
+ args.extend(['--no-omap'])
+ if not config.get('erasure_code_use_overwrites', False):
+ args.extend(['--ec-pool'])
+ if config.get('write_fadvise_dontneed', False):
+ args.extend(['--write-fadvise-dontneed'])
+ if config.get('set_redirect', False):
+ args.extend(['--set_redirect'])
+ if config.get('set_chunk', False):
+ args.extend(['--set_chunk'])
+ if config.get('low_tier_pool', None):
+ args.extend(['--low_tier_pool', config.get('low_tier_pool', None)])
+ if config.get('pool_snaps', False):
+ args.extend(['--pool-snaps'])
+ args.extend([
+ '--max-ops', str(config.get('ops', 10000)),
+ '--objects', str(config.get('objects', 500)),
+ '--max-in-flight', str(config.get('max_in_flight', 16)),
+ '--size', str(object_size),
+ '--min-stride-size', str(config.get('min_stride_size', object_size // 10)),
+ '--max-stride-size', str(config.get('max_stride_size', object_size // 5)),
+ '--max-seconds', str(config.get('max_seconds', 0))
+ ])
+
+ weights = {}
+ weights['read'] = 100
+ weights['write'] = 100
+ weights['delete'] = 10
+ # Parallel of the op_types in test/osd/TestRados.cc
+ for field in [
+ # read handled above
+ # write handled above
+ # delete handled above
+ "snap_create",
+ "snap_remove",
+ "rollback",
+ "setattr",
+ "rmattr",
+ "watch",
+ "copy_from",
+ "hit_set_list",
+ "is_dirty",
+ "undirty",
+ "cache_flush",
+ "cache_try_flush",
+ "cache_evict",
+ "append",
+ "write",
+ "read",
+ "delete"
+ ]:
+ if field in op_weights:
+ weights[field] = op_weights[field]
+
+ if config.get('write_append_excl', True):
+ if 'write' in weights:
+ weights['write'] = weights['write'] // 2
+ weights['write_excl'] = weights['write']
+
+ if 'append' in weights:
+ weights['append'] = weights['append'] // 2
+ weights['append_excl'] = weights['append']
+
+ for op, weight in weights.items():
+ args.extend([
+ '--op', op, str(weight)
+ ])
+
+
+ def thread():
+ """Thread spawned by gevent"""
+ clients = ['client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+ log.info('clients are %s' % clients)
+ manager = ctx.managers['ceph']
+ if config.get('ec_pool', False):
+ profile = config.get('erasure_code_profile', {})
+ profile_name = profile.get('name', 'teuthologyprofile')
+ manager.create_erasure_code_profile(profile_name, profile)
+ else:
+ profile_name = None
+ for i in range(int(config.get('runs', '1'))):
+ log.info("starting run %s out of %s", str(i), config.get('runs', '1'))
+ tests = {}
+ existing_pools = config.get('pools', [])
+ created_pools = []
+ for role in config.get('clients', clients):
+ assert isinstance(role, six.string_types)
+ PREFIX = 'client.'
+ assert role.startswith(PREFIX)
+ id_ = role[len(PREFIX):]
+
+ pool = config.get('pool', None)
+ if not pool and existing_pools:
+ pool = existing_pools.pop()
+ else:
+ pool = manager.create_pool_with_unique_name(
+ erasure_code_profile_name=profile_name,
+ erasure_code_use_overwrites=
+ config.get('erasure_code_use_overwrites', False)
+ )
+ created_pools.append(pool)
+ if config.get('fast_read', False):
+ manager.raw_cluster_cmd(
+ 'osd', 'pool', 'set', pool, 'fast_read', 'true')
+ min_size = config.get('min_size', None);
+ if min_size is not None:
+ manager.raw_cluster_cmd(
+ 'osd', 'pool', 'set', pool, 'min_size', str(min_size))
+
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ proc = remote.run(
+ args=["CEPH_CLIENT_ID={id_}".format(id_=id_)] + args +
+ ["--pool", pool],
+ logger=log.getChild("rados.{id}".format(id=id_)),
+ stdin=run.PIPE,
+ wait=False
+ )
+ tests[id_] = proc
+ run.wait(tests.values())
+
+ for pool in created_pools:
+ manager.wait_snap_trimming_complete(pool);
+ manager.remove_pool(pool)
+
+ running = gevent.spawn(thread)
+
+ try:
+ yield
+ finally:
+ log.info('joining rados')
+ running.get()
diff --git a/qa/tasks/radosbench.py b/qa/tasks/radosbench.py
new file mode 100644
index 00000000..32b09576
--- /dev/null
+++ b/qa/tasks/radosbench.py
@@ -0,0 +1,140 @@
+"""
+Rados benchmarking
+"""
+import contextlib
+import logging
+
+from teuthology.orchestra import run
+from teuthology import misc as teuthology
+
+import six
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run radosbench
+
+ The config should be as follows:
+
+ radosbench:
+ clients: [client list]
+ time: <seconds to run>
+ pool: <pool to use>
+ size: write size to use
+ concurrency: max number of outstanding writes (16)
+ objectsize: object size to use
+ unique_pool: use a unique pool, defaults to False
+ ec_pool: create an ec pool, defaults to False
+ create_pool: create pool, defaults to True
+ erasure_code_profile:
+ name: teuthologyprofile
+ k: 2
+ m: 1
+ crush-failure-domain: osd
+ cleanup: false (defaults to true)
+ type: <write|seq|rand> (defaults to write)
+ example:
+
+ tasks:
+ - ceph:
+ - radosbench:
+ clients: [client.0]
+ time: 360
+ - interactive:
+ """
+ log.info('Beginning radosbench...')
+ assert isinstance(config, dict), \
+ "please list clients to run on"
+ radosbench = {}
+
+ testdir = teuthology.get_testdir(ctx)
+ manager = ctx.managers['ceph']
+ runtype = config.get('type', 'write')
+
+ create_pool = config.get('create_pool', True)
+ for role in config.get('clients', ['client.0']):
+ assert isinstance(role, six.string_types)
+ PREFIX = 'client.'
+ assert role.startswith(PREFIX)
+ id_ = role[len(PREFIX):]
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+
+ if config.get('ec_pool', False):
+ profile = config.get('erasure_code_profile', {})
+ profile_name = profile.get('name', 'teuthologyprofile')
+ manager.create_erasure_code_profile(profile_name, profile)
+ else:
+ profile_name = None
+
+ cleanup = []
+ if not config.get('cleanup', True):
+ cleanup = ['--no-cleanup']
+
+ pool = config.get('pool', 'data')
+ if create_pool:
+ if pool != 'data':
+ manager.create_pool(pool, erasure_code_profile_name=profile_name)
+ else:
+ pool = manager.create_pool_with_unique_name(erasure_code_profile_name=profile_name)
+
+ concurrency = config.get('concurrency', 16)
+ osize = config.get('objectsize', 65536)
+ if osize == 0:
+ objectsize = []
+ else:
+ objectsize = ['--object-size', str(osize)]
+ size = ['-b', str(config.get('size', 65536))]
+ # If doing a reading run then populate data
+ if runtype != "write":
+ proc = remote.run(
+ args=[
+ "/bin/sh", "-c",
+ " ".join(['adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage',
+ 'rados',
+ '--no-log-to-stderr',
+ '--name', role]
+ + size + objectsize +
+ ['-t', str(concurrency)] +
+ ['-p' , pool,
+ 'bench', str(60), "write", "--no-cleanup"
+ ]).format(tdir=testdir),
+ ],
+ logger=log.getChild('radosbench.{id}'.format(id=id_)),
+ wait=True
+ )
+ size = []
+ objectsize = []
+
+ proc = remote.run(
+ args=[
+ "/bin/sh", "-c",
+ " ".join(['adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage',
+ 'rados',
+ '--no-log-to-stderr',
+ '--name', role]
+ + size + objectsize +
+ ['-p' , pool,
+ 'bench', str(config.get('time', 360)), runtype,
+ ] + cleanup).format(tdir=testdir),
+ ],
+ logger=log.getChild('radosbench.{id}'.format(id=id_)),
+ stdin=run.PIPE,
+ wait=False
+ )
+ radosbench[id_] = proc
+
+ try:
+ yield
+ finally:
+ timeout = config.get('time', 360) * 30 + 300
+ log.info('joining radosbench (timing out after %ss)', timeout)
+ run.wait(radosbench.values(), timeout=timeout)
+
+ if pool != 'data' and create_pool:
+ manager.remove_pool(pool)
diff --git a/qa/tasks/radosbenchsweep.py b/qa/tasks/radosbenchsweep.py
new file mode 100644
index 00000000..0aeb7218
--- /dev/null
+++ b/qa/tasks/radosbenchsweep.py
@@ -0,0 +1,223 @@
+"""
+Rados benchmarking sweep
+"""
+import contextlib
+import logging
+import re
+
+from io import BytesIO
+from itertools import product
+
+from teuthology.orchestra import run
+from teuthology import misc as teuthology
+
+import six
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Execute a radosbench parameter sweep
+
+ Puts radosbench in a loop, taking values from the given config at each
+ iteration. If given, the min and max values below create a range, e.g.
+ min_replicas=1 and max_replicas=3 implies executing with 1-3 replicas.
+
+ Parameters:
+
+ clients: [client list]
+ time: seconds to run (default=120)
+ sizes: [list of object sizes] (default=[4M])
+ mode: <write|read|seq> (default=write)
+ repetitions: execute the same configuration multiple times (default=1)
+ min_num_replicas: minimum number of replicas to use (default = 3)
+ max_num_replicas: maximum number of replicas to use (default = 3)
+ min_num_osds: the minimum number of OSDs in a pool (default=all)
+ max_num_osds: the maximum number of OSDs in a pool (default=all)
+ file: name of CSV-formatted output file (default='radosbench.csv')
+ columns: columns to include (default=all)
+ - rep: execution number (takes values from 'repetitions')
+ - num_osd: number of osds for pool
+ - num_replica: number of replicas
+ - avg_throughput: throughput
+ - avg_latency: latency
+ - stdev_throughput:
+ - stdev_latency:
+
+ Example:
+ - radsobenchsweep:
+ columns: [rep, num_osd, num_replica, avg_throughput, stdev_throughput]
+ """
+ log.info('Beginning radosbenchsweep...')
+ assert isinstance(config, dict), 'expecting dictionary for configuration'
+
+ # get and validate config values
+ # {
+
+ # only one client supported for now
+ if len(config.get('clients', [])) != 1:
+ raise Exception("Only one client can be specified")
+
+ # only write mode
+ if config.get('mode', 'write') != 'write':
+ raise Exception("Only 'write' mode supported for now.")
+
+ # OSDs
+ total_osds_in_cluster = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+ min_num_osds = config.get('min_num_osds', total_osds_in_cluster)
+ max_num_osds = config.get('max_num_osds', total_osds_in_cluster)
+
+ if max_num_osds > total_osds_in_cluster:
+ raise Exception('max_num_osds cannot be greater than total in cluster')
+ if min_num_osds < 1:
+ raise Exception('min_num_osds cannot be less than 1')
+ if min_num_osds > max_num_osds:
+ raise Exception('min_num_osds cannot be greater than max_num_osd')
+ osds = range(0, (total_osds_in_cluster + 1))
+
+ # replicas
+ min_num_replicas = config.get('min_num_replicas', 3)
+ max_num_replicas = config.get('max_num_replicas', 3)
+
+ if min_num_replicas < 1:
+ raise Exception('min_num_replicas cannot be less than 1')
+ if min_num_replicas > max_num_replicas:
+ raise Exception('min_num_replicas cannot be greater than max_replicas')
+ if max_num_replicas > max_num_osds:
+ raise Exception('max_num_replicas cannot be greater than max_num_osds')
+ replicas = range(min_num_replicas, (max_num_replicas + 1))
+
+ # object size
+ sizes = config.get('size', [4 << 20])
+
+ # repetitions
+ reps = range(config.get('repetitions', 1))
+
+ # file
+ fname = config.get('file', 'radosbench.csv')
+ f = open('{}/{}'.format(ctx.archive, fname), 'w')
+ f.write(get_csv_header(config) + '\n')
+ # }
+
+ # set default pools size=1 to avoid 'unhealthy' issues
+ ctx.manager.set_pool_property('data', 'size', 1)
+ ctx.manager.set_pool_property('metadata', 'size', 1)
+ ctx.manager.set_pool_property('rbd', 'size', 1)
+
+ current_osds_out = 0
+
+ # sweep through all parameters
+ for osds_out, size, replica, rep in product(osds, sizes, replicas, reps):
+
+ osds_in = total_osds_in_cluster - osds_out
+
+ if osds_in == 0:
+ # we're done
+ break
+
+ if current_osds_out != osds_out:
+ # take an osd out
+ ctx.manager.raw_cluster_cmd(
+ 'osd', 'reweight', str(osds_out-1), '0.0')
+ wait_until_healthy(ctx, config)
+ current_osds_out = osds_out
+
+ if osds_in not in range(min_num_osds, (max_num_osds + 1)):
+ # no need to execute with a number of osds that wasn't requested
+ continue
+
+ if osds_in < replica:
+ # cannot execute with more replicas than available osds
+ continue
+
+ run_radosbench(ctx, config, f, osds_in, size, replica, rep)
+
+ f.close()
+
+ yield
+
+
+def get_csv_header(conf):
+ all_columns = [
+ 'rep', 'num_osd', 'num_replica', 'avg_throughput',
+ 'avg_latency', 'stdev_throughput', 'stdev_latency'
+ ]
+ given_columns = conf.get('columns', None)
+ if given_columns and len(given_columns) != 0:
+ for column in given_columns:
+ if column not in all_columns:
+ raise Exception('Unknown column ' + column)
+ return ','.join(conf['columns'])
+ else:
+ conf['columns'] = all_columns
+ return ','.join(all_columns)
+
+
+def run_radosbench(ctx, config, f, num_osds, size, replica, rep):
+ pool = ctx.manager.create_pool_with_unique_name()
+
+ ctx.manager.set_pool_property(pool, 'size', replica)
+
+ wait_until_healthy(ctx, config)
+
+ log.info('Executing with parameters: ')
+ log.info(' num_osd =' + str(num_osds))
+ log.info(' size =' + str(size))
+ log.info(' num_replicas =' + str(replica))
+ log.info(' repetition =' + str(rep))
+
+ for role in config.get('clients', ['client.0']):
+ assert isinstance(role, six.string_types)
+ PREFIX = 'client.'
+ assert role.startswith(PREFIX)
+ id_ = role[len(PREFIX):]
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+
+ proc = remote.run(
+ args=[
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{}/archive/coverage'.format(teuthology.get_testdir(ctx)),
+ 'rados',
+ '--no-log-to-stderr',
+ '--name', role,
+ '-b', str(size),
+ '-p', pool,
+ 'bench', str(config.get('time', 120)), 'write',
+ ],
+ logger=log.getChild('radosbench.{id}'.format(id=id_)),
+ stdin=run.PIPE,
+ stdout=BytesIO(),
+ wait=False
+ )
+
+ # parse output to get summary and format it as CSV
+ proc.wait()
+ out = proc.stdout.getvalue()
+ all_values = {
+ 'stdev_throughput': re.sub(r'Stddev Bandwidth: ', '', re.search(
+ r'Stddev Bandwidth:.*', out).group(0)),
+ 'stdev_latency': re.sub(r'Stddev Latency: ', '', re.search(
+ r'Stddev Latency:.*', out).group(0)),
+ 'avg_throughput': re.sub(r'Bandwidth \(MB/sec\): ', '', re.search(
+ r'Bandwidth \(MB/sec\):.*', out).group(0)),
+ 'avg_latency': re.sub(r'Average Latency: ', '', re.search(
+ r'Average Latency:.*', out).group(0)),
+ 'rep': str(rep),
+ 'num_osd': str(num_osds),
+ 'num_replica': str(replica)
+ }
+ values_to_write = []
+ for column in config['columns']:
+ values_to_write.extend([all_values[column]])
+ f.write(','.join(values_to_write) + '\n')
+
+ ctx.manager.remove_pool(pool)
+
+
+def wait_until_healthy(ctx, config):
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys()
+ teuthology.wait_until_healthy(ctx, mon_remote)
diff --git a/qa/tasks/radosgw_admin.py b/qa/tasks/radosgw_admin.py
new file mode 100644
index 00000000..13b926a5
--- /dev/null
+++ b/qa/tasks/radosgw_admin.py
@@ -0,0 +1,953 @@
+"""
+Rgw admin testing against a running instance
+"""
+# The test cases in this file have been annotated for inventory.
+# To extract the inventory (in csv format) use the command:
+#
+# grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //'
+#
+# to run this standalone:
+# python qa/tasks/radosgw_admin.py [USER] HOSTNAME
+#
+
+import json
+import logging
+import time
+import datetime
+from six.moves import queue
+
+import sys
+import six
+
+from io import BytesIO
+
+import boto.exception
+import boto.s3.connection
+import boto.s3.acl
+
+import httplib2
+
+
+from tasks.util.rgw import rgwadmin, get_user_summary, get_user_successful_ops
+
+log = logging.getLogger(__name__)
+
+def usage_acc_findentry2(entries, user, add=True):
+ for e in entries:
+ if e['user'] == user:
+ return e
+ if not add:
+ return None
+ e = {'user': user, 'buckets': []}
+ entries.append(e)
+ return e
+def usage_acc_findsum2(summaries, user, add=True):
+ for e in summaries:
+ if e['user'] == user:
+ return e
+ if not add:
+ return None
+ e = {'user': user, 'categories': [],
+ 'total': {'bytes_received': 0,
+ 'bytes_sent': 0, 'ops': 0, 'successful_ops': 0 }}
+ summaries.append(e)
+ return e
+def usage_acc_update2(x, out, b_in, err):
+ x['bytes_sent'] += b_in
+ x['bytes_received'] += out
+ x['ops'] += 1
+ if not err:
+ x['successful_ops'] += 1
+def usage_acc_validate_fields(r, x, x2, what):
+ q=[]
+ for field in ['bytes_sent', 'bytes_received', 'ops', 'successful_ops']:
+ try:
+ if x2[field] < x[field]:
+ q.append("field %s: %d < %d" % (field, x2[field], x[field]))
+ except Exception as ex:
+ r.append( "missing/bad field " + field + " in " + what + " " + str(ex))
+ return
+ if len(q) > 0:
+ r.append("incomplete counts in " + what + ": " + ", ".join(q))
+class usage_acc:
+ def __init__(self):
+ self.results = {'entries': [], 'summary': []}
+ def findentry(self, user):
+ return usage_acc_findentry2(self.results['entries'], user)
+ def findsum(self, user):
+ return usage_acc_findsum2(self.results['summary'], user)
+ def e2b(self, e, bucket, add=True):
+ for b in e['buckets']:
+ if b['bucket'] == bucket:
+ return b
+ if not add:
+ return None
+ b = {'bucket': bucket, 'categories': []}
+ e['buckets'].append(b)
+ return b
+ def c2x(self, c, cat, add=True):
+ for x in c:
+ if x['category'] == cat:
+ return x
+ if not add:
+ return None
+ x = {'bytes_received': 0, 'category': cat,
+ 'bytes_sent': 0, 'ops': 0, 'successful_ops': 0 }
+ c.append(x)
+ return x
+ def update(self, c, cat, user, out, b_in, err):
+ x = self.c2x(c, cat)
+ usage_acc_update2(x, out, b_in, err)
+ if not err and cat == 'create_bucket' and 'owner' not in x:
+ x['owner'] = user
+ def make_entry(self, cat, bucket, user, out, b_in, err):
+ if cat == 'create_bucket' and err:
+ return
+ e = self.findentry(user)
+ b = self.e2b(e, bucket)
+ self.update(b['categories'], cat, user, out, b_in, err)
+ s = self.findsum(user)
+ x = self.c2x(s['categories'], cat)
+ usage_acc_update2(x, out, b_in, err)
+ x = s['total']
+ usage_acc_update2(x, out, b_in, err)
+ def generate_make_entry(self):
+ return lambda cat,bucket,user,out,b_in,err: self.make_entry(cat, bucket, user, out, b_in, err)
+ def get_usage(self):
+ return self.results
+ def compare_results(self, results):
+ if 'entries' not in results or 'summary' not in results:
+ return ['Missing entries or summary']
+ r = []
+ for e in self.results['entries']:
+ try:
+ e2 = usage_acc_findentry2(results['entries'], e['user'], False)
+ except Exception as ex:
+ r.append("malformed entry looking for user "
+ + e['user'] + " " + str(ex))
+ break
+ if e2 == None:
+ r.append("missing entry for user " + e['user'])
+ continue
+ for b in e['buckets']:
+ c = b['categories']
+ if b['bucket'] == 'nosuchbucket':
+ print("got here")
+ try:
+ b2 = self.e2b(e2, b['bucket'], False)
+ if b2 != None:
+ c2 = b2['categories']
+ except Exception as ex:
+ r.append("malformed entry looking for bucket "
+ + b['bucket'] + " in user " + e['user'] + " " + str(ex))
+ break
+ if b2 == None:
+ r.append("can't find bucket " + b['bucket']
+ + " in user " + e['user'])
+ continue
+ for x in c:
+ try:
+ x2 = self.c2x(c2, x['category'], False)
+ except Exception as ex:
+ r.append("malformed entry looking for "
+ + x['category'] + " in bucket " + b['bucket']
+ + " user " + e['user'] + " " + str(ex))
+ break
+ usage_acc_validate_fields(r, x, x2, "entry: category "
+ + x['category'] + " bucket " + b['bucket']
+ + " in user " + e['user'])
+ for s in self.results['summary']:
+ c = s['categories']
+ try:
+ s2 = usage_acc_findsum2(results['summary'], s['user'], False)
+ except Exception as ex:
+ r.append("malformed summary looking for user " + e['user']
+ + " " + str(ex))
+ break
+ if s2 == None:
+ r.append("missing summary for user " + e['user'] + " " + str(ex))
+ continue
+ try:
+ c2 = s2['categories']
+ except Exception as ex:
+ r.append("malformed summary missing categories for user "
+ + e['user'] + " " + str(ex))
+ break
+ for x in c:
+ try:
+ x2 = self.c2x(c2, x['category'], False)
+ except Exception as ex:
+ r.append("malformed summary looking for "
+ + x['category'] + " user " + e['user'] + " " + str(ex))
+ break
+ usage_acc_validate_fields(r, x, x2, "summary: category "
+ + x['category'] + " in user " + e['user'])
+ x = s['total']
+ try:
+ x2 = s2['total']
+ except Exception as ex:
+ r.append("malformed summary looking for totals for user "
+ + e['user'] + " " + str(ex))
+ break
+ usage_acc_validate_fields(r, x, x2, "summary: totals for user" + e['user'])
+ return r
+
+def ignore_this_entry(cat, bucket, user, out, b_in, err):
+ pass
+class requestlog_queue():
+ def __init__(self, add):
+ self.q = queue.Queue(1000)
+ self.adder = add
+ def handle_request_data(self, request, response, error=False):
+ now = datetime.datetime.now()
+ if error:
+ pass
+ elif response.status < 200 or response.status >= 400:
+ error = True
+ self.q.put({'t': now, 'o': request, 'i': response, 'e': error})
+ def clear(self):
+ with self.q.mutex:
+ self.q.queue.clear()
+ def log_and_clear(self, cat, bucket, user, add_entry = None):
+ while not self.q.empty():
+ j = self.q.get()
+ bytes_out = 0
+ if 'Content-Length' in j['o'].headers:
+ bytes_out = int(j['o'].headers['Content-Length'])
+ bytes_in = 0
+ msg = j['i'].msg if six.PY3 else j['i'].msg.dict
+ if 'content-length'in msg:
+ bytes_in = int(msg['content-length'])
+ log.info('RL: %s %s %s bytes_out=%d bytes_in=%d failed=%r'
+ % (cat, bucket, user, bytes_out, bytes_in, j['e']))
+ if add_entry == None:
+ add_entry = self.adder
+ add_entry(cat, bucket, user, bytes_out, bytes_in, j['e'])
+
+def create_presigned_url(conn, method, bucket_name, key_name, expiration):
+ return conn.generate_url(expires_in=expiration,
+ method=method,
+ bucket=bucket_name,
+ key=key_name,
+ query_auth=True,
+ )
+
+def send_raw_http_request(conn, method, bucket_name, key_name, follow_redirects = False):
+ url = create_presigned_url(conn, method, bucket_name, key_name, 3600)
+ print(url)
+ h = httplib2.Http()
+ h.follow_redirects = follow_redirects
+ return h.request(url, method)
+
+
+def get_acl(key):
+ """
+ Helper function to get the xml acl from a key, ensuring that the xml
+ version tag is removed from the acl response
+ """
+ raw_acl = six.ensure_str(key.get_xml_acl())
+
+ def remove_version(string):
+ return string.split(
+ '<?xml version="1.0" encoding="UTF-8"?>'
+ )[-1]
+
+ def remove_newlines(string):
+ return string.strip('\n')
+
+ return remove_version(
+ remove_newlines(raw_acl)
+ )
+
+def task(ctx, config):
+ """
+ Test radosgw-admin functionality against a running rgw instance.
+ """
+ global log
+
+ assert ctx.rgw.config, \
+ "radosgw_admin task needs a config passed from the rgw task"
+ config = ctx.rgw.config
+ log.debug('config is: %r', config)
+
+ clients_from_config = config.keys()
+
+ # choose first client as default
+ client = next(iter(clients_from_config))
+
+ # once the client is chosen, pull the host name and assigned port out of
+ # the role_endpoints that were assigned by the rgw task
+ endpoint = ctx.rgw.role_endpoints[client]
+
+ ##
+ user1='foo'
+ user2='fud'
+ subuser1='foo:foo1'
+ subuser2='foo:foo2'
+ display_name1='Foo'
+ display_name2='Fud'
+ email='foo@foo.com'
+ access_key='9te6NH5mcdcq0Tc5i8i1'
+ secret_key='Ny4IOauQoL18Gp2zM7lC1vLmoawgqcYP/YGcWfXu'
+ access_key2='p5YnriCv1nAtykxBrupQ'
+ secret_key2='Q8Tk6Q/27hfbFSYdSkPtUqhqx1GgzvpXa4WARozh'
+ swift_secret1='gpS2G9RREMrnbqlp29PP2D36kgPR1tm72n5fPYfL'
+ swift_secret2='ri2VJQcKSYATOY6uaDUX7pxgkW+W1YmC6OCxPHwy'
+
+ bucket_name='myfoo'
+ bucket_name2='mybar'
+
+ # connect to rgw
+ connection = boto.s3.connection.S3Connection(
+ aws_access_key_id=access_key,
+ aws_secret_access_key=secret_key,
+ is_secure=False,
+ port=endpoint.port,
+ host=endpoint.hostname,
+ calling_format=boto.s3.connection.OrdinaryCallingFormat(),
+ )
+ connection2 = boto.s3.connection.S3Connection(
+ aws_access_key_id=access_key2,
+ aws_secret_access_key=secret_key2,
+ is_secure=False,
+ port=endpoint.port,
+ host=endpoint.hostname,
+ calling_format=boto.s3.connection.OrdinaryCallingFormat(),
+ )
+
+ acc = usage_acc()
+ rl = requestlog_queue(acc.generate_make_entry())
+ connection.set_request_hook(rl)
+ connection2.set_request_hook(rl)
+
+ # legend (test cases can be easily grep-ed out)
+ # TESTCASE 'testname','object','method','operation','assertion'
+
+ # TESTCASE 'usage-show0' 'usage' 'show' 'all usage' 'succeeds'
+ (err, summary0) = rgwadmin(ctx, client, ['usage', 'show'], check_status=True)
+
+ # TESTCASE 'info-nosuch','user','info','non-existent user','fails'
+ (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1])
+ assert err
+
+ # TESTCASE 'create-ok','user','create','w/all valid info','succeeds'
+ (err, out) = rgwadmin(ctx, client, [
+ 'user', 'create',
+ '--uid', user1,
+ '--display-name', display_name1,
+ '--email', email,
+ '--access-key', access_key,
+ '--secret', secret_key,
+ '--max-buckets', '4'
+ ],
+ check_status=True)
+
+ # TESTCASE 'duplicate email','user','create','existing user email','fails'
+ (err, out) = rgwadmin(ctx, client, [
+ 'user', 'create',
+ '--uid', user2,
+ '--display-name', display_name2,
+ '--email', email,
+ ])
+ assert err
+
+ # TESTCASE 'info-existing','user','info','existing user','returns correct info'
+ (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
+ assert out['user_id'] == user1
+ assert out['email'] == email
+ assert out['display_name'] == display_name1
+ assert len(out['keys']) == 1
+ assert out['keys'][0]['access_key'] == access_key
+ assert out['keys'][0]['secret_key'] == secret_key
+ assert not out['suspended']
+
+ # TESTCASE 'suspend-ok','user','suspend','active user','succeeds'
+ (err, out) = rgwadmin(ctx, client, ['user', 'suspend', '--uid', user1],
+ check_status=True)
+
+ # TESTCASE 'suspend-suspended','user','suspend','suspended user','succeeds w/advisory'
+ (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
+ assert out['suspended']
+
+ # TESTCASE 're-enable','user','enable','suspended user','succeeds'
+ (err, out) = rgwadmin(ctx, client, ['user', 'enable', '--uid', user1], check_status=True)
+
+ # TESTCASE 'info-re-enabled','user','info','re-enabled user','no longer suspended'
+ (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
+ assert not out['suspended']
+
+ # TESTCASE 'add-keys','key','create','w/valid info','succeeds'
+ (err, out) = rgwadmin(ctx, client, [
+ 'key', 'create', '--uid', user1,
+ '--access-key', access_key2, '--secret', secret_key2,
+ ], check_status=True)
+
+ # TESTCASE 'info-new-key','user','info','after key addition','returns all keys'
+ (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1],
+ check_status=True)
+ assert len(out['keys']) == 2
+ assert out['keys'][0]['access_key'] == access_key2 or out['keys'][1]['access_key'] == access_key2
+ assert out['keys'][0]['secret_key'] == secret_key2 or out['keys'][1]['secret_key'] == secret_key2
+
+ # TESTCASE 'rm-key','key','rm','newly added key','succeeds, key is removed'
+ (err, out) = rgwadmin(ctx, client, [
+ 'key', 'rm', '--uid', user1,
+ '--access-key', access_key2,
+ ], check_status=True)
+ assert len(out['keys']) == 1
+ assert out['keys'][0]['access_key'] == access_key
+ assert out['keys'][0]['secret_key'] == secret_key
+
+ # TESTCASE 'add-swift-key','key','create','swift key','succeeds'
+ subuser_access = 'full'
+ subuser_perm = 'full-control'
+
+ (err, out) = rgwadmin(ctx, client, [
+ 'subuser', 'create', '--subuser', subuser1,
+ '--access', subuser_access
+ ], check_status=True)
+
+ # TESTCASE 'add-swift-key','key','create','swift key','succeeds'
+ (err, out) = rgwadmin(ctx, client, [
+ 'subuser', 'modify', '--subuser', subuser1,
+ '--secret', swift_secret1,
+ '--key-type', 'swift',
+ ], check_status=True)
+
+ # TESTCASE 'subuser-perm-mask', 'subuser', 'info', 'test subuser perm mask durability', 'succeeds'
+ (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1])
+
+ assert out['subusers'][0]['permissions'] == subuser_perm
+
+ # TESTCASE 'info-swift-key','user','info','after key addition','returns all keys'
+ (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
+ assert len(out['swift_keys']) == 1
+ assert out['swift_keys'][0]['user'] == subuser1
+ assert out['swift_keys'][0]['secret_key'] == swift_secret1
+
+ # TESTCASE 'add-swift-subuser','key','create','swift sub-user key','succeeds'
+ (err, out) = rgwadmin(ctx, client, [
+ 'subuser', 'create', '--subuser', subuser2,
+ '--secret', swift_secret2,
+ '--key-type', 'swift',
+ ], check_status=True)
+
+ # TESTCASE 'info-swift-subuser','user','info','after key addition','returns all sub-users/keys'
+ (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True)
+ assert len(out['swift_keys']) == 2
+ assert out['swift_keys'][0]['user'] == subuser2 or out['swift_keys'][1]['user'] == subuser2
+ assert out['swift_keys'][0]['secret_key'] == swift_secret2 or out['swift_keys'][1]['secret_key'] == swift_secret2
+
+ # TESTCASE 'rm-swift-key1','key','rm','subuser','succeeds, one key is removed'
+ (err, out) = rgwadmin(ctx, client, [
+ 'key', 'rm', '--subuser', subuser1,
+ '--key-type', 'swift',
+ ], check_status=True)
+ assert len(out['swift_keys']) == 1
+
+ # TESTCASE 'rm-subuser','subuser','rm','subuser','success, subuser is removed'
+ (err, out) = rgwadmin(ctx, client, [
+ 'subuser', 'rm', '--subuser', subuser1,
+ ], check_status=True)
+ assert len(out['subusers']) == 1
+
+ # TESTCASE 'rm-subuser-with-keys','subuser','rm','subuser','succeeds, second subser and key is removed'
+ (err, out) = rgwadmin(ctx, client, [
+ 'subuser', 'rm', '--subuser', subuser2,
+ '--key-type', 'swift', '--purge-keys',
+ ], check_status=True)
+ assert len(out['swift_keys']) == 0
+ assert len(out['subusers']) == 0
+
+ # TESTCASE 'bucket-stats','bucket','stats','no session/buckets','succeeds, empty list'
+ (err, out) = rgwadmin(ctx, client, ['bucket', 'stats', '--uid', user1],
+ check_status=True)
+ assert len(out) == 0
+
+ # TESTCASE 'bucket-stats2','bucket','stats','no buckets','succeeds, empty list'
+ (err, out) = rgwadmin(ctx, client, ['bucket', 'list', '--uid', user1], check_status=True)
+ assert len(out) == 0
+
+ # create a first bucket
+ bucket = connection.create_bucket(bucket_name)
+
+ rl.log_and_clear("create_bucket", bucket_name, user1)
+
+ # TESTCASE 'bucket-list','bucket','list','one bucket','succeeds, expected list'
+ (err, out) = rgwadmin(ctx, client, ['bucket', 'list', '--uid', user1], check_status=True)
+ assert len(out) == 1
+ assert out[0] == bucket_name
+
+ bucket_list = connection.get_all_buckets()
+ assert len(bucket_list) == 1
+ assert bucket_list[0].name == bucket_name
+
+ rl.log_and_clear("list_buckets", '', user1)
+
+ # TESTCASE 'bucket-list-all','bucket','list','all buckets','succeeds, expected list'
+ (err, out) = rgwadmin(ctx, client, ['bucket', 'list'], check_status=True)
+ assert len(out) >= 1
+ assert bucket_name in out;
+
+ # TESTCASE 'max-bucket-limit,'bucket','create','4 buckets','5th bucket fails due to max buckets == 4'
+ bucket2 = connection.create_bucket(bucket_name + '2')
+ rl.log_and_clear("create_bucket", bucket_name + '2', user1)
+ bucket3 = connection.create_bucket(bucket_name + '3')
+ rl.log_and_clear("create_bucket", bucket_name + '3', user1)
+ bucket4 = connection.create_bucket(bucket_name + '4')
+ rl.log_and_clear("create_bucket", bucket_name + '4', user1)
+ # the 5th should fail.
+ failed = False
+ try:
+ connection.create_bucket(bucket_name + '5')
+ except Exception:
+ failed = True
+ assert failed
+ rl.log_and_clear("create_bucket", bucket_name + '5', user1)
+
+ # delete the buckets
+ bucket2.delete()
+ rl.log_and_clear("delete_bucket", bucket_name + '2', user1)
+ bucket3.delete()
+ rl.log_and_clear("delete_bucket", bucket_name + '3', user1)
+ bucket4.delete()
+ rl.log_and_clear("delete_bucket", bucket_name + '4', user1)
+
+ # TESTCASE 'bucket-stats3','bucket','stats','new empty bucket','succeeds, empty list'
+ (err, out) = rgwadmin(ctx, client, [
+ 'bucket', 'stats', '--bucket', bucket_name], check_status=True)
+ assert out['owner'] == user1
+ bucket_id = out['id']
+
+ # TESTCASE 'bucket-stats4','bucket','stats','new empty bucket','succeeds, expected bucket ID'
+ (err, out) = rgwadmin(ctx, client, ['bucket', 'stats', '--uid', user1], check_status=True)
+ assert len(out) == 1
+ assert out[0]['id'] == bucket_id # does it return the same ID twice in a row?
+
+ # use some space
+ key = boto.s3.key.Key(bucket)
+ key.set_contents_from_string('one')
+ rl.log_and_clear("put_obj", bucket_name, user1)
+
+ # TESTCASE 'bucket-stats5','bucket','stats','after creating key','succeeds, lists one non-empty object'
+ (err, out) = rgwadmin(ctx, client, [
+ 'bucket', 'stats', '--bucket', bucket_name], check_status=True)
+ assert out['id'] == bucket_id
+ assert out['usage']['rgw.main']['num_objects'] == 1
+ assert out['usage']['rgw.main']['size_kb'] > 0
+
+ # reclaim it
+ key.delete()
+ rl.log_and_clear("delete_obj", bucket_name, user1)
+
+ # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'fails', 'access denied error'
+ (err, out) = rgwadmin(ctx, client,
+ ['bucket', 'unlink', '--uid', user1, '--bucket', bucket_name],
+ check_status=True)
+
+ # create a second user to link the bucket to
+ (err, out) = rgwadmin(ctx, client, [
+ 'user', 'create',
+ '--uid', user2,
+ '--display-name', display_name2,
+ '--access-key', access_key2,
+ '--secret', secret_key2,
+ '--max-buckets', '1',
+ ],
+ check_status=True)
+
+ # try creating an object with the first user before the bucket is relinked
+ denied = False
+ key = boto.s3.key.Key(bucket)
+
+ try:
+ key.set_contents_from_string('two')
+ except boto.exception.S3ResponseError:
+ denied = True
+
+ assert not denied
+ rl.log_and_clear("put_obj", bucket_name, user1)
+
+ # delete the object
+ key.delete()
+ rl.log_and_clear("delete_obj", bucket_name, user1)
+
+ # link the bucket to another user
+ (err, out) = rgwadmin(ctx, client, ['metadata', 'get', 'bucket:{n}'.format(n=bucket_name)],
+ check_status=True)
+
+ bucket_data = out['data']
+ assert bucket_data['bucket']['name'] == bucket_name
+
+ bucket_id = bucket_data['bucket']['bucket_id']
+
+ # link the bucket to another user
+ (err, out) = rgwadmin(ctx, client, ['bucket', 'link', '--uid', user2, '--bucket', bucket_name, '--bucket-id', bucket_id],
+ check_status=True)
+
+ # try to remove user, should fail (has a linked bucket)
+ (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user2])
+ assert err
+
+ # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'succeeds, bucket unlinked'
+ (err, out) = rgwadmin(ctx, client, ['bucket', 'unlink', '--uid', user2, '--bucket', bucket_name],
+ check_status=True)
+
+ # relink the bucket to the first user and delete the second user
+ (err, out) = rgwadmin(ctx, client,
+ ['bucket', 'link', '--uid', user1, '--bucket', bucket_name, '--bucket-id', bucket_id],
+ check_status=True)
+
+ (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user2],
+ check_status=True)
+
+ # TESTCASE 'object-rm', 'object', 'rm', 'remove object', 'succeeds, object is removed'
+
+ # upload an object
+ object_name = 'four'
+ key = boto.s3.key.Key(bucket, object_name)
+ key.set_contents_from_string(object_name)
+ rl.log_and_clear("put_obj", bucket_name, user1)
+
+ # fetch it too (for usage stats presently)
+ s = key.get_contents_as_string(encoding='ascii')
+ rl.log_and_clear("get_obj", bucket_name, user1)
+ assert s == object_name
+ # list bucket too (for usage stats presently)
+ keys = list(bucket.list())
+ rl.log_and_clear("list_bucket", bucket_name, user1)
+ assert len(keys) == 1
+ assert keys[0].name == object_name
+
+ # now delete it
+ (err, out) = rgwadmin(ctx, client,
+ ['object', 'rm', '--bucket', bucket_name, '--object', object_name],
+ check_status=True)
+
+ # TESTCASE 'bucket-stats6','bucket','stats','after deleting key','succeeds, lists one no objects'
+ (err, out) = rgwadmin(ctx, client, [
+ 'bucket', 'stats', '--bucket', bucket_name],
+ check_status=True)
+ assert out['id'] == bucket_id
+ assert out['usage']['rgw.main']['num_objects'] == 0
+
+ # list log objects
+ # TESTCASE 'log-list','log','list','after activity','succeeds, lists one no objects'
+ (err, out) = rgwadmin(ctx, client, ['log', 'list'], check_status=True)
+ assert len(out) > 0
+
+ for obj in out:
+ # TESTCASE 'log-show','log','show','after activity','returns expected info'
+ if obj[:4] == 'meta' or obj[:4] == 'data' or obj[:18] == 'obj_delete_at_hint':
+ continue
+
+ (err, rgwlog) = rgwadmin(ctx, client, ['log', 'show', '--object', obj],
+ check_status=True)
+ assert len(rgwlog) > 0
+
+ # exempt bucket_name2 from checking as it was only used for multi-region tests
+ assert rgwlog['bucket'].find(bucket_name) == 0 or rgwlog['bucket'].find(bucket_name2) == 0
+ assert rgwlog['bucket'] != bucket_name or rgwlog['bucket_id'] == bucket_id
+ assert rgwlog['bucket_owner'] == user1 or rgwlog['bucket'] == bucket_name + '5' or rgwlog['bucket'] == bucket_name2
+ for entry in rgwlog['log_entries']:
+ log.debug('checking log entry: ', entry)
+ assert entry['bucket'] == rgwlog['bucket']
+ possible_buckets = [bucket_name + '5', bucket_name2]
+ user = entry['user']
+ assert user == user1 or user.endswith('system-user') or \
+ rgwlog['bucket'] in possible_buckets
+
+ # TESTCASE 'log-rm','log','rm','delete log objects','succeeds'
+ (err, out) = rgwadmin(ctx, client, ['log', 'rm', '--object', obj],
+ check_status=True)
+
+ # TODO: show log by bucket+date
+
+ # TESTCASE 'user-suspend2','user','suspend','existing user','succeeds'
+ (err, out) = rgwadmin(ctx, client, ['user', 'suspend', '--uid', user1],
+ check_status=True)
+
+ # TESTCASE 'user-suspend3','user','suspend','suspended user','cannot write objects'
+ denied = False
+ try:
+ key = boto.s3.key.Key(bucket)
+ key.set_contents_from_string('five')
+ except boto.exception.S3ResponseError as e:
+ denied = True
+ assert e.status == 403
+
+ assert denied
+ rl.log_and_clear("put_obj", bucket_name, user1)
+
+ # TESTCASE 'user-renable2','user','enable','suspended user','succeeds'
+ (err, out) = rgwadmin(ctx, client, ['user', 'enable', '--uid', user1],
+ check_status=True)
+
+ # TESTCASE 'user-renable3','user','enable','reenabled user','can write objects'
+ key = boto.s3.key.Key(bucket)
+ key.set_contents_from_string('six')
+ rl.log_and_clear("put_obj", bucket_name, user1)
+
+ # TESTCASE 'gc-list', 'gc', 'list', 'get list of objects ready for garbage collection'
+
+ # create an object large enough to be split into multiple parts
+ test_string = 'foo'*10000000
+
+ big_key = boto.s3.key.Key(bucket)
+ big_key.set_contents_from_string(test_string)
+ rl.log_and_clear("put_obj", bucket_name, user1)
+
+ # now delete the head
+ big_key.delete()
+ rl.log_and_clear("delete_obj", bucket_name, user1)
+
+ # wait a bit to give the garbage collector time to cycle
+ time.sleep(15)
+
+ (err, out) = rgwadmin(ctx, client, ['gc', 'list'])
+
+ assert len(out) > 0
+
+ # TESTCASE 'gc-process', 'gc', 'process', 'manually collect garbage'
+ (err, out) = rgwadmin(ctx, client, ['gc', 'process'], check_status=True)
+
+ #confirm
+ (err, out) = rgwadmin(ctx, client, ['gc', 'list'])
+
+ assert len(out) == 0
+
+ # TESTCASE 'rm-user-buckets','user','rm','existing user','fails, still has buckets'
+ (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user1])
+ assert err
+
+ # delete should fail because ``key`` still exists
+ try:
+ bucket.delete()
+ except boto.exception.S3ResponseError as e:
+ assert e.status == 409
+ rl.log_and_clear("delete_bucket", bucket_name, user1)
+
+ key.delete()
+ rl.log_and_clear("delete_obj", bucket_name, user1)
+ bucket.delete()
+ rl.log_and_clear("delete_bucket", bucket_name, user1)
+
+ # TESTCASE 'policy', 'bucket', 'policy', 'get bucket policy', 'returns S3 policy'
+ bucket = connection.create_bucket(bucket_name)
+ rl.log_and_clear("create_bucket", bucket_name, user1)
+
+ # create an object
+ key = boto.s3.key.Key(bucket)
+ key.set_contents_from_string('seven')
+ rl.log_and_clear("put_obj", bucket_name, user1)
+
+ # should be private already but guarantee it
+ key.set_acl('private')
+ rl.log_and_clear("put_acls", bucket_name, user1)
+
+ (err, out) = rgwadmin(ctx, client,
+ ['policy', '--bucket', bucket.name, '--object', six.ensure_str(key.key)],
+ check_status=True, format='xml')
+
+ acl = get_acl(key)
+ rl.log_and_clear("get_acls", bucket_name, user1)
+
+ assert acl == out.strip('\n')
+
+ # add another grantee by making the object public read
+ key.set_acl('public-read')
+ rl.log_and_clear("put_acls", bucket_name, user1)
+
+ (err, out) = rgwadmin(ctx, client,
+ ['policy', '--bucket', bucket.name, '--object', six.ensure_str(key.key)],
+ check_status=True, format='xml')
+
+ acl = get_acl(key)
+ rl.log_and_clear("get_acls", bucket_name, user1)
+
+ assert acl == out.strip('\n')
+
+ # TESTCASE 'rm-bucket', 'bucket', 'rm', 'bucket with objects', 'succeeds'
+ bucket = connection.create_bucket(bucket_name)
+ rl.log_and_clear("create_bucket", bucket_name, user1)
+ key_name = ['eight', 'nine', 'ten', 'eleven']
+ for i in range(4):
+ key = boto.s3.key.Key(bucket)
+ key.set_contents_from_string(key_name[i])
+ rl.log_and_clear("put_obj", bucket_name, user1)
+
+ (err, out) = rgwadmin(ctx, client,
+ ['bucket', 'rm', '--bucket', bucket_name, '--purge-objects'],
+ check_status=True)
+
+ # TESTCASE 'caps-add', 'caps', 'add', 'add user cap', 'succeeds'
+ caps='user=read'
+ (err, out) = rgwadmin(ctx, client, ['caps', 'add', '--uid', user1, '--caps', caps])
+
+ assert out['caps'][0]['perm'] == 'read'
+
+ # TESTCASE 'caps-rm', 'caps', 'rm', 'remove existing cap from user', 'succeeds'
+ (err, out) = rgwadmin(ctx, client, ['caps', 'rm', '--uid', user1, '--caps', caps])
+
+ assert not out['caps']
+
+ # TESTCASE 'rm-user','user','rm','existing user','fails, still has buckets'
+ bucket = connection.create_bucket(bucket_name)
+ rl.log_and_clear("create_bucket", bucket_name, user1)
+ key = boto.s3.key.Key(bucket)
+
+ (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user1])
+ assert err
+
+ # TESTCASE 'rm-user2', 'user', 'rm', 'user with data', 'succeeds'
+ bucket = connection.create_bucket(bucket_name)
+ rl.log_and_clear("create_bucket", bucket_name, user1)
+ key = boto.s3.key.Key(bucket)
+ key.set_contents_from_string('twelve')
+ rl.log_and_clear("put_obj", bucket_name, user1)
+
+ time.sleep(35)
+
+ # need to wait for all usage data to get flushed, should take up to 30 seconds
+ timestamp = time.time()
+ while time.time() - timestamp <= (2 * 60): # wait up to 20 minutes
+ (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--categories', 'delete_obj']) # one of the operations we did is delete_obj, should be present.
+ if get_user_successful_ops(out, user1) > 0:
+ break
+ time.sleep(1)
+
+ assert time.time() - timestamp <= (20 * 60)
+
+ # TESTCASE 'usage-show' 'usage' 'show' 'all usage' 'succeeds'
+ (err, out) = rgwadmin(ctx, client, ['usage', 'show'], check_status=True)
+ assert len(out['entries']) > 0
+ assert len(out['summary']) > 0
+
+ r = acc.compare_results(out)
+ if len(r) != 0:
+ sys.stderr.write(("\n".join(r))+"\n")
+ assert(len(r) == 0)
+
+ user_summary = get_user_summary(out, user1)
+
+ total = user_summary['total']
+ assert total['successful_ops'] > 0
+
+ # TESTCASE 'usage-show2' 'usage' 'show' 'user usage' 'succeeds'
+ (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1],
+ check_status=True)
+ assert len(out['entries']) > 0
+ assert len(out['summary']) > 0
+ user_summary = out['summary'][0]
+ for entry in user_summary['categories']:
+ assert entry['successful_ops'] > 0
+ assert user_summary['user'] == user1
+
+ # TESTCASE 'usage-show3' 'usage' 'show' 'user usage categories' 'succeeds'
+ test_categories = ['create_bucket', 'put_obj', 'delete_obj', 'delete_bucket']
+ for cat in test_categories:
+ (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1, '--categories', cat],
+ check_status=True)
+ assert len(out['summary']) > 0
+ user_summary = out['summary'][0]
+ assert user_summary['user'] == user1
+ assert len(user_summary['categories']) == 1
+ entry = user_summary['categories'][0]
+ assert entry['category'] == cat
+ assert entry['successful_ops'] > 0
+
+ # should be all through with connection. (anything using connection
+ # should be BEFORE the usage stuff above.)
+ rl.log_and_clear("(before-close)", '-', '-', ignore_this_entry)
+ connection.close()
+ connection = None
+
+ # the usage flush interval is 30 seconds, wait that much an then some
+ # to make sure everything has been flushed
+ time.sleep(35)
+
+ # TESTCASE 'usage-trim' 'usage' 'trim' 'user usage' 'succeeds, usage removed'
+ (err, out) = rgwadmin(ctx, client, ['usage', 'trim', '--uid', user1],
+ check_status=True)
+ (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1],
+ check_status=True)
+ assert len(out['entries']) == 0
+ assert len(out['summary']) == 0
+
+ (err, out) = rgwadmin(ctx, client,
+ ['user', 'rm', '--uid', user1, '--purge-data' ],
+ check_status=True)
+
+ # TESTCASE 'rm-user3','user','rm','deleted user','fails'
+ (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1])
+ assert err
+
+ # TESTCASE 'zone-info', 'zone', 'get', 'get zone info', 'succeeds, has default placement rule'
+ #
+
+ (err, out) = rgwadmin(ctx, client, ['zone', 'get','--rgw-zone','default'])
+ orig_placement_pools = len(out['placement_pools'])
+
+ # removed this test, it is not correct to assume that zone has default placement, it really
+ # depends on how we set it up before
+ #
+ # assert len(out) > 0
+ # assert len(out['placement_pools']) == 1
+
+ # default_rule = out['placement_pools'][0]
+ # assert default_rule['key'] == 'default-placement'
+
+ rule={'key': 'new-placement', 'val': {'data_pool': '.rgw.buckets.2', 'index_pool': '.rgw.buckets.index.2'}}
+
+ out['placement_pools'].append(rule)
+
+ (err, out) = rgwadmin(ctx, client, ['zone', 'set'],
+ stdin=BytesIO(six.ensure_binary(json.dumps(out))),
+ check_status=True)
+
+ (err, out) = rgwadmin(ctx, client, ['zone', 'get'])
+ assert len(out) > 0
+ assert len(out['placement_pools']) == orig_placement_pools + 1
+
+ zonecmd = ['zone', 'placement', 'rm',
+ '--rgw-zone', 'default',
+ '--placement-id', 'new-placement']
+
+ (err, out) = rgwadmin(ctx, client, zonecmd, check_status=True)
+
+ # TESTCASE 'zonegroup-info', 'zonegroup', 'get', 'get zonegroup info', 'succeeds'
+ (err, out) = rgwadmin(ctx, client, ['zonegroup', 'get'], check_status=True)
+
+from teuthology.config import config
+from teuthology.orchestra import cluster, remote
+import argparse;
+
+def main():
+ if len(sys.argv) == 3:
+ user = sys.argv[1] + "@"
+ host = sys.argv[2]
+ elif len(sys.argv) == 2:
+ user = ""
+ host = sys.argv[1]
+ else:
+ sys.stderr.write("usage: radosgw_admin.py [user] host\n")
+ exit(1)
+ client0 = remote.Remote(user + host)
+ ctx = config
+ ctx.cluster=cluster.Cluster(remotes=[(client0,
+ [ 'ceph.client.rgw.%s' % (host), ]),])
+
+ ctx.rgw = argparse.Namespace()
+ endpoints = {}
+ endpoints['ceph.client.rgw.%s' % host] = (host, 80)
+ ctx.rgw.role_endpoints = endpoints
+ ctx.rgw.realm = None
+ ctx.rgw.regions = {'region0': { 'api name': 'api1',
+ 'is master': True, 'master zone': 'r0z0',
+ 'zones': ['r0z0', 'r0z1'] }}
+ ctx.rgw.config = {'ceph.client.rgw.%s' % host: {'system user': {'name': '%s-system-user' % host}}}
+ task(config, None)
+ exit()
+
+if __name__ == '__main__':
+ main()
diff --git a/qa/tasks/radosgw_admin_rest.py b/qa/tasks/radosgw_admin_rest.py
new file mode 100644
index 00000000..24330ad3
--- /dev/null
+++ b/qa/tasks/radosgw_admin_rest.py
@@ -0,0 +1,721 @@
+"""
+Run a series of rgw admin commands through the rest interface.
+
+The test cases in this file have been annotated for inventory.
+To extract the inventory (in csv format) use the command:
+
+ grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //'
+
+"""
+import logging
+
+
+import boto.exception
+import boto.s3.connection
+import boto.s3.acl
+
+import requests
+import time
+
+from boto.connection import AWSAuthConnection
+from teuthology import misc as teuthology
+from tasks.util.rgw import get_user_summary, get_user_successful_ops, rgwadmin
+
+log = logging.getLogger(__name__)
+
+def rgwadmin_rest(connection, cmd, params=None, headers=None, raw=False):
+ """
+ perform a rest command
+ """
+ log.info('radosgw-admin-rest: %s %s' % (cmd, params))
+ put_cmds = ['create', 'link', 'add']
+ post_cmds = ['unlink', 'modify']
+ delete_cmds = ['trim', 'rm', 'process']
+ get_cmds = ['check', 'info', 'show', 'list']
+
+ bucket_sub_resources = ['object', 'policy', 'index']
+ user_sub_resources = ['subuser', 'key', 'caps']
+ zone_sub_resources = ['pool', 'log', 'garbage']
+
+ def get_cmd_method_and_handler(cmd):
+ """
+ Get the rest command and handler from information in cmd and
+ from the imported requests object.
+ """
+ if cmd[1] in put_cmds:
+ return 'PUT', requests.put
+ elif cmd[1] in delete_cmds:
+ return 'DELETE', requests.delete
+ elif cmd[1] in post_cmds:
+ return 'POST', requests.post
+ elif cmd[1] in get_cmds:
+ return 'GET', requests.get
+
+ def get_resource(cmd):
+ """
+ Get the name of the resource from information in cmd.
+ """
+ if cmd[0] == 'bucket' or cmd[0] in bucket_sub_resources:
+ if cmd[0] == 'bucket':
+ return 'bucket', ''
+ else:
+ return 'bucket', cmd[0]
+ elif cmd[0] == 'user' or cmd[0] in user_sub_resources:
+ if cmd[0] == 'user':
+ return 'user', ''
+ else:
+ return 'user', cmd[0]
+ elif cmd[0] == 'usage':
+ return 'usage', ''
+ elif cmd[0] == 'zone' or cmd[0] in zone_sub_resources:
+ if cmd[0] == 'zone':
+ return 'zone', ''
+ else:
+ return 'zone', cmd[0]
+
+ def build_admin_request(conn, method, resource = '', headers=None, data='',
+ query_args=None, params=None):
+ """
+ Build an administative request adapted from the build_request()
+ method of boto.connection
+ """
+
+ path = conn.calling_format.build_path_base('admin', resource)
+ auth_path = conn.calling_format.build_auth_path('admin', resource)
+ host = conn.calling_format.build_host(conn.server_name(), 'admin')
+ if query_args:
+ path += '?' + query_args
+ boto.log.debug('path=%s' % path)
+ auth_path += '?' + query_args
+ boto.log.debug('auth_path=%s' % auth_path)
+ return AWSAuthConnection.build_base_http_request(conn, method, path,
+ auth_path, params, headers, data, host)
+
+ method, handler = get_cmd_method_and_handler(cmd)
+ resource, query_args = get_resource(cmd)
+ request = build_admin_request(connection, method, resource,
+ query_args=query_args, headers=headers)
+
+ url = '{protocol}://{host}{path}'.format(protocol=request.protocol,
+ host=request.host, path=request.path)
+
+ request.authorize(connection=connection)
+ result = handler(url, params=params, headers=request.headers)
+
+ if raw:
+ log.info(' text result: %s' % result.text)
+ return result.status_code, result.text
+ elif len(result.content) == 0:
+ # many admin requests return no body, so json() throws a JSONDecodeError
+ log.info(' empty result')
+ return result.status_code, None
+ else:
+ log.info(' json result: %s' % result.json())
+ return result.status_code, result.json()
+
+
+def task(ctx, config):
+ """
+ Test radosgw-admin functionality through the RESTful interface
+ """
+ assert config is None or isinstance(config, list) \
+ or isinstance(config, dict), \
+ "task s3tests only supports a list or dictionary for configuration"
+ all_clients = ['client.{id}'.format(id=id_)
+ for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+ if config is None:
+ config = all_clients
+ if isinstance(config, list):
+ config = dict.fromkeys(config)
+ clients = config.keys()
+
+ # just use the first client...
+ client = next(iter(clients))
+
+ ##
+ admin_user = 'ada'
+ admin_display_name = 'Ms. Admin User'
+ admin_access_key = 'MH1WC2XQ1S8UISFDZC8W'
+ admin_secret_key = 'dQyrTPA0s248YeN5bBv4ukvKU0kh54LWWywkrpoG'
+ admin_caps = 'users=read, write; usage=read, write; buckets=read, write; zone=read, write'
+
+ user1 = 'foo'
+ user2 = 'fud'
+ subuser1 = 'foo:foo1'
+ subuser2 = 'foo:foo2'
+ display_name1 = 'Foo'
+ display_name2 = 'Fud'
+ email = 'foo@foo.com'
+ access_key = '9te6NH5mcdcq0Tc5i8i1'
+ secret_key = 'Ny4IOauQoL18Gp2zM7lC1vLmoawgqcYP/YGcWfXu'
+ access_key2 = 'p5YnriCv1nAtykxBrupQ'
+ secret_key2 = 'Q8Tk6Q/27hfbFSYdSkPtUqhqx1GgzvpXa4WARozh'
+ swift_secret1 = 'gpS2G9RREMrnbqlp29PP2D36kgPR1tm72n5fPYfL'
+ swift_secret2 = 'ri2VJQcKSYATOY6uaDUX7pxgkW+W1YmC6OCxPHwy'
+
+ bucket_name = 'myfoo'
+
+ # legend (test cases can be easily grep-ed out)
+ # TESTCASE 'testname','object','method','operation','assertion'
+ # TESTCASE 'create-admin-user','user','create','administrative user','succeeds'
+ (err, out) = rgwadmin(ctx, client, [
+ 'user', 'create',
+ '--uid', admin_user,
+ '--display-name', admin_display_name,
+ '--access-key', admin_access_key,
+ '--secret', admin_secret_key,
+ '--max-buckets', '0',
+ '--caps', admin_caps
+ ])
+ logging.error(out)
+ logging.error(err)
+ assert not err
+
+ assert hasattr(ctx, 'rgw'), 'radosgw-admin-rest must run after the rgw task'
+ endpoint = ctx.rgw.role_endpoints.get(client)
+ assert endpoint, 'no rgw endpoint for {}'.format(client)
+
+ admin_conn = boto.s3.connection.S3Connection(
+ aws_access_key_id=admin_access_key,
+ aws_secret_access_key=admin_secret_key,
+ is_secure=True if endpoint.cert else False,
+ port=endpoint.port,
+ host=endpoint.hostname,
+ calling_format=boto.s3.connection.OrdinaryCallingFormat(),
+ )
+
+ # TESTCASE 'info-nosuch','user','info','non-existent user','fails'
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {"uid": user1})
+ assert ret == 404
+
+ # TESTCASE 'create-ok','user','create','w/all valid info','succeeds'
+ (ret, out) = rgwadmin_rest(admin_conn,
+ ['user', 'create'],
+ {'uid' : user1,
+ 'display-name' : display_name1,
+ 'email' : email,
+ 'access-key' : access_key,
+ 'secret-key' : secret_key,
+ 'max-buckets' : '4'
+ })
+
+ assert ret == 200
+
+ # TESTCASE 'list-no-user','user','list','list user keys','user list object'
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'list'], {'list' : '', 'max-entries' : 0})
+ assert ret == 200
+ assert out['count'] == 0
+ assert out['truncated'] == True
+ assert len(out['keys']) == 0
+ assert len(out['marker']) > 0
+
+ # TESTCASE 'list-user-without-marker','user','list','list user keys','user list object'
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'list'], {'list' : '', 'max-entries' : 1})
+ assert ret == 200
+ assert out['count'] == 1
+ assert out['truncated'] == True
+ assert len(out['keys']) == 1
+ assert len(out['marker']) > 0
+ marker = out['marker']
+
+ # TESTCASE 'list-user-with-marker','user','list','list user keys','user list object'
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'list'], {'list' : '', 'max-entries' : 1, 'marker': marker})
+ assert ret == 200
+ assert out['count'] == 1
+ assert out['truncated'] == False
+ assert len(out['keys']) == 1
+
+ # TESTCASE 'info-existing','user','info','existing user','returns correct info'
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+
+ assert out['user_id'] == user1
+ assert out['email'] == email
+ assert out['display_name'] == display_name1
+ assert len(out['keys']) == 1
+ assert out['keys'][0]['access_key'] == access_key
+ assert out['keys'][0]['secret_key'] == secret_key
+ assert not out['suspended']
+ assert out['tenant'] == ''
+ assert out['max_buckets'] == 4
+ assert out['caps'] == []
+ assert out['op_mask'] == 'read, write, delete'
+ assert out['default_placement'] == ''
+ assert out['default_storage_class'] == ''
+ assert out['placement_tags'] == []
+ assert not out['bucket_quota']['enabled']
+ assert not out['bucket_quota']['check_on_raw']
+ assert out['bucket_quota']['max_size'] == -1
+ assert out['bucket_quota']['max_size_kb'] == 0
+ assert out['bucket_quota']['max_objects'] == -1
+ assert not out['user_quota']['enabled']
+ assert not out['user_quota']['check_on_raw']
+ assert out['user_quota']['max_size'] == -1
+ assert out['user_quota']['max_size_kb'] == 0
+ assert out['user_quota']['max_objects'] == -1
+ assert out['temp_url_keys'] == []
+ assert out['type'] == 'rgw'
+ assert out['mfa_ids'] == []
+ # TESTCASE 'info-existing','user','info','existing user query with wrong uid but correct access key','returns correct info'
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'access-key' : access_key, 'uid': 'uid_not_exist'})
+
+ assert out['user_id'] == user1
+ assert out['email'] == email
+ assert out['display_name'] == display_name1
+ assert len(out['keys']) == 1
+ assert out['keys'][0]['access_key'] == access_key
+ assert out['keys'][0]['secret_key'] == secret_key
+ assert not out['suspended']
+ assert out['tenant'] == ''
+ assert out['max_buckets'] == 4
+ assert out['caps'] == []
+ assert out['op_mask'] == "read, write, delete"
+ assert out['default_placement'] == ''
+ assert out['default_storage_class'] == ''
+ assert out['placement_tags'] == []
+ assert not out['bucket_quota']['enabled']
+ assert not out['bucket_quota']['check_on_raw']
+ assert out ['bucket_quota']['max_size'] == -1
+ assert out ['bucket_quota']['max_size_kb'] == 0
+ assert out ['bucket_quota']['max_objects'] == -1
+ assert not out['user_quota']['enabled']
+ assert not out['user_quota']['check_on_raw']
+ assert out['user_quota']['max_size'] == -1
+ assert out['user_quota']['max_size_kb'] == 0
+ assert out['user_quota']['max_objects'] == -1
+ assert out['temp_url_keys'] == []
+ assert out['type'] == 'rgw'
+ assert out['mfa_ids'] == []
+
+ # TESTCASE 'suspend-ok','user','suspend','active user','succeeds'
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : True})
+ assert ret == 200
+
+ # TESTCASE 'suspend-suspended','user','suspend','suspended user','succeeds w/advisory'
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+ assert ret == 200
+ assert out['suspended']
+ assert out['email'] == email
+
+ # TESTCASE 're-enable','user','enable','suspended user','succeeds'
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : 'false'})
+ assert not err
+
+ # TESTCASE 'info-re-enabled','user','info','re-enabled user','no longer suspended'
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+ assert ret == 200
+ assert not out['suspended']
+
+ # TESTCASE 'add-keys','key','create','w/valid info','succeeds'
+ (ret, out) = rgwadmin_rest(admin_conn,
+ ['key', 'create'],
+ {'uid' : user1,
+ 'access-key' : access_key2,
+ 'secret-key' : secret_key2
+ })
+
+
+ assert ret == 200
+
+ # TESTCASE 'info-new-key','user','info','after key addition','returns all keys'
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+ assert ret == 200
+ assert len(out['keys']) == 2
+ assert out['keys'][0]['access_key'] == access_key2 or out['keys'][1]['access_key'] == access_key2
+ assert out['keys'][0]['secret_key'] == secret_key2 or out['keys'][1]['secret_key'] == secret_key2
+
+ # TESTCASE 'rm-key','key','rm','newly added key','succeeds, key is removed'
+ (ret, out) = rgwadmin_rest(admin_conn,
+ ['key', 'rm'],
+ {'uid' : user1,
+ 'access-key' : access_key2
+ })
+
+ assert ret == 200
+
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+
+ assert len(out['keys']) == 1
+ assert out['keys'][0]['access_key'] == access_key
+ assert out['keys'][0]['secret_key'] == secret_key
+
+ # TESTCASE 'add-swift-key','key','create','swift key','succeeds'
+ (ret, out) = rgwadmin_rest(admin_conn,
+ ['subuser', 'create'],
+ {'subuser' : subuser1,
+ 'secret-key' : swift_secret1,
+ 'key-type' : 'swift'
+ })
+
+ assert ret == 200
+
+ # TESTCASE 'info-swift-key','user','info','after key addition','returns all keys'
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+ assert ret == 200
+ assert len(out['swift_keys']) == 1
+ assert out['swift_keys'][0]['user'] == subuser1
+ assert out['swift_keys'][0]['secret_key'] == swift_secret1
+
+ # TESTCASE 'add-swift-subuser','key','create','swift sub-user key','succeeds'
+ (ret, out) = rgwadmin_rest(admin_conn,
+ ['subuser', 'create'],
+ {'subuser' : subuser2,
+ 'secret-key' : swift_secret2,
+ 'key-type' : 'swift'
+ })
+
+ assert ret == 200
+
+ # TESTCASE 'info-swift-subuser','user','info','after key addition','returns all sub-users/keys'
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+ assert ret == 200
+ assert len(out['swift_keys']) == 2
+ assert out['swift_keys'][0]['user'] == subuser2 or out['swift_keys'][1]['user'] == subuser2
+ assert out['swift_keys'][0]['secret_key'] == swift_secret2 or out['swift_keys'][1]['secret_key'] == swift_secret2
+
+ # TESTCASE 'rm-swift-key1','key','rm','subuser','succeeds, one key is removed'
+ (ret, out) = rgwadmin_rest(admin_conn,
+ ['key', 'rm'],
+ {'subuser' : subuser1,
+ 'key-type' :'swift'
+ })
+
+ assert ret == 200
+
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+ assert len(out['swift_keys']) == 1
+
+ # TESTCASE 'rm-subuser','subuser','rm','subuser','success, subuser is removed'
+ (ret, out) = rgwadmin_rest(admin_conn,
+ ['subuser', 'rm'],
+ {'subuser' : subuser1
+ })
+
+ assert ret == 200
+
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+ assert len(out['subusers']) == 1
+
+ # TESTCASE 'rm-subuser-with-keys','subuser','rm','subuser','succeeds, second subser and key is removed'
+ (ret, out) = rgwadmin_rest(admin_conn,
+ ['subuser', 'rm'],
+ {'subuser' : subuser2,
+ 'key-type' : 'swift',
+ '{purge-keys' :True
+ })
+
+ assert ret == 200
+
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+ assert len(out['swift_keys']) == 0
+ assert len(out['subusers']) == 0
+
+ # TESTCASE 'bucket-stats','bucket','info','no session/buckets','succeeds, empty list'
+ (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1})
+ assert ret == 200
+ assert len(out) == 0
+
+ # connect to rgw
+ connection = boto.s3.connection.S3Connection(
+ aws_access_key_id=access_key,
+ aws_secret_access_key=secret_key,
+ is_secure=True if endpoint.cert else False,
+ port=endpoint.port,
+ host=endpoint.hostname,
+ calling_format=boto.s3.connection.OrdinaryCallingFormat(),
+ )
+
+ # TESTCASE 'bucket-stats2','bucket','stats','no buckets','succeeds, empty list'
+ (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1, 'stats' : True})
+ assert ret == 200
+ assert len(out) == 0
+
+ # create a first bucket
+ bucket = connection.create_bucket(bucket_name)
+
+ # TESTCASE 'bucket-list','bucket','list','one bucket','succeeds, expected list'
+ (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1})
+ assert ret == 200
+ assert len(out) == 1
+ assert out[0] == bucket_name
+
+ # TESTCASE 'bucket-stats3','bucket','stats','new empty bucket','succeeds, empty list'
+ (ret, out) = rgwadmin_rest(admin_conn,
+ ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True})
+
+ assert ret == 200
+ assert out['owner'] == user1
+ assert out['tenant'] == ''
+ bucket_id = out['id']
+
+ # TESTCASE 'bucket-stats4','bucket','stats','new empty bucket','succeeds, expected bucket ID'
+ (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1, 'stats' : True})
+ assert ret == 200
+ assert len(out) == 1
+ assert out[0]['id'] == bucket_id # does it return the same ID twice in a row?
+
+ # use some space
+ key = boto.s3.key.Key(bucket)
+ key.set_contents_from_string('one')
+
+ # TESTCASE 'bucket-stats5','bucket','stats','after creating key','succeeds, lists one non-empty object'
+ (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True})
+ assert ret == 200
+ assert out['id'] == bucket_id
+ assert out['usage']['rgw.main']['num_objects'] == 1
+ assert out['usage']['rgw.main']['size_kb'] > 0
+
+ # TESTCASE 'bucket-stats6', 'bucket', 'stats', 'non-existent bucket', 'fails, 'bucket not found error'
+ (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : 'doesnotexist'})
+ assert ret == 404
+ assert out['Code'] == 'NoSuchBucket'
+
+ # reclaim it
+ key.delete()
+
+ # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'fails', 'access denied error'
+ (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'unlink'], {'uid' : user1, 'bucket' : bucket_name})
+
+ assert ret == 200
+
+ # create a second user to link the bucket to
+ (ret, out) = rgwadmin_rest(admin_conn,
+ ['user', 'create'],
+ {'uid' : user2,
+ 'display-name' : display_name2,
+ 'access-key' : access_key2,
+ 'secret-key' : secret_key2,
+ 'max-buckets' : '1',
+ })
+
+ assert ret == 200
+
+ # try creating an object with the first user before the bucket is relinked
+ denied = False
+ key = boto.s3.key.Key(bucket)
+
+ try:
+ key.set_contents_from_string('two')
+ except boto.exception.S3ResponseError:
+ denied = True
+
+ assert not denied
+
+ # delete the object
+ key.delete()
+
+ # link the bucket to another user
+ (ret, out) = rgwadmin_rest(admin_conn,
+ ['bucket', 'link'],
+ {'uid' : user2,
+ 'bucket' : bucket_name,
+ 'bucket-id' : bucket_id,
+ })
+
+ assert ret == 200
+
+ # try creating an object with the first user which should cause an error
+ key = boto.s3.key.Key(bucket)
+
+ try:
+ key.set_contents_from_string('three')
+ except boto.exception.S3ResponseError:
+ denied = True
+
+ assert denied
+
+ # relink the bucket to the first user and delete the second user
+ (ret, out) = rgwadmin_rest(admin_conn,
+ ['bucket', 'link'],
+ {'uid' : user1,
+ 'bucket' : bucket_name,
+ 'bucket-id' : bucket_id,
+ })
+ assert ret == 200
+
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user2})
+ assert ret == 200
+
+ # TESTCASE 'object-rm', 'object', 'rm', 'remove object', 'succeeds, object is removed'
+
+ # upload an object
+ object_name = 'four'
+ key = boto.s3.key.Key(bucket, object_name)
+ key.set_contents_from_string(object_name)
+
+ # now delete it
+ (ret, out) = rgwadmin_rest(admin_conn, ['object', 'rm'], {'bucket' : bucket_name, 'object' : object_name})
+ assert ret == 200
+
+ # TESTCASE 'bucket-stats6','bucket','stats','after deleting key','succeeds, lists one no objects'
+ (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True})
+ assert ret == 200
+ assert out['id'] == bucket_id
+ assert out['usage']['rgw.main']['num_objects'] == 0
+
+ # create a bucket for deletion stats
+ useless_bucket = connection.create_bucket('useless_bucket')
+ useless_key = useless_bucket.new_key('useless_key')
+ useless_key.set_contents_from_string('useless string')
+
+ # delete it
+ useless_key.delete()
+ useless_bucket.delete()
+
+ # wait for the statistics to flush
+ time.sleep(60)
+
+ # need to wait for all usage data to get flushed, should take up to 30 seconds
+ timestamp = time.time()
+ while time.time() - timestamp <= (20 * 60): # wait up to 20 minutes
+ (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'categories' : 'delete_obj'}) # last operation we did is delete obj, wait for it to flush
+
+ if get_user_successful_ops(out, user1) > 0:
+ break
+ time.sleep(1)
+
+ assert time.time() - timestamp <= (20 * 60)
+
+ # TESTCASE 'usage-show' 'usage' 'show' 'all usage' 'succeeds'
+ (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'])
+ assert ret == 200
+ assert len(out['entries']) > 0
+ assert len(out['summary']) > 0
+ user_summary = get_user_summary(out, user1)
+ total = user_summary['total']
+ assert total['successful_ops'] > 0
+
+ # TESTCASE 'usage-show2' 'usage' 'show' 'user usage' 'succeeds'
+ (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1})
+ assert ret == 200
+ assert len(out['entries']) > 0
+ assert len(out['summary']) > 0
+ user_summary = out['summary'][0]
+ for entry in user_summary['categories']:
+ assert entry['successful_ops'] > 0
+ assert user_summary['user'] == user1
+
+ # TESTCASE 'usage-show3' 'usage' 'show' 'user usage categories' 'succeeds'
+ test_categories = ['create_bucket', 'put_obj', 'delete_obj', 'delete_bucket']
+ for cat in test_categories:
+ (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1, 'categories' : cat})
+ assert ret == 200
+ assert len(out['summary']) > 0
+ user_summary = out['summary'][0]
+ assert user_summary['user'] == user1
+ assert len(user_summary['categories']) == 1
+ entry = user_summary['categories'][0]
+ assert entry['category'] == cat
+ assert entry['successful_ops'] > 0
+
+ # TESTCASE 'usage-trim' 'usage' 'trim' 'user usage' 'succeeds, usage removed'
+ (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'trim'], {'uid' : user1})
+ assert ret == 200
+ (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1})
+ assert ret == 200
+ assert len(out['entries']) == 0
+ assert len(out['summary']) == 0
+
+ # TESTCASE 'user-suspend2','user','suspend','existing user','succeeds'
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : True})
+ assert ret == 200
+
+ # TESTCASE 'user-suspend3','user','suspend','suspended user','cannot write objects'
+ try:
+ key = boto.s3.key.Key(bucket)
+ key.set_contents_from_string('five')
+ except boto.exception.S3ResponseError as e:
+ assert e.status == 403
+
+ # TESTCASE 'user-renable2','user','enable','suspended user','succeeds'
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : 'false'})
+ assert ret == 200
+
+ # TESTCASE 'user-renable3','user','enable','reenabled user','can write objects'
+ key = boto.s3.key.Key(bucket)
+ key.set_contents_from_string('six')
+
+ # TESTCASE 'garbage-list', 'garbage', 'list', 'get list of objects ready for garbage collection'
+
+ # create an object large enough to be split into multiple parts
+ test_string = 'foo'*10000000
+
+ big_key = boto.s3.key.Key(bucket)
+ big_key.set_contents_from_string(test_string)
+
+ # now delete the head
+ big_key.delete()
+
+ # TESTCASE 'rm-user-buckets','user','rm','existing user','fails, still has buckets'
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1})
+ assert ret == 409
+
+ # delete should fail because ``key`` still exists
+ try:
+ bucket.delete()
+ except boto.exception.S3ResponseError as e:
+ assert e.status == 409
+
+ key.delete()
+ bucket.delete()
+
+ # TESTCASE 'policy', 'bucket', 'policy', 'get bucket policy', 'returns S3 policy'
+ bucket = connection.create_bucket(bucket_name)
+
+ # create an object
+ key = boto.s3.key.Key(bucket)
+ key.set_contents_from_string('seven')
+
+ # should be private already but guarantee it
+ key.set_acl('private')
+
+ (ret, out) = rgwadmin_rest(admin_conn, ['policy', 'show'], {'bucket' : bucket.name, 'object' : key.key})
+ assert ret == 200
+ assert len(out['acl']['grant_map']) == 1
+
+ # add another grantee by making the object public read
+ key.set_acl('public-read')
+
+ (ret, out) = rgwadmin_rest(admin_conn, ['policy', 'show'], {'bucket' : bucket.name, 'object' : key.key})
+ assert ret == 200
+ assert len(out['acl']['grant_map']) == 2
+
+ # TESTCASE 'rm-bucket', 'bucket', 'rm', 'bucket with objects', 'succeeds'
+ bucket = connection.create_bucket(bucket_name)
+ key_name = ['eight', 'nine', 'ten', 'eleven']
+ for i in range(4):
+ key = boto.s3.key.Key(bucket)
+ key.set_contents_from_string(key_name[i])
+
+ (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'rm'], {'bucket' : bucket_name, 'purge-objects' : True})
+ assert ret == 200
+
+ # TESTCASE 'caps-add', 'caps', 'add', 'add user cap', 'succeeds'
+ caps = 'usage=read'
+ (ret, out) = rgwadmin_rest(admin_conn, ['caps', 'add'], {'uid' : user1, 'user-caps' : caps})
+ assert ret == 200
+ assert out[0]['perm'] == 'read'
+
+ # TESTCASE 'caps-rm', 'caps', 'rm', 'remove existing cap from user', 'succeeds'
+ (ret, out) = rgwadmin_rest(admin_conn, ['caps', 'rm'], {'uid' : user1, 'user-caps' : caps})
+ assert ret == 200
+ assert not out
+
+ # TESTCASE 'rm-user','user','rm','existing user','fails, still has buckets'
+ bucket = connection.create_bucket(bucket_name)
+ key = boto.s3.key.Key(bucket)
+
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1})
+ assert ret == 409
+
+ # TESTCASE 'rm-user2', 'user', 'rm', user with data', 'succeeds'
+ bucket = connection.create_bucket(bucket_name)
+ key = boto.s3.key.Key(bucket)
+ key.set_contents_from_string('twelve')
+
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1, 'purge-data' : True})
+ assert ret == 200
+
+ # TESTCASE 'rm-user3','user','info','deleted user','fails'
+ (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1})
+ assert ret == 404
+
diff --git a/qa/tasks/ragweed.py b/qa/tasks/ragweed.py
new file mode 100644
index 00000000..d906cdca
--- /dev/null
+++ b/qa/tasks/ragweed.py
@@ -0,0 +1,390 @@
+"""
+Run a set of s3 tests on rgw.
+"""
+from io import BytesIO
+from configobj import ConfigObj
+import base64
+import contextlib
+import logging
+import os
+import random
+import six
+import string
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.config import config as teuth_config
+from teuthology.orchestra import run
+from teuthology.orchestra.connection import split_user
+
+log = logging.getLogger(__name__)
+
+
+def get_ragweed_branches(config, client_conf):
+ """
+ figure out the ragweed branch according to the per-client settings
+
+ use force-branch is specified, and fall back to the ones deduced using ceph
+ branch under testing
+ """
+ force_branch = client_conf.get('force-branch', None)
+ if force_branch:
+ return [force_branch]
+ else:
+ S3_BRANCHES = ['master', 'nautilus', 'mimic',
+ 'luminous', 'kraken', 'jewel']
+ ceph_branch = config.get('branch')
+ suite_branch = config.get('suite_branch', ceph_branch)
+ if suite_branch in S3_BRANCHES:
+ branch = client_conf.get('branch', 'ceph-' + suite_branch)
+ else:
+ branch = client_conf.get('branch', suite_branch)
+ default_branch = client_conf.get('default-branch', None)
+ if default_branch:
+ return [branch, default_branch]
+ else:
+ return [branch]
+
+
+@contextlib.contextmanager
+def download(ctx, config):
+ """
+ Download the s3 tests from the git builder.
+ Remove downloaded s3 file upon exit.
+
+ The context passed in should be identical to the context
+ passed in to the main task.
+ """
+ assert isinstance(config, dict)
+ log.info('Downloading ragweed...')
+ testdir = teuthology.get_testdir(ctx)
+ for (client, cconf) in config.items():
+ ragweed_repo = ctx.config.get('ragweed_repo',
+ teuth_config.ceph_git_base_url + 'ragweed.git')
+ for branch in get_ragweed_branches(ctx.config, cconf):
+ log.info("Using branch '%s' for ragweed", branch)
+ try:
+ ctx.cluster.only(client).sh(
+ script=f'git clone -b {branch} {ragweed_repo} {testdir}/ragweed')
+ break
+ except Exception as e:
+ exc = e
+ else:
+ raise exc
+
+ sha1 = cconf.get('sha1')
+ if sha1 is not None:
+ ctx.cluster.only(client).run(
+ args=[
+ 'cd', '{tdir}/ragweed'.format(tdir=testdir),
+ run.Raw('&&'),
+ 'git', 'reset', '--hard', sha1,
+ ],
+ )
+ try:
+ yield
+ finally:
+ log.info('Removing ragweed...')
+ testdir = teuthology.get_testdir(ctx)
+ for client in config:
+ ctx.cluster.only(client).run(
+ args=[
+ 'rm',
+ '-rf',
+ '{tdir}/ragweed'.format(tdir=testdir),
+ ],
+ )
+
+
+def _config_user(ragweed_conf, section, user):
+ """
+ Configure users for this section by stashing away keys, ids, and
+ email addresses.
+ """
+ ragweed_conf[section].setdefault('user_id', user)
+ ragweed_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user))
+ ragweed_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user))
+ ragweed_conf[section].setdefault('access_key', ''.join(random.choice(string.ascii_uppercase) for i in range(20)))
+ ragweed_conf[section].setdefault('secret_key', base64.b64encode(os.urandom(40)).decode('ascii'))
+
+
+@contextlib.contextmanager
+def create_users(ctx, config, run_stages):
+ """
+ Create a main and an alternate s3 user.
+ """
+ assert isinstance(config, dict)
+
+ for client, properties in config['config'].items():
+ run_stages[client] = properties.get('stages', 'prepare,check').split(',')
+
+ log.info('Creating rgw users...')
+ testdir = teuthology.get_testdir(ctx)
+ users = {'user regular': 'ragweed', 'user system': 'sysuser'}
+ for client in config['clients']:
+ if not 'prepare' in run_stages[client]:
+ # should have been prepared in a previous run
+ continue
+
+ ragweed_conf = config['ragweed_conf'][client]
+ ragweed_conf.setdefault('fixtures', {})
+ ragweed_conf['rgw'].setdefault('bucket_prefix', 'test-' + client)
+ for section, user in users.items():
+ _config_user(ragweed_conf, section, '{user}.{client}'.format(user=user, client=client))
+ log.debug('Creating user {user} on {host}'.format(user=ragweed_conf[section]['user_id'], host=client))
+ if user == 'sysuser':
+ sys_str = 'true'
+ else:
+ sys_str = 'false'
+ ctx.cluster.only(client).run(
+ args=[
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'radosgw-admin',
+ '-n', client,
+ 'user', 'create',
+ '--uid', ragweed_conf[section]['user_id'],
+ '--display-name', ragweed_conf[section]['display_name'],
+ '--access-key', ragweed_conf[section]['access_key'],
+ '--secret', ragweed_conf[section]['secret_key'],
+ '--email', ragweed_conf[section]['email'],
+ '--system', sys_str,
+ ],
+ )
+ try:
+ yield
+ finally:
+ for client in config['clients']:
+ if not 'check' in run_stages[client]:
+ # only remove user if went through the check stage
+ continue
+ for user in users.values():
+ uid = '{user}.{client}'.format(user=user, client=client)
+ ctx.cluster.only(client).run(
+ args=[
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'radosgw-admin',
+ '-n', client,
+ 'user', 'rm',
+ '--uid', uid,
+ '--purge-data',
+ ],
+ )
+
+
+@contextlib.contextmanager
+def configure(ctx, config, run_stages):
+ """
+ Configure the ragweed. This includes the running of the
+ bootstrap code and the updating of local conf files.
+ """
+ assert isinstance(config, dict)
+ log.info('Configuring ragweed...')
+ testdir = teuthology.get_testdir(ctx)
+ for client, properties in config['clients'].items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ remote.run(
+ args=[
+ 'cd',
+ '{tdir}/ragweed'.format(tdir=testdir),
+ run.Raw('&&'),
+ './bootstrap',
+ ],
+ )
+
+ preparing = 'prepare' in run_stages[client]
+ if not preparing:
+ # should have been prepared in a previous run
+ continue
+
+ ragweed_conf = config['ragweed_conf'][client]
+ if properties is not None and 'rgw_server' in properties:
+ host = None
+ for target, roles in zip(ctx.config['targets'].keys(), ctx.config['roles']):
+ log.info('roles: ' + str(roles))
+ log.info('target: ' + str(target))
+ if properties['rgw_server'] in roles:
+ _, host = split_user(target)
+ assert host is not None, "Invalid client specified as the rgw_server"
+ ragweed_conf['rgw']['host'] = host
+ else:
+ ragweed_conf['rgw']['host'] = 'localhost'
+
+ if properties is not None and 'slow_backend' in properties:
+ ragweed_conf['fixtures']['slow backend'] = properties['slow_backend']
+
+ conf_fp = BytesIO()
+ ragweed_conf.write(conf_fp)
+ teuthology.write_file(
+ remote=remote,
+ path='{tdir}/archive/ragweed.{client}.conf'.format(tdir=testdir, client=client),
+ data=conf_fp.getvalue(),
+ )
+
+ log.info('Configuring boto...')
+ boto_src = os.path.join(os.path.dirname(__file__), 'boto.cfg.template')
+ for client, properties in config['clients'].items():
+ with open(boto_src, 'r') as f:
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ conf = f.read().format(
+ idle_timeout=config.get('idle_timeout', 30)
+ )
+ teuthology.write_file(
+ remote=remote,
+ path='{tdir}/boto.cfg'.format(tdir=testdir),
+ data=conf,
+ )
+
+ try:
+ yield
+
+ finally:
+ log.info('Cleaning up boto...')
+ for client, properties in config['clients'].items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ remote.run(
+ args=[
+ 'rm',
+ '{tdir}/boto.cfg'.format(tdir=testdir),
+ ],
+ )
+
+@contextlib.contextmanager
+def run_tests(ctx, config, run_stages):
+ """
+ Run the ragweed after everything is set up.
+
+ :param ctx: Context passed to task
+ :param config: specific configuration information
+ """
+ assert isinstance(config, dict)
+ testdir = teuthology.get_testdir(ctx)
+ attrs = ["!fails_on_rgw"]
+ for client, client_config in config.items():
+ stages = ','.join(run_stages[client])
+ args = [
+ 'RAGWEED_CONF={tdir}/archive/ragweed.{client}.conf'.format(tdir=testdir, client=client),
+ 'RAGWEED_STAGES={stages}'.format(stages=stages),
+ 'BOTO_CONFIG={tdir}/boto.cfg'.format(tdir=testdir),
+ '{tdir}/ragweed/virtualenv/bin/nosetests'.format(tdir=testdir),
+ '-w',
+ '{tdir}/ragweed'.format(tdir=testdir),
+ '-v',
+ '-a', ','.join(attrs),
+ ]
+ if client_config is not None and 'extra_args' in client_config:
+ args.extend(client_config['extra_args'])
+
+ ctx.cluster.only(client).run(
+ args=args,
+ label="ragweed tests against rgw"
+ )
+ yield
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run the ragweed suite against rgw.
+
+ To run all tests on all clients::
+
+ tasks:
+ - ceph:
+ - rgw:
+ - ragweed:
+
+ To restrict testing to particular clients::
+
+ tasks:
+ - ceph:
+ - rgw: [client.0]
+ - ragweed: [client.0]
+
+ To run against a server on client.1 and increase the boto timeout to 10m::
+
+ tasks:
+ - ceph:
+ - rgw: [client.1]
+ - ragweed:
+ client.0:
+ rgw_server: client.1
+ idle_timeout: 600
+ stages: prepare,check
+
+ To pass extra arguments to nose (e.g. to run a certain test)::
+
+ tasks:
+ - ceph:
+ - rgw: [client.0]
+ - ragweed:
+ client.0:
+ extra_args: ['test_s3:test_object_acl_grand_public_read']
+ client.1:
+ extra_args: ['--exclude', 'test_100_continue']
+ """
+ assert hasattr(ctx, 'rgw'), 'ragweed must run after the rgw task'
+ assert config is None or isinstance(config, list) \
+ or isinstance(config, dict), \
+ "task ragweed only supports a list or dictionary for configuration"
+ all_clients = ['client.{id}'.format(id=id_)
+ for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+ if config is None:
+ config = all_clients
+ if isinstance(config, list):
+ config = dict.fromkeys(config)
+ clients = config.keys()
+
+ overrides = ctx.config.get('overrides', {})
+ # merge each client section, not the top level.
+ for client in config.keys():
+ if not config[client]:
+ config[client] = {}
+ teuthology.deep_merge(config[client], overrides.get('ragweed', {}))
+
+ log.debug('ragweed config is %s', config)
+
+ ragweed_conf = {}
+ for client in clients:
+ endpoint = ctx.rgw.role_endpoints.get(client)
+ assert endpoint, 'ragweed: no rgw endpoint for {}'.format(client)
+
+ ragweed_conf[client] = ConfigObj(
+ indent_type='',
+ infile={
+ 'rgw':
+ {
+ 'port' : endpoint.port,
+ 'is_secure' : endpoint.cert is not None,
+ },
+ 'fixtures' : {},
+ 'user system' : {},
+ 'user regular' : {},
+ 'rados':
+ {
+ 'ceph_conf' : '/etc/ceph/ceph.conf',
+ },
+ }
+ )
+
+ run_stages = {}
+
+ with contextutil.nested(
+ lambda: download(ctx=ctx, config=config),
+ lambda: create_users(ctx=ctx, config=dict(
+ clients=clients,
+ ragweed_conf=ragweed_conf,
+ config=config,
+ ),
+ run_stages=run_stages),
+ lambda: configure(ctx=ctx, config=dict(
+ clients=config,
+ ragweed_conf=ragweed_conf,
+ ),
+ run_stages=run_stages),
+ lambda: run_tests(ctx=ctx, config=config, run_stages=run_stages),
+ ):
+ pass
+ yield
diff --git a/qa/tasks/rbd.py b/qa/tasks/rbd.py
new file mode 100644
index 00000000..b1183fb8
--- /dev/null
+++ b/qa/tasks/rbd.py
@@ -0,0 +1,628 @@
+"""
+Rbd testing task
+"""
+import contextlib
+import logging
+import os
+import tempfile
+import sys
+
+from io import BytesIO
+from teuthology.orchestra import run
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.parallel import parallel
+from teuthology.task.common_fs_utils import generic_mkfs
+from teuthology.task.common_fs_utils import generic_mount
+from teuthology.task.common_fs_utils import default_image_name
+
+import six
+
+#V1 image unsupported but required for testing purposes
+os.environ["RBD_FORCE_ALLOW_V1"] = "1"
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def create_image(ctx, config):
+ """
+ Create an rbd image.
+
+ For example::
+
+ tasks:
+ - ceph:
+ - rbd.create_image:
+ client.0:
+ image_name: testimage
+ image_size: 100
+ image_format: 1
+ client.1:
+
+ Image size is expressed as a number of megabytes; default value
+ is 10240.
+
+ Image format value must be either 1 or 2; default value is 1.
+
+ """
+ assert isinstance(config, dict) or isinstance(config, list), \
+ "task create_image only supports a list or dictionary for configuration"
+
+ if isinstance(config, dict):
+ images = config.items()
+ else:
+ images = [(role, None) for role in config]
+
+ testdir = teuthology.get_testdir(ctx)
+ for role, properties in images:
+ if properties is None:
+ properties = {}
+ name = properties.get('image_name', default_image_name(role))
+ size = properties.get('image_size', 10240)
+ fmt = properties.get('image_format', 1)
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ log.info('Creating image {name} with size {size}'.format(name=name,
+ size=size))
+ args = [
+ 'adjust-ulimits',
+ 'ceph-coverage'.format(tdir=testdir),
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'rbd',
+ '-p', 'rbd',
+ 'create',
+ '--size', str(size),
+ name,
+ ]
+ # omit format option if using the default (format 1)
+ # since old versions of don't support it
+ if int(fmt) != 1:
+ args += ['--image-format', str(fmt)]
+ remote.run(args=args)
+ try:
+ yield
+ finally:
+ log.info('Deleting rbd images...')
+ for role, properties in images:
+ if properties is None:
+ properties = {}
+ name = properties.get('image_name', default_image_name(role))
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ remote.run(
+ args=[
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'rbd',
+ '-p', 'rbd',
+ 'rm',
+ name,
+ ],
+ )
+
+@contextlib.contextmanager
+def clone_image(ctx, config):
+ """
+ Clones a parent imag
+
+ For example::
+
+ tasks:
+ - ceph:
+ - rbd.clone_image:
+ client.0:
+ parent_name: testimage
+ image_name: cloneimage
+ """
+ assert isinstance(config, dict) or isinstance(config, list), \
+ "task clone_image only supports a list or dictionary for configuration"
+
+ if isinstance(config, dict):
+ images = config.items()
+ else:
+ images = [(role, None) for role in config]
+
+ testdir = teuthology.get_testdir(ctx)
+ for role, properties in images:
+ if properties is None:
+ properties = {}
+
+ name = properties.get('image_name', default_image_name(role))
+ parent_name = properties.get('parent_name')
+ assert parent_name is not None, \
+ "parent_name is required"
+ parent_spec = '{name}@{snap}'.format(name=parent_name, snap=name)
+
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ log.info('Clone image {parent} to {child}'.format(parent=parent_name,
+ child=name))
+ for cmd in [('snap', 'create', parent_spec),
+ ('snap', 'protect', parent_spec),
+ ('clone', parent_spec, name)]:
+ args = [
+ 'adjust-ulimits',
+ 'ceph-coverage'.format(tdir=testdir),
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'rbd', '-p', 'rbd'
+ ]
+ args.extend(cmd)
+ remote.run(args=args)
+
+ try:
+ yield
+ finally:
+ log.info('Deleting rbd clones...')
+ for role, properties in images:
+ if properties is None:
+ properties = {}
+ name = properties.get('image_name', default_image_name(role))
+ parent_name = properties.get('parent_name')
+ parent_spec = '{name}@{snap}'.format(name=parent_name, snap=name)
+
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+
+ for cmd in [('rm', name),
+ ('snap', 'unprotect', parent_spec),
+ ('snap', 'rm', parent_spec)]:
+ args = [
+ 'adjust-ulimits',
+ 'ceph-coverage'.format(tdir=testdir),
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'rbd', '-p', 'rbd'
+ ]
+ args.extend(cmd)
+ remote.run(args=args)
+
+@contextlib.contextmanager
+def modprobe(ctx, config):
+ """
+ Load the rbd kernel module..
+
+ For example::
+
+ tasks:
+ - ceph:
+ - rbd.create_image: [client.0]
+ - rbd.modprobe: [client.0]
+ """
+ log.info('Loading rbd kernel module...')
+ for role in config:
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ remote.run(
+ args=[
+ 'sudo',
+ 'modprobe',
+ 'rbd',
+ ],
+ )
+ try:
+ yield
+ finally:
+ log.info('Unloading rbd kernel module...')
+ for role in config:
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ remote.run(
+ args=[
+ 'sudo',
+ 'modprobe',
+ '-r',
+ 'rbd',
+ # force errors to be ignored; necessary if more
+ # than one device was created, which may mean
+ # the module isn't quite ready to go the first
+ # time through.
+ run.Raw('||'),
+ 'true',
+ ],
+ )
+
+@contextlib.contextmanager
+def dev_create(ctx, config):
+ """
+ Map block devices to rbd images.
+
+ For example::
+
+ tasks:
+ - ceph:
+ - rbd.create_image: [client.0]
+ - rbd.modprobe: [client.0]
+ - rbd.dev_create:
+ client.0: testimage.client.0
+ """
+ assert isinstance(config, dict) or isinstance(config, list), \
+ "task dev_create only supports a list or dictionary for configuration"
+
+ if isinstance(config, dict):
+ role_images = config.items()
+ else:
+ role_images = [(role, None) for role in config]
+
+ log.info('Creating rbd block devices...')
+
+ testdir = teuthology.get_testdir(ctx)
+
+ for role, image in role_images:
+ if image is None:
+ image = default_image_name(role)
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+
+ remote.run(
+ args=[
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'rbd',
+ '--user', role.rsplit('.')[-1],
+ '-p', 'rbd',
+ 'map',
+ image,
+ run.Raw('&&'),
+ # wait for the symlink to be created by udev
+ 'while', 'test', '!', '-e', '/dev/rbd/rbd/{image}'.format(image=image), run.Raw(';'), 'do',
+ 'sleep', '1', run.Raw(';'),
+ 'done',
+ ],
+ )
+ try:
+ yield
+ finally:
+ log.info('Unmapping rbd devices...')
+ for role, image in role_images:
+ if image is None:
+ image = default_image_name(role)
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ remote.run(
+ args=[
+ 'LD_LIBRARY_PATH={tdir}/binary/usr/local/lib'.format(tdir=testdir),
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'rbd',
+ '-p', 'rbd',
+ 'unmap',
+ '/dev/rbd/rbd/{imgname}'.format(imgname=image),
+ run.Raw('&&'),
+ # wait for the symlink to be deleted by udev
+ 'while', 'test', '-e', '/dev/rbd/rbd/{image}'.format(image=image),
+ run.Raw(';'),
+ 'do',
+ 'sleep', '1', run.Raw(';'),
+ 'done',
+ ],
+ )
+
+
+def rbd_devname_rtn(ctx, image):
+ return '/dev/rbd/rbd/{image}'.format(image=image)
+
+def canonical_path(ctx, role, path):
+ """
+ Determine the canonical path for a given path on the host
+ representing the given role. A canonical path contains no
+ . or .. components, and includes no symbolic links.
+ """
+ version_fp = BytesIO()
+ ctx.cluster.only(role).run(
+ args=[ 'readlink', '-f', path ],
+ stdout=version_fp,
+ )
+ canonical_path = six.ensure_str(version_fp.getvalue()).rstrip('\n')
+ version_fp.close()
+ return canonical_path
+
+@contextlib.contextmanager
+def run_xfstests(ctx, config):
+ """
+ Run xfstests over specified devices.
+
+ Warning: both the test and scratch devices specified will be
+ overwritten. Normally xfstests modifies (but does not destroy)
+ the test device, but for now the run script used here re-makes
+ both filesystems.
+
+ Note: Only one instance of xfstests can run on a single host at
+ a time, although this is not enforced.
+
+ This task in its current form needs some improvement. For
+ example, it assumes all roles provided in the config are
+ clients, and that the config provided is a list of key/value
+ pairs. For now please use the xfstests() interface, below.
+
+ For example::
+
+ tasks:
+ - ceph:
+ - rbd.run_xfstests:
+ client.0:
+ count: 2
+ test_dev: 'test_dev'
+ scratch_dev: 'scratch_dev'
+ fs_type: 'xfs'
+ tests: 'generic/100 xfs/003 xfs/005 xfs/006 generic/015'
+ exclude:
+ - generic/42
+ randomize: true
+ """
+ with parallel() as p:
+ for role, properties in config.items():
+ p.spawn(run_xfstests_one_client, ctx, role, properties)
+ exc_info = None
+ while True:
+ try:
+ p.next()
+ except StopIteration:
+ break
+ except:
+ exc_info = sys.exc_info()
+ if exc_info:
+ six.reraise(exc_info[0], exc_info[1], exc_info[2])
+ yield
+
+def run_xfstests_one_client(ctx, role, properties):
+ """
+ Spawned routine to handle xfs tests for a single client
+ """
+ testdir = teuthology.get_testdir(ctx)
+ try:
+ count = properties.get('count')
+ test_dev = properties.get('test_dev')
+ assert test_dev is not None, \
+ "task run_xfstests requires test_dev to be defined"
+ test_dev = canonical_path(ctx, role, test_dev)
+
+ scratch_dev = properties.get('scratch_dev')
+ assert scratch_dev is not None, \
+ "task run_xfstests requires scratch_dev to be defined"
+ scratch_dev = canonical_path(ctx, role, scratch_dev)
+
+ fs_type = properties.get('fs_type')
+ tests = properties.get('tests')
+ exclude_list = properties.get('exclude')
+ randomize = properties.get('randomize')
+
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+
+ # Fetch the test script
+ test_root = teuthology.get_testdir(ctx)
+ test_script = 'run_xfstests.sh'
+ test_path = os.path.join(test_root, test_script)
+
+ xfstests_url = properties.get('xfstests_url')
+ assert xfstests_url is not None, \
+ "task run_xfstests requires xfstests_url to be defined"
+
+ xfstests_krbd_url = xfstests_url + '/' + test_script
+
+ log.info('Fetching {script} for {role} from {url}'.format(
+ script=test_script,
+ role=role,
+ url=xfstests_krbd_url))
+
+ args = [ 'wget', '-O', test_path, '--', xfstests_krbd_url ]
+ remote.run(args=args)
+
+ log.info('Running xfstests on {role}:'.format(role=role))
+ log.info(' iteration count: {count}:'.format(count=count))
+ log.info(' test device: {dev}'.format(dev=test_dev))
+ log.info(' scratch device: {dev}'.format(dev=scratch_dev))
+ log.info(' using fs_type: {fs_type}'.format(fs_type=fs_type))
+ log.info(' tests to run: {tests}'.format(tests=tests))
+ log.info(' exclude list: {}'.format(' '.join(exclude_list)))
+ log.info(' randomize: {randomize}'.format(randomize=randomize))
+
+ if exclude_list:
+ with tempfile.NamedTemporaryFile(mode='w', prefix='exclude') as exclude_file:
+ for test in exclude_list:
+ exclude_file.write("{}\n".format(test))
+ exclude_file.flush()
+ remote.put_file(exclude_file.name, exclude_file.name)
+
+ # Note that the device paths are interpreted using
+ # readlink -f <path> in order to get their canonical
+ # pathname (so it matches what the kernel remembers).
+ args = [
+ '/usr/bin/sudo',
+ 'TESTDIR={tdir}'.format(tdir=testdir),
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ '/bin/bash',
+ test_path,
+ '-c', str(count),
+ '-f', fs_type,
+ '-t', test_dev,
+ '-s', scratch_dev,
+ ]
+ if exclude_list:
+ args.extend(['-x', exclude_file.name])
+ if randomize:
+ args.append('-r')
+ if tests:
+ args.extend(['--', tests])
+ remote.run(args=args, logger=log.getChild(role))
+ finally:
+ log.info('Removing {script} on {role}'.format(script=test_script,
+ role=role))
+ remote.run(args=['rm', '-f', test_path])
+
+@contextlib.contextmanager
+def xfstests(ctx, config):
+ """
+ Run xfstests over rbd devices. This interface sets up all
+ required configuration automatically if not otherwise specified.
+ Note that only one instance of xfstests can run on a single host
+ at a time. By default, the set of tests specified is run once.
+ If a (non-zero) count value is supplied, the complete set of
+ tests will be run that number of times.
+
+ For example::
+
+ tasks:
+ - ceph:
+ # Image sizes are in MB
+ - rbd.xfstests:
+ client.0:
+ count: 3
+ test_image: 'test_image'
+ test_size: 250
+ test_format: 2
+ scratch_image: 'scratch_image'
+ scratch_size: 250
+ scratch_format: 1
+ fs_type: 'xfs'
+ tests: 'generic/100 xfs/003 xfs/005 xfs/006 generic/015'
+ exclude:
+ - generic/42
+ randomize: true
+ xfstests_url: 'https://raw.github.com/ceph/ceph-ci/wip-55555/qa'
+ """
+ if config is None:
+ config = { 'all': None }
+ assert isinstance(config, dict) or isinstance(config, list), \
+ "task xfstests only supports a list or dictionary for configuration"
+ if isinstance(config, dict):
+ config = teuthology.replace_all_with_clients(ctx.cluster, config)
+ runs = config.items()
+ else:
+ runs = [(role, None) for role in config]
+
+ running_xfstests = {}
+ for role, properties in runs:
+ assert role.startswith('client.'), \
+ "task xfstests can only run on client nodes"
+ for host, roles_for_host in ctx.cluster.remotes.items():
+ if role in roles_for_host:
+ assert host not in running_xfstests, \
+ "task xfstests allows only one instance at a time per host"
+ running_xfstests[host] = True
+
+ images_config = {}
+ scratch_config = {}
+ modprobe_config = {}
+ image_map_config = {}
+ scratch_map_config = {}
+ xfstests_config = {}
+ for role, properties in runs:
+ if properties is None:
+ properties = {}
+
+ test_image = properties.get('test_image', 'test_image.{role}'.format(role=role))
+ test_size = properties.get('test_size', 10000) # 10G
+ test_fmt = properties.get('test_format', 1)
+ scratch_image = properties.get('scratch_image', 'scratch_image.{role}'.format(role=role))
+ scratch_size = properties.get('scratch_size', 10000) # 10G
+ scratch_fmt = properties.get('scratch_format', 1)
+
+ images_config[role] = dict(
+ image_name=test_image,
+ image_size=test_size,
+ image_format=test_fmt,
+ )
+
+ scratch_config[role] = dict(
+ image_name=scratch_image,
+ image_size=scratch_size,
+ image_format=scratch_fmt,
+ )
+
+ xfstests_branch = properties.get('xfstests_branch', 'master')
+ xfstests_url = properties.get('xfstests_url', 'https://raw.github.com/ceph/ceph/{branch}/qa'.format(branch=xfstests_branch))
+
+ xfstests_config[role] = dict(
+ count=properties.get('count', 1),
+ test_dev='/dev/rbd/rbd/{image}'.format(image=test_image),
+ scratch_dev='/dev/rbd/rbd/{image}'.format(image=scratch_image),
+ fs_type=properties.get('fs_type', 'xfs'),
+ randomize=properties.get('randomize', False),
+ tests=properties.get('tests'),
+ exclude=properties.get('exclude', []),
+ xfstests_url=xfstests_url,
+ )
+
+ log.info('Setting up xfstests using RBD images:')
+ log.info(' test ({size} MB): {image}'.format(size=test_size,
+ image=test_image))
+ log.info(' scratch ({size} MB): {image}'.format(size=scratch_size,
+ image=scratch_image))
+ modprobe_config[role] = None
+ image_map_config[role] = test_image
+ scratch_map_config[role] = scratch_image
+
+ with contextutil.nested(
+ lambda: create_image(ctx=ctx, config=images_config),
+ lambda: create_image(ctx=ctx, config=scratch_config),
+ lambda: modprobe(ctx=ctx, config=modprobe_config),
+ lambda: dev_create(ctx=ctx, config=image_map_config),
+ lambda: dev_create(ctx=ctx, config=scratch_map_config),
+ lambda: run_xfstests(ctx=ctx, config=xfstests_config),
+ ):
+ yield
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Create and mount an rbd image.
+
+ For example, you can specify which clients to run on::
+
+ tasks:
+ - ceph:
+ - rbd: [client.0, client.1]
+
+ There are a few image options::
+
+ tasks:
+ - ceph:
+ - rbd:
+ client.0: # uses defaults
+ client.1:
+ image_name: foo
+ image_size: 2048
+ image_format: 2
+ fs_type: xfs
+
+ To use default options on all clients::
+
+ tasks:
+ - ceph:
+ - rbd:
+ all:
+
+ To create 20GiB images and format them with xfs on all clients::
+
+ tasks:
+ - ceph:
+ - rbd:
+ all:
+ image_size: 20480
+ fs_type: xfs
+ """
+ if config is None:
+ config = { 'all': None }
+ norm_config = config
+ if isinstance(config, dict):
+ norm_config = teuthology.replace_all_with_clients(ctx.cluster, config)
+ if isinstance(norm_config, dict):
+ role_images = {}
+ for role, properties in norm_config.items():
+ if properties is None:
+ properties = {}
+ role_images[role] = properties.get('image_name')
+ else:
+ role_images = norm_config
+
+ log.debug('rbd config is: %s', norm_config)
+
+ with contextutil.nested(
+ lambda: create_image(ctx=ctx, config=norm_config),
+ lambda: modprobe(ctx=ctx, config=norm_config),
+ lambda: dev_create(ctx=ctx, config=role_images),
+ lambda: generic_mkfs(ctx=ctx, config=norm_config,
+ devname_rtn=rbd_devname_rtn),
+ lambda: generic_mount(ctx=ctx, config=role_images,
+ devname_rtn=rbd_devname_rtn),
+ ):
+ yield
diff --git a/qa/tasks/rbd_fio.py b/qa/tasks/rbd_fio.py
new file mode 100644
index 00000000..4f321284
--- /dev/null
+++ b/qa/tasks/rbd_fio.py
@@ -0,0 +1,224 @@
+"""
+ Long running fio tests on rbd mapped devices for format/features provided in config
+ Many fio parameters can be configured so that this task can be used along with thrash/power-cut tests
+ and exercise IO on full disk for all format/features
+ - This test should not be run on VM due to heavy use of resource
+
+"""
+import contextlib
+import json
+import logging
+import os
+
+from teuthology.parallel import parallel
+from teuthology import misc as teuthology
+from tempfile import NamedTemporaryFile
+from teuthology.orchestra import run
+from teuthology.packaging import install_package, remove_package
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ client.0:
+ fio-io-size: 100g or 80% or 100m
+ fio-version: 2.2.9
+ formats: [2]
+ features: [[layering],[striping],[layering,exclusive-lock,object-map]]
+ test-clone-io: 1 #remove this option to not run create rbd clone and not run io on clone
+ io-engine: "sync or rbd or any io-engine"
+ rw: randrw
+ client.1:
+ fio-io-size: 100g
+ fio-version: 2.2.9
+ rw: read
+ image-size:20480
+
+or
+ all:
+ fio-io-size: 400g
+ rw: randrw
+ formats: [2]
+ features: [[layering],[striping]]
+ io-engine: libaio
+
+ Create rbd image + device and exercise IO for format/features provided in config file
+ Config can be per client or one config can be used for all clients, fio jobs are run in parallel for client provided
+
+ """
+ if config.get('all'):
+ client_config = config['all']
+ clients = ctx.cluster.only(teuthology.is_type('client'))
+ rbd_test_dir = teuthology.get_testdir(ctx) + "/rbd_fio_test"
+ for remote,role in clients.remotes.items():
+ if 'client_config' in locals():
+ with parallel() as p:
+ p.spawn(run_fio, remote, client_config, rbd_test_dir)
+ else:
+ for client_config in config:
+ if client_config in role:
+ with parallel() as p:
+ p.spawn(run_fio, remote, config[client_config], rbd_test_dir)
+
+ yield
+
+
+def get_ioengine_package_name(ioengine, remote):
+ system_type = teuthology.get_system_type(remote)
+ if ioengine == 'rbd':
+ return 'librbd1-devel' if system_type == 'rpm' else 'librbd-dev'
+ elif ioengine == 'libaio':
+ return 'libaio-devel' if system_type == 'rpm' else 'libaio-dev'
+ else:
+ return None
+
+
+def run_rbd_map(remote, image, iodepth):
+ iodepth = max(iodepth, 128) # RBD_QUEUE_DEPTH_DEFAULT
+ dev = remote.sh(['sudo', 'rbd', 'device', 'map', '-o',
+ 'queue_depth={}'.format(iodepth), image]).rstrip('\n')
+ teuthology.sudo_write_file(
+ remote,
+ '/sys/block/{}/queue/nr_requests'.format(os.path.basename(dev)),
+ str(iodepth))
+ return dev
+
+
+def run_fio(remote, config, rbd_test_dir):
+ """
+ create fio config file with options based on above config
+ get the fio from github, generate binary, and use it to run on
+ the generated fio config file
+ """
+ fio_config=NamedTemporaryFile(mode='w', prefix='fio_rbd_', dir='/tmp/', delete=False)
+ fio_config.write('[global]\n')
+ if config.get('io-engine'):
+ ioengine=config['io-engine']
+ fio_config.write('ioengine={ioe}\n'.format(ioe=ioengine))
+ else:
+ fio_config.write('ioengine=sync\n')
+ if config.get('bs'):
+ bs=config['bs']
+ fio_config.write('bs={bs}\n'.format(bs=bs))
+ else:
+ fio_config.write('bs=4k\n')
+ iodepth = config.get('io-depth', 2)
+ fio_config.write('iodepth={iod}\n'.format(iod=iodepth))
+ if config.get('fio-io-size'):
+ size=config['fio-io-size']
+ fio_config.write('size={size}\n'.format(size=size))
+ else:
+ fio_config.write('size=100m\n')
+
+ fio_config.write('time_based\n')
+ if config.get('runtime'):
+ runtime=config['runtime']
+ fio_config.write('runtime={runtime}\n'.format(runtime=runtime))
+ else:
+ fio_config.write('runtime=1800\n')
+ fio_config.write('allow_file_create=0\n')
+ image_size=10240
+ if config.get('image_size'):
+ image_size=config['image_size']
+
+ formats=[1,2]
+ features=[['layering'],['striping'],['exclusive-lock','object-map']]
+ fio_version='2.21'
+ if config.get('formats'):
+ formats=config['formats']
+ if config.get('features'):
+ features=config['features']
+ if config.get('fio-version'):
+ fio_version=config['fio-version']
+
+ # handle package required for ioengine, if any
+ sn=remote.shortname
+ ioengine_pkg = get_ioengine_package_name(ioengine, remote)
+ if ioengine_pkg:
+ install_package(ioengine_pkg, remote)
+
+ fio_config.write('norandommap\n')
+ if ioengine == 'rbd':
+ fio_config.write('clientname=admin\n')
+ fio_config.write('pool=rbd\n')
+ fio_config.write('invalidate=0\n')
+ elif ioengine == 'libaio':
+ fio_config.write('direct=1\n')
+ for frmt in formats:
+ for feature in features:
+ log.info("Creating rbd images on {sn}".format(sn=sn))
+ feature_name = '-'.join(feature)
+ rbd_name = 'i{i}f{f}{sn}'.format(i=frmt,f=feature_name,sn=sn)
+ rbd_snap_name = 'i{i}f{f}{sn}@i{i}f{f}{sn}Snap'.format(i=frmt,f=feature_name,sn=sn)
+ rbd_clone_name = 'i{i}f{f}{sn}Clone'.format(i=frmt,f=feature_name,sn=sn)
+ create_args=['rbd', 'create',
+ '--size', '{size}'.format(size=image_size),
+ '--image', rbd_name,
+ '--image-format', '{f}'.format(f=frmt)]
+ map(lambda x: create_args.extend(['--image-feature', x]), feature)
+ remote.run(args=create_args)
+ remote.run(args=['rbd', 'info', rbd_name])
+ if ioengine != 'rbd':
+ rbd_dev = run_rbd_map(remote, rbd_name, iodepth)
+ if config.get('test-clone-io'):
+ log.info("Testing clones using fio")
+ remote.run(args=['rbd', 'snap', 'create', rbd_snap_name])
+ remote.run(args=['rbd', 'snap', 'protect', rbd_snap_name])
+ remote.run(args=['rbd', 'clone', rbd_snap_name, rbd_clone_name])
+ rbd_clone_dev = run_rbd_map(remote, rbd_clone_name, iodepth)
+ fio_config.write('[{rbd_dev}]\n'.format(rbd_dev=rbd_dev))
+ if config.get('rw'):
+ rw=config['rw']
+ fio_config.write('rw={rw}\n'.format(rw=rw))
+ else:
+ fio_config .write('rw=randrw\n')
+ fio_config.write('filename={rbd_dev}\n'.format(rbd_dev=rbd_dev))
+ if config.get('test-clone-io'):
+ fio_config.write('[{rbd_clone_dev}]\n'.format(rbd_clone_dev=rbd_clone_dev))
+ fio_config.write('rw={rw}\n'.format(rw=rw))
+ fio_config.write('filename={rbd_clone_dev}\n'.format(rbd_clone_dev=rbd_clone_dev))
+ else:
+ if config.get('test-clone-io'):
+ log.info("Testing clones using fio")
+ remote.run(args=['rbd', 'snap', 'create', rbd_snap_name])
+ remote.run(args=['rbd', 'snap', 'protect', rbd_snap_name])
+ remote.run(args=['rbd', 'clone', rbd_snap_name, rbd_clone_name])
+ fio_config.write('[{img_name}]\n'.format(img_name=rbd_name))
+ if config.get('rw'):
+ rw=config['rw']
+ fio_config.write('rw={rw}\n'.format(rw=rw))
+ else:
+ fio_config.write('rw=randrw\n')
+ fio_config.write('rbdname={img_name}\n'.format(img_name=rbd_name))
+ if config.get('test-clone-io'):
+ fio_config.write('[{clone_img_name}]\n'.format(clone_img_name=rbd_clone_name))
+ fio_config.write('rw={rw}\n'.format(rw=rw))
+ fio_config.write('rbdname={clone_img_name}\n'.format(clone_img_name=rbd_clone_name))
+
+
+ fio_config.close()
+ remote.put_file(fio_config.name,fio_config.name)
+ try:
+ log.info("Running rbd feature - fio test on {sn}".format(sn=sn))
+ fio = "https://github.com/axboe/fio/archive/fio-" + fio_version + ".tar.gz"
+ remote.run(args=['mkdir', run.Raw(rbd_test_dir),])
+ remote.run(args=['cd' , run.Raw(rbd_test_dir),
+ run.Raw(';'), 'wget', fio, run.Raw(';'), run.Raw('tar -xvf fio*tar.gz'), run.Raw(';'),
+ run.Raw('cd fio-fio*'), run.Raw(';'), './configure', run.Raw(';'), 'make'])
+ remote.run(args=['ceph', '-s'])
+ remote.run(args=[run.Raw('{tdir}/fio-fio-{v}/fio --showcmd {f}'.format(tdir=rbd_test_dir,v=fio_version,f=fio_config.name))])
+ remote.run(args=['sudo', run.Raw('{tdir}/fio-fio-{v}/fio {f}'.format(tdir=rbd_test_dir,v=fio_version,f=fio_config.name))])
+ remote.run(args=['ceph', '-s'])
+ finally:
+ out = remote.sh('rbd device list --format=json')
+ mapped_images = json.loads(out)
+ if mapped_images:
+ log.info("Unmapping rbd images on {sn}".format(sn=sn))
+ for image in mapped_images:
+ remote.run(args=['sudo', 'rbd', 'device', 'unmap',
+ str(image['device'])])
+ log.info("Cleaning up fio install")
+ remote.run(args=['rm','-rf', run.Raw(rbd_test_dir)])
+ if ioengine_pkg:
+ remove_package(ioengine_pkg, remote)
diff --git a/qa/tasks/rbd_fsx.py b/qa/tasks/rbd_fsx.py
new file mode 100644
index 00000000..396d8fed
--- /dev/null
+++ b/qa/tasks/rbd_fsx.py
@@ -0,0 +1,114 @@
+"""
+Run fsx on an rbd image
+"""
+import contextlib
+import logging
+
+from teuthology.exceptions import ConfigError
+from teuthology.parallel import parallel
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run fsx on an rbd image.
+
+ Currently this requires running as client.admin
+ to create a pool.
+
+ Specify which clients to run on as a list::
+
+ tasks:
+ ceph:
+ rbd_fsx:
+ clients: [client.0, client.1]
+
+ You can optionally change some properties of fsx:
+
+ tasks:
+ ceph:
+ rbd_fsx:
+ clients: <list of clients>
+ seed: <random seed number, or 0 to use the time>
+ ops: <number of operations to do>
+ size: <maximum image size in bytes>
+ valgrind: [--tool=<valgrind tool>]
+ """
+ log.info('starting rbd_fsx...')
+ with parallel() as p:
+ for role in config['clients']:
+ p.spawn(_run_one_client, ctx, config, role)
+ yield
+
+def _run_one_client(ctx, config, role):
+ """Spawned task that runs the client"""
+ krbd = config.get('krbd', False)
+ nbd = config.get('nbd', False)
+ testdir = teuthology.get_testdir(ctx)
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+
+ args = []
+ if krbd or nbd:
+ args.append('sudo') # rbd(-nbd) map/unmap need privileges
+ args.extend([
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir)
+ ])
+
+ overrides = ctx.config.get('overrides', {})
+ teuthology.deep_merge(config, overrides.get('rbd_fsx', {}))
+
+ if config.get('valgrind'):
+ args = teuthology.get_valgrind_args(
+ testdir,
+ 'fsx_{id}'.format(id=role),
+ args,
+ config.get('valgrind')
+ )
+
+ cluster_name, type_, client_id = teuthology.split_role(role)
+ if type_ != 'client':
+ msg = 'client role ({0}) must be a client'.format(role)
+ raise ConfigError(msg)
+
+ args.extend([
+ 'ceph_test_librbd_fsx',
+ '--cluster', cluster_name,
+ '--id', client_id,
+ '-d', # debug output for all operations
+ '-W', '-R', # mmap doesn't work with rbd
+ '-p', str(config.get('progress_interval', 100)), # show progress
+ '-P', '{tdir}/archive'.format(tdir=testdir),
+ '-r', str(config.get('readbdy',1)),
+ '-w', str(config.get('writebdy',1)),
+ '-t', str(config.get('truncbdy',1)),
+ '-h', str(config.get('holebdy',1)),
+ '-l', str(config.get('size', 250000000)),
+ '-S', str(config.get('seed', 0)),
+ '-N', str(config.get('ops', 1000)),
+ ])
+ if krbd:
+ args.append('-K') # -K enables krbd mode
+ if nbd:
+ args.append('-M') # -M enables nbd mode
+ if config.get('direct_io', False):
+ args.append('-Z') # -Z use direct IO
+ if not config.get('randomized_striping', True):
+ args.append('-U') # -U disables randomized striping
+ if not config.get('punch_holes', True):
+ args.append('-H') # -H disables discard ops
+ if config.get('deep_copy', False):
+ args.append('-g') # -g deep copy instead of clone
+ if config.get('journal_replay', False):
+ args.append('-j') # -j replay all IO events from journal
+ if config.get('keep_images', False):
+ args.append('-k') # -k keep images on success
+ args.extend([
+ config.get('pool_name', 'pool_{pool}'.format(pool=role)),
+ 'image_{image}'.format(image=role),
+ ])
+
+ remote.run(args=args)
diff --git a/qa/tasks/rbd_mirror.py b/qa/tasks/rbd_mirror.py
new file mode 100644
index 00000000..5d6d1b2b
--- /dev/null
+++ b/qa/tasks/rbd_mirror.py
@@ -0,0 +1,119 @@
+"""
+Task for running rbd mirroring daemons and configuring mirroring
+"""
+
+import logging
+
+from teuthology.orchestra import run
+from teuthology import misc
+from teuthology.exceptions import ConfigError
+from teuthology.task import Task
+from tasks.util import get_remote_for_role
+
+log = logging.getLogger(__name__)
+
+
+class RBDMirror(Task):
+ """
+ Run an rbd-mirror daemon to sync rbd images between clusters.
+
+ This requires two clients (one from each cluster) on the same host
+ to connect with. The pool configuration should be adjusted by later
+ test scripts to include the remote client and cluster name. This task
+ just needs to know how to connect to the local cluster.
+
+ For example:
+
+ roles:
+ - [primary.mon.a, primary.osd.0, primary.osd.1, primary.osd.2]
+ - [secondary.mon.a, secondary.osd.0, secondary.osd.1, secondary.osd.2]
+ - [primary.client.mirror, secondary.client.mirror]
+ tasks:
+ - ceph:
+ cluster: primary
+ - ceph:
+ cluster: secondary
+ - rbd-mirror:
+ client: primary.client.mirror
+
+ To mirror back to the primary cluster as well, add another
+ rbd_mirror instance:
+
+ - rbd-mirror:
+ client: secondary.client.mirror
+
+ Possible options for this task are:
+
+ client: role - ceph client to connect as
+ valgrind: [--tool=<valgrind tool>] - none by default
+ coverage: bool - whether this run may be collecting coverage data
+ thrash: bool - whether this run may be thrashed
+ """
+ def __init__(self, ctx, config):
+ super(RBDMirror, self).__init__(ctx, config)
+ self.log = log
+
+ def setup(self):
+ super(RBDMirror, self).setup()
+ try:
+ self.client = self.config['client']
+ except KeyError:
+ raise ConfigError('rbd-mirror requires a client to connect with')
+
+ self.cluster_name, type_, self.client_id = misc.split_role(self.client)
+
+ if type_ != 'client':
+ msg = 'client role ({0}) must be a client'.format(self.client)
+ raise ConfigError(msg)
+
+ self.remote = get_remote_for_role(self.ctx, self.client)
+
+ def begin(self):
+ super(RBDMirror, self).begin()
+ testdir = misc.get_testdir(self.ctx)
+ daemon_signal = 'kill'
+ if 'coverage' in self.config or 'valgrind' in self.config or \
+ self.config.get('thrash', False):
+ daemon_signal = 'term'
+
+ args = [
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'daemon-helper',
+ daemon_signal,
+ ]
+
+ if 'valgrind' in self.config:
+ args = misc.get_valgrind_args(
+ testdir,
+ 'rbd-mirror-{id}'.format(id=self.client),
+ args,
+ self.config.get('valgrind')
+ )
+
+ args.extend([
+ 'rbd-mirror', '--foreground',
+ '--cluster',
+ self.cluster_name,
+ '--id',
+ self.client_id,
+ ])
+
+ self.ctx.daemons.add_daemon(
+ self.remote, 'rbd-mirror', self.client,
+ cluster=self.cluster_name,
+ args=args,
+ logger=self.log.getChild(self.client),
+ stdin=run.PIPE,
+ wait=False,
+ )
+
+ def end(self):
+ mirror_daemon = self.ctx.daemons.get_daemon('rbd-mirror',
+ self.client,
+ self.cluster_name)
+ mirror_daemon.stop()
+ super(RBDMirror, self).end()
+
+task = RBDMirror
diff --git a/qa/tasks/rbd_mirror_thrash.py b/qa/tasks/rbd_mirror_thrash.py
new file mode 100644
index 00000000..67e1c332
--- /dev/null
+++ b/qa/tasks/rbd_mirror_thrash.py
@@ -0,0 +1,214 @@
+"""
+Task for thrashing rbd-mirror daemons
+"""
+
+import contextlib
+import logging
+import random
+import signal
+import socket
+import time
+
+from gevent import sleep
+from gevent.greenlet import Greenlet
+from gevent.event import Event
+
+from teuthology.exceptions import CommandFailedError
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+
+class RBDMirrorThrasher(Greenlet):
+ """
+ RBDMirrorThrasher::
+
+ The RBDMirrorThrasher thrashes rbd-mirror daemons during execution of other
+ tasks (workunits, etc).
+
+ The config is optional. Many of the config parameters are a maximum value
+ to use when selecting a random value from a range. The config is a dict
+ containing some or all of:
+
+ cluster: [default: ceph] cluster to thrash
+
+ max_thrash: [default: 1] the maximum number of active rbd-mirror daemons per
+ cluster will be thrashed at any given time.
+
+ min_thrash_delay: [default: 60] minimum number of seconds to delay before
+ thrashing again.
+
+ max_thrash_delay: [default: 120] maximum number of seconds to delay before
+ thrashing again.
+
+ max_revive_delay: [default: 10] maximum number of seconds to delay before
+ bringing back a thrashed rbd-mirror daemon.
+
+ randomize: [default: true] enables randomization and use the max/min values
+
+ seed: [no default] seed the random number generator
+
+ Examples::
+
+ The following example disables randomization, and uses the max delay
+ values:
+
+ tasks:
+ - ceph:
+ - rbd_mirror_thrash:
+ randomize: False
+ max_thrash_delay: 10
+ """
+
+ def __init__(self, ctx, config, cluster, daemons):
+ Greenlet.__init__(self)
+
+ self.ctx = ctx
+ self.config = config
+ self.cluster = cluster
+ self.daemons = daemons
+
+ self.e = None
+ self.logger = log
+ self.name = 'thrasher.rbd_mirror.[{cluster}]'.format(cluster = cluster)
+ self.stopping = Event()
+
+ self.randomize = bool(self.config.get('randomize', True))
+ self.max_thrash = int(self.config.get('max_thrash', 1))
+ self.min_thrash_delay = float(self.config.get('min_thrash_delay', 60.0))
+ self.max_thrash_delay = float(self.config.get('max_thrash_delay', 120.0))
+ self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0))
+
+ def _run(self):
+ try:
+ self.do_thrash()
+ except Exception as e:
+ self.e = e
+ self.logger.exception("exception:")
+
+ def log(self, x):
+ """Write data to logger assigned to this RBDMirrorThrasher"""
+ self.logger.info(x)
+
+ def stop(self):
+ self.stopping.set()
+
+ def do_thrash(self):
+ """
+ Perform the random thrashing action
+ """
+
+ self.log('starting thrash for cluster {cluster}'.format(cluster=self.cluster))
+ stats = {
+ "kill": 0,
+ }
+
+ while not self.stopping.is_set():
+ delay = self.max_thrash_delay
+ if self.randomize:
+ delay = random.randrange(self.min_thrash_delay, self.max_thrash_delay)
+
+ if delay > 0.0:
+ self.log('waiting for {delay} secs before thrashing'.format(delay=delay))
+ self.stopping.wait(delay)
+ if self.stopping.is_set():
+ continue
+
+ killed_daemons = []
+
+ weight = 1.0 / len(self.daemons)
+ count = 0
+ for daemon in self.daemons:
+ skip = random.uniform(0.0, 1.0)
+ if weight <= skip:
+ self.log('skipping daemon {label} with skip ({skip}) > weight ({weight})'.format(
+ label=daemon.id_, skip=skip, weight=weight))
+ continue
+
+ self.log('kill {label}'.format(label=daemon.id_))
+ try:
+ daemon.signal(signal.SIGTERM)
+ except socket.error:
+ pass
+ killed_daemons.append(daemon)
+ stats['kill'] += 1
+
+ # if we've reached max_thrash, we're done
+ count += 1
+ if count >= self.max_thrash:
+ break
+
+ if killed_daemons:
+ # wait for a while before restarting
+ delay = self.max_revive_delay
+ if self.randomize:
+ delay = random.randrange(0.0, self.max_revive_delay)
+
+ self.log('waiting for {delay} secs before reviving daemons'.format(delay=delay))
+ sleep(delay)
+
+ for daemon in killed_daemons:
+ self.log('waiting for {label}'.format(label=daemon.id_))
+ try:
+ run.wait([daemon.proc], timeout=600)
+ except CommandFailedError:
+ pass
+ except:
+ self.log('Failed to stop {label}'.format(label=daemon.id_))
+
+ try:
+ # try to capture a core dump
+ daemon.signal(signal.SIGABRT)
+ except socket.error:
+ pass
+ raise
+ finally:
+ daemon.reset()
+
+ for daemon in killed_daemons:
+ self.log('reviving {label}'.format(label=daemon.id_))
+ daemon.start()
+
+ for stat in stats:
+ self.log("stat['{key}'] = {value}".format(key = stat, value = stats[stat]))
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Stress test the rbd-mirror by thrashing while another task/workunit
+ is running.
+
+ Please refer to RBDMirrorThrasher class for further information on the
+ available options.
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'rbd_mirror_thrash task only accepts a dict for configuration'
+
+ cluster = config.get('cluster', 'ceph')
+ daemons = list(ctx.daemons.iter_daemons_of_role('rbd-mirror', cluster))
+ assert len(daemons) > 0, \
+ 'rbd_mirror_thrash task requires at least 1 rbd-mirror daemon'
+
+ # choose random seed
+ if 'seed' in config:
+ seed = int(config['seed'])
+ else:
+ seed = int(time.time())
+ log.info('rbd_mirror_thrash using random seed: {seed}'.format(seed=seed))
+ random.seed(seed)
+
+ thrasher = RBDMirrorThrasher(ctx, config, cluster, daemons)
+ thrasher.start()
+
+ try:
+ log.debug('Yielding')
+ yield
+ finally:
+ log.info('joining rbd_mirror_thrash')
+ thrasher.stop()
+ if thrasher.e:
+ raise RuntimeError('error during thrashing')
+ thrasher.join()
+ log.info('done joining')
diff --git a/qa/tasks/rebuild_mondb.py b/qa/tasks/rebuild_mondb.py
new file mode 100644
index 00000000..008e312e
--- /dev/null
+++ b/qa/tasks/rebuild_mondb.py
@@ -0,0 +1,224 @@
+"""
+Test if we can recover the leveldb from OSD after where all leveldbs are
+corrupted
+"""
+
+import logging
+import os.path
+import shutil
+import tempfile
+
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+
+def _push_directory(path, remote, remote_dir):
+ """
+ local_temp_path=`mktemp`
+ tar czf $local_temp_path $path
+ ssh remote mkdir -p remote_dir
+ remote_temp_path=`mktemp`
+ scp $local_temp_path $remote_temp_path
+ rm $local_temp_path
+ tar xzf $remote_temp_path -C $remote_dir
+ ssh remote:$remote_temp_path
+ """
+ fd, local_temp_path = tempfile.mkstemp(suffix='.tgz',
+ prefix='rebuild_mondb-')
+ os.close(fd)
+ cmd = ' '.join(['tar', 'cz',
+ '-f', local_temp_path,
+ '-C', path,
+ '--', '.'])
+ teuthology.sh(cmd)
+ _, fname = os.path.split(local_temp_path)
+ fd, remote_temp_path = tempfile.mkstemp(suffix='.tgz',
+ prefix='rebuild_mondb-')
+ os.close(fd)
+ remote.put_file(local_temp_path, remote_temp_path)
+ os.remove(local_temp_path)
+ remote.run(args=['sudo',
+ 'tar', 'xz',
+ '-C', remote_dir,
+ '-f', remote_temp_path])
+ remote.run(args=['sudo', 'rm', '-fr', remote_temp_path])
+
+
+def _nuke_mons(manager, mons, mon_id):
+ assert mons
+ is_mon = teuthology.is_type('mon')
+ for remote, roles in mons.remotes.items():
+ for role in roles:
+ if not is_mon(role):
+ continue
+ cluster, _, m = teuthology.split_role(role)
+ log.info('killing {cluster}:mon.{mon}'.format(
+ cluster=cluster,
+ mon=m))
+ manager.kill_mon(m)
+ mon_data = os.path.join('/var/lib/ceph/mon/',
+ '{0}-{1}'.format(cluster, m))
+ if m == mon_id:
+ # so we will only need to recreate the store.db for the
+ # first mon, would be easier than mkfs on it then replace
+ # the its store.db with the recovered one
+ store_dir = os.path.join(mon_data, 'store.db')
+ remote.run(args=['sudo', 'rm', '-r', store_dir])
+ else:
+ remote.run(args=['sudo', 'rm', '-r', mon_data])
+
+
+def _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path):
+ local_mstore = tempfile.mkdtemp()
+
+ # collect the maps from all OSDs
+ is_osd = teuthology.is_type('osd')
+ osds = ctx.cluster.only(is_osd)
+ assert osds
+ for osd, roles in osds.remotes.items():
+ for role in roles:
+ if not is_osd(role):
+ continue
+ cluster, _, osd_id = teuthology.split_role(role)
+ assert cluster_name == cluster
+ log.info('collecting maps from {cluster}:osd.{osd}'.format(
+ cluster=cluster,
+ osd=osd_id))
+ # push leveldb to OSD
+ osd_mstore = os.path.join(teuthology.get_testdir(ctx), 'mon-store')
+ osd.run(args=['sudo', 'mkdir', '-m', 'o+x', '-p', osd_mstore])
+
+ _push_directory(local_mstore, osd, osd_mstore)
+ log.info('rm -rf {0}'.format(local_mstore))
+ shutil.rmtree(local_mstore)
+ # update leveldb with OSD data
+ options = '--no-mon-config --op update-mon-db --mon-store-path {0}'
+ log.info('cot {0}'.format(osd_mstore))
+ manager.objectstore_tool(pool=None,
+ options=options.format(osd_mstore),
+ args='',
+ osd=osd_id,
+ do_revive=False)
+ # pull the updated mon db
+ log.info('pull dir {0} -> {1}'.format(osd_mstore, local_mstore))
+ local_mstore = tempfile.mkdtemp()
+ teuthology.pull_directory(osd, osd_mstore, local_mstore)
+ log.info('rm -rf osd:{0}'.format(osd_mstore))
+ osd.run(args=['sudo', 'rm', '-fr', osd_mstore])
+
+ # recover the first_mon with re-built mon db
+ # pull from recovered leveldb from client
+ mon_store_dir = os.path.join('/var/lib/ceph/mon',
+ '{0}-{1}'.format(cluster_name, mon_id))
+ _push_directory(local_mstore, mon, mon_store_dir)
+ mon.run(args=['sudo', 'chown', '-R', 'ceph:ceph', mon_store_dir])
+ shutil.rmtree(local_mstore)
+
+ # fill up the caps in the keyring file
+ mon.run(args=['sudo',
+ 'ceph-authtool', keyring_path,
+ '-n', 'mon.',
+ '--cap', 'mon', 'allow *'])
+ mon.run(args=['sudo',
+ 'ceph-authtool', keyring_path,
+ '-n', 'client.admin',
+ '--cap', 'mon', 'allow *',
+ '--cap', 'osd', 'allow *',
+ '--cap', 'mds', 'allow *',
+ '--cap', 'mgr', 'allow *'])
+ mon.run(args=['sudo', '-u', 'ceph',
+ 'CEPH_ARGS=--no-mon-config',
+ 'ceph-monstore-tool', mon_store_dir,
+ 'rebuild', '--',
+ '--keyring', keyring_path,
+ '--monmap', '/tmp/monmap',
+ ])
+
+
+def _revive_mons(manager, mons, recovered, keyring_path):
+ # revive monitors
+ # the initial monmap is in the ceph.conf, so we are good.
+ n_mons = 0
+ is_mon = teuthology.is_type('mon')
+ for remote, roles in mons.remotes.items():
+ for role in roles:
+ if not is_mon(role):
+ continue
+ cluster, _, m = teuthology.split_role(role)
+ if recovered != m:
+ log.info('running mkfs on {cluster}:mon.{mon}'.format(
+ cluster=cluster,
+ mon=m))
+ remote.run(
+ args=[
+ 'sudo',
+ 'ceph-mon',
+ '--cluster', cluster,
+ '--mkfs',
+ '-i', m,
+ '--keyring', keyring_path,
+ '--monmap', '/tmp/monmap'])
+ log.info('reviving mon.{0}'.format(m))
+ manager.revive_mon(m)
+ n_mons += 1
+ manager.wait_for_mon_quorum_size(n_mons, timeout=30)
+
+
+def _revive_mgrs(ctx, manager):
+ is_mgr = teuthology.is_type('mgr')
+ mgrs = ctx.cluster.only(is_mgr)
+ for _, roles in mgrs.remotes.items():
+ for role in roles:
+ if not is_mgr(role):
+ continue
+ _, _, mgr_id = teuthology.split_role(role)
+ log.info('reviving mgr.{0}'.format(mgr_id))
+ manager.revive_mgr(mgr_id)
+
+
+def _revive_osds(ctx, manager):
+ is_osd = teuthology.is_type('osd')
+ osds = ctx.cluster.only(is_osd)
+ for _, roles in osds.remotes.items():
+ for role in roles:
+ if not is_osd(role):
+ continue
+ _, _, osd_id = teuthology.split_role(role)
+ log.info('reviving osd.{0}'.format(osd_id))
+ manager.revive_osd(osd_id)
+
+
+def task(ctx, config):
+ """
+ Test monitor recovery from OSD
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'task only accepts a dict for configuration'
+
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+ # stash a monmap for later
+ mon.run(args=['ceph', 'mon', 'getmap', '-o', '/tmp/monmap'])
+
+ manager = ceph_manager.CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager'))
+
+ mons = ctx.cluster.only(teuthology.is_type('mon'))
+ # note down the first cluster_name and mon_id
+ # we will recover it later on
+ cluster_name, _, mon_id = teuthology.split_role(first_mon)
+ _nuke_mons(manager, mons, mon_id)
+ default_keyring = '/etc/ceph/{cluster}.keyring'.format(
+ cluster=cluster_name)
+ keyring_path = config.get('keyring_path', default_keyring)
+ _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path)
+ _revive_mons(manager, mons, mon_id, keyring_path)
+ _revive_mgrs(ctx, manager)
+ _revive_osds(ctx, manager)
diff --git a/qa/tasks/reg11184.py b/qa/tasks/reg11184.py
new file mode 100644
index 00000000..86cfbf39
--- /dev/null
+++ b/qa/tasks/reg11184.py
@@ -0,0 +1,242 @@
+"""
+Special regression test for tracker #11184
+
+Synopsis: osd/SnapMapper.cc: 282: FAILED assert(check(oid))
+
+This is accomplished by moving a pg that wasn't part of split and still include
+divergent priors.
+"""
+import logging
+import time
+
+from teuthology.exceptions import CommandFailedError
+from teuthology.orchestra import run
+from teuthology import misc as teuthology
+from tasks.util.rados import rados
+import os
+
+
+log = logging.getLogger(__name__)
+
+
+def task(ctx, config):
+ """
+ Test handling of divergent entries during export / import
+ to regression test tracker #11184
+
+ overrides:
+ ceph:
+ conf:
+ osd:
+ debug osd: 5
+
+ Requires 3 osds on a single test node.
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'divergent_priors task only accepts a dict for configuration'
+
+ manager = ctx.managers['ceph']
+
+ while len(manager.get_osd_status()['up']) < 3:
+ time.sleep(10)
+ osds = [0, 1, 2]
+ manager.flush_pg_stats(osds)
+ manager.raw_cluster_cmd('osd', 'set', 'noout')
+ manager.raw_cluster_cmd('osd', 'set', 'noin')
+ manager.raw_cluster_cmd('osd', 'set', 'nodown')
+ manager.wait_for_clean()
+
+ # something that is always there
+ dummyfile = '/etc/fstab'
+ dummyfile2 = '/etc/resolv.conf'
+ testdir = teuthology.get_testdir(ctx)
+
+ # create 1 pg pool
+ log.info('creating foo')
+ manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1')
+ manager.raw_cluster_cmd(
+ 'osd', 'pool', 'application', 'enable',
+ 'foo', 'rados', run.Raw('||'), 'true')
+
+ # Remove extra pool to simlify log output
+ manager.raw_cluster_cmd('osd', 'pool', 'delete', 'rbd', 'rbd', '--yes-i-really-really-mean-it')
+
+ for i in osds:
+ manager.set_config(i, osd_min_pg_log_entries=10)
+ manager.set_config(i, osd_max_pg_log_entries=10)
+ manager.set_config(i, osd_pg_log_trim_min=5)
+
+ # determine primary
+ divergent = manager.get_pg_primary('foo', 0)
+ log.info("primary and soon to be divergent is %d", divergent)
+ non_divergent = list(osds)
+ non_divergent.remove(divergent)
+
+ log.info('writing initial objects')
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+ # write 100 objects
+ for i in range(100):
+ rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile])
+
+ manager.wait_for_clean()
+
+ # blackhole non_divergent
+ log.info("blackholing osds %s", str(non_divergent))
+ for i in non_divergent:
+ manager.set_config(i, objectstore_blackhole=1)
+
+ DIVERGENT_WRITE = 5
+ DIVERGENT_REMOVE = 5
+ # Write some soon to be divergent
+ log.info('writing divergent objects')
+ for i in range(DIVERGENT_WRITE):
+ rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i,
+ dummyfile2], wait=False)
+ # Remove some soon to be divergent
+ log.info('remove divergent objects')
+ for i in range(DIVERGENT_REMOVE):
+ rados(ctx, mon, ['-p', 'foo', 'rm',
+ 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False)
+ time.sleep(10)
+ mon.run(
+ args=['killall', '-9', 'rados'],
+ wait=True,
+ check_status=False)
+
+ # kill all the osds but leave divergent in
+ log.info('killing all the osds')
+ for i in osds:
+ manager.kill_osd(i)
+ for i in osds:
+ manager.mark_down_osd(i)
+ for i in non_divergent:
+ manager.mark_out_osd(i)
+
+ # bring up non-divergent
+ log.info("bringing up non_divergent %s", str(non_divergent))
+ for i in non_divergent:
+ manager.revive_osd(i)
+ for i in non_divergent:
+ manager.mark_in_osd(i)
+
+ # write 1 non-divergent object (ensure that old divergent one is divergent)
+ objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE)
+ log.info('writing non-divergent object ' + objname)
+ rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2])
+
+ manager.wait_for_recovery()
+
+ # ensure no recovery of up osds first
+ log.info('delay recovery')
+ for i in non_divergent:
+ manager.wait_run_admin_socket(
+ 'osd', i, ['set_recovery_delay', '100000'])
+
+ # bring in our divergent friend
+ log.info("revive divergent %d", divergent)
+ manager.raw_cluster_cmd('osd', 'set', 'noup')
+ manager.revive_osd(divergent)
+
+ log.info('delay recovery divergent')
+ manager.wait_run_admin_socket(
+ 'osd', divergent, ['set_recovery_delay', '100000'])
+
+ manager.raw_cluster_cmd('osd', 'unset', 'noup')
+ while len(manager.get_osd_status()['up']) < 3:
+ time.sleep(10)
+
+ log.info('wait for peering')
+ rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile])
+
+ # At this point the divergent_priors should have been detected
+
+ log.info("killing divergent %d", divergent)
+ manager.kill_osd(divergent)
+
+ # Split pgs for pool foo
+ manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'pg_num', '2')
+ time.sleep(5)
+
+ manager.raw_cluster_cmd('pg','dump')
+
+ # Export a pg
+ (exp_remote,) = ctx.\
+ cluster.only('osd.{o}'.format(o=divergent)).remotes.keys()
+ FSPATH = manager.get_filepath()
+ JPATH = os.path.join(FSPATH, "journal")
+ prefix = ("sudo adjust-ulimits ceph-objectstore-tool "
+ "--data-path {fpath} --journal-path {jpath} "
+ "--log-file="
+ "/var/log/ceph/objectstore_tool.$$.log ".
+ format(fpath=FSPATH, jpath=JPATH))
+ pid = os.getpid()
+ expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid))
+ cmd = ((prefix + "--op export-remove --pgid 2.0 --file {file}").
+ format(id=divergent, file=expfile))
+ try:
+ exp_remote.sh(cmd, wait=True)
+ except CommandFailedError as e:
+ assert e.exitstatus == 0
+
+ # Kill one of non-divergent OSDs
+ log.info('killing osd.%d' % non_divergent[0])
+ manager.kill_osd(non_divergent[0])
+ manager.mark_down_osd(non_divergent[0])
+ # manager.mark_out_osd(non_divergent[0])
+
+ # An empty collection for pg 2.0 might need to be cleaned up
+ cmd = ((prefix + "--force --op remove --pgid 2.0").
+ format(id=non_divergent[0]))
+ exp_remote.sh(cmd, wait=True, check_status=False)
+
+ cmd = ((prefix + "--op import --file {file}").
+ format(id=non_divergent[0], file=expfile))
+ try:
+ exp_remote.sh(cmd, wait=True)
+ except CommandFailedError as e:
+ assert e.exitstatus == 0
+
+ # bring in our divergent friend and other node
+ log.info("revive divergent %d", divergent)
+ manager.revive_osd(divergent)
+ manager.mark_in_osd(divergent)
+ log.info("revive %d", non_divergent[0])
+ manager.revive_osd(non_divergent[0])
+
+ while len(manager.get_osd_status()['up']) < 3:
+ time.sleep(10)
+
+ log.info('delay recovery divergent')
+ manager.set_config(divergent, osd_recovery_delay_start=100000)
+ log.info('mark divergent in')
+ manager.mark_in_osd(divergent)
+
+ log.info('wait for peering')
+ rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile])
+
+ log.info("killing divergent %d", divergent)
+ manager.kill_osd(divergent)
+ log.info("reviving divergent %d", divergent)
+ manager.revive_osd(divergent)
+ time.sleep(3)
+
+ log.info('allowing recovery')
+ # Set osd_recovery_delay_start back to 0 and kick the queue
+ for i in osds:
+ manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug',
+ 'kick_recovery_wq', ' 0')
+
+ log.info('reading divergent objects')
+ for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE):
+ exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i,
+ '/tmp/existing'])
+ assert exit_status == 0
+
+ (remote,) = ctx.\
+ cluster.only('osd.{o}'.format(o=divergent)).remotes.keys()
+ cmd = 'rm {file}'.format(file=expfile)
+ remote.run(args=cmd, wait=True)
+ log.info("success")
diff --git a/qa/tasks/rep_lost_unfound_delete.py b/qa/tasks/rep_lost_unfound_delete.py
new file mode 100644
index 00000000..d422a33b
--- /dev/null
+++ b/qa/tasks/rep_lost_unfound_delete.py
@@ -0,0 +1,178 @@
+"""
+Lost_unfound
+"""
+import logging
+import time
+
+from tasks import ceph_manager
+from tasks.util.rados import rados
+from teuthology import misc as teuthology
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+ """
+ Test handling of lost objects.
+
+ A pretty rigid cluseter is brought up andtested by this task
+ """
+ POOL = 'unfounddel_pool'
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'lost_unfound task only accepts a dict for configuration'
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+ manager = ceph_manager.CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager'),
+ )
+
+ while len(manager.get_osd_status()['up']) < 3:
+ time.sleep(10)
+ manager.flush_pg_stats([0, 1, 2])
+ manager.wait_for_clean()
+
+ manager.create_pool(POOL)
+
+ # something that is always there
+ dummyfile = '/etc/fstab'
+
+ # take an osd out until the very end
+ manager.kill_osd(2)
+ manager.mark_down_osd(2)
+ manager.mark_out_osd(2)
+
+ # kludge to make sure they get a map
+ rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile])
+
+ manager.flush_pg_stats([0, 1])
+ manager.wait_for_recovery()
+
+ # create old objects
+ for f in range(1, 10):
+ rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile])
+ rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile])
+ rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f])
+
+ # delay recovery, and make the pg log very long (to prevent backfill)
+ manager.raw_cluster_cmd(
+ 'tell', 'osd.1',
+ 'injectargs',
+ '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000'
+ )
+
+ manager.kill_osd(0)
+ manager.mark_down_osd(0)
+
+ for f in range(1, 10):
+ rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile])
+ rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile])
+ rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile])
+
+ # bring osd.0 back up, let it peer, but don't replicate the new
+ # objects...
+ log.info('osd.0 command_args is %s' % 'foo')
+ log.info(ctx.daemons.get_daemon('osd', 0).command_args)
+ ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([
+ '--osd-recovery-delay-start', '1000'
+ ])
+ manager.revive_osd(0)
+ manager.mark_in_osd(0)
+ manager.wait_till_osd_is_up(0)
+
+ manager.flush_pg_stats([0, 1])
+ manager.wait_till_active()
+
+ # take out osd.1 and the only copy of those objects.
+ manager.kill_osd(1)
+ manager.mark_down_osd(1)
+ manager.mark_out_osd(1)
+ manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it')
+
+ # bring up osd.2 so that things would otherwise, in theory, recovery fully
+ manager.revive_osd(2)
+ manager.mark_in_osd(2)
+ manager.wait_till_osd_is_up(2)
+
+ manager.flush_pg_stats([0, 2])
+ manager.wait_till_active()
+ manager.flush_pg_stats([0, 2])
+
+ # verify that there are unfound objects
+ unfound = manager.get_num_unfound_objects()
+ log.info("there are %d unfound objects" % unfound)
+ assert unfound
+
+ testdir = teuthology.get_testdir(ctx)
+ procs = []
+ if config.get('parallel_bench', True):
+ procs.append(mon.run(
+ args=[
+ "/bin/sh", "-c",
+ " ".join(['adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage',
+ 'rados',
+ '--no-log-to-stderr',
+ '--name', 'client.admin',
+ '-b', str(4<<10),
+ '-p' , POOL,
+ '-t', '20',
+ 'bench', '240', 'write',
+ ]).format(tdir=testdir),
+ ],
+ logger=log.getChild('radosbench.{id}'.format(id='client.admin')),
+ stdin=run.PIPE,
+ wait=False
+ ))
+ time.sleep(10)
+
+ # mark stuff lost
+ pgs = manager.get_pg_stats()
+ for pg in pgs:
+ if pg['stat_sum']['num_objects_unfound'] > 0:
+ primary = 'osd.%d' % pg['acting'][0]
+
+ # verify that i can list them direct from the osd
+ log.info('listing missing/lost in %s state %s', pg['pgid'],
+ pg['state']);
+ m = manager.list_pg_unfound(pg['pgid'])
+ #log.info('%s' % m)
+ assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound']
+ num_unfound=0
+ for o in m['objects']:
+ if len(o['locations']) == 0:
+ num_unfound += 1
+ assert m['num_unfound'] == num_unfound
+
+ log.info("reverting unfound in %s on %s", pg['pgid'], primary)
+ manager.raw_cluster_cmd('pg', pg['pgid'],
+ 'mark_unfound_lost', 'delete')
+ else:
+ log.info("no unfound in %s", pg['pgid'])
+
+ manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5')
+ manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5')
+ manager.flush_pg_stats([0, 2])
+ manager.wait_for_recovery()
+
+ # verify result
+ for f in range(1, 10):
+ err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-'])
+ assert err
+ err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-'])
+ assert err
+ err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-'])
+ assert err
+
+ # see if osd.1 can cope
+ manager.mark_in_osd(1)
+ manager.revive_osd(1)
+ manager.wait_till_osd_is_up(1)
+ manager.wait_for_clean()
+ run.wait(procs)
+
diff --git a/qa/tasks/repair_test.py b/qa/tasks/repair_test.py
new file mode 100644
index 00000000..973273bb
--- /dev/null
+++ b/qa/tasks/repair_test.py
@@ -0,0 +1,309 @@
+"""
+Test pool repairing after objects are damaged.
+"""
+import logging
+import time
+
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+
+def choose_primary(manager, pool, num):
+ """
+ Return primary to test on.
+ """
+ log.info("Choosing primary")
+ return manager.get_pg_primary(pool, num)
+
+
+def choose_replica(manager, pool, num):
+ """
+ Return replica to test on.
+ """
+ log.info("Choosing replica")
+ return manager.get_pg_replica(pool, num)
+
+
+def trunc(manager, osd, pool, obj):
+ """
+ truncate an object
+ """
+ log.info("truncating object")
+ return manager.osd_admin_socket(
+ osd,
+ ['truncobj', pool, obj, '1'])
+
+
+def dataerr(manager, osd, pool, obj):
+ """
+ cause an error in the data
+ """
+ log.info("injecting data err on object")
+ return manager.osd_admin_socket(
+ osd,
+ ['injectdataerr', pool, obj])
+
+
+def mdataerr(manager, osd, pool, obj):
+ """
+ cause an error in the mdata
+ """
+ log.info("injecting mdata err on object")
+ return manager.osd_admin_socket(
+ osd,
+ ['injectmdataerr', pool, obj])
+
+
+def omaperr(manager, osd, pool, obj):
+ """
+ Cause an omap error.
+ """
+ log.info("injecting omap err on object")
+ return manager.osd_admin_socket(osd, ['setomapval', pool, obj,
+ 'badkey', 'badval'])
+
+
+def repair_test_1(manager, corrupter, chooser, scrub_type):
+ """
+ Creates an object in the pool, corrupts it,
+ scrubs it, and verifies that the pool is inconsistent. It then repairs
+ the pool, rescrubs it, and verifies that the pool is consistent
+
+ :param corrupter: error generating function (truncate, data-error, or
+ meta-data error, for example).
+ :param chooser: osd type chooser (primary or replica)
+ :param scrub_type: regular scrub or deep-scrub
+ """
+ pool = "repair_pool_1"
+ manager.wait_for_clean()
+ with manager.pool(pool, 1):
+
+ log.info("starting repair test type 1")
+ victim_osd = chooser(manager, pool, 0)
+
+ # create object
+ log.info("doing put")
+ manager.do_put(pool, 'repair_test_obj', '/etc/hosts')
+
+ # corrupt object
+ log.info("corrupting object")
+ corrupter(manager, victim_osd, pool, 'repair_test_obj')
+
+ # verify inconsistent
+ log.info("scrubbing")
+ manager.do_pg_scrub(pool, 0, scrub_type)
+
+ manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s)
+
+ # repair
+ log.info("repairing")
+ manager.do_pg_scrub(pool, 0, "repair")
+
+ log.info("re-scrubbing")
+ manager.do_pg_scrub(pool, 0, scrub_type)
+
+ # verify consistent
+ manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s)
+ log.info("done")
+
+
+def repair_test_2(ctx, manager, config, chooser):
+ """
+ First creates a set of objects and
+ sets the omap value. It then corrupts an object, does both a scrub
+ and a deep-scrub, and then corrupts more objects. After that, it
+ repairs the pool and makes sure that the pool is consistent some
+ time after a deep-scrub.
+
+ :param chooser: primary or replica selection routine.
+ """
+ pool = "repair_pool_2"
+ manager.wait_for_clean()
+ with manager.pool(pool, 1):
+ log.info("starting repair test type 2")
+ victim_osd = chooser(manager, pool, 0)
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+ # create object
+ log.info("doing put and setomapval")
+ manager.do_put(pool, 'file1', '/etc/hosts')
+ manager.do_rados(mon, ['-p', pool, 'setomapval', 'file1',
+ 'key', 'val'])
+ manager.do_put(pool, 'file2', '/etc/hosts')
+ manager.do_put(pool, 'file3', '/etc/hosts')
+ manager.do_put(pool, 'file4', '/etc/hosts')
+ manager.do_put(pool, 'file5', '/etc/hosts')
+ manager.do_rados(mon, ['-p', pool, 'setomapval', 'file5',
+ 'key', 'val'])
+ manager.do_put(pool, 'file6', '/etc/hosts')
+
+ # corrupt object
+ log.info("corrupting object")
+ omaperr(manager, victim_osd, pool, 'file1')
+
+ # verify inconsistent
+ log.info("scrubbing")
+ manager.do_pg_scrub(pool, 0, 'deep-scrub')
+
+ manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s)
+
+ # Regression test for bug #4778, should still
+ # be inconsistent after scrub
+ manager.do_pg_scrub(pool, 0, 'scrub')
+
+ manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s)
+
+ # Additional corruptions including 2 types for file1
+ log.info("corrupting more objects")
+ dataerr(manager, victim_osd, pool, 'file1')
+ mdataerr(manager, victim_osd, pool, 'file2')
+ trunc(manager, victim_osd, pool, 'file3')
+ omaperr(manager, victim_osd, pool, 'file6')
+
+ # see still inconsistent
+ log.info("scrubbing")
+ manager.do_pg_scrub(pool, 0, 'deep-scrub')
+
+ manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s)
+
+ # repair
+ log.info("repairing")
+ manager.do_pg_scrub(pool, 0, "repair")
+
+ # Let repair clear inconsistent flag
+ time.sleep(10)
+
+ # verify consistent
+ manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s)
+
+ # In the future repair might determine state of
+ # inconsistency itself, verify with a deep-scrub
+ log.info("scrubbing")
+ manager.do_pg_scrub(pool, 0, 'deep-scrub')
+
+ # verify consistent
+ manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s)
+
+ log.info("done")
+
+
+def hinfoerr(manager, victim, pool, obj):
+ """
+ cause an error in the hinfo_key
+ """
+ log.info("remove the hinfo_key")
+ manager.objectstore_tool(pool,
+ options='',
+ args='rm-attr hinfo_key',
+ object_name=obj,
+ osd=victim)
+
+
+def repair_test_erasure_code(manager, corrupter, victim, scrub_type):
+ """
+ Creates an object in the pool, corrupts it,
+ scrubs it, and verifies that the pool is inconsistent. It then repairs
+ the pool, rescrubs it, and verifies that the pool is consistent
+
+ :param corrupter: error generating function.
+ :param chooser: osd type chooser (primary or replica)
+ :param scrub_type: regular scrub or deep-scrub
+ """
+ pool = "repair_pool_3"
+ manager.wait_for_clean()
+ with manager.pool(pool_name=pool, pg_num=1,
+ erasure_code_profile_name='default'):
+
+ log.info("starting repair test for erasure code")
+
+ # create object
+ log.info("doing put")
+ manager.do_put(pool, 'repair_test_obj', '/etc/hosts')
+
+ # corrupt object
+ log.info("corrupting object")
+ corrupter(manager, victim, pool, 'repair_test_obj')
+
+ # verify inconsistent
+ log.info("scrubbing")
+ manager.do_pg_scrub(pool, 0, scrub_type)
+
+ manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s)
+
+ # repair
+ log.info("repairing")
+ manager.do_pg_scrub(pool, 0, "repair")
+
+ log.info("re-scrubbing")
+ manager.do_pg_scrub(pool, 0, scrub_type)
+
+ # verify consistent
+ manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s)
+ log.info("done")
+
+
+def task(ctx, config):
+ """
+ Test [deep] repair in several situations:
+ Repair [Truncate, Data EIO, MData EIO] on [Primary|Replica]
+
+ The config should be as follows:
+
+ Must include the log-whitelist below
+ Must enable filestore_debug_inject_read_err config
+
+ example:
+
+ tasks:
+ - chef:
+ - install:
+ - ceph:
+ log-whitelist:
+ - 'candidate had a stat error'
+ - 'candidate had a read error'
+ - 'deep-scrub 0 missing, 1 inconsistent objects'
+ - 'deep-scrub 0 missing, 4 inconsistent objects'
+ - 'deep-scrub [0-9]+ errors'
+ - '!= omap_digest'
+ - '!= data_digest'
+ - 'repair 0 missing, 1 inconsistent objects'
+ - 'repair 0 missing, 4 inconsistent objects'
+ - 'repair [0-9]+ errors, [0-9]+ fixed'
+ - 'scrub 0 missing, 1 inconsistent objects'
+ - 'scrub [0-9]+ errors'
+ - 'size 1 != size'
+ - 'attr name mismatch'
+ - 'Regular scrub request, deep-scrub details will be lost'
+ - 'candidate size [0-9]+ info size [0-9]+ mismatch'
+ conf:
+ osd:
+ filestore debug inject read err: true
+ - repair_test:
+
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'repair_test task only accepts a dict for config'
+
+ manager = ctx.managers['ceph']
+ manager.wait_for_all_osds_up()
+
+ manager.raw_cluster_cmd('osd', 'set', 'noscrub')
+ manager.raw_cluster_cmd('osd', 'set', 'nodeep-scrub')
+
+ repair_test_1(manager, mdataerr, choose_primary, "scrub")
+ repair_test_1(manager, mdataerr, choose_replica, "scrub")
+ repair_test_1(manager, dataerr, choose_primary, "deep-scrub")
+ repair_test_1(manager, dataerr, choose_replica, "deep-scrub")
+ repair_test_1(manager, trunc, choose_primary, "scrub")
+ repair_test_1(manager, trunc, choose_replica, "scrub")
+ repair_test_2(ctx, manager, config, choose_primary)
+ repair_test_2(ctx, manager, config, choose_replica)
+
+ repair_test_erasure_code(manager, hinfoerr, 'primary', "deep-scrub")
+
+ manager.raw_cluster_cmd('osd', 'unset', 'noscrub')
+ manager.raw_cluster_cmd('osd', 'unset', 'nodeep-scrub')
diff --git a/qa/tasks/resolve_stuck_peering.py b/qa/tasks/resolve_stuck_peering.py
new file mode 100644
index 00000000..d140544c
--- /dev/null
+++ b/qa/tasks/resolve_stuck_peering.py
@@ -0,0 +1,112 @@
+"""
+Resolve stuck peering
+"""
+import logging
+import time
+
+from teuthology import misc as teuthology
+from tasks.util.rados import rados
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+ """
+ Test handling resolve stuck peering
+
+ requires 3 osds on a single test node
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'Resolve stuck peering only accepts a dict for config'
+
+ manager = ctx.managers['ceph']
+
+ while len(manager.get_osd_status()['up']) < 3:
+ time.sleep(10)
+
+
+ manager.wait_for_clean()
+
+ dummyfile = '/etc/fstab'
+ dummyfile1 = '/etc/resolv.conf'
+
+ #create 1 PG pool
+ pool='foo'
+ log.info('creating pool foo')
+ manager.raw_cluster_cmd('osd', 'pool', 'create', '%s' % pool, '1')
+
+ #set min_size of the pool to 1
+ #so that we can continue with I/O
+ #when 2 osds are down
+ manager.set_pool_property(pool, "min_size", 1)
+
+ osds = [0, 1, 2]
+
+ primary = manager.get_pg_primary('foo', 0)
+ log.info("primary osd is %d", primary)
+
+ others = list(osds)
+ others.remove(primary)
+
+ log.info('writing initial objects')
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+ #create few objects
+ for i in range(100):
+ rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile])
+
+ manager.wait_for_clean()
+
+ #kill other osds except primary
+ log.info('killing other osds except primary')
+ for i in others:
+ manager.kill_osd(i)
+ for i in others:
+ manager.mark_down_osd(i)
+
+
+ for i in range(100):
+ rados(ctx, mon, ['-p', 'foo', 'put', 'new_%d' % i, dummyfile1])
+
+ #kill primary osd
+ manager.kill_osd(primary)
+ manager.mark_down_osd(primary)
+
+ #revive other 2 osds
+ for i in others:
+ manager.revive_osd(i)
+
+ #make sure that pg is down
+ #Assuming pg number for single pg pool will start from 0
+ pgnum=0
+ pgstr = manager.get_pgid(pool, pgnum)
+ stats = manager.get_single_pg_stats(pgstr)
+ print(stats['state'])
+
+ timeout=60
+ start=time.time()
+
+ while 'down' not in stats['state']:
+ assert time.time() - start < timeout, \
+ 'failed to reach down state before timeout expired'
+ stats = manager.get_single_pg_stats(pgstr)
+
+ #mark primary as lost
+ manager.raw_cluster_cmd('osd', 'lost', '%d' % primary,\
+ '--yes-i-really-mean-it')
+
+
+ #expect the pg status to be active+undersized+degraded
+ #pg should recover and become active+clean within timeout
+ stats = manager.get_single_pg_stats(pgstr)
+ print(stats['state'])
+
+ timeout=10
+ start=time.time()
+
+ while manager.get_num_down():
+ assert time.time() - start < timeout, \
+ 'failed to recover before timeout expired'
+
+ manager.revive_osd(primary)
diff --git a/qa/tasks/restart.py b/qa/tasks/restart.py
new file mode 100644
index 00000000..52b685c9
--- /dev/null
+++ b/qa/tasks/restart.py
@@ -0,0 +1,163 @@
+"""
+Daemon restart
+"""
+import logging
+import pipes
+
+from teuthology import misc as teuthology
+from teuthology.orchestra import run as tor
+
+from teuthology.orchestra import run
+log = logging.getLogger(__name__)
+
+def restart_daemon(ctx, config, role, id_, *args):
+ """
+ Handle restart (including the execution of the command parameters passed)
+ """
+ log.info('Restarting {r}.{i} daemon...'.format(r=role, i=id_))
+ daemon = ctx.daemons.get_daemon(role, id_)
+ log.debug('Waiting for exit of {r}.{i} daemon...'.format(r=role, i=id_))
+ try:
+ daemon.wait_for_exit()
+ except tor.CommandFailedError as e:
+ log.debug('Command Failed: {e}'.format(e=e))
+ if len(args) > 0:
+ confargs = ['--{k}={v}'.format(k=k, v=v) for k,v in zip(args[0::2], args[1::2])]
+ log.debug('Doing restart of {r}.{i} daemon with args: {a}...'.format(r=role, i=id_, a=confargs))
+ daemon.restart_with_args(confargs)
+ else:
+ log.debug('Doing restart of {r}.{i} daemon...'.format(r=role, i=id_))
+ daemon.restart()
+
+def get_tests(ctx, config, role, remote, testdir):
+ """Download restart tests"""
+ srcdir = '{tdir}/restart.{role}'.format(tdir=testdir, role=role)
+
+ refspec = config.get('branch')
+ if refspec is None:
+ refspec = config.get('sha1')
+ if refspec is None:
+ refspec = config.get('tag')
+ if refspec is None:
+ refspec = 'HEAD'
+ log.info('Pulling restart qa/workunits from ref %s', refspec)
+
+ remote.run(
+ logger=log.getChild(role),
+ args=[
+ 'mkdir', '--', srcdir,
+ run.Raw('&&'),
+ 'git',
+ 'archive',
+ '--remote=git://git.ceph.com/ceph.git',
+ '%s:qa/workunits' % refspec,
+ run.Raw('|'),
+ 'tar',
+ '-C', srcdir,
+ '-x',
+ '-f-',
+ run.Raw('&&'),
+ 'cd', '--', srcdir,
+ run.Raw('&&'),
+ 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi',
+ run.Raw('&&'),
+ 'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir),
+ run.Raw('>{tdir}/restarts.list'.format(tdir=testdir)),
+ ],
+ )
+ restarts = sorted(teuthology.get_file(
+ remote,
+ '{tdir}/restarts.list'.format(tdir=testdir)).split('\0'))
+ return (srcdir, restarts)
+
+def task(ctx, config):
+ """
+ Execute commands and allow daemon restart with config options.
+ Each process executed can output to stdout restart commands of the form:
+ restart <role> <id> <conf_key1> <conf_value1> <conf_key2> <conf_value2>
+ This will restart the daemon <role>.<id> with the specified config values once
+ by modifying the conf file with those values, and then replacing the old conf file
+ once the daemon is restarted.
+ This task does not kill a running daemon, it assumes the daemon will abort on an
+ assert specified in the config.
+
+ tasks:
+ - install:
+ - ceph:
+ - restart:
+ exec:
+ client.0:
+ - test_backtraces.py
+
+ """
+ assert isinstance(config, dict), "task kill got invalid config"
+
+ testdir = teuthology.get_testdir(ctx)
+
+ try:
+ assert 'exec' in config, "config requires exec key with <role>: <command> entries"
+ for role, task in config['exec'].items():
+ log.info('restart for role {r}'.format(r=role))
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ srcdir, restarts = get_tests(ctx, config, role, remote, testdir)
+ log.info('Running command on role %s host %s', role, remote.name)
+ spec = '{spec}'.format(spec=task[0])
+ log.info('Restarts list: %s', restarts)
+ log.info('Spec is %s', spec)
+ to_run = [w for w in restarts if w == task or w.find(spec) != -1]
+ log.info('To run: %s', to_run)
+ for c in to_run:
+ log.info('Running restart script %s...', c)
+ args = [
+ run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)),
+ ]
+ env = config.get('env')
+ if env is not None:
+ for var, val in env.items():
+ quoted_val = pipes.quote(val)
+ env_arg = '{var}={val}'.format(var=var, val=quoted_val)
+ args.append(run.Raw(env_arg))
+ args.extend([
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ '{srcdir}/{c}'.format(
+ srcdir=srcdir,
+ c=c,
+ ),
+ ])
+ proc = remote.run(
+ args=args,
+ stdout=tor.PIPE,
+ stdin=tor.PIPE,
+ stderr=log,
+ wait=False,
+ )
+ log.info('waiting for a command from script')
+ while True:
+ l = proc.stdout.readline()
+ if not l or l == '':
+ break
+ log.debug('script command: {c}'.format(c=l))
+ ll = l.strip()
+ cmd = ll.split(' ')
+ if cmd[0] == "done":
+ break
+ assert cmd[0] == 'restart', "script sent invalid command request to kill task"
+ # cmd should be: restart <role> <id> <conf_key1> <conf_value1> <conf_key2> <conf_value2>
+ # or to clear, just: restart <role> <id>
+ restart_daemon(ctx, config, cmd[1], cmd[2], *cmd[3:])
+ proc.stdin.writelines(['restarted\n'])
+ proc.stdin.flush()
+ try:
+ proc.wait()
+ except tor.CommandFailedError:
+ raise Exception('restart task got non-zero exit status from script: {s}'.format(s=c))
+ finally:
+ log.info('Finishing %s on %s...', task, role)
+ remote.run(
+ logger=log.getChild(role),
+ args=[
+ 'rm', '-rf', '--', '{tdir}/restarts.list'.format(tdir=testdir), srcdir,
+ ],
+ )
diff --git a/qa/tasks/rgw.py b/qa/tasks/rgw.py
new file mode 100644
index 00000000..e747426c
--- /dev/null
+++ b/qa/tasks/rgw.py
@@ -0,0 +1,357 @@
+"""
+rgw routines
+"""
+import argparse
+import contextlib
+import logging
+
+from teuthology.orchestra import run
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.exceptions import ConfigError
+from tasks.util import get_remote_for_role
+from tasks.util.rgw import rgwadmin, wait_for_radosgw
+from tasks.util.rados import (create_ec_pool,
+ create_replicated_pool,
+ create_cache_pool)
+
+log = logging.getLogger(__name__)
+
+class RGWEndpoint:
+ def __init__(self, hostname=None, port=None, cert=None, dns_name=None, website_dns_name=None):
+ self.hostname = hostname
+ self.port = port
+ self.cert = cert
+ self.dns_name = dns_name
+ self.website_dns_name = website_dns_name
+
+ def url(self):
+ proto = 'https' if self.cert else 'http'
+ return '{proto}://{hostname}:{port}/'.format(proto=proto, hostname=self.hostname, port=self.port)
+
+@contextlib.contextmanager
+def start_rgw(ctx, config, clients):
+ """
+ Start rgw on remote sites.
+ """
+ log.info('Starting rgw...')
+ testdir = teuthology.get_testdir(ctx)
+ for client in clients:
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ cluster_name, daemon_type, client_id = teuthology.split_role(client)
+ client_with_id = daemon_type + '.' + client_id
+ client_with_cluster = cluster_name + '.' + client_with_id
+
+ client_config = config.get(client)
+ if client_config is None:
+ client_config = {}
+ log.info("rgw %s config is %s", client, client_config)
+ cmd_prefix = [
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'daemon-helper',
+ 'term',
+ ]
+
+ rgw_cmd = ['radosgw']
+
+ log.info("Using %s as radosgw frontend", ctx.rgw.frontend)
+
+ endpoint = ctx.rgw.role_endpoints[client]
+ frontends = ctx.rgw.frontend
+ frontend_prefix = client_config.get('frontend_prefix', None)
+ if frontend_prefix:
+ frontends += ' prefix={pfx}'.format(pfx=frontend_prefix)
+
+ if endpoint.cert:
+ # add the ssl certificate path
+ frontends += ' ssl_certificate={}'.format(endpoint.cert.certificate)
+ if ctx.rgw.frontend == 'civetweb':
+ frontends += ' port={}s'.format(endpoint.port)
+ else:
+ frontends += ' ssl_port={}'.format(endpoint.port)
+ else:
+ frontends += ' port={}'.format(endpoint.port)
+
+ rgw_cmd.extend([
+ '--rgw-frontends', frontends,
+ '-n', client_with_id,
+ '--cluster', cluster_name,
+ '-k', '/etc/ceph/{client_with_cluster}.keyring'.format(client_with_cluster=client_with_cluster),
+ '--log-file',
+ '/var/log/ceph/rgw.{client_with_cluster}.log'.format(client_with_cluster=client_with_cluster),
+ '--rgw_ops_log_socket_path',
+ '{tdir}/rgw.opslog.{client_with_cluster}.sock'.format(tdir=testdir,
+ client_with_cluster=client_with_cluster)
+ ])
+
+ keystone_role = client_config.get('use-keystone-role', None)
+ if keystone_role is not None:
+ if not ctx.keystone:
+ raise ConfigError('rgw must run after the keystone task')
+ url = 'http://{host}:{port}/v1/KEY_$(tenant_id)s'.format(host=endpoint.hostname,
+ port=endpoint.port)
+ ctx.keystone.create_endpoint(ctx, keystone_role, 'swift', url)
+
+ keystone_host, keystone_port = \
+ ctx.keystone.public_endpoints[keystone_role]
+ rgw_cmd.extend([
+ '--rgw_keystone_url',
+ 'http://{khost}:{kport}'.format(khost=keystone_host,
+ kport=keystone_port),
+ ])
+
+
+ if client_config.get('dns-name') is not None:
+ rgw_cmd.extend(['--rgw-dns-name', endpoint.dns_name])
+ if client_config.get('dns-s3website-name') is not None:
+ rgw_cmd.extend(['--rgw-dns-s3website-name', endpoint.website_dns_name])
+
+ rgw_cmd.extend([
+ '--foreground',
+ run.Raw('|'),
+ 'sudo',
+ 'tee',
+ '/var/log/ceph/rgw.{client_with_cluster}.stdout'.format(tdir=testdir,
+ client_with_cluster=client_with_cluster),
+ run.Raw('2>&1'),
+ ])
+
+ if client_config.get('valgrind'):
+ cmd_prefix = teuthology.get_valgrind_args(
+ testdir,
+ client_with_cluster,
+ cmd_prefix,
+ client_config.get('valgrind')
+ )
+
+ run_cmd = list(cmd_prefix)
+ run_cmd.extend(rgw_cmd)
+
+ ctx.daemons.add_daemon(
+ remote, 'rgw', client_with_id,
+ cluster=cluster_name,
+ args=run_cmd,
+ logger=log.getChild(client),
+ stdin=run.PIPE,
+ wait=False,
+ )
+
+ # XXX: add_daemon() doesn't let us wait until radosgw finishes startup
+ for client in clients:
+ endpoint = ctx.rgw.role_endpoints[client]
+ url = endpoint.url()
+ log.info('Polling {client} until it starts accepting connections on {url}'.format(client=client, url=url))
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ wait_for_radosgw(url, remote)
+
+ try:
+ yield
+ finally:
+ for client in clients:
+ cluster_name, daemon_type, client_id = teuthology.split_role(client)
+ client_with_id = daemon_type + '.' + client_id
+ client_with_cluster = cluster_name + '.' + client_with_id
+ ctx.daemons.get_daemon('rgw', client_with_id, cluster_name).stop()
+ ctx.cluster.only(client).run(
+ args=[
+ 'rm',
+ '-f',
+ '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir,
+ client=client_with_cluster),
+ ],
+ )
+
+def assign_endpoints(ctx, config, default_cert):
+ role_endpoints = {}
+ for role, client_config in config.items():
+ client_config = client_config or {}
+ remote = get_remote_for_role(ctx, role)
+
+ cert = client_config.get('ssl certificate', default_cert)
+ if cert:
+ # find the certificate created by the ssl task
+ if not hasattr(ctx, 'ssl_certificates'):
+ raise ConfigError('rgw: no ssl task found for option "ssl certificate"')
+ ssl_certificate = ctx.ssl_certificates.get(cert, None)
+ if not ssl_certificate:
+ raise ConfigError('rgw: missing ssl certificate "{}"'.format(cert))
+ else:
+ ssl_certificate = None
+
+ port = client_config.get('port', 443 if ssl_certificate else 80)
+
+ # if dns-name is given, use it as the hostname (or as a prefix)
+ dns_name = client_config.get('dns-name', '')
+ if len(dns_name) == 0 or dns_name.endswith('.'):
+ dns_name += remote.hostname
+
+ website_dns_name = client_config.get('dns-s3website-name')
+ if website_dns_name is not None and (len(website_dns_name) == 0 or website_dns_name.endswith('.')):
+ website_dns_name += remote.hostname
+
+ role_endpoints[role] = RGWEndpoint(remote.hostname, port, ssl_certificate, dns_name, website_dns_name)
+
+ return role_endpoints
+
+@contextlib.contextmanager
+def create_pools(ctx, clients):
+ """Create replicated or erasure coded data pools for rgw."""
+
+ log.info('Creating data pools')
+ for client in clients:
+ log.debug("Obtaining remote for client {}".format(client))
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ data_pool = 'default.rgw.buckets.data'
+ cluster_name, daemon_type, client_id = teuthology.split_role(client)
+
+ if ctx.rgw.ec_data_pool:
+ create_ec_pool(remote, data_pool, client, ctx.rgw.data_pool_pg_size,
+ ctx.rgw.erasure_code_profile, cluster_name, 'rgw')
+ else:
+ create_replicated_pool(remote, data_pool, ctx.rgw.data_pool_pg_size, cluster_name, 'rgw')
+
+ index_pool = 'default.rgw.buckets.index'
+ create_replicated_pool(remote, index_pool, ctx.rgw.index_pool_pg_size, cluster_name, 'rgw')
+
+ if ctx.rgw.cache_pools:
+ create_cache_pool(remote, data_pool, data_pool + '.cache', 64,
+ 64*1024*1024, cluster_name)
+ log.debug('Pools created')
+ yield
+
+@contextlib.contextmanager
+def configure_compression(ctx, clients, compression):
+ """ set a compression type in the default zone placement """
+ log.info('Configuring compression type = %s', compression)
+ for client in clients:
+ # XXX: the 'default' zone and zonegroup aren't created until we run RGWRados::init_complete().
+ # issue a 'radosgw-admin user list' command to trigger this
+ rgwadmin(ctx, client, cmd=['user', 'list'], check_status=True)
+
+ rgwadmin(ctx, client,
+ cmd=['zone', 'placement', 'modify', '--rgw-zone', 'default',
+ '--placement-id', 'default-placement',
+ '--compression', compression],
+ check_status=True)
+ yield
+
+@contextlib.contextmanager
+def configure_storage_classes(ctx, clients, storage_classes):
+ """ set a compression type in the default zone placement """
+
+ sc = [s.strip() for s in storage_classes.split(',')]
+
+ for client in clients:
+ # XXX: the 'default' zone and zonegroup aren't created until we run RGWRados::init_complete().
+ # issue a 'radosgw-admin user list' command to trigger this
+ rgwadmin(ctx, client, cmd=['user', 'list'], check_status=True)
+
+ for storage_class in sc:
+ log.info('Configuring storage class type = %s', storage_class)
+ rgwadmin(ctx, client,
+ cmd=['zonegroup', 'placement', 'add',
+ '--rgw-zone', 'default',
+ '--placement-id', 'default-placement',
+ '--storage-class', storage_class],
+ check_status=True)
+ rgwadmin(ctx, client,
+ cmd=['zone', 'placement', 'add',
+ '--rgw-zone', 'default',
+ '--placement-id', 'default-placement',
+ '--storage-class', storage_class,
+ '--data-pool', 'default.rgw.buckets.data.' + storage_class.lower()],
+ check_status=True)
+ yield
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ For example, to run rgw on all clients::
+
+ tasks:
+ - ceph:
+ - rgw:
+
+ To only run on certain clients::
+
+ tasks:
+ - ceph:
+ - rgw: [client.0, client.3]
+
+ or
+
+ tasks:
+ - ceph:
+ - rgw:
+ client.0:
+ client.3:
+
+ To run radosgw through valgrind:
+
+ tasks:
+ - ceph:
+ - rgw:
+ client.0:
+ valgrind: [--tool=memcheck]
+ client.3:
+ valgrind: [--tool=memcheck]
+
+ To configure data or index pool pg_size:
+
+ overrides:
+ rgw:
+ data_pool_pg_size: 256
+ index_pool_pg_size: 128
+ """
+ if config is None:
+ config = dict(('client.{id}'.format(id=id_), None)
+ for id_ in teuthology.all_roles_of_type(
+ ctx.cluster, 'client'))
+ elif isinstance(config, list):
+ config = dict((name, None) for name in config)
+
+ clients = config.keys() # http://tracker.ceph.com/issues/20417
+
+ overrides = ctx.config.get('overrides', {})
+ teuthology.deep_merge(config, overrides.get('rgw', {}))
+
+ ctx.rgw = argparse.Namespace()
+
+ ctx.rgw.ec_data_pool = bool(config.pop('ec-data-pool', False))
+ ctx.rgw.erasure_code_profile = config.pop('erasure_code_profile', {})
+ ctx.rgw.cache_pools = bool(config.pop('cache-pools', False))
+ ctx.rgw.frontend = config.pop('frontend', 'civetweb')
+ ctx.rgw.compression_type = config.pop('compression type', None)
+ ctx.rgw.storage_classes = config.pop('storage classes', None)
+ default_cert = config.pop('ssl certificate', None)
+ ctx.rgw.data_pool_pg_size = config.pop('data_pool_pg_size', 64)
+ ctx.rgw.index_pool_pg_size = config.pop('index_pool_pg_size', 64)
+ ctx.rgw.config = config
+
+ log.debug("config is {}".format(config))
+ log.debug("client list is {}".format(clients))
+
+ ctx.rgw.role_endpoints = assign_endpoints(ctx, config, default_cert)
+
+ subtasks = [
+ lambda: create_pools(ctx=ctx, clients=clients),
+ ]
+ if ctx.rgw.compression_type:
+ subtasks.extend([
+ lambda: configure_compression(ctx=ctx, clients=clients,
+ compression=ctx.rgw.compression_type),
+ ])
+ if ctx.rgw.storage_classes:
+ subtasks.extend([
+ lambda: configure_storage_classes(ctx=ctx, clients=clients,
+ storage_classes=ctx.rgw.storage_classes),
+ ])
+ subtasks.extend([
+ lambda: start_rgw(ctx=ctx, config=config, clients=clients),
+ ])
+
+ with contextutil.nested(*subtasks):
+ yield
diff --git a/qa/tasks/rgw_logsocket.py b/qa/tasks/rgw_logsocket.py
new file mode 100644
index 00000000..d76e59d7
--- /dev/null
+++ b/qa/tasks/rgw_logsocket.py
@@ -0,0 +1,165 @@
+"""
+rgw s3tests logging wrappers
+"""
+from io import BytesIO
+from configobj import ConfigObj
+import contextlib
+import logging
+from tasks import s3tests
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def download(ctx, config):
+ """
+ Run s3tests download function
+ """
+ return s3tests.download(ctx, config)
+
+def _config_user(s3tests_conf, section, user):
+ """
+ Run s3tests user config function
+ """
+ return s3tests._config_user(s3tests_conf, section, user)
+
+@contextlib.contextmanager
+def create_users(ctx, config):
+ """
+ Run s3tests user create function
+ """
+ return s3tests.create_users(ctx, config)
+
+@contextlib.contextmanager
+def configure(ctx, config):
+ """
+ Run s3tests user configure function
+ """
+ return s3tests.configure(ctx, config)
+
+@contextlib.contextmanager
+def run_tests(ctx, config):
+ """
+ Run remote netcat tests
+ """
+ assert isinstance(config, dict)
+ testdir = teuthology.get_testdir(ctx)
+ for client, client_config in config.items():
+ client_config['extra_args'] = [
+ 's3tests.functional.test_s3:test_bucket_list_return_data',
+ ]
+# args = [
+# 'S3TEST_CONF={tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client),
+# '{tdir}/s3-tests/virtualenv/bin/nosetests'.format(tdir=testdir),
+# '-w',
+# '{tdir}/s3-tests'.format(tdir=testdir),
+# '-v',
+# 's3tests.functional.test_s3:test_bucket_list_return_data',
+# ]
+# if client_config is not None and 'extra_args' in client_config:
+# args.extend(client_config['extra_args'])
+#
+# ctx.cluster.only(client).run(
+# args=args,
+# )
+
+ s3tests.run_tests(ctx, config)
+
+ netcat_out = BytesIO()
+
+ for client, client_config in config.items():
+ ctx.cluster.only(client).run(
+ args = [
+ 'netcat',
+ '-w', '5',
+ '-U', '{tdir}/rgw.opslog.sock'.format(tdir=testdir),
+ ],
+ stdout = netcat_out,
+ )
+
+ out = netcat_out.getvalue()
+
+ assert len(out) > 100
+
+ log.info('Received', out)
+
+ yield
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run some s3-tests suite against rgw, verify opslog socket returns data
+
+ Must restrict testing to a particular client::
+
+ tasks:
+ - ceph:
+ - rgw: [client.0]
+ - s3tests: [client.0]
+
+ To pass extra arguments to nose (e.g. to run a certain test)::
+
+ tasks:
+ - ceph:
+ - rgw: [client.0]
+ - s3tests:
+ client.0:
+ extra_args: ['test_s3:test_object_acl_grand_public_read']
+ client.1:
+ extra_args: ['--exclude', 'test_100_continue']
+ """
+ assert hasattr(ctx, 'rgw'), 'rgw-logsocket must run after the rgw task'
+ assert config is None or isinstance(config, list) \
+ or isinstance(config, dict), \
+ "task rgw-logsocket only supports a list or dictionary for configuration"
+ all_clients = ['client.{id}'.format(id=id_)
+ for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+ if config is None:
+ config = all_clients
+ if isinstance(config, list):
+ config = dict.fromkeys(config)
+ clients = config.keys()
+
+ overrides = ctx.config.get('overrides', {})
+ # merge each client section, not the top level.
+ for (client, cconf) in config.items():
+ teuthology.deep_merge(cconf, overrides.get('rgw-logsocket', {}))
+
+ log.debug('config is %s', config)
+
+ s3tests_conf = {}
+ for client in clients:
+ endpoint = ctx.rgw.role_endpoints.get(client)
+ assert endpoint, 'rgw-logsocket: no rgw endpoint for {}'.format(client)
+
+ s3tests_conf[client] = ConfigObj(
+ indent_type='',
+ infile={
+ 'DEFAULT':
+ {
+ 'port' : endpoint.port,
+ 'is_secure' : endpoint.cert is not None,
+ },
+ 'fixtures' : {},
+ 's3 main' : {},
+ 's3 alt' : {},
+ }
+ )
+
+ with contextutil.nested(
+ lambda: download(ctx=ctx, config=config),
+ lambda: create_users(ctx=ctx, config=dict(
+ clients=clients,
+ s3tests_conf=s3tests_conf,
+ )),
+ lambda: configure(ctx=ctx, config=dict(
+ clients=config,
+ s3tests_conf=s3tests_conf,
+ )),
+ lambda: run_tests(ctx=ctx, config=config),
+ ):
+ yield
diff --git a/qa/tasks/rgw_multi b/qa/tasks/rgw_multi
new file mode 120000
index 00000000..abfc703b
--- /dev/null
+++ b/qa/tasks/rgw_multi
@@ -0,0 +1 @@
+../../src/test/rgw/rgw_multi \ No newline at end of file
diff --git a/qa/tasks/rgw_multisite.py b/qa/tasks/rgw_multisite.py
new file mode 100644
index 00000000..266d0fb6
--- /dev/null
+++ b/qa/tasks/rgw_multisite.py
@@ -0,0 +1,436 @@
+"""
+rgw multisite configuration routines
+"""
+import argparse
+import logging
+import random
+import string
+from copy import deepcopy
+from tasks.util.rgw import rgwadmin, wait_for_radosgw
+from tasks.util.rados import create_ec_pool, create_replicated_pool
+from tasks.rgw_multi import multisite
+from tasks.rgw_multi.zone_rados import RadosZone as RadosZone
+from tasks.rgw_multi.zone_ps import PSZone as PSZone
+
+from teuthology.orchestra import run
+from teuthology import misc
+from teuthology.exceptions import ConfigError
+from teuthology.task import Task
+
+log = logging.getLogger(__name__)
+
+class RGWMultisite(Task):
+ """
+ Performs rgw multisite configuration to match the given realm definition.
+
+ - rgw-multisite:
+ realm:
+ name: test-realm
+ is_default: true
+
+ List one or more zonegroup definitions. These are provided as json
+ input to `radosgw-admin zonegroup set`, with the exception of these keys:
+
+ * 'is_master' is passed on the command line as --master
+ * 'is_default' is passed on the command line as --default
+ * 'is_pubsub' is used to create a zone with tier-type=pubsub
+ * 'endpoints' given as client names are replaced with actual endpoints
+
+ zonegroups:
+ - name: test-zonegroup
+ api_name: test-api
+ is_master: true
+ is_default: true
+ endpoints: [c1.client.0]
+
+ List each of the zones to be created in this zonegroup.
+
+ zones:
+ - name: test-zone1
+ is_master: true
+ is_default: true
+ endpoints: [c1.client.0]
+ - name: test-zone2
+ is_default: true
+ endpoints: [c2.client.0]
+
+ A complete example:
+
+ tasks:
+ - install:
+ - ceph: {cluster: c1}
+ - ceph: {cluster: c2}
+ - rgw:
+ c1.client.0:
+ c2.client.0:
+ - rgw-multisite:
+ realm:
+ name: test-realm
+ is_default: true
+ zonegroups:
+ - name: test-zonegroup
+ is_master: true
+ is_default: true
+ zones:
+ - name: test-zone1
+ is_master: true
+ is_default: true
+ endpoints: [c1.client.0]
+ - name: test-zone2
+ is_default: true
+ endpoints: [c2.client.0]
+ - name: test-zone3
+ is_pubsub: true
+ endpoints: [c1.client.1]
+
+ """
+ def __init__(self, ctx, config):
+ super(RGWMultisite, self).__init__(ctx, config)
+
+ def setup(self):
+ super(RGWMultisite, self).setup()
+
+ overrides = self.ctx.config.get('overrides', {})
+ misc.deep_merge(self.config, overrides.get('rgw-multisite', {}))
+
+ if not self.ctx.rgw:
+ raise ConfigError('rgw-multisite must run after the rgw task')
+ role_endpoints = self.ctx.rgw.role_endpoints
+
+ # construct Clusters and Gateways for each client in the rgw task
+ clusters, gateways = extract_clusters_and_gateways(self.ctx,
+ role_endpoints)
+
+ # get the master zone and zonegroup configuration
+ mz, mzg = extract_master_zone_zonegroup(self.config['zonegroups'])
+ cluster1 = cluster_for_zone(clusters, mz)
+
+ # create the realm and period on the master zone's cluster
+ log.info('creating realm..')
+ realm = create_realm(cluster1, self.config['realm'])
+ period = realm.current_period
+
+ creds = gen_credentials()
+
+ # create the master zonegroup and its master zone
+ log.info('creating master zonegroup..')
+ master_zonegroup = create_zonegroup(cluster1, gateways, period,
+ deepcopy(mzg))
+ period.master_zonegroup = master_zonegroup
+
+ log.info('creating master zone..')
+ master_zone = create_zone(self.ctx, cluster1, gateways, creds,
+ master_zonegroup, deepcopy(mz))
+ master_zonegroup.master_zone = master_zone
+
+ period.update(master_zone, commit=True)
+ restart_zone_gateways(master_zone) # restart with --rgw-zone
+
+ # create the admin user on the master zone
+ log.info('creating admin user..')
+ user_args = ['--display-name', 'Realm Admin', '--system']
+ user_args += creds.credential_args()
+ admin_user = multisite.User('realm-admin')
+ admin_user.create(master_zone, user_args)
+
+ # process 'zonegroups'
+ for zg_config in self.config['zonegroups']:
+ zones_config = zg_config.pop('zones')
+
+ zonegroup = None
+ for zone_config in zones_config:
+ # get the cluster for this zone
+ cluster = cluster_for_zone(clusters, zone_config)
+
+ if cluster != cluster1: # already created on master cluster
+ log.info('pulling realm configuration to %s', cluster.name)
+ realm.pull(cluster, master_zone.gateways[0], creds)
+
+ # use the first zone's cluster to create the zonegroup
+ if not zonegroup:
+ if zg_config['name'] == master_zonegroup.name:
+ zonegroup = master_zonegroup
+ else:
+ log.info('creating zonegroup..')
+ zonegroup = create_zonegroup(cluster, gateways,
+ period, zg_config)
+
+ if zone_config['name'] == master_zone.name:
+ # master zone was already created
+ zone = master_zone
+ else:
+ # create the zone and commit the period
+ log.info('creating zone..')
+ zone = create_zone(self.ctx, cluster, gateways, creds,
+ zonegroup, zone_config)
+ period.update(zone, commit=True)
+
+ restart_zone_gateways(zone) # restart with --rgw-zone
+
+ # attach configuration to the ctx for other tasks
+ self.ctx.rgw_multisite = argparse.Namespace()
+ self.ctx.rgw_multisite.clusters = clusters
+ self.ctx.rgw_multisite.gateways = gateways
+ self.ctx.rgw_multisite.realm = realm
+ self.ctx.rgw_multisite.admin_user = admin_user
+
+ log.info('rgw multisite configuration completed')
+
+ def end(self):
+ del self.ctx.rgw_multisite
+
+class Cluster(multisite.Cluster):
+ """ Issues 'radosgw-admin' commands with the rgwadmin() helper """
+ def __init__(self, ctx, name, client):
+ super(Cluster, self).__init__()
+ self.ctx = ctx
+ self.name = name
+ self.client = client
+
+ def admin(self, args = None, **kwargs):
+ """ radosgw-admin command """
+ args = args or []
+ args += ['--cluster', self.name]
+ args += ['--debug-rgw', str(kwargs.pop('debug_rgw', 0))]
+ args += ['--debug-ms', str(kwargs.pop('debug_ms', 0))]
+ if kwargs.pop('read_only', False):
+ args += ['--rgw-cache-enabled', 'false']
+ kwargs['decode'] = False
+ check_retcode = kwargs.pop('check_retcode', True)
+ r, s = rgwadmin(self.ctx, self.client, args, **kwargs)
+ if check_retcode:
+ assert r == 0
+ return s, r
+
+class Gateway(multisite.Gateway):
+ """ Controls a radosgw instance using its daemon """
+ def __init__(self, role, remote, daemon, *args, **kwargs):
+ super(Gateway, self).__init__(*args, **kwargs)
+ self.role = role
+ self.remote = remote
+ self.daemon = daemon
+
+ def set_zone(self, zone):
+ """ set the zone and add its args to the daemon's command line """
+ assert self.zone is None, 'zone can only be set once'
+ self.zone = zone
+ # daemon.restart_with_args() would be perfect for this, except that
+ # radosgw args likely include a pipe and redirect. zone arguments at
+ # the end won't actually apply to radosgw
+ args = self.daemon.command_kwargs.get('args', [])
+ try:
+ # insert zone args before the first |
+ pipe = args.index(run.Raw('|'))
+ args = args[0:pipe] + zone.zone_args() + args[pipe:]
+ except ValueError:
+ args += zone.zone_args()
+ self.daemon.command_kwargs['args'] = args
+
+ def start(self, args = None):
+ """ (re)start the daemon """
+ self.daemon.restart()
+ # wait until startup completes
+ wait_for_radosgw(self.endpoint(), self.remote)
+
+ def stop(self):
+ """ stop the daemon """
+ self.daemon.stop()
+
+def extract_clusters_and_gateways(ctx, role_endpoints):
+ """ create cluster and gateway instances for all of the radosgw roles """
+ clusters = {}
+ gateways = {}
+ for role, endpoint in role_endpoints.items():
+ cluster_name, daemon_type, client_id = misc.split_role(role)
+ # find or create the cluster by name
+ cluster = clusters.get(cluster_name)
+ if not cluster:
+ clusters[cluster_name] = cluster = Cluster(ctx, cluster_name, role)
+ # create a gateway for this daemon
+ client_with_id = daemon_type + '.' + client_id # match format from rgw.py
+ daemon = ctx.daemons.get_daemon('rgw', client_with_id, cluster_name)
+ if not daemon:
+ raise ConfigError('no daemon for role=%s cluster=%s type=rgw id=%s' % \
+ (role, cluster_name, client_id))
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ gateways[role] = Gateway(role, remote, daemon, endpoint.hostname,
+ endpoint.port, cluster)
+ return clusters, gateways
+
+def create_realm(cluster, config):
+ """ create a realm from configuration and initialize its first period """
+ realm = multisite.Realm(config['name'])
+ args = []
+ if config.get('is_default', False):
+ args += ['--default']
+ realm.create(cluster, args)
+ realm.current_period = multisite.Period(realm)
+ return realm
+
+def extract_user_credentials(config):
+ """ extract keys from configuration """
+ return multisite.Credentials(config['access_key'], config['secret_key'])
+
+def extract_master_zone(zonegroup_config):
+ """ find and return the master zone definition """
+ master = None
+ for zone in zonegroup_config['zones']:
+ if not zone.get('is_master', False):
+ continue
+ if master:
+ raise ConfigError('zones %s and %s cannot both set \'is_master\'' % \
+ (master['name'], zone['name']))
+ master = zone
+ # continue the loop so we can detect duplicates
+ if not master:
+ raise ConfigError('one zone must set \'is_master\' in zonegroup %s' % \
+ zonegroup_config['name'])
+ return master
+
+def extract_master_zone_zonegroup(zonegroups_config):
+ """ find and return the master zone and zonegroup definitions """
+ master_zone, master_zonegroup = (None, None)
+ for zonegroup in zonegroups_config:
+ # verify that all zonegroups have a master zone set, even if they
+ # aren't in the master zonegroup
+ zone = extract_master_zone(zonegroup)
+ if not zonegroup.get('is_master', False):
+ continue
+ if master_zonegroup:
+ raise ConfigError('zonegroups %s and %s cannot both set \'is_master\'' % \
+ (master_zonegroup['name'], zonegroup['name']))
+ master_zonegroup = zonegroup
+ master_zone = zone
+ # continue the loop so we can detect duplicates
+ if not master_zonegroup:
+ raise ConfigError('one zonegroup must set \'is_master\'')
+ return master_zone, master_zonegroup
+
+def extract_zone_cluster_name(zone_config):
+ """ return the cluster (must be common to all zone endpoints) """
+ cluster_name = None
+ endpoints = zone_config.get('endpoints')
+ if not endpoints:
+ raise ConfigError('zone %s missing \'endpoints\' list' % \
+ zone_config['name'])
+ for role in endpoints:
+ name, _, _ = misc.split_role(role)
+ if not cluster_name:
+ cluster_name = name
+ elif cluster_name != name:
+ raise ConfigError('all zone %s endpoints must be in the same cluster' % \
+ zone_config['name'])
+ return cluster_name
+
+def cluster_for_zone(clusters, zone_config):
+ """ return the cluster entry for the given zone """
+ name = extract_zone_cluster_name(zone_config)
+ try:
+ return clusters[name]
+ except KeyError:
+ raise ConfigError('no cluster %s found' % name)
+
+def gen_access_key():
+ return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(16))
+
+def gen_secret():
+ return ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(32))
+
+def gen_credentials():
+ return multisite.Credentials(gen_access_key(), gen_secret())
+
+def extract_gateway_endpoints(gateways, endpoints_config):
+ """ return a list of gateway endpoints associated with the given roles """
+ endpoints = []
+ for role in endpoints_config:
+ try:
+ # replace role names with their gateway's endpoint
+ endpoints.append(gateways[role].endpoint())
+ except KeyError:
+ raise ConfigError('no radosgw endpoint found for role %s' % role)
+ return endpoints
+
+def is_default_arg(config):
+ return ['--default'] if config.pop('is_default', False) else []
+
+def is_master_arg(config):
+ return ['--master'] if config.pop('is_master', False) else []
+
+def create_zonegroup(cluster, gateways, period, config):
+ """ pass the zonegroup configuration to `zonegroup set` """
+ config.pop('zones', None) # remove 'zones' from input to `zonegroup set`
+ endpoints = config.get('endpoints')
+ if endpoints:
+ # replace client names with their gateway endpoints
+ config['endpoints'] = extract_gateway_endpoints(gateways, endpoints)
+ zonegroup = multisite.ZoneGroup(config['name'], period)
+ # `zonegroup set` needs --default on command line, and 'is_master' in json
+ args = is_default_arg(config)
+ zonegroup.set(cluster, config, args)
+ period.zonegroups.append(zonegroup)
+ return zonegroup
+
+def create_zone(ctx, cluster, gateways, creds, zonegroup, config):
+ """ create a zone with the given configuration """
+ zone = multisite.Zone(config['name'], zonegroup, cluster)
+ if config.pop('is_pubsub', False):
+ zone = PSZone(config['name'], zonegroup, cluster)
+ else:
+ zone = RadosZone(config['name'], zonegroup, cluster)
+
+ # collect Gateways for the zone's endpoints
+ endpoints = config.get('endpoints')
+ if not endpoints:
+ raise ConfigError('no \'endpoints\' for zone %s' % config['name'])
+ zone.gateways = [gateways[role] for role in endpoints]
+ for gateway in zone.gateways:
+ gateway.set_zone(zone)
+
+ # format the gateway endpoints
+ endpoints = [g.endpoint() for g in zone.gateways]
+
+ args = is_default_arg(config)
+ args += is_master_arg(config)
+ args += creds.credential_args()
+ if len(endpoints):
+ args += ['--endpoints', ','.join(endpoints)]
+ zone.create(cluster, args)
+ zonegroup.zones.append(zone)
+
+ create_zone_pools(ctx, zone)
+ if ctx.rgw.compression_type:
+ configure_zone_compression(zone, ctx.rgw.compression_type)
+
+ zonegroup.zones_by_type.setdefault(zone.tier_type(), []).append(zone)
+
+ if zone.is_read_only():
+ zonegroup.ro_zones.append(zone)
+ else:
+ zonegroup.rw_zones.append(zone)
+
+ return zone
+
+def create_zone_pools(ctx, zone):
+ """ Create the data_pool for each placement type """
+ gateway = zone.gateways[0]
+ cluster = zone.cluster
+ for pool_config in zone.data.get('placement_pools', []):
+ pool_name = pool_config['val']['storage_classes']['STANDARD']['data_pool']
+ if ctx.rgw.ec_data_pool:
+ create_ec_pool(gateway.remote, pool_name, zone.name, 64,
+ ctx.rgw.erasure_code_profile, cluster.name, 'rgw')
+ else:
+ create_replicated_pool(gateway.remote, pool_name, 64, cluster.name, 'rgw')
+
+def configure_zone_compression(zone, compression):
+ """ Set compression type in the zone's default-placement """
+ zone.json_command(zone.cluster, 'placement', ['modify',
+ '--placement-id', 'default-placement',
+ '--compression', compression
+ ])
+
+def restart_zone_gateways(zone):
+ zone.stop()
+ zone.start()
+
+task = RGWMultisite
diff --git a/qa/tasks/rgw_multisite_tests.py b/qa/tasks/rgw_multisite_tests.py
new file mode 100644
index 00000000..53aedf79
--- /dev/null
+++ b/qa/tasks/rgw_multisite_tests.py
@@ -0,0 +1,99 @@
+"""
+rgw multisite testing
+"""
+import logging
+import nose.core
+import nose.config
+
+from teuthology.exceptions import ConfigError
+from teuthology.task import Task
+from teuthology import misc
+
+from tasks.rgw_multi import multisite, tests, tests_ps
+
+log = logging.getLogger(__name__)
+
+
+class RGWMultisiteTests(Task):
+ """
+ Runs the rgw_multi tests against a multisite configuration created by the
+ rgw-multisite task. Tests are run with nose, using any additional 'args'
+ provided. Overrides for tests.Config can be set in 'config'.
+
+ - rgw-multisite-tests:
+ args:
+ - tasks.rgw_multi.tests:test_object_sync
+ config:
+ reconfigure_delay: 60
+
+ """
+ def __init__(self, ctx, config):
+ super(RGWMultisiteTests, self).__init__(ctx, config)
+
+ def setup(self):
+ super(RGWMultisiteTests, self).setup()
+
+ overrides = self.ctx.config.get('overrides', {})
+ misc.deep_merge(self.config, overrides.get('rgw-multisite-tests', {}))
+
+ if not self.ctx.rgw_multisite:
+ raise ConfigError('rgw-multisite-tests must run after the rgw-multisite task')
+ realm = self.ctx.rgw_multisite.realm
+ master_zone = realm.meta_master_zone()
+
+ # create the test user
+ log.info('creating test user..')
+ user = multisite.User('rgw-multisite-test-user')
+ user.create(master_zone, ['--display-name', 'Multisite Test User',
+ '--gen-access-key', '--gen-secret'])
+
+ config = self.config.get('config', {})
+ tests.init_multi(realm, user, tests.Config(**config))
+ tests.realm_meta_checkpoint(realm)
+
+ def begin(self):
+ # extra arguments for nose can be passed as a string or list
+ extra_args = self.config.get('args', [])
+ if not isinstance(extra_args, list):
+ extra_args = [extra_args]
+ argv = [__name__] + extra_args
+
+ log.info("running rgw multisite tests on '%s' with args=%r",
+ tests.__name__, extra_args)
+
+ # run nose tests in the rgw_multi.tests module
+ conf = nose.config.Config(stream=get_log_stream(), verbosity=2)
+ error_msg = ''
+ result = nose.run(defaultTest=tests.__name__, argv=argv, config=conf)
+ if not result:
+ error_msg += 'rgw multisite, '
+ result = nose.run(defaultTest=tests_ps.__name__, argv=argv, config=conf)
+ if not result:
+ error_msg += 'rgw multisite pubsub, '
+ if error_msg:
+ raise RuntimeError(error_msg + 'test failures')
+
+
+def get_log_stream():
+ """ return a log stream for nose output """
+ # XXX: this is a workaround for IOErrors when nose writes to stderr,
+ # copied from vstart_runner.py
+ class LogStream(object):
+ def __init__(self):
+ self.buffer = ""
+
+ def write(self, data):
+ self.buffer += data
+ if "\n" in self.buffer:
+ lines = self.buffer.split("\n")
+ for line in lines[:-1]:
+ log.info(line)
+ self.buffer = lines[-1]
+
+ def flush(self):
+ pass
+
+ return LogStream()
+
+
+task = RGWMultisiteTests
diff --git a/qa/tasks/s3a_hadoop.py b/qa/tasks/s3a_hadoop.py
new file mode 100644
index 00000000..239be7cb
--- /dev/null
+++ b/qa/tasks/s3a_hadoop.py
@@ -0,0 +1,289 @@
+import contextlib
+import logging
+from teuthology import misc
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run Hadoop S3A tests using Ceph
+ usage:
+ -tasks:
+ ceph-ansible:
+ s3a-hadoop:
+ maven-version: '3.6.3' (default)
+ hadoop-version: '2.9.2'
+ bucket-name: 's3atest' (default)
+ access-key: 'anykey' (uses a default value)
+ secret-key: 'secretkey' ( uses a default value)
+ role: client.0
+ """
+ if config is None:
+ config = {}
+
+ assert isinstance(config, dict), \
+ "task only supports a dictionary for configuration"
+
+ assert hasattr(ctx, 'rgw'), 's3a-hadoop must run after the rgw task'
+
+ overrides = ctx.config.get('overrides', {})
+ misc.deep_merge(config, overrides.get('s3a-hadoop', {}))
+ testdir = misc.get_testdir(ctx)
+
+ role = config.get('role')
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ endpoint = ctx.rgw.role_endpoints.get(role)
+ assert endpoint, 's3tests: no rgw endpoint for {}'.format(role)
+
+ # get versions
+ maven_major = config.get('maven-major', 'maven-3')
+ maven_version = config.get('maven-version', '3.6.3')
+ hadoop_ver = config.get('hadoop-version', '2.9.2')
+ bucket_name = config.get('bucket-name', 's3atest')
+ access_key = config.get('access-key', 'EGAQRD2ULOIFKFSKCT4F')
+ secret_key = config.get(
+ 'secret-key',
+ 'zi816w1vZKfaSM85Cl0BxXTwSLyN7zB4RbTswrGb')
+
+ # set versions for cloning the repo
+ apache_maven = 'apache-maven-{maven_version}-bin.tar.gz'.format(
+ maven_version=maven_version)
+ maven_link = 'http://apache.mirrors.lucidnetworks.net/maven/' + \
+ '{maven_major}/{maven_version}/binaries/'.format(maven_major=maven_major, maven_version=maven_version) + apache_maven
+ hadoop_git = 'https://github.com/apache/hadoop'
+ hadoop_rel = 'hadoop-{ver} rel/release-{ver}'.format(ver=hadoop_ver)
+ install_prereq(remote)
+ remote.run(
+ args=[
+ 'cd',
+ testdir,
+ run.Raw('&&'),
+ 'wget',
+ maven_link,
+ run.Raw('&&'),
+ 'tar',
+ '-xvf',
+ apache_maven,
+ run.Raw('&&'),
+ 'git',
+ 'clone',
+ run.Raw(hadoop_git),
+ run.Raw('&&'),
+ 'cd',
+ 'hadoop',
+ run.Raw('&&'),
+ 'git',
+ 'checkout',
+ '-b',
+ run.Raw(hadoop_rel)
+ ]
+ )
+ configure_s3a(remote, endpoint.dns_name, access_key, secret_key, bucket_name, testdir)
+ setup_user_bucket(remote, endpoint.dns_name, access_key, secret_key, bucket_name, testdir)
+ if hadoop_ver.startswith('2.8'):
+ # test all ITtests but skip AWS test using public bucket landsat-pds
+ # which is not available from within this test
+ test_options = '-Dit.test=ITestS3A* -Dparallel-tests -Dscale \
+ -Dfs.s3a.scale.test.timeout=1200 \
+ -Dfs.s3a.scale.test.huge.filesize=256M verify'
+ else:
+ test_options = 'test -Dtest=S3a*,TestS3A*'
+ try:
+ run_s3atest(remote, maven_version, testdir, test_options)
+ yield
+ finally:
+ log.info("Done s3a testing, Cleaning up")
+ for fil in ['apache*', 'hadoop*', 'venv*', 'create*']:
+ remote.run(args=['rm', run.Raw('-rf'), run.Raw('{tdir}/{file}'.format(tdir=testdir, file=fil))])
+
+
+def install_prereq(client):
+ """
+ Install pre requisites for RHEL and CentOS
+ TBD: Ubuntu
+ """
+ if client.os.name == 'rhel' or client.os.name == 'centos':
+ client.run(
+ args=[
+ 'sudo',
+ 'yum',
+ 'install',
+ '-y',
+ 'protobuf-c.x86_64',
+ 'java',
+ 'java-1.8.0-openjdk-devel',
+ 'dnsmasq'
+ ]
+ )
+
+
+def setup_user_bucket(client, dns_name, access_key, secret_key, bucket_name, testdir):
+ """
+ Create user with access_key and secret_key that will be
+ used for the s3a testdir
+ """
+ client.run(
+ args=[
+ 'sudo',
+ 'radosgw-admin',
+ 'user',
+ 'create',
+ run.Raw('--uid'),
+ 's3a',
+ run.Raw('--display-name=s3a cephtests'),
+ run.Raw('--access-key={access_key}'.format(access_key=access_key)),
+ run.Raw('--secret-key={secret_key}'.format(secret_key=secret_key)),
+ run.Raw('--email=s3a@ceph.com'),
+ ]
+ )
+ client.run(
+ args=[
+ 'virtualenv',
+ '{testdir}/venv'.format(testdir=testdir),
+ run.Raw('&&'),
+ run.Raw('{testdir}/venv/bin/pip'.format(testdir=testdir)),
+ 'install',
+ 'boto'
+ ]
+ )
+ create_bucket = """
+#!/usr/bin/env python
+import boto
+import boto.s3.connection
+access_key = '{access_key}'
+secret_key = '{secret_key}'
+
+conn = boto.connect_s3(
+ aws_access_key_id = access_key,
+ aws_secret_access_key = secret_key,
+ host = '{dns_name}',
+ is_secure=False,
+ calling_format = boto.s3.connection.OrdinaryCallingFormat(),
+ )
+bucket = conn.create_bucket('{bucket_name}')
+for bucket in conn.get_all_buckets():
+ print(bucket.name + "\t" + bucket.creation_date)
+""".format(access_key=access_key, secret_key=secret_key, dns_name=dns_name, bucket_name=bucket_name)
+ py_bucket_file = '{testdir}/create_bucket.py'.format(testdir=testdir)
+ misc.sudo_write_file(
+ remote=client,
+ path=py_bucket_file,
+ data=create_bucket,
+ perms='0744',
+ )
+ client.run(
+ args=[
+ 'cat',
+ '{testdir}/create_bucket.py'.format(testdir=testdir),
+ ]
+ )
+ client.run(
+ args=[
+ '{testdir}/venv/bin/python'.format(testdir=testdir),
+ '{testdir}/create_bucket.py'.format(testdir=testdir),
+ ]
+ )
+
+
+def run_s3atest(client, maven_version, testdir, test_options):
+ """
+ Finally run the s3a test
+ """
+ aws_testdir = '{testdir}/hadoop/hadoop-tools/hadoop-aws/'.format(testdir=testdir)
+ run_test = '{testdir}/apache-maven-{maven_version}/bin/mvn'.format(testdir=testdir, maven_version=maven_version)
+ # Remove AWS CredentialsProvider tests as it hits public bucket from AWS
+ # better solution is to create the public bucket on local server and test
+ rm_test = 'rm src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java'
+ client.run(
+ args=[
+ 'cd',
+ run.Raw(aws_testdir),
+ run.Raw('&&'),
+ run.Raw(rm_test),
+ run.Raw('&&'),
+ run.Raw(run_test),
+ run.Raw(test_options)
+ ]
+ )
+
+
+def configure_s3a(client, dns_name, access_key, secret_key, bucket_name, testdir):
+ """
+ Use the template to configure s3a test, Fill in access_key, secret_key
+ and other details required for test.
+ """
+ config_template = """<configuration>
+<property>
+<name>fs.s3a.endpoint</name>
+<value>{name}</value>
+</property>
+
+<property>
+<name>fs.contract.test.fs.s3a</name>
+<value>s3a://{bucket_name}/</value>
+</property>
+
+<property>
+<name>fs.s3a.connection.ssl.enabled</name>
+<value>false</value>
+</property>
+
+<property>
+<name>test.fs.s3n.name</name>
+<value>s3n://{bucket_name}/</value>
+</property>
+
+<property>
+<name>test.fs.s3a.name</name>
+<value>s3a://{bucket_name}/</value>
+</property>
+
+<property>
+<name>test.fs.s3.name</name>
+<value>s3://{bucket_name}/</value>
+</property>
+
+<property>
+<name>fs.s3.awsAccessKeyId</name>
+<value>{access_key}</value>
+</property>
+
+<property>
+<name>fs.s3.awsSecretAccessKey</name>
+<value>{secret_key}</value>
+</property>
+
+<property>
+<name>fs.s3n.awsAccessKeyId</name>
+<value>{access_key}</value>
+</property>
+
+<property>
+<name>fs.s3n.awsSecretAccessKey</name>
+<value>{secret_key}</value>
+</property>
+
+<property>
+<name>fs.s3a.access.key</name>
+<description>AWS access key ID. Omit for Role-based authentication.</description>
+<value>{access_key}</value>
+</property>
+
+<property>
+<name>fs.s3a.secret.key</name>
+<description>AWS secret key. Omit for Role-based authentication.</description>
+<value>{secret_key}</value>
+</property>
+</configuration>
+""".format(name=dns_name, bucket_name=bucket_name, access_key=access_key, secret_key=secret_key)
+ config_path = testdir + '/hadoop/hadoop-tools/hadoop-aws/src/test/resources/auth-keys.xml'
+ misc.write_file(
+ remote=client,
+ path=config_path,
+ data=config_template,
+ )
+ # output for debug
+ client.run(args=['cat', config_path])
diff --git a/qa/tasks/s3readwrite.py b/qa/tasks/s3readwrite.py
new file mode 100644
index 00000000..d9e7234c
--- /dev/null
+++ b/qa/tasks/s3readwrite.py
@@ -0,0 +1,353 @@
+"""
+Run rgw s3 readwite tests
+"""
+import base64
+import contextlib
+import logging
+import os
+import random
+import string
+import yaml
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.config import config as teuth_config
+from teuthology.orchestra import run
+from teuthology.orchestra.connection import split_user
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def download(ctx, config):
+ """
+ Download the s3 tests from the git builder.
+ Remove downloaded s3 file upon exit.
+
+ The context passed in should be identical to the context
+ passed in to the main task.
+ """
+ assert isinstance(config, dict)
+ log.info('Downloading s3-tests...')
+ testdir = teuthology.get_testdir(ctx)
+ for (client, client_config) in config.items():
+ s3tests_branch = client_config.get('force-branch', None)
+ if not s3tests_branch:
+ raise ValueError(
+ "Could not determine what branch to use for s3-tests. Please add 'force-branch: {s3-tests branch name}' to the .yaml config for this s3readwrite task.")
+
+ log.info("Using branch '%s' for s3tests", s3tests_branch)
+ sha1 = client_config.get('sha1')
+ git_remote = client_config.get('git_remote', teuth_config.ceph_git_base_url)
+ ctx.cluster.only(client).run(
+ args=[
+ 'git', 'clone',
+ '-b', s3tests_branch,
+ git_remote + 's3-tests.git',
+ '{tdir}/s3-tests'.format(tdir=testdir),
+ ],
+ )
+ if sha1 is not None:
+ ctx.cluster.only(client).run(
+ args=[
+ 'cd', '{tdir}/s3-tests'.format(tdir=testdir),
+ run.Raw('&&'),
+ 'git', 'reset', '--hard', sha1,
+ ],
+ )
+ try:
+ yield
+ finally:
+ log.info('Removing s3-tests...')
+ testdir = teuthology.get_testdir(ctx)
+ for client in config:
+ ctx.cluster.only(client).run(
+ args=[
+ 'rm',
+ '-rf',
+ '{tdir}/s3-tests'.format(tdir=testdir),
+ ],
+ )
+
+
+def _config_user(s3tests_conf, section, user):
+ """
+ Configure users for this section by stashing away keys, ids, and
+ email addresses.
+ """
+ s3tests_conf[section].setdefault('user_id', user)
+ s3tests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user))
+ s3tests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user))
+ s3tests_conf[section].setdefault('access_key', ''.join(random.choice(string.ascii_uppercase) for i in range(20)))
+ s3tests_conf[section].setdefault('secret_key', base64.b64encode(os.urandom(40)).decode('ascii'))
+
+@contextlib.contextmanager
+def create_users(ctx, config):
+ """
+ Create a default s3 user.
+ """
+ assert isinstance(config, dict)
+ log.info('Creating rgw users...')
+ testdir = teuthology.get_testdir(ctx)
+ users = {'s3': 'foo'}
+ cached_client_user_names = dict()
+ for client in config['clients']:
+ cached_client_user_names[client] = dict()
+ s3tests_conf = config['s3tests_conf'][client]
+ s3tests_conf.setdefault('readwrite', {})
+ s3tests_conf['readwrite'].setdefault('bucket', 'rwtest-' + client + '-{random}-')
+ s3tests_conf['readwrite'].setdefault('readers', 10)
+ s3tests_conf['readwrite'].setdefault('writers', 3)
+ s3tests_conf['readwrite'].setdefault('duration', 300)
+ s3tests_conf['readwrite'].setdefault('files', {})
+ rwconf = s3tests_conf['readwrite']
+ rwconf['files'].setdefault('num', 10)
+ rwconf['files'].setdefault('size', 2000)
+ rwconf['files'].setdefault('stddev', 500)
+ for section, user in users.items():
+ _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client))
+ log.debug('creating user {user} on {client}'.format(user=s3tests_conf[section]['user_id'],
+ client=client))
+
+ # stash the 'delete_user' flag along with user name for easier cleanup
+ delete_this_user = True
+ if 'delete_user' in s3tests_conf['s3']:
+ delete_this_user = s3tests_conf['s3']['delete_user']
+ log.debug('delete_user set to {flag} for {client}'.format(flag=delete_this_user, client=client))
+ cached_client_user_names[client][section+user] = (s3tests_conf[section]['user_id'], delete_this_user)
+
+ # skip actual user creation if the create_user flag is set to false for this client
+ if 'create_user' in s3tests_conf['s3'] and s3tests_conf['s3']['create_user'] == False:
+ log.debug('create_user set to False, skipping user creation for {client}'.format(client=client))
+ continue
+ else:
+ ctx.cluster.only(client).run(
+ args=[
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'radosgw-admin',
+ '-n', client,
+ 'user', 'create',
+ '--uid', s3tests_conf[section]['user_id'],
+ '--display-name', s3tests_conf[section]['display_name'],
+ '--access-key', s3tests_conf[section]['access_key'],
+ '--secret', s3tests_conf[section]['secret_key'],
+ '--email', s3tests_conf[section]['email'],
+ ],
+ )
+ try:
+ yield
+ finally:
+ for client in config['clients']:
+ for section, user in users.items():
+ #uid = '{user}.{client}'.format(user=user, client=client)
+ real_uid, delete_this_user = cached_client_user_names[client][section+user]
+ if delete_this_user:
+ ctx.cluster.only(client).run(
+ args=[
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'radosgw-admin',
+ '-n', client,
+ 'user', 'rm',
+ '--uid', real_uid,
+ '--purge-data',
+ ],
+ )
+ else:
+ log.debug('skipping delete for user {uid} on {client}'.format(uid=real_uid, client=client))
+
+@contextlib.contextmanager
+def configure(ctx, config):
+ """
+ Configure the s3-tests. This includes the running of the
+ bootstrap code and the updating of local conf files.
+ """
+ assert isinstance(config, dict)
+ log.info('Configuring s3-readwrite-tests...')
+ for client, properties in config['clients'].items():
+ s3tests_conf = config['s3tests_conf'][client]
+ if properties is not None and 'rgw_server' in properties:
+ host = None
+ for target, roles in zip(ctx.config['targets'].keys(), ctx.config['roles']):
+ log.info('roles: ' + str(roles))
+ log.info('target: ' + str(target))
+ if properties['rgw_server'] in roles:
+ _, host = split_user(target)
+ assert host is not None, "Invalid client specified as the rgw_server"
+ s3tests_conf['s3']['host'] = host
+ else:
+ s3tests_conf['s3']['host'] = 'localhost'
+
+ def_conf = s3tests_conf['DEFAULT']
+ s3tests_conf['s3'].setdefault('port', def_conf['port'])
+ s3tests_conf['s3'].setdefault('is_secure', def_conf['is_secure'])
+
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ remote.run(
+ args=[
+ 'cd',
+ '{tdir}/s3-tests'.format(tdir=teuthology.get_testdir(ctx)),
+ run.Raw('&&'),
+ './bootstrap',
+ ],
+ )
+ conf = dict(
+ s3=s3tests_conf['s3'],
+ readwrite=s3tests_conf['readwrite'],
+ )
+ teuthology.write_file(
+ remote=remote,
+ path='{tdir}/archive/s3readwrite.{client}.config.yaml'.format(tdir=teuthology.get_testdir(ctx), client=client),
+ data=yaml.safe_dump(conf, default_flow_style=False),
+ )
+ yield
+
+
+@contextlib.contextmanager
+def run_tests(ctx, config):
+ """
+ Run the s3readwrite tests after everything is set up.
+
+ :param ctx: Context passed to task
+ :param config: specific configuration information
+ """
+ assert isinstance(config, dict)
+ testdir = teuthology.get_testdir(ctx)
+ for client, client_config in config.items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ conf = teuthology.get_file(remote, '{tdir}/archive/s3readwrite.{client}.config.yaml'.format(tdir=testdir, client=client))
+ args = [
+ '{tdir}/s3-tests/virtualenv/bin/s3tests-test-readwrite'.format(tdir=testdir),
+ ]
+ if client_config is not None and 'extra_args' in client_config:
+ args.extend(client_config['extra_args'])
+
+ ctx.cluster.only(client).run(
+ args=args,
+ stdin=conf,
+ )
+ yield
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run the s3tests-test-readwrite suite against rgw.
+
+ To run all tests on all clients::
+
+ tasks:
+ - ceph:
+ - rgw:
+ - s3readwrite:
+ force-branch: ceph-nautilus
+
+ To restrict testing to particular clients::
+
+ tasks:
+ - ceph:
+ - rgw: [client.0]
+ - s3readwrite: [client.0]
+
+ To run against a server on client.1::
+
+ tasks:
+ - ceph:
+ - rgw: [client.1]
+ - s3readwrite:
+ client.0:
+ force-branch: ceph-nautilus
+ rgw_server: client.1
+
+ To pass extra test arguments
+
+ tasks:
+ - ceph:
+ - rgw: [client.0]
+ - s3readwrite:
+ client.0:
+ force-branch: ceph-nautilus
+ readwrite:
+ bucket: mybucket
+ readers: 10
+ writers: 3
+ duration: 600
+ files:
+ num: 10
+ size: 2000
+ stddev: 500
+ client.1:
+ ...
+
+ To override s3 configuration
+
+ tasks:
+ - ceph:
+ - rgw: [client.0]
+ - s3readwrite:
+ client.0:
+ force-branch: ceph-nautilus
+ s3:
+ user_id: myuserid
+ display_name: myname
+ email: my@email
+ access_key: myaccesskey
+ secret_key: mysecretkey
+
+ """
+ assert hasattr(ctx, 'rgw'), 's3readwrite must run after the rgw task'
+ assert config is None or isinstance(config, list) \
+ or isinstance(config, dict), \
+ "task s3readwrite only supports a list or dictionary for configuration"
+ all_clients = ['client.{id}'.format(id=id_)
+ for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+ if config is None:
+ config = all_clients
+ if isinstance(config, list):
+ config = dict.fromkeys(config)
+ clients = config.keys()
+
+ overrides = ctx.config.get('overrides', {})
+ # merge each client section, not the top level.
+ for client in config.keys():
+ if not config[client]:
+ config[client] = {}
+ teuthology.deep_merge(config[client], overrides.get('s3readwrite', {}))
+
+ log.debug('in s3readwrite, config is %s', config)
+
+ s3tests_conf = {}
+ for client in clients:
+ if config[client] is None:
+ config[client] = {}
+ config[client].setdefault('s3', {})
+ config[client].setdefault('readwrite', {})
+ endpoint = ctx.rgw.role_endpoints.get(client)
+ assert endpoint, 's3readwrite: no rgw endpoint for {}'.format(client)
+
+ s3tests_conf[client] = ({
+ 'DEFAULT':
+ {
+ 'port' : endpoint.port,
+ 'is_secure' : endpoint.cert is not None,
+ },
+ 'readwrite' : config[client]['readwrite'],
+ 's3' : config[client]['s3'],
+ })
+
+ with contextutil.nested(
+ lambda: download(ctx=ctx, config=config),
+ lambda: create_users(ctx=ctx, config=dict(
+ clients=clients,
+ s3tests_conf=s3tests_conf,
+ )),
+ lambda: configure(ctx=ctx, config=dict(
+ clients=config,
+ s3tests_conf=s3tests_conf,
+ )),
+ lambda: run_tests(ctx=ctx, config=config),
+ ):
+ pass
+ yield
diff --git a/qa/tasks/s3roundtrip.py b/qa/tasks/s3roundtrip.py
new file mode 100644
index 00000000..cf6a9e86
--- /dev/null
+++ b/qa/tasks/s3roundtrip.py
@@ -0,0 +1,326 @@
+"""
+Run rgw roundtrip message tests
+"""
+import base64
+import contextlib
+import logging
+import os
+import random
+import string
+import yaml
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.config import config as teuth_config
+from teuthology.orchestra import run
+from teuthology.orchestra.connection import split_user
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def download(ctx, config):
+ """
+ Download the s3 tests from the git builder.
+ Remove downloaded s3 file upon exit.
+
+ The context passed in should be identical to the context
+ passed in to the main task.
+ """
+ assert isinstance(config, dict)
+ log.info('Downloading s3-tests...')
+ testdir = teuthology.get_testdir(ctx)
+ for (client, client_config) in config.items():
+ s3tests_branch = client_config.get('force-branch', None)
+ if not s3tests_branch:
+ raise ValueError(
+ "Could not determine what branch to use for s3-tests. Please add 'force-branch: {s3-tests branch name}' to the .yaml config for this s3roundtrip task.")
+
+ log.info("Using branch '%s' for s3tests", s3tests_branch)
+ sha1 = client_config.get('sha1')
+ git_remote = client_config.get('git_remote', teuth_config.ceph_git_base_url)
+ ctx.cluster.only(client).run(
+ args=[
+ 'git', 'clone',
+ '-b', s3tests_branch,
+ git_remote + 's3-tests.git',
+ '{tdir}/s3-tests'.format(tdir=testdir),
+ ],
+ )
+ if sha1 is not None:
+ ctx.cluster.only(client).run(
+ args=[
+ 'cd', '{tdir}/s3-tests'.format(tdir=testdir),
+ run.Raw('&&'),
+ 'git', 'reset', '--hard', sha1,
+ ],
+ )
+ try:
+ yield
+ finally:
+ log.info('Removing s3-tests...')
+ testdir = teuthology.get_testdir(ctx)
+ for client in config:
+ ctx.cluster.only(client).run(
+ args=[
+ 'rm',
+ '-rf',
+ '{tdir}/s3-tests'.format(tdir=testdir),
+ ],
+ )
+
+
+def _config_user(s3tests_conf, section, user):
+ """
+ Configure users for this section by stashing away keys, ids, and
+ email addresses.
+ """
+ s3tests_conf[section].setdefault('user_id', user)
+ s3tests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user))
+ s3tests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user))
+ s3tests_conf[section].setdefault('access_key', ''.join(random.choice(string.ascii_uppercase) for i in range(20)))
+ s3tests_conf[section].setdefault('secret_key', base64.b64encode(os.urandom(40)).decode('ascii'))
+
+@contextlib.contextmanager
+def create_users(ctx, config):
+ """
+ Create a default s3 user.
+ """
+ assert isinstance(config, dict)
+ log.info('Creating rgw users...')
+ testdir = teuthology.get_testdir(ctx)
+ users = {'s3': 'foo'}
+ for client in config['clients']:
+ s3tests_conf = config['s3tests_conf'][client]
+ s3tests_conf.setdefault('roundtrip', {})
+ s3tests_conf['roundtrip'].setdefault('bucket', 'rttest-' + client + '-{random}-')
+ s3tests_conf['roundtrip'].setdefault('readers', 10)
+ s3tests_conf['roundtrip'].setdefault('writers', 3)
+ s3tests_conf['roundtrip'].setdefault('duration', 300)
+ s3tests_conf['roundtrip'].setdefault('files', {})
+ rtconf = s3tests_conf['roundtrip']
+ rtconf['files'].setdefault('num', 10)
+ rtconf['files'].setdefault('size', 2000)
+ rtconf['files'].setdefault('stddev', 500)
+ for section, user in [('s3', 'foo')]:
+ _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client))
+ ctx.cluster.only(client).run(
+ args=[
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'radosgw-admin',
+ '-n', client,
+ 'user', 'create',
+ '--uid', s3tests_conf[section]['user_id'],
+ '--display-name', s3tests_conf[section]['display_name'],
+ '--access-key', s3tests_conf[section]['access_key'],
+ '--secret', s3tests_conf[section]['secret_key'],
+ '--email', s3tests_conf[section]['email'],
+ ],
+ )
+ try:
+ yield
+ finally:
+ for client in config['clients']:
+ for user in users.values():
+ uid = '{user}.{client}'.format(user=user, client=client)
+ ctx.cluster.only(client).run(
+ args=[
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'radosgw-admin',
+ '-n', client,
+ 'user', 'rm',
+ '--uid', uid,
+ '--purge-data',
+ ],
+ )
+
+@contextlib.contextmanager
+def configure(ctx, config):
+ """
+ Configure the s3-tests. This includes the running of the
+ bootstrap code and the updating of local conf files.
+ """
+ assert isinstance(config, dict)
+ log.info('Configuring s3-roundtrip-tests...')
+ testdir = teuthology.get_testdir(ctx)
+ for client, properties in config['clients'].items():
+ s3tests_conf = config['s3tests_conf'][client]
+ if properties is not None and 'rgw_server' in properties:
+ host = None
+ for target, roles in zip(ctx.config['targets'].keys(), ctx.config['roles']):
+ log.info('roles: ' + str(roles))
+ log.info('target: ' + str(target))
+ if properties['rgw_server'] in roles:
+ _, host = split_user(target)
+ assert host is not None, "Invalid client specified as the rgw_server"
+ s3tests_conf['s3']['host'] = host
+ else:
+ s3tests_conf['s3']['host'] = 'localhost'
+
+ def_conf = s3tests_conf['DEFAULT']
+ s3tests_conf['s3'].setdefault('port', def_conf['port'])
+ s3tests_conf['s3'].setdefault('is_secure', def_conf['is_secure'])
+
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ remote.run(
+ args=[
+ 'cd',
+ '{tdir}/s3-tests'.format(tdir=testdir),
+ run.Raw('&&'),
+ './bootstrap',
+ ],
+ )
+ conf = dict(
+ s3=s3tests_conf['s3'],
+ roundtrip=s3tests_conf['roundtrip'],
+ )
+ teuthology.write_file(
+ remote=remote,
+ path='{tdir}/archive/s3roundtrip.{client}.config.yaml'.format(tdir=testdir, client=client),
+ data=yaml.safe_dump(conf, default_flow_style=False))
+ yield
+
+
+@contextlib.contextmanager
+def run_tests(ctx, config):
+ """
+ Run the s3 roundtrip after everything is set up.
+
+ :param ctx: Context passed to task
+ :param config: specific configuration information
+ """
+ assert isinstance(config, dict)
+ testdir = teuthology.get_testdir(ctx)
+ for client, client_config in config.items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ conf = teuthology.get_file(remote, '{tdir}/archive/s3roundtrip.{client}.config.yaml'.format(tdir=testdir, client=client))
+ args = [
+ '{tdir}/s3-tests/virtualenv/bin/s3tests-test-roundtrip'.format(tdir=testdir),
+ ]
+ if client_config is not None and 'extra_args' in client_config:
+ args.extend(client_config['extra_args'])
+
+ ctx.cluster.only(client).run(
+ args=args,
+ stdin=conf,
+ )
+ yield
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run the s3tests-test-roundtrip suite against rgw.
+
+ To run all tests on all clients::
+
+ tasks:
+ - ceph:
+ - rgw:
+ - s3roundtrip:
+ force-branch: ceph-nautilus
+
+ To restrict testing to particular clients::
+
+ tasks:
+ - ceph:
+ - rgw: [client.0]
+ - s3roundtrip:
+ client.0:
+ force-branch: ceph-nautilus
+
+ To run against a server on client.1::
+
+ tasks:
+ - ceph:
+ - rgw: [client.1]
+ - s3roundtrip:
+ client.0:
+ force-branch: ceph-nautilus
+ rgw_server: client.1
+
+ To pass extra test arguments
+
+ tasks:
+ - ceph:
+ - rgw: [client.0]
+ - s3roundtrip:
+ client.0:
+ force-branch: ceph-nautilus
+ roundtrip:
+ bucket: mybucket
+ readers: 10
+ writers: 3
+ duration: 600
+ files:
+ num: 10
+ size: 2000
+ stddev: 500
+ client.1:
+ ...
+
+ To override s3 configuration
+
+ tasks:
+ - ceph:
+ - rgw: [client.0]
+ - s3roundtrip:
+ force-branch: ceph-nautilus
+ client.0:
+ s3:
+ user_id: myuserid
+ display_name: myname
+ email: my@email
+ access_key: myaccesskey
+ secret_key: mysecretkey
+
+ """
+ assert hasattr(ctx, 'rgw'), 's3roundtrip must run after the rgw task'
+ assert config is None or isinstance(config, list) \
+ or isinstance(config, dict), \
+ "task s3roundtrip only supports a list or dictionary for configuration"
+ all_clients = ['client.{id}'.format(id=id_)
+ for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+ if config is None:
+ config = all_clients
+ if isinstance(config, list):
+ config = dict.fromkeys(config)
+ clients = config.keys()
+
+ s3tests_conf = {}
+ for client in clients:
+ if config[client] is None:
+ config[client] = {}
+ config[client].setdefault('s3', {})
+ config[client].setdefault('roundtrip', {})
+
+ endpoint = ctx.rgw.role_endpoints.get(client)
+ assert endpoint, 's3roundtrip: no rgw endpoint for {}'.format(client)
+
+ s3tests_conf[client] = ({
+ 'DEFAULT':
+ {
+ 'port' : endpoint.port,
+ 'is_secure' : endpoint.cert is not None,
+ },
+ 'roundtrip' : config[client]['roundtrip'],
+ 's3' : config[client]['s3'],
+ })
+
+ with contextutil.nested(
+ lambda: download(ctx=ctx, config=config),
+ lambda: create_users(ctx=ctx, config=dict(
+ clients=clients,
+ s3tests_conf=s3tests_conf,
+ )),
+ lambda: configure(ctx=ctx, config=dict(
+ clients=config,
+ s3tests_conf=s3tests_conf,
+ )),
+ lambda: run_tests(ctx=ctx, config=config),
+ ):
+ pass
+ yield
diff --git a/qa/tasks/s3tests.py b/qa/tasks/s3tests.py
new file mode 100644
index 00000000..1b88ec74
--- /dev/null
+++ b/qa/tasks/s3tests.py
@@ -0,0 +1,424 @@
+"""
+Run a set of s3 tests on rgw.
+"""
+from io import BytesIO
+from configobj import ConfigObj
+import base64
+import contextlib
+import logging
+import os
+import random
+import six
+import string
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.config import config as teuth_config
+from teuthology.orchestra import run
+from teuthology.orchestra.connection import split_user
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def download(ctx, config):
+ """
+ Download the s3 tests from the git builder.
+ Remove downloaded s3 file upon exit.
+
+ The context passed in should be identical to the context
+ passed in to the main task.
+ """
+ assert isinstance(config, dict)
+ log.info('Downloading s3-tests...')
+ testdir = teuthology.get_testdir(ctx)
+ for (client, client_config) in config.items():
+ s3tests_branch = client_config.get('force-branch', None)
+ if not s3tests_branch:
+ raise ValueError(
+ "Could not determine what branch to use for s3-tests. Please add 'force-branch: {s3-tests branch name}' to the .yaml config for this s3tests task.")
+
+ log.info("Using branch '%s' for s3tests", s3tests_branch)
+ sha1 = client_config.get('sha1')
+ git_remote = client_config.get('git_remote', teuth_config.ceph_git_base_url)
+ ctx.cluster.only(client).run(
+ args=[
+ 'git', 'clone',
+ '-b', s3tests_branch,
+ git_remote + 's3-tests.git',
+ '{tdir}/s3-tests'.format(tdir=testdir),
+ ],
+ )
+ if sha1 is not None:
+ ctx.cluster.only(client).run(
+ args=[
+ 'cd', '{tdir}/s3-tests'.format(tdir=testdir),
+ run.Raw('&&'),
+ 'git', 'reset', '--hard', sha1,
+ ],
+ )
+ try:
+ yield
+ finally:
+ log.info('Removing s3-tests...')
+ testdir = teuthology.get_testdir(ctx)
+ for client in config:
+ ctx.cluster.only(client).run(
+ args=[
+ 'rm',
+ '-rf',
+ '{tdir}/s3-tests'.format(tdir=testdir),
+ ],
+ )
+
+
+def _config_user(s3tests_conf, section, user):
+ """
+ Configure users for this section by stashing away keys, ids, and
+ email addresses.
+ """
+ s3tests_conf[section].setdefault('user_id', user)
+ s3tests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user))
+ s3tests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user))
+ s3tests_conf[section].setdefault('access_key',
+ ''.join(random.choice(string.ascii_uppercase) for i in range(20)))
+ s3tests_conf[section].setdefault('secret_key',
+ six.ensure_str(base64.b64encode(os.urandom(40))))
+ s3tests_conf[section].setdefault('totp_serial',
+ ''.join(random.choice(string.digits) for i in range(10)))
+ s3tests_conf[section].setdefault('totp_seed',
+ six.ensure_str(base64.b32encode(os.urandom(40))))
+ s3tests_conf[section].setdefault('totp_seconds', '5')
+
+
+@contextlib.contextmanager
+def create_users(ctx, config):
+ """
+ Create a main and an alternate s3 user.
+ """
+ assert isinstance(config, dict)
+ log.info('Creating rgw users...')
+ testdir = teuthology.get_testdir(ctx)
+ users = {'s3 main': 'foo', 's3 alt': 'bar', 's3 tenant': 'testx$tenanteduser'}
+ for client in config['clients']:
+ s3tests_conf = config['s3tests_conf'][client]
+ s3tests_conf.setdefault('fixtures', {})
+ s3tests_conf['fixtures'].setdefault('bucket prefix', 'test-' + client + '-{random}-')
+ for section, user in users.items():
+ _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client))
+ log.debug('Creating user {user} on {host}'.format(user=s3tests_conf[section]['user_id'], host=client))
+ cluster_name, daemon_type, client_id = teuthology.split_role(client)
+ client_with_id = daemon_type + '.' + client_id
+ ctx.cluster.only(client).run(
+ args=[
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'radosgw-admin',
+ '-n', client_with_id,
+ 'user', 'create',
+ '--uid', s3tests_conf[section]['user_id'],
+ '--display-name', s3tests_conf[section]['display_name'],
+ '--access-key', s3tests_conf[section]['access_key'],
+ '--secret', s3tests_conf[section]['secret_key'],
+ '--email', s3tests_conf[section]['email'],
+ '--cluster', cluster_name,
+ ],
+ )
+ ctx.cluster.only(client).run(
+ args=[
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'radosgw-admin',
+ '-n', client_with_id,
+ 'mfa', 'create',
+ '--uid', s3tests_conf[section]['user_id'],
+ '--totp-serial', s3tests_conf[section]['totp_serial'],
+ '--totp-seed', s3tests_conf[section]['totp_seed'],
+ '--totp-seconds', s3tests_conf[section]['totp_seconds'],
+ '--totp-window', '8',
+ '--totp-seed-type', 'base32',
+ '--cluster', cluster_name,
+ ],
+ )
+ try:
+ yield
+ finally:
+ for client in config['clients']:
+ for user in users.values():
+ uid = '{user}.{client}'.format(user=user, client=client)
+ cluster_name, daemon_type, client_id = teuthology.split_role(client)
+ client_with_id = daemon_type + '.' + client_id
+ ctx.cluster.only(client).run(
+ args=[
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'radosgw-admin',
+ '-n', client_with_id,
+ 'user', 'rm',
+ '--uid', uid,
+ '--purge-data',
+ '--cluster', cluster_name,
+ ],
+ )
+
+
+@contextlib.contextmanager
+def configure(ctx, config):
+ """
+ Configure the s3-tests. This includes the running of the
+ bootstrap code and the updating of local conf files.
+ """
+ assert isinstance(config, dict)
+ log.info('Configuring s3-tests...')
+ testdir = teuthology.get_testdir(ctx)
+ for client, properties in config['clients'].items():
+ s3tests_conf = config['s3tests_conf'][client]
+ if properties is not None and 'rgw_server' in properties:
+ host = None
+ for target, roles in zip(ctx.config['targets'].keys(), ctx.config['roles']):
+ log.info('roles: ' + str(roles))
+ log.info('target: ' + str(target))
+ if properties['rgw_server'] in roles:
+ _, host = split_user(target)
+ assert host is not None, "Invalid client specified as the rgw_server"
+ s3tests_conf['DEFAULT']['host'] = host
+ else:
+ s3tests_conf['DEFAULT']['host'] = 'localhost'
+
+ if properties is not None and 'slow_backend' in properties:
+ s3tests_conf['fixtures']['slow backend'] = properties['slow_backend']
+
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ remote.run(
+ args=[
+ 'cd',
+ '{tdir}/s3-tests'.format(tdir=testdir),
+ run.Raw('&&'),
+ './bootstrap',
+ ],
+ )
+ conf_fp = BytesIO()
+ s3tests_conf.write(conf_fp)
+ teuthology.write_file(
+ remote=remote,
+ path='{tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client),
+ data=conf_fp.getvalue(),
+ )
+
+ log.info('Configuring boto...')
+ boto_src = os.path.join(os.path.dirname(__file__), 'boto.cfg.template')
+ for client, properties in config['clients'].items():
+ with open(boto_src, 'rb') as f:
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ conf = six.ensure_str(f.read()).format(
+ idle_timeout=config.get('idle_timeout', 30)
+ )
+ teuthology.write_file(
+ remote=remote,
+ path='{tdir}/boto.cfg'.format(tdir=testdir),
+ data=six.ensure_binary(conf),
+ )
+
+ try:
+ yield
+
+ finally:
+ log.info('Cleaning up boto...')
+ for client, properties in config['clients'].items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ remote.run(
+ args=[
+ 'rm',
+ '{tdir}/boto.cfg'.format(tdir=testdir),
+ ],
+ )
+
+@contextlib.contextmanager
+def run_tests(ctx, config):
+ """
+ Run the s3tests after everything is set up.
+
+ :param ctx: Context passed to task
+ :param config: specific configuration information
+ """
+ assert isinstance(config, dict)
+ testdir = teuthology.get_testdir(ctx)
+ # civetweb > 1.8 && beast parsers are strict on rfc2616
+ attrs = ["!fails_on_rgw", "!lifecycle_expiration", "!fails_strict_rfc2616"]
+ for client, client_config in config.items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ args = [
+ 'S3TEST_CONF={tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client),
+ 'BOTO_CONFIG={tdir}/boto.cfg'.format(tdir=testdir)
+ ]
+ # the 'requests' library comes with its own ca bundle to verify ssl
+ # certificates - override that to use the system's ca bundle, which
+ # is where the ssl task installed this certificate
+ if remote.os.package_type == 'deb':
+ args += ['REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt']
+ else:
+ args += ['REQUESTS_CA_BUNDLE=/etc/pki/tls/certs/ca-bundle.crt']
+ args += [
+ '{tdir}/s3-tests/virtualenv/bin/nosetests'.format(tdir=testdir),
+ '-w',
+ '{tdir}/s3-tests'.format(tdir=testdir),
+ '-v',
+ '-a', ','.join(attrs),
+ ]
+ if client_config is not None and 'extra_args' in client_config:
+ args.extend(client_config['extra_args'])
+
+ remote.run(
+ args=args,
+ label="s3 tests against rgw"
+ )
+ yield
+
+@contextlib.contextmanager
+def scan_for_leaked_encryption_keys(ctx, config):
+ """
+ Scan radosgw logs for the encryption keys used by s3tests to
+ verify that we're not leaking secrets.
+
+ :param ctx: Context passed to task
+ :param config: specific configuration information
+ """
+ assert isinstance(config, dict)
+
+ try:
+ yield
+ finally:
+ # x-amz-server-side-encryption-customer-key
+ s3test_customer_key = 'pO3upElrwuEXSoFwCfnZPdSsmt/xWeFa0N9KgDijwVs='
+
+ log.debug('Scanning radosgw logs for leaked encryption keys...')
+ procs = list()
+ for client, client_config in config.items():
+ if not client_config.get('scan_for_encryption_keys', True):
+ continue
+ cluster_name, daemon_type, client_id = teuthology.split_role(client)
+ client_with_cluster = '.'.join((cluster_name, daemon_type, client_id))
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ proc = remote.run(
+ args=[
+ 'grep',
+ '--binary-files=text',
+ s3test_customer_key,
+ '/var/log/ceph/rgw.{client}.log'.format(client=client_with_cluster),
+ ],
+ wait=False,
+ check_status=False,
+ )
+ procs.append(proc)
+
+ for proc in procs:
+ proc.wait()
+ if proc.returncode == 1: # 1 means no matches
+ continue
+ log.error('radosgw log is leaking encryption keys!')
+ raise Exception('radosgw log is leaking encryption keys')
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run the s3-tests suite against rgw.
+
+ To run all tests on all clients::
+
+ tasks:
+ - ceph:
+ - rgw:
+ - s3tests:
+
+ To restrict testing to particular clients::
+
+ tasks:
+ - ceph:
+ - rgw: [client.0]
+ - s3tests:
+ client.0:
+ force-branch: ceph-nautilus
+
+ To run against a server on client.1 and increase the boto timeout to 10m::
+
+ tasks:
+ - ceph:
+ - rgw: [client.1]
+ - s3tests:
+ client.0:
+ force-branch: ceph-nautilus
+ rgw_server: client.1
+ idle_timeout: 600
+
+ To pass extra arguments to nose (e.g. to run a certain test)::
+
+ tasks:
+ - ceph:
+ - rgw: [client.0]
+ - s3tests:
+ client.0:
+ force-branch: ceph-nautilus
+ extra_args: ['test_s3:test_object_acl_grand_public_read']
+ client.1:
+ force-branch: ceph-nautilus
+ extra_args: ['--exclude', 'test_100_continue']
+ """
+ assert hasattr(ctx, 'rgw'), 's3tests must run after the rgw task'
+ assert config is None or isinstance(config, list) \
+ or isinstance(config, dict), \
+ "task s3tests only supports a list or dictionary for configuration"
+ all_clients = ['client.{id}'.format(id=id_)
+ for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+ if config is None:
+ config = all_clients
+ if isinstance(config, list):
+ config = dict.fromkeys(config)
+ clients = config.keys()
+
+ overrides = ctx.config.get('overrides', {})
+ # merge each client section, not the top level.
+ for client in config.keys():
+ if not config[client]:
+ config[client] = {}
+ teuthology.deep_merge(config[client], overrides.get('s3tests', {}))
+
+ log.debug('s3tests config is %s', config)
+
+ s3tests_conf = {}
+ for client in clients:
+ endpoint = ctx.rgw.role_endpoints.get(client)
+ assert endpoint, 's3tests: no rgw endpoint for {}'.format(client)
+
+ s3tests_conf[client] = ConfigObj(
+ indent_type='',
+ infile={
+ 'DEFAULT':
+ {
+ 'port' : endpoint.port,
+ 'is_secure' : endpoint.cert is not None,
+ 'api_name' : 'default',
+ },
+ 'fixtures' : {},
+ 's3 main' : {},
+ 's3 alt' : {},
+ 's3 tenant': {},
+ }
+ )
+
+ with contextutil.nested(
+ lambda: download(ctx=ctx, config=config),
+ lambda: create_users(ctx=ctx, config=dict(
+ clients=clients,
+ s3tests_conf=s3tests_conf,
+ )),
+ lambda: configure(ctx=ctx, config=dict(
+ clients=config,
+ s3tests_conf=s3tests_conf,
+ )),
+ lambda: run_tests(ctx=ctx, config=config),
+ lambda: scan_for_leaked_encryption_keys(ctx=ctx, config=config),
+ ):
+ pass
+ yield
diff --git a/qa/tasks/samba.py b/qa/tasks/samba.py
new file mode 100644
index 00000000..1dd62d86
--- /dev/null
+++ b/qa/tasks/samba.py
@@ -0,0 +1,247 @@
+"""
+Samba
+"""
+import contextlib
+import logging
+import sys
+import time
+
+import six
+
+from teuthology import misc as teuthology
+from teuthology.orchestra import run
+from teuthology.orchestra.daemon import DaemonGroup
+
+log = logging.getLogger(__name__)
+
+
+def get_sambas(ctx, roles):
+ """
+ Scan for roles that are samba. Yield the id of the the samba role
+ (samba.0, samba.1...) and the associated remote site
+
+ :param ctx: Context
+ :param roles: roles for this test (extracted from yaml files)
+ """
+ for role in roles:
+ assert isinstance(role, six.string_types)
+ PREFIX = 'samba.'
+ assert role.startswith(PREFIX)
+ id_ = role[len(PREFIX):]
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ yield (id_, remote)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Setup samba smbd with ceph vfs module. This task assumes the samba
+ package has already been installed via the install task.
+
+ The config is optional and defaults to starting samba on all nodes.
+ If a config is given, it is expected to be a list of
+ samba nodes to start smbd servers on.
+
+ Example that starts smbd on all samba nodes::
+
+ tasks:
+ - install:
+ - install:
+ project: samba
+ extra_packages: ['samba']
+ - ceph:
+ - samba:
+ - interactive:
+
+ Example that starts smbd on just one of the samba nodes and cifs on the other::
+
+ tasks:
+ - samba: [samba.0]
+ - cifs: [samba.1]
+
+ An optional backend can be specified, and requires a path which smbd will
+ use as the backend storage location:
+
+ roles:
+ - [osd.0, osd.1, osd.2, mon.0, mon.1, mon.2, mds.a]
+ - [client.0, samba.0]
+
+ tasks:
+ - ceph:
+ - ceph-fuse: [client.0]
+ - samba:
+ samba.0:
+ cephfuse: "{testdir}/mnt.0"
+
+ This mounts ceph to {testdir}/mnt.0 using fuse, and starts smbd with
+ a UNC of //localhost/cephfuse. Access through that UNC will be on
+ the ceph fuse mount point.
+
+ If no arguments are specified in the samba
+ role, the default behavior is to enable the ceph UNC //localhost/ceph
+ and use the ceph vfs module as the smbd backend.
+
+ :param ctx: Context
+ :param config: Configuration
+ """
+ log.info("Setting up smbd with ceph vfs...")
+ assert config is None or isinstance(config, list) or isinstance(config, dict), \
+ "task samba got invalid config"
+
+ if config is None:
+ config = dict(('samba.{id}'.format(id=id_), None)
+ for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba'))
+ elif isinstance(config, list):
+ config = dict((name, None) for name in config)
+
+ samba_servers = list(get_sambas(ctx=ctx, roles=config.keys()))
+
+ testdir = teuthology.get_testdir(ctx)
+
+ if not hasattr(ctx, 'daemons'):
+ ctx.daemons = DaemonGroup()
+
+ for id_, remote in samba_servers:
+
+ rolestr = "samba.{id_}".format(id_=id_)
+
+ confextras = """vfs objects = ceph
+ ceph:config_file = /etc/ceph/ceph.conf"""
+
+ unc = "ceph"
+ backend = "/"
+
+ if config[rolestr] is not None:
+ # verify that there's just one parameter in role
+ if len(config[rolestr]) != 1:
+ log.error("samba config for role samba.{id_} must have only one parameter".format(id_=id_))
+ raise Exception('invalid config')
+ confextras = ""
+ (unc, backendstr) = config[rolestr].items()[0]
+ backend = backendstr.format(testdir=testdir)
+
+ # on first samba role, set ownership and permissions of ceph root
+ # so that samba tests succeed
+ if config[rolestr] is None and id_ == samba_servers[0][0]:
+ remote.run(
+ args=[
+ 'mkdir', '-p', '/tmp/cmnt', run.Raw('&&'),
+ 'sudo', 'ceph-fuse', '/tmp/cmnt', run.Raw('&&'),
+ 'sudo', 'chown', 'ubuntu:ubuntu', '/tmp/cmnt/', run.Raw('&&'),
+ 'sudo', 'chmod', '1777', '/tmp/cmnt/', run.Raw('&&'),
+ 'sudo', 'umount', '/tmp/cmnt/', run.Raw('&&'),
+ 'rm', '-rf', '/tmp/cmnt',
+ ],
+ )
+ else:
+ remote.run(
+ args=[
+ 'sudo', 'chown', 'ubuntu:ubuntu', backend, run.Raw('&&'),
+ 'sudo', 'chmod', '1777', backend,
+ ],
+ )
+
+ teuthology.sudo_write_file(remote, "/usr/local/samba/etc/smb.conf", """
+[global]
+ workgroup = WORKGROUP
+ netbios name = DOMAIN
+
+[{unc}]
+ path = {backend}
+ {extras}
+ writeable = yes
+ valid users = ubuntu
+""".format(extras=confextras, unc=unc, backend=backend))
+
+ # create ubuntu user
+ remote.run(
+ args=[
+ 'sudo', '/usr/local/samba/bin/smbpasswd', '-e', 'ubuntu',
+ run.Raw('||'),
+ 'printf', run.Raw('"ubuntu\nubuntu\n"'),
+ run.Raw('|'),
+ 'sudo', '/usr/local/samba/bin/smbpasswd', '-s', '-a', 'ubuntu'
+ ])
+
+ smbd_cmd = [
+ 'sudo',
+ 'daemon-helper',
+ 'term',
+ 'nostdin',
+ '/usr/local/samba/sbin/smbd',
+ '-F',
+ ]
+ ctx.daemons.add_daemon(remote, 'smbd', id_,
+ args=smbd_cmd,
+ logger=log.getChild("smbd.{id_}".format(id_=id_)),
+ stdin=run.PIPE,
+ wait=False,
+ )
+
+ # let smbd initialize, probably a better way...
+ seconds_to_sleep = 100
+ log.info('Sleeping for %s seconds...' % seconds_to_sleep)
+ time.sleep(seconds_to_sleep)
+ log.info('Sleeping stopped...')
+
+ try:
+ yield
+ finally:
+ log.info('Stopping smbd processes...')
+ exc_info = (None, None, None)
+ for d in ctx.daemons.iter_daemons_of_role('smbd'):
+ try:
+ d.stop()
+ except (run.CommandFailedError,
+ run.CommandCrashedError,
+ run.ConnectionLostError):
+ exc_info = sys.exc_info()
+ log.exception('Saw exception from %s.%s', d.role, d.id_)
+ if exc_info != (None, None, None):
+ six.reraise(exc_info[0], exc_info[1], exc_info[2])
+
+ for id_, remote in samba_servers:
+ remote.run(
+ args=[
+ 'sudo',
+ 'rm', '-rf',
+ '/usr/local/samba/etc/smb.conf',
+ '/usr/local/samba/private/*',
+ '/usr/local/samba/var/run/',
+ '/usr/local/samba/var/locks',
+ '/usr/local/samba/var/lock',
+ ],
+ )
+ # make sure daemons are gone
+ try:
+ remote.run(
+ args=[
+ 'while',
+ 'sudo', 'killall', '-9', 'smbd',
+ run.Raw(';'),
+ 'do', 'sleep', '1',
+ run.Raw(';'),
+ 'done',
+ ],
+ )
+
+ remote.run(
+ args=[
+ 'sudo',
+ 'lsof',
+ backend,
+ ],
+ check_status=False
+ )
+ remote.run(
+ args=[
+ 'sudo',
+ 'fuser',
+ '-M',
+ backend,
+ ],
+ check_status=False
+ )
+ except Exception:
+ log.exception("Saw exception")
+ pass
diff --git a/qa/tasks/scrub.py b/qa/tasks/scrub.py
new file mode 100644
index 00000000..7cf304e8
--- /dev/null
+++ b/qa/tasks/scrub.py
@@ -0,0 +1,117 @@
+"""
+Scrub osds
+"""
+import contextlib
+import gevent
+import logging
+import random
+import time
+
+import tasks.ceph_manager
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run scrub periodically. Randomly chooses an OSD to scrub.
+
+ The config should be as follows:
+
+ scrub:
+ frequency: <seconds between scrubs>
+ deep: <bool for deepness>
+
+ example:
+
+ tasks:
+ - ceph:
+ - scrub:
+ frequency: 30
+ deep: 0
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'scrub task only accepts a dict for configuration'
+
+ log.info('Beginning scrub...')
+
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+ manager = ceph_manager.CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager'),
+ )
+
+ num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+ while len(manager.get_osd_status()['up']) < num_osds:
+ time.sleep(10)
+
+ scrub_proc = Scrubber(
+ manager,
+ config,
+ )
+ try:
+ yield
+ finally:
+ log.info('joining scrub')
+ scrub_proc.do_join()
+
+class Scrubber:
+ """
+ Scrubbing is actually performed during initialization
+ """
+ def __init__(self, manager, config):
+ """
+ Spawn scrubbing thread upon completion.
+ """
+ self.ceph_manager = manager
+ self.ceph_manager.wait_for_clean()
+
+ osd_status = self.ceph_manager.get_osd_status()
+ self.osds = osd_status['up']
+
+ self.config = config
+ if self.config is None:
+ self.config = dict()
+
+ else:
+ def tmp(x):
+ """Local display"""
+ print(x)
+ self.log = tmp
+
+ self.stopping = False
+
+ log.info("spawning thread")
+
+ self.thread = gevent.spawn(self.do_scrub)
+
+ def do_join(self):
+ """Scrubbing thread finished"""
+ self.stopping = True
+ self.thread.get()
+
+ def do_scrub(self):
+ """Perform the scrub operation"""
+ frequency = self.config.get("frequency", 30)
+ deep = self.config.get("deep", 0)
+
+ log.info("stopping %s" % self.stopping)
+
+ while not self.stopping:
+ osd = str(random.choice(self.osds))
+
+ if deep:
+ cmd = 'deep-scrub'
+ else:
+ cmd = 'scrub'
+
+ log.info('%sbing %s' % (cmd, osd))
+ self.ceph_manager.raw_cluster_cmd('osd', cmd, osd)
+
+ time.sleep(frequency)
diff --git a/qa/tasks/scrub_test.py b/qa/tasks/scrub_test.py
new file mode 100644
index 00000000..3d71708e
--- /dev/null
+++ b/qa/tasks/scrub_test.py
@@ -0,0 +1,403 @@
+"""Scrub testing"""
+
+import contextlib
+import json
+import logging
+import os
+import time
+import tempfile
+
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+
+def wait_for_victim_pg(manager):
+ """Return a PG with some data and its acting set"""
+ # wait for some PG to have data that we can mess with
+ victim = None
+ while victim is None:
+ stats = manager.get_pg_stats()
+ for pg in stats:
+ size = pg['stat_sum']['num_bytes']
+ if size > 0:
+ victim = pg['pgid']
+ acting = pg['acting']
+ return victim, acting
+ time.sleep(3)
+
+
+def find_victim_object(ctx, pg, osd):
+ """Return a file to be fuzzed"""
+ (osd_remote,) = ctx.cluster.only('osd.%d' % osd).remotes.keys()
+ data_path = os.path.join(
+ '/var/lib/ceph/osd',
+ 'ceph-{id}'.format(id=osd),
+ 'fuse',
+ '{pg}_head'.format(pg=pg),
+ 'all',
+ )
+
+ # fuzz time
+ ls_out = osd_remote.sh('sudo ls %s' % data_path)
+
+ # find an object file we can mess with (and not the pg info object)
+ osdfilename = next(line for line in ls_out.split('\n')
+ if not line.endswith('::::head#'))
+ assert osdfilename is not None
+
+ # Get actual object name from osd stored filename
+ objname = osdfilename.split(':')[4]
+ return osd_remote, os.path.join(data_path, osdfilename), objname
+
+
+def corrupt_file(osd_remote, path):
+ # put a single \0 at the beginning of the file
+ osd_remote.run(
+ args=['sudo', 'dd',
+ 'if=/dev/zero',
+ 'of=%s/data' % path,
+ 'bs=1', 'count=1', 'conv=notrunc']
+ )
+
+
+def get_pgnum(pgid):
+ pos = pgid.find('.')
+ assert pos != -1
+ return pgid[pos+1:]
+
+
+def deep_scrub(manager, victim, pool):
+ # scrub, verify inconsistent
+ pgnum = get_pgnum(victim)
+ manager.do_pg_scrub(pool, pgnum, 'deep-scrub')
+
+ stats = manager.get_single_pg_stats(victim)
+ inconsistent = stats['state'].find('+inconsistent') != -1
+ assert inconsistent
+
+
+def repair(manager, victim, pool):
+ # repair, verify no longer inconsistent
+ pgnum = get_pgnum(victim)
+ manager.do_pg_scrub(pool, pgnum, 'repair')
+
+ stats = manager.get_single_pg_stats(victim)
+ inconsistent = stats['state'].find('+inconsistent') != -1
+ assert not inconsistent
+
+
+def test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path, pool):
+ corrupt_file(osd_remote, obj_path)
+ deep_scrub(manager, pg, pool)
+ repair(manager, pg, pool)
+
+
+def test_repair_bad_omap(ctx, manager, pg, osd, objname):
+ # Test deep-scrub with various omap modifications
+ # Modify omap on specific osd
+ log.info('fuzzing omap of %s' % objname)
+ manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'key'])
+ manager.osd_admin_socket(osd, ['setomapval', 'rbd', objname,
+ 'badkey', 'badval'])
+ manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'badhdr'])
+
+ deep_scrub(manager, pg, 'rbd')
+ # please note, the repair here is errnomous, it rewrites the correct omap
+ # digest and data digest on the replicas with the corresponding digests
+ # from the primary osd which is hosting the victim object, see
+ # find_victim_object().
+ # so we need to either put this test and the end of this task or
+ # undo the mess-up manually before the "repair()" that just ensures
+ # the cleanup is sane, otherwise the succeeding tests will fail. if they
+ # try set "badkey" in hope to get an "inconsistent" pg with a deep-scrub.
+ manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'hdr'])
+ manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'badkey'])
+ manager.osd_admin_socket(osd, ['setomapval', 'rbd', objname,
+ 'key', 'val'])
+ repair(manager, pg, 'rbd')
+
+
+class MessUp:
+ def __init__(self, manager, osd_remote, pool, osd_id,
+ obj_name, obj_path, omap_key, omap_val):
+ self.manager = manager
+ self.osd = osd_remote
+ self.pool = pool
+ self.osd_id = osd_id
+ self.obj = obj_name
+ self.path = obj_path
+ self.omap_key = omap_key
+ self.omap_val = omap_val
+
+ @contextlib.contextmanager
+ def _test_with_file(self, messup_cmd, *checks):
+ temp = tempfile.mktemp()
+ backup_cmd = ['sudo', 'cp', os.path.join(self.path, 'data'), temp]
+ self.osd.run(args=backup_cmd)
+ self.osd.run(args=messup_cmd.split())
+ yield checks
+ create_cmd = ['sudo', 'mkdir', self.path]
+ self.osd.run(args=create_cmd, check_status=False)
+ restore_cmd = ['sudo', 'cp', temp, os.path.join(self.path, 'data')]
+ self.osd.run(args=restore_cmd)
+
+ def remove(self):
+ cmd = 'sudo rmdir {path}'.format(path=self.path)
+ return self._test_with_file(cmd, 'missing')
+
+ def append(self):
+ cmd = 'sudo dd if=/dev/zero of={path}/data bs=1 count=1 ' \
+ 'conv=notrunc oflag=append'.format(path=self.path)
+ return self._test_with_file(cmd,
+ 'data_digest_mismatch',
+ 'size_mismatch')
+
+ def truncate(self):
+ cmd = 'sudo dd if=/dev/null of={path}/data'.format(path=self.path)
+ return self._test_with_file(cmd,
+ 'data_digest_mismatch',
+ 'size_mismatch')
+
+ def change_obj(self):
+ cmd = 'sudo dd if=/dev/zero of={path}/data bs=1 count=1 ' \
+ 'conv=notrunc'.format(path=self.path)
+ return self._test_with_file(cmd,
+ 'data_digest_mismatch')
+
+ @contextlib.contextmanager
+ def rm_omap(self):
+ cmd = ['rmomapkey', self.pool, self.obj, self.omap_key]
+ self.manager.osd_admin_socket(self.osd_id, cmd)
+ yield ('omap_digest_mismatch',)
+ cmd = ['setomapval', self.pool, self.obj,
+ self.omap_key, self.omap_val]
+ self.manager.osd_admin_socket(self.osd_id, cmd)
+
+ @contextlib.contextmanager
+ def add_omap(self):
+ cmd = ['setomapval', self.pool, self.obj, 'badkey', 'badval']
+ self.manager.osd_admin_socket(self.osd_id, cmd)
+ yield ('omap_digest_mismatch',)
+ cmd = ['rmomapkey', self.pool, self.obj, 'badkey']
+ self.manager.osd_admin_socket(self.osd_id, cmd)
+
+ @contextlib.contextmanager
+ def change_omap(self):
+ cmd = ['setomapval', self.pool, self.obj, self.omap_key, 'badval']
+ self.manager.osd_admin_socket(self.osd_id, cmd)
+ yield ('omap_digest_mismatch',)
+ cmd = ['setomapval', self.pool, self.obj, self.omap_key, self.omap_val]
+ self.manager.osd_admin_socket(self.osd_id, cmd)
+
+
+class InconsistentObjChecker:
+ """Check the returned inconsistents/inconsistent info"""
+
+ def __init__(self, osd, acting, obj_name):
+ self.osd = osd
+ self.acting = acting
+ self.obj = obj_name
+ assert self.osd in self.acting
+
+ def basic_checks(self, inc):
+ assert inc['object']['name'] == self.obj
+ assert inc['object']['snap'] == "head"
+ assert len(inc['shards']) == len(self.acting), \
+ "the number of returned shard does not match with the acting set"
+
+ def run(self, check, inc):
+ func = getattr(self, check)
+ func(inc)
+
+ def _check_errors(self, inc, err_name):
+ bad_found = False
+ good_found = False
+ for shard in inc['shards']:
+ log.info('shard = %r' % shard)
+ log.info('err = %s' % err_name)
+ assert 'osd' in shard
+ osd = shard['osd']
+ err = err_name in shard['errors']
+ if osd == self.osd:
+ assert bad_found is False, \
+ "multiple entries found for the given OSD"
+ assert err is True, \
+ "Didn't find '{err}' in errors".format(err=err_name)
+ bad_found = True
+ else:
+ assert osd in self.acting, "shard not in acting set"
+ assert err is False, \
+ "Expected '{err}' in errors".format(err=err_name)
+ good_found = True
+ assert bad_found is True, \
+ "Shard for osd.{osd} not found".format(osd=self.osd)
+ assert good_found is True, \
+ "No other acting shards found"
+
+ def _check_attrs(self, inc, attr_name):
+ bad_attr = None
+ good_attr = None
+ for shard in inc['shards']:
+ log.info('shard = %r' % shard)
+ log.info('attr = %s' % attr_name)
+ assert 'osd' in shard
+ osd = shard['osd']
+ attr = shard.get(attr_name, False)
+ if osd == self.osd:
+ assert bad_attr is None, \
+ "multiple entries found for the given OSD"
+ bad_attr = attr
+ else:
+ assert osd in self.acting, "shard not in acting set"
+ assert good_attr is None or good_attr == attr, \
+ "multiple good attrs found"
+ good_attr = attr
+ assert bad_attr is not None, \
+ "bad {attr} not found".format(attr=attr_name)
+ assert good_attr is not None, \
+ "good {attr} not found".format(attr=attr_name)
+ assert good_attr != bad_attr, \
+ "bad attr is identical to the good ones: " \
+ "{0} == {1}".format(good_attr, bad_attr)
+
+ def data_digest_mismatch(self, inc):
+ assert 'data_digest_mismatch' in inc['errors']
+ self._check_attrs(inc, 'data_digest')
+
+ def missing(self, inc):
+ assert 'missing' in inc['union_shard_errors']
+ self._check_errors(inc, 'missing')
+
+ def size_mismatch(self, inc):
+ assert 'size_mismatch' in inc['errors']
+ self._check_attrs(inc, 'size')
+
+ def omap_digest_mismatch(self, inc):
+ assert 'omap_digest_mismatch' in inc['errors']
+ self._check_attrs(inc, 'omap_digest')
+
+
+def test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd_id,
+ obj_name, obj_path):
+ mon = manager.controller
+ pool = 'rbd'
+ omap_key = 'key'
+ omap_val = 'val'
+ manager.do_rados(mon, ['-p', pool, 'setomapval', obj_name,
+ omap_key, omap_val])
+ # Update missing digests, requires "osd deep scrub update digest min age: 0"
+ pgnum = get_pgnum(pg)
+ manager.do_pg_scrub(pool, pgnum, 'deep-scrub')
+
+ messup = MessUp(manager, osd_remote, pool, osd_id, obj_name, obj_path,
+ omap_key, omap_val)
+ for test in [messup.rm_omap, messup.add_omap, messup.change_omap,
+ messup.append, messup.truncate, messup.change_obj,
+ messup.remove]:
+ with test() as checks:
+ deep_scrub(manager, pg, pool)
+ cmd = 'rados list-inconsistent-pg {pool} ' \
+ '--format=json'.format(pool=pool)
+ pgs = json.loads(mon.sh(cmd))
+ assert pgs == [pg]
+
+ cmd = 'rados list-inconsistent-obj {pg} ' \
+ '--format=json'.format(pg=pg)
+ objs = json.loads(mon.sh(cmd))
+ assert len(objs['inconsistents']) == 1
+
+ checker = InconsistentObjChecker(osd_id, acting, obj_name)
+ inc_obj = objs['inconsistents'][0]
+ log.info('inc = %r', inc_obj)
+ checker.basic_checks(inc_obj)
+ for check in checks:
+ checker.run(check, inc_obj)
+
+
+def task(ctx, config):
+ """
+ Test [deep] scrub
+
+ tasks:
+ - chef:
+ - install:
+ - ceph:
+ log-whitelist:
+ - '!= data_digest'
+ - '!= omap_digest'
+ - '!= size'
+ - deep-scrub 0 missing, 1 inconsistent objects
+ - deep-scrub [0-9]+ errors
+ - repair 0 missing, 1 inconsistent objects
+ - repair [0-9]+ errors, [0-9]+ fixed
+ - shard [0-9]+ .* : missing
+ - deep-scrub 1 missing, 1 inconsistent objects
+ - does not match object info size
+ - attr name mistmatch
+ - deep-scrub 1 missing, 0 inconsistent objects
+ - failed to pick suitable auth object
+ - candidate size [0-9]+ info size [0-9]+ mismatch
+ conf:
+ osd:
+ osd deep scrub update digest min age: 0
+ - scrub_test:
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'scrub_test task only accepts a dict for configuration'
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.keys()
+
+ num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd')
+ log.info('num_osds is %s' % num_osds)
+
+ manager = ceph_manager.CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager'),
+ )
+
+ while len(manager.get_osd_status()['up']) < num_osds:
+ time.sleep(10)
+
+ for i in range(num_osds):
+ manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs',
+ '--', '--osd-objectstore-fuse')
+ manager.flush_pg_stats(range(num_osds))
+ manager.wait_for_clean()
+
+ # write some data
+ p = manager.do_rados(mon, ['-p', 'rbd', 'bench', '--no-cleanup', '1',
+ 'write', '-b', '4096'])
+ log.info('err is %d' % p.exitstatus)
+
+ # wait for some PG to have data that we can mess with
+ pg, acting = wait_for_victim_pg(manager)
+ osd = acting[0]
+
+ osd_remote, obj_path, obj_name = find_victim_object(ctx, pg, osd)
+ manager.do_rados(mon, ['-p', 'rbd', 'setomapval', obj_name, 'key', 'val'])
+ log.info('err is %d' % p.exitstatus)
+ manager.do_rados(mon, ['-p', 'rbd', 'setomapheader', obj_name, 'hdr'])
+ log.info('err is %d' % p.exitstatus)
+
+ # Update missing digests, requires "osd deep scrub update digest min age: 0"
+ pgnum = get_pgnum(pg)
+ manager.do_pg_scrub('rbd', pgnum, 'deep-scrub')
+
+ log.info('messing with PG %s on osd %d' % (pg, osd))
+ test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path, 'rbd')
+ test_repair_bad_omap(ctx, manager, pg, osd, obj_name)
+ test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd,
+ obj_name, obj_path)
+ log.info('test successful!')
+
+ # shut down fuse mount
+ for i in range(num_osds):
+ manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs',
+ '--', '--no-osd-objectstore-fuse')
+ time.sleep(5)
+ log.info('done')
diff --git a/qa/tasks/swift.py b/qa/tasks/swift.py
new file mode 100644
index 00000000..f8758842
--- /dev/null
+++ b/qa/tasks/swift.py
@@ -0,0 +1,256 @@
+"""
+Test Swift API
+"""
+from io import BytesIO
+from configobj import ConfigObj
+import base64
+import contextlib
+import logging
+import os
+
+from distutils.version import LooseVersion
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology.config import config as teuth_config
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def download(ctx, config):
+ """
+ Download the Swift API.
+ """
+ testdir = teuthology.get_testdir(ctx)
+ assert isinstance(config, dict)
+ log.info('Downloading swift...')
+ for (client, cconf) in config.items():
+ ctx.cluster.only(client).run(
+ args=[
+ 'git', 'clone',
+ '-b', cconf.get('force-branch', 'ceph-nautilus'),
+ teuth_config.ceph_git_base_url + 'swift.git',
+ '{tdir}/swift'.format(tdir=testdir),
+ ],
+ )
+ try:
+ yield
+ finally:
+ log.info('Removing swift...')
+ testdir = teuthology.get_testdir(ctx)
+ for (client, _) in config.items():
+ ctx.cluster.only(client).run(
+ args=[
+ 'rm',
+ '-rf',
+ '{tdir}/swift'.format(tdir=testdir),
+ ],
+ )
+
+def _config_user(testswift_conf, account, user, suffix):
+ """
+ Configure a swift user
+
+ :param account: Swift account
+ :param user: User name
+ :param suffix: user name and email suffixes.
+ """
+ testswift_conf['func_test'].setdefault('account{s}'.format(s=suffix), account)
+ testswift_conf['func_test'].setdefault('username{s}'.format(s=suffix), user)
+ testswift_conf['func_test'].setdefault('email{s}'.format(s=suffix), '{account}+test@test.test'.format(account=account))
+ testswift_conf['func_test'].setdefault('display_name{s}'.format(s=suffix), 'Mr. {account} {user}'.format(account=account, user=user))
+ testswift_conf['func_test'].setdefault('password{s}'.format(s=suffix), base64.b64encode(os.urandom(40)).decode('ascii'))
+
+@contextlib.contextmanager
+def create_users(ctx, config):
+ """
+ Create rgw users to interact with the swift interface.
+ """
+ assert isinstance(config, dict)
+ log.info('Creating rgw users...')
+ testdir = teuthology.get_testdir(ctx)
+ users = {'': 'foo', '2': 'bar'}
+ for client, testswift_conf in config.items():
+ cluster_name, daemon_type, client_id = teuthology.split_role(client)
+ for suffix, user in users.items():
+ _config_user(testswift_conf, '{user}.{client}'.format(user=user, client=client), user, suffix)
+ ctx.cluster.only(client).run(
+ args=[
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'radosgw-admin',
+ '-n', client,
+ '--cluster', cluster_name,
+ 'user', 'create',
+ '--subuser', '{account}:{user}'.format(account=testswift_conf['func_test']['account{s}'.format(s=suffix)],user=user),
+ '--display-name', testswift_conf['func_test']['display_name{s}'.format(s=suffix)],
+ '--secret', testswift_conf['func_test']['password{s}'.format(s=suffix)],
+ '--email', testswift_conf['func_test']['email{s}'.format(s=suffix)],
+ '--key-type', 'swift',
+ '--access', 'full',
+ ],
+ )
+ try:
+ yield
+ finally:
+ for client in config.keys():
+ for user in users.values():
+ uid = '{user}.{client}'.format(user=user, client=client)
+ cluster_name, daemon_type, client_id = teuthology.split_role(client)
+ ctx.cluster.only(client).run(
+ args=[
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'radosgw-admin',
+ '-n', client,
+ '--cluster', cluster_name,
+ 'user', 'rm',
+ '--uid', uid,
+ '--purge-data',
+ ],
+ )
+
+@contextlib.contextmanager
+def configure(ctx, config):
+ """
+ Configure rgw and Swift
+ """
+ assert isinstance(config, dict)
+ log.info('Configuring testswift...')
+ testdir = teuthology.get_testdir(ctx)
+ for client, testswift_conf in config.items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ remote.run(
+ args=[
+ 'cd',
+ '{tdir}/swift'.format(tdir=testdir),
+ run.Raw('&&'),
+ './bootstrap',
+ ],
+ )
+ conf_fp = BytesIO()
+ testswift_conf.write(conf_fp)
+ teuthology.write_file(
+ remote=remote,
+ path='{tdir}/archive/testswift.{client}.conf'.format(tdir=testdir, client=client),
+ data=conf_fp.getvalue(),
+ )
+ yield
+
+
+@contextlib.contextmanager
+def run_tests(ctx, config):
+ """
+ Run an individual Swift test.
+ """
+ assert isinstance(config, dict)
+ testdir = teuthology.get_testdir(ctx)
+ for client, client_config in config.items():
+ args = [
+ 'SWIFT_TEST_CONFIG_FILE={tdir}/archive/testswift.{client}.conf'.format(tdir=testdir, client=client),
+ '{tdir}/swift/virtualenv/bin/nosetests'.format(tdir=testdir),
+ '-w',
+ '{tdir}/swift/test/functional'.format(tdir=testdir),
+ '-v',
+ '-a', '!fails_on_rgw',
+ ]
+ if client_config is not None and 'extra_args' in client_config:
+ args.extend(client_config['extra_args'])
+
+ ctx.cluster.only(client).run(
+ args=args,
+ )
+ yield
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run the testswift suite against rgw.
+
+ To run all tests on all clients::
+
+ tasks:
+ - ceph:
+ - rgw:
+ - testswift:
+
+ To restrict testing to particular clients::
+
+ tasks:
+ - ceph:
+ - rgw: [client.0]
+ - testswift: [client.0]
+
+ To run against a server on client.1::
+
+ tasks:
+ - ceph:
+ - rgw: [client.1]
+ - testswift:
+ client.0:
+ rgw_server: client.1
+
+ To pass extra arguments to nose (e.g. to run a certain test)::
+
+ tasks:
+ - ceph:
+ - rgw: [client.0]
+ - testswift:
+ client.0:
+ extra_args: ['test.functional.tests:TestFileUTF8', '-m', 'testCopy']
+ client.1:
+ extra_args: ['--exclude', 'TestFile']
+ """
+ assert hasattr(ctx, 'rgw'), 'swift must run after the rgw task'
+ assert config is None or isinstance(config, list) \
+ or isinstance(config, dict), \
+ "task testswift only supports a list or dictionary for configuration"
+ all_clients = ['client.{id}'.format(id=id_)
+ for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+ if config is None:
+ config = all_clients
+ if isinstance(config, list):
+ config = dict.fromkeys(config)
+
+ testswift_conf = {}
+ clients = []
+ for client, client_config in config.items():
+ # http://tracker.ceph.com/issues/40304 can't bootstrap on rhel 7.6+
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ if remote.os.name == 'rhel' and LooseVersion(remote.os.version) >= LooseVersion('7.6'):
+ log.warning('Swift tests cannot run on rhel 7.6+, skipping client {}'.format(client))
+ continue
+
+ clients.append(client)
+
+ server = client_config.get('rgw_server', client)
+ endpoint = ctx.rgw.role_endpoints.get(server)
+ assert endpoint, 'swift: no rgw endpoint for {}'.format(server)
+
+ testswift_conf[client] = ConfigObj(
+ indent_type='',
+ infile={
+ 'func_test':
+ {
+ 'auth_host' : endpoint.hostname,
+ 'auth_port' : endpoint.port,
+ 'auth_ssl' : 'yes' if endpoint.cert else 'no',
+ 'auth_prefix' : '/auth/',
+ },
+ }
+ )
+ # only take config for valid clients
+ config = {c: config[c] for c in clients}
+
+ log.info('clients={c}'.format(c=config.keys()))
+ with contextutil.nested(
+ lambda: download(ctx=ctx, config=config),
+ lambda: create_users(ctx=ctx, config=testswift_conf),
+ lambda: configure(ctx=ctx, config=testswift_conf),
+ lambda: run_tests(ctx=ctx, config=config),
+ ):
+ pass
+ yield
diff --git a/qa/tasks/systemd.py b/qa/tasks/systemd.py
new file mode 100644
index 00000000..745f503c
--- /dev/null
+++ b/qa/tasks/systemd.py
@@ -0,0 +1,135 @@
+"""
+Systemd test
+"""
+import contextlib
+import logging
+import re
+import time
+
+from teuthology.orchestra import run
+from teuthology.misc import reconnect, get_first_mon, wait_until_healthy
+
+log = logging.getLogger(__name__)
+
+def _remote_service_status(remote, service):
+ status = remote.sh('sudo systemctl status %s' % service,
+ check_status=False)
+ return status
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ - tasks:
+ ceph-deploy:
+ systemd:
+
+ Test ceph systemd services can start, stop and restart and
+ check for any failed services and report back errors
+ """
+ for remote, roles in ctx.cluster.remotes.items():
+ remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
+ 'grep', 'ceph'])
+ units = remote.sh('sudo systemctl list-units | grep ceph',
+ check_status=False)
+ log.info(units)
+ if units.find('failed'):
+ log.info("Ceph services in failed state")
+
+ # test overall service stop and start using ceph.target
+ # ceph.target tests are meant for ceph systemd tests
+ # and not actual process testing using 'ps'
+ log.info("Stopping all Ceph services")
+ remote.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])
+ status = _remote_service_status(remote, 'ceph.target')
+ log.info(status)
+ log.info("Checking process status")
+ ps_eaf = remote.sh('sudo ps -eaf | grep ceph')
+ if ps_eaf.find('Active: inactive'):
+ log.info("Successfully stopped all ceph services")
+ else:
+ log.info("Failed to stop ceph services")
+
+ log.info("Starting all Ceph services")
+ remote.run(args=['sudo', 'systemctl', 'start', 'ceph.target'])
+ status = _remote_service_status(remote, 'ceph.target')
+ log.info(status)
+ if status.find('Active: active'):
+ log.info("Successfully started all Ceph services")
+ else:
+ log.info("info", "Failed to start Ceph services")
+ ps_eaf = remote.sh('sudo ps -eaf | grep ceph')
+ log.info(ps_eaf)
+ time.sleep(4)
+
+ # test individual services start stop
+ name = remote.shortname
+ mon_name = 'ceph-mon@' + name + '.service'
+ mds_name = 'ceph-mds@' + name + '.service'
+ mgr_name = 'ceph-mgr@' + name + '.service'
+ mon_role_name = 'mon.' + name
+ mds_role_name = 'mds.' + name
+ mgr_role_name = 'mgr.' + name
+ m_osd = re.search('--id (\d+) --setuser ceph', r.stdout.getvalue())
+ if m_osd:
+ osd_service = 'ceph-osd@{m}.service'.format(m=m_osd.group(1))
+ remote.run(args=['sudo', 'systemctl', 'status',
+ osd_service])
+ remote.run(args=['sudo', 'systemctl', 'stop',
+ osd_service])
+ time.sleep(4) # immediate check will result in deactivating state
+ status = _remote_service_status(remote, osd_service)
+ log.info(status)
+ if status.find('Active: inactive'):
+ log.info("Successfully stopped single osd ceph service")
+ else:
+ log.info("Failed to stop ceph osd services")
+ remote.sh(['sudo', 'systemctl', 'start', osd_service])
+ time.sleep(4)
+ if mon_role_name in roles:
+ remote.run(args=['sudo', 'systemctl', 'status', mon_name])
+ remote.run(args=['sudo', 'systemctl', 'stop', mon_name])
+ time.sleep(4) # immediate check will result in deactivating state
+ status = _remote_service_status(remote, mon_name)
+ if status.find('Active: inactive'):
+ log.info("Successfully stopped single mon ceph service")
+ else:
+ log.info("Failed to stop ceph mon service")
+ remote.run(args=['sudo', 'systemctl', 'start', mon_name])
+ time.sleep(4)
+ if mgr_role_name in roles:
+ remote.run(args=['sudo', 'systemctl', 'status', mgr_name])
+ remote.run(args=['sudo', 'systemctl', 'stop', mgr_name])
+ time.sleep(4) # immediate check will result in deactivating state
+ status = _remote_service_status(remote, mgr_name)
+ if status.find('Active: inactive'):
+ log.info("Successfully stopped single ceph mgr service")
+ else:
+ log.info("Failed to stop ceph mgr service")
+ remote.run(args=['sudo', 'systemctl', 'start', mgr_name])
+ time.sleep(4)
+ if mds_role_name in roles:
+ remote.run(args=['sudo', 'systemctl', 'status', mds_name])
+ remote.run(args=['sudo', 'systemctl', 'stop', mds_name])
+ time.sleep(4) # immediate check will result in deactivating state
+ status = _remote_service_status(remote, mds_name)
+ if status.find('Active: inactive'):
+ log.info("Successfully stopped single ceph mds service")
+ else:
+ log.info("Failed to stop ceph mds service")
+ remote.run(args=['sudo', 'systemctl', 'start', mds_name])
+ time.sleep(4)
+
+ # reboot all nodes and verify the systemd units restart
+ # workunit that runs would fail if any of the systemd unit doesnt start
+ ctx.cluster.run(args='sudo reboot', wait=False, check_status=False)
+ # avoid immediate reconnect
+ time.sleep(120)
+ reconnect(ctx, 480) # reconnect all nodes
+ # for debug info
+ ctx.cluster.run(args=['sudo', 'ps', '-eaf', run.Raw('|'),
+ 'grep', 'ceph'])
+ # wait for HEALTH_OK
+ mon = get_first_mon(ctx, config)
+ (mon_remote,) = ctx.cluster.only(mon).remotes.keys()
+ wait_until_healthy(ctx, mon_remote, use_sudo=True)
+ yield
diff --git a/qa/tasks/tempest.py b/qa/tasks/tempest.py
new file mode 100644
index 00000000..2fe49a7e
--- /dev/null
+++ b/qa/tasks/tempest.py
@@ -0,0 +1,284 @@
+"""
+Deploy and configure Tempest for Teuthology
+"""
+import contextlib
+import logging
+
+from six.moves import configparser
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology import packaging
+from teuthology.exceptions import ConfigError
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+
+def get_tempest_dir(ctx):
+ return '{tdir}/tempest'.format(tdir=teuthology.get_testdir(ctx))
+
+def run_in_tempest_dir(ctx, client, cmdargs, **kwargs):
+ ctx.cluster.only(client).run(
+ args=[ 'cd', get_tempest_dir(ctx), run.Raw('&&'), ] + cmdargs,
+ **kwargs
+ )
+
+def run_in_tempest_rgw_dir(ctx, client, cmdargs, **kwargs):
+ ctx.cluster.only(client).run(
+ args=[ 'cd', get_tempest_dir(ctx) + '/rgw', run.Raw('&&'), ] + cmdargs,
+ **kwargs
+ )
+
+def run_in_tempest_venv(ctx, client, cmdargs, **kwargs):
+ run_in_tempest_dir(ctx, client,
+ [ 'source',
+ '.tox/venv/bin/activate',
+ run.Raw('&&')
+ ] + cmdargs, **kwargs)
+
+@contextlib.contextmanager
+def download(ctx, config):
+ """
+ Download the Tempest from github.
+ Remove downloaded file upon exit.
+
+ The context passed in should be identical to the context
+ passed in to the main task.
+ """
+ assert isinstance(config, dict)
+ log.info('Downloading Tempest...')
+ for (client, cconf) in config.items():
+ ctx.cluster.only(client).run(
+ args=[
+ 'git', 'clone',
+ '-b', cconf.get('force-branch', 'master'),
+ 'https://github.com/openstack/tempest.git',
+ get_tempest_dir(ctx)
+ ],
+ )
+
+ sha1 = cconf.get('sha1')
+ if sha1 is not None:
+ run_in_tempest_dir(ctx, client, [ 'git', 'reset', '--hard', sha1 ])
+ try:
+ yield
+ finally:
+ log.info('Removing Tempest...')
+ for client in config:
+ ctx.cluster.only(client).run(
+ args=[ 'rm', '-rf', get_tempest_dir(ctx) ],
+ )
+
+def get_toxvenv_dir(ctx):
+ return ctx.tox.venv_path
+
+@contextlib.contextmanager
+def install_python3(ctx, config):
+ assert isinstance(config, dict)
+ log.info('Installing Python3 for Tempest')
+ installed = []
+ for (client, _) in config.items():
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ try:
+ packaging.get_package_version(remote, 'python3')
+ except:
+ packaging.install_package('python3', remote)
+ installed.append(client)
+ try:
+ yield
+ finally:
+ log.info('Removing Python3 required by Tempest...')
+ for client in installed:
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ packaging.remove_package('python3', remote)
+
+@contextlib.contextmanager
+def setup_venv(ctx, config):
+ """
+ Setup the virtualenv for Tempest using tox.
+ """
+ assert isinstance(config, dict)
+ log.info('Setting up virtualenv for Tempest')
+ for (client, _) in config.items():
+ run_in_tempest_dir(ctx, client,
+ [ '{tvdir}/bin/tox'.format(tvdir=get_toxvenv_dir(ctx)),
+ '-e', 'venv', '--notest'
+ ])
+ yield
+
+def setup_logging(ctx, cpar):
+ cpar.set('DEFAULT', 'log_dir', teuthology.get_archive_dir(ctx))
+ cpar.set('DEFAULT', 'log_file', 'tempest.log')
+
+def to_config(config, params, section, cpar):
+ for (k, v) in config[section].items():
+ if isinstance(v, str):
+ v = v.format(**params)
+ elif isinstance(v, bool):
+ v = 'true' if v else 'false'
+ else:
+ v = str(v)
+ cpar.set(section, k, v)
+
+@contextlib.contextmanager
+def configure_instance(ctx, config):
+ assert isinstance(config, dict)
+ log.info('Configuring Tempest')
+
+ for (client, cconfig) in config.items():
+ run_in_tempest_venv(ctx, client,
+ [
+ 'tempest',
+ 'init',
+ '--workspace-path',
+ get_tempest_dir(ctx) + '/workspace.yaml',
+ 'rgw'
+ ])
+
+ # prepare the config file
+ tetcdir = '{tdir}/rgw/etc'.format(tdir=get_tempest_dir(ctx))
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ local_conf = remote.get_file(tetcdir + '/tempest.conf.sample')
+
+ # fill the params dictionary which allows to use templatized configs
+ keystone_role = cconfig.get('use-keystone-role', None)
+ if keystone_role is None \
+ or keystone_role not in ctx.keystone.public_endpoints:
+ raise ConfigError('the use-keystone-role is misconfigured')
+ public_host, public_port = ctx.keystone.public_endpoints[keystone_role]
+ params = {
+ 'keystone_public_host': public_host,
+ 'keystone_public_port': str(public_port),
+ }
+
+ cpar = configparser.ConfigParser()
+ cpar.read(local_conf)
+ setup_logging(ctx, cpar)
+ to_config(cconfig, params, 'auth', cpar)
+ to_config(cconfig, params, 'identity', cpar)
+ to_config(cconfig, params, 'object-storage', cpar)
+ to_config(cconfig, params, 'object-storage-feature-enabled', cpar)
+ cpar.write(open(local_conf, 'w+'))
+
+ remote.put_file(local_conf, tetcdir + '/tempest.conf')
+ yield
+
+@contextlib.contextmanager
+def run_tempest(ctx, config):
+ assert isinstance(config, dict)
+ log.info('Configuring Tempest')
+
+ for (client, cconf) in config.items():
+ blacklist = cconf.get('blacklist', [])
+ assert isinstance(blacklist, list)
+ run_in_tempest_venv(ctx, client,
+ [
+ 'tempest',
+ 'run',
+ '--workspace-path',
+ get_tempest_dir(ctx) + '/workspace.yaml',
+ '--workspace',
+ 'rgw',
+ '--regex',
+ '(tempest.api.object_storage)' +
+ ''.join([ '(?!{blackitem})'.format(blackitem=blackitem)
+ for blackitem in blacklist])
+ ])
+ try:
+ yield
+ finally:
+ pass
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Deploy and run Tempest's object storage campaign
+
+ Example of configuration:
+
+ overrides:
+ ceph:
+ conf:
+ client:
+ rgw keystone admin token: ADMIN
+ rgw keystone accepted roles: admin,Member
+ rgw keystone implicit tenants: true
+ rgw keystone accepted admin roles: admin
+ rgw swift enforce content length: true
+ rgw swift account in url: true
+ rgw swift versioning enabled: true
+ tasks:
+ # typically, the task should be preceded with install, ceph, tox,
+ # keystone and rgw. Tox and Keystone are specific requirements
+ # of tempest.py.
+ - rgw:
+ # it's important to match the prefix with the endpoint's URL
+ # in Keystone. Additionally, if we want to test /info and its
+ # accompanying stuff, the whole Swift API must be put in root
+ # of the whole URL hierarchy (read: frontend_prefix == /swift).
+ frontend_prefix: /swift
+ client.0:
+ use-keystone-role: client.0
+ - tempest:
+ client.0:
+ force-branch: master
+ use-keystone-role: client.0
+ auth:
+ admin_username: admin
+ admin_project_name: admin
+ admin_password: ADMIN
+ admin_domain_name: Default
+ identity:
+ uri: http://{keystone_public_host}:{keystone_public_port}/v2.0/
+ uri_v3: http://{keystone_public_host}:{keystone_public_port}/v3/
+ admin_role: admin
+ object-storage:
+ reseller_admin_role: admin
+ object-storage-feature-enabled:
+ container_sync: false
+ discoverability: false
+ blacklist:
+ # please strip half of these items after merging PRs #15369
+ # and #12704
+ - .*test_list_containers_reverse_order.*
+ - .*test_list_container_contents_with_end_marker.*
+ - .*test_delete_non_empty_container.*
+ - .*test_container_synchronization.*
+ - .*test_get_object_after_expiration_time.*
+ - .*test_create_object_with_transfer_encoding.*
+ """
+ assert config is None or isinstance(config, list) \
+ or isinstance(config, dict), \
+ 'task tempest only supports a list or dictionary for configuration'
+
+ if not ctx.tox:
+ raise ConfigError('tempest must run after the tox task')
+ if not ctx.keystone:
+ raise ConfigError('tempest must run after the keystone task')
+
+ all_clients = ['client.{id}'.format(id=id_)
+ for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+ if config is None:
+ config = all_clients
+ if isinstance(config, list):
+ config = dict.fromkeys(config)
+
+ overrides = ctx.config.get('overrides', {})
+ # merge each client section, not the top level.
+ for client in config.keys():
+ if not config[client]:
+ config[client] = {}
+ teuthology.deep_merge(config[client], overrides.get('keystone', {}))
+
+ log.debug('Tempest config is %s', config)
+
+ with contextutil.nested(
+ lambda: download(ctx=ctx, config=config),
+ lambda: install_python3(ctx=ctx, config=config),
+ lambda: setup_venv(ctx=ctx, config=config),
+ lambda: configure_instance(ctx=ctx, config=config),
+ lambda: run_tempest(ctx=ctx, config=config),
+ ):
+ yield
diff --git a/qa/tasks/tests/__init__.py b/qa/tasks/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/qa/tasks/tests/__init__.py
diff --git a/qa/tasks/tests/test_devstack.py b/qa/tasks/tests/test_devstack.py
new file mode 100644
index 00000000..39b94a64
--- /dev/null
+++ b/qa/tasks/tests/test_devstack.py
@@ -0,0 +1,48 @@
+from textwrap import dedent
+
+from tasks import devstack
+
+
+class TestDevstack(object):
+ def test_parse_os_table(self):
+ table_str = dedent("""
+ +---------------------+--------------------------------------+
+ | Property | Value |
+ +---------------------+--------------------------------------+
+ | attachments | [] |
+ | availability_zone | nova |
+ | bootable | false |
+ | created_at | 2014-02-21T17:14:47.548361 |
+ | display_description | None |
+ | display_name | NAME |
+ | id | ffdbd1bb-60dc-4d95-acfe-88774c09ad3e |
+ | metadata | {} |
+ | size | 1 |
+ | snapshot_id | None |
+ | source_volid | None |
+ | status | creating |
+ | volume_type | None |
+ +---------------------+--------------------------------------+
+ """).strip()
+ expected = {
+ 'Property': 'Value',
+ 'attachments': '[]',
+ 'availability_zone': 'nova',
+ 'bootable': 'false',
+ 'created_at': '2014-02-21T17:14:47.548361',
+ 'display_description': 'None',
+ 'display_name': 'NAME',
+ 'id': 'ffdbd1bb-60dc-4d95-acfe-88774c09ad3e',
+ 'metadata': '{}',
+ 'size': '1',
+ 'snapshot_id': 'None',
+ 'source_volid': 'None',
+ 'status': 'creating',
+ 'volume_type': 'None'}
+
+ vol_info = devstack.parse_os_table(table_str)
+ assert vol_info == expected
+
+
+
+
diff --git a/qa/tasks/tests/test_radosgw_admin.py b/qa/tasks/tests/test_radosgw_admin.py
new file mode 100644
index 00000000..2ed0ebd5
--- /dev/null
+++ b/qa/tasks/tests/test_radosgw_admin.py
@@ -0,0 +1,35 @@
+import six
+if six.PY3:
+ from unittest.mock import Mock
+else:
+ from mock import Mock
+
+from tasks import radosgw_admin
+
+acl_with_version = """<?xml version="1.0" encoding="UTF-8"?><AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>foo</ID><DisplayName>Foo</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>foo</ID><DisplayName>Foo</DisplayName></Grantee><Permission>FULL_CONTROL</Permission></Grant></AccessControlList></AccessControlPolicy>
+""" # noqa
+
+
+acl_without_version = """<AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>foo</ID><DisplayName>Foo</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>foo</ID><DisplayName>Foo</DisplayName></Grantee><Permission>FULL_CONTROL</Permission></Grant></AccessControlList></AccessControlPolicy>
+""" # noqa
+
+
+class TestGetAcl(object):
+
+ def setup(self):
+ self.key = Mock()
+
+ def test_removes_xml_version(self):
+ self.key.get_xml_acl = Mock(return_value=acl_with_version)
+ result = radosgw_admin.get_acl(self.key)
+ assert result.startswith('<AccessControlPolicy')
+
+ def test_xml_version_is_already_removed(self):
+ self.key.get_xml_acl = Mock(return_value=acl_without_version)
+ result = radosgw_admin.get_acl(self.key)
+ assert result.startswith('<AccessControlPolicy')
+
+ def test_newline_gets_trimmed(self):
+ self.key.get_xml_acl = Mock(return_value=acl_without_version)
+ result = radosgw_admin.get_acl(self.key)
+ assert result.endswith('\n') is False
diff --git a/qa/tasks/teuthology_integration.py b/qa/tasks/teuthology_integration.py
new file mode 100644
index 00000000..b5a2278e
--- /dev/null
+++ b/qa/tasks/teuthology_integration.py
@@ -0,0 +1,19 @@
+import logging
+from teuthology import misc
+from teuthology.task import Task
+
+log = logging.getLogger(__name__)
+
+
+class TeuthologyIntegration(Task):
+
+ def begin(self):
+ misc.sh("""
+ set -x
+ pip install tox
+ tox
+ # tox -e py27-integration
+ tox -e openstack-integration
+ """)
+
+task = TeuthologyIntegration
diff --git a/qa/tasks/tgt.py b/qa/tasks/tgt.py
new file mode 100644
index 00000000..a0758f47
--- /dev/null
+++ b/qa/tasks/tgt.py
@@ -0,0 +1,177 @@
+"""
+Task to handle tgt
+
+Assumptions made:
+ The ceph-extras tgt package may need to get installed.
+ The open-iscsi package needs to get installed.
+"""
+import logging
+import contextlib
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def start_tgt_remotes(ctx, start_tgtd):
+ """
+ This subtask starts up a tgtd on the clients specified
+ """
+ remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
+ tgtd_list = []
+ for rem, roles in remotes.items():
+ for _id in roles:
+ if _id in start_tgtd:
+ if not rem in tgtd_list:
+ tgtd_list.append(rem)
+ size = ctx.config.get('image_size', 10240)
+ rem.run(
+ args=[
+ 'rbd',
+ 'create',
+ 'iscsi-image',
+ '--size',
+ str(size),
+ ])
+ rem.run(
+ args=[
+ 'sudo',
+ 'tgtadm',
+ '--lld',
+ 'iscsi',
+ '--mode',
+ 'target',
+ '--op',
+ 'new',
+ '--tid',
+ '1',
+ '--targetname',
+ 'rbd',
+ ])
+ rem.run(
+ args=[
+ 'sudo',
+ 'tgtadm',
+ '--lld',
+ 'iscsi',
+ '--mode',
+ 'logicalunit',
+ '--op',
+ 'new',
+ '--tid',
+ '1',
+ '--lun',
+ '1',
+ '--backing-store',
+ 'iscsi-image',
+ '--bstype',
+ 'rbd',
+ ])
+ rem.run(
+ args=[
+ 'sudo',
+ 'tgtadm',
+ '--lld',
+ 'iscsi',
+ '--op',
+ 'bind',
+ '--mode',
+ 'target',
+ '--tid',
+ '1',
+ '-I',
+ 'ALL',
+ ])
+ try:
+ yield
+
+ finally:
+ for rem in tgtd_list:
+ rem.run(
+ args=[
+ 'sudo',
+ 'tgtadm',
+ '--lld',
+ 'iscsi',
+ '--mode',
+ 'target',
+ '--op',
+ 'delete',
+ '--force',
+ '--tid',
+ '1',
+ ])
+ rem.run(
+ args=[
+ 'rbd',
+ 'snap',
+ 'purge',
+ 'iscsi-image',
+ ])
+ rem.run(
+ args=[
+ 'sudo',
+ 'rbd',
+ 'rm',
+ 'iscsi-image',
+ ])
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Start up tgt.
+
+ To start on on all clients::
+
+ tasks:
+ - ceph:
+ - tgt:
+
+ To start on certain clients::
+
+ tasks:
+ - ceph:
+ - tgt: [client.0, client.3]
+
+ or
+
+ tasks:
+ - ceph:
+ - tgt:
+ client.0:
+ client.3:
+
+ An image blocksize size can also be specified::
+
+ tasks:
+ - ceph:
+ - tgt:
+ image_size = 20480
+
+ The general flow of things here is:
+ 1. Find clients on which tgt is supposed to run (start_tgtd)
+ 2. Remotely start up tgt daemon
+ On cleanup:
+ 3. Stop tgt daemon
+
+ The iscsi administration is handled by the iscsi task.
+ """
+ if config:
+ config = {key : val for key, val in config.items()
+ if key.startswith('client')}
+ # config at this point should only contain keys starting with 'client'
+ start_tgtd = []
+ remotes = ctx.cluster.only(teuthology.is_type('client')).remotes
+ log.info(remotes)
+ if not config:
+ start_tgtd = ['client.{id}'.format(id=id_)
+ for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+ else:
+ start_tgtd = config
+ log.info(start_tgtd)
+ with contextutil.nested(
+ lambda: start_tgt_remotes(ctx=ctx, start_tgtd=start_tgtd),):
+ yield
diff --git a/qa/tasks/thrash_pool_snaps.py b/qa/tasks/thrash_pool_snaps.py
new file mode 100644
index 00000000..c71c9ce8
--- /dev/null
+++ b/qa/tasks/thrash_pool_snaps.py
@@ -0,0 +1,61 @@
+"""
+Thrash -- Simulate random osd failures.
+"""
+import contextlib
+import logging
+import gevent
+import time
+import random
+
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ "Thrash" snap creation and removal on the listed pools
+
+ Example:
+
+ thrash_pool_snaps:
+ pools: [.rgw.buckets, .rgw.buckets.index]
+ max_snaps: 10
+ min_snaps: 5
+ period: 10
+ """
+ stopping = False
+ def do_thrash():
+ pools = config.get('pools', [])
+ max_snaps = config.get('max_snaps', 10)
+ min_snaps = config.get('min_snaps', 5)
+ period = config.get('period', 30)
+ snaps = []
+ manager = ctx.managers['ceph']
+ def remove_snap():
+ assert len(snaps) > 0
+ snap = random.choice(snaps)
+ log.info("Removing snap %s" % (snap,))
+ for pool in pools:
+ manager.remove_pool_snap(pool, str(snap))
+ snaps.remove(snap)
+ def add_snap(snap):
+ log.info("Adding snap %s" % (snap,))
+ for pool in pools:
+ manager.add_pool_snap(pool, str(snap))
+ snaps.append(snap)
+ index = 0
+ while not stopping:
+ index += 1
+ time.sleep(period)
+ if len(snaps) <= min_snaps:
+ add_snap(index)
+ elif len(snaps) >= max_snaps:
+ remove_snap()
+ else:
+ random.choice([lambda: add_snap(index), remove_snap])()
+ log.info("Stopping")
+ thread = gevent.spawn(do_thrash)
+ yield
+ stopping = True
+ thread.join()
+
diff --git a/qa/tasks/thrashosds-health.yaml b/qa/tasks/thrashosds-health.yaml
new file mode 100644
index 00000000..914f6e25
--- /dev/null
+++ b/qa/tasks/thrashosds-health.yaml
@@ -0,0 +1,15 @@
+overrides:
+ ceph:
+ log-whitelist:
+ - overall HEALTH_
+ - \(OSDMAP_FLAGS\)
+ - \(OSD_
+ - \(PG_
+ - \(POOL_
+ - \(CACHE_POOL_
+ - \(SMALLER_PGP_NUM\)
+ - \(OBJECT_
+ - \(SLOW_OPS\)
+ - \(REQUEST_SLOW\)
+ - \(TOO_FEW_PGS\)
+ - slow request
diff --git a/qa/tasks/thrashosds.py b/qa/tasks/thrashosds.py
new file mode 100644
index 00000000..253663f8
--- /dev/null
+++ b/qa/tasks/thrashosds.py
@@ -0,0 +1,219 @@
+"""
+Thrash -- Simulate random osd failures.
+"""
+import contextlib
+import logging
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ "Thrash" the OSDs by randomly marking them out/down (and then back
+ in) until the task is ended. This loops, and every op_delay
+ seconds it randomly chooses to add or remove an OSD (even odds)
+ unless there are fewer than min_out OSDs out of the cluster, or
+ more than min_in OSDs in the cluster.
+
+ All commands are run on mon0 and it stops when __exit__ is called.
+
+ The config is optional, and is a dict containing some or all of:
+
+ cluster: (default 'ceph') the name of the cluster to thrash
+
+ min_in: (default 4) the minimum number of OSDs to keep in the
+ cluster
+
+ min_out: (default 0) the minimum number of OSDs to keep out of the
+ cluster
+
+ op_delay: (5) the length of time to sleep between changing an
+ OSD's status
+
+ min_dead: (0) minimum number of osds to leave down/dead.
+
+ max_dead: (0) maximum number of osds to leave down/dead before waiting
+ for clean. This should probably be num_replicas - 1.
+
+ clean_interval: (60) the approximate length of time to loop before
+ waiting until the cluster goes clean. (In reality this is used
+ to probabilistically choose when to wait, and the method used
+ makes it closer to -- but not identical to -- the half-life.)
+
+ scrub_interval: (-1) the approximate length of time to loop before
+ waiting until a scrub is performed while cleaning. (In reality
+ this is used to probabilistically choose when to wait, and it
+ only applies to the cases where cleaning is being performed).
+ -1 is used to indicate that no scrubbing will be done.
+
+ chance_down: (0.4) the probability that the thrasher will mark an
+ OSD down rather than marking it out. (The thrasher will not
+ consider that OSD out of the cluster, since presently an OSD
+ wrongly marked down will mark itself back up again.) This value
+ can be either an integer (eg, 75) or a float probability (eg
+ 0.75).
+
+ chance_test_min_size: (0) chance to run test_pool_min_size,
+ which:
+ - kills all but one osd
+ - waits
+ - kills that osd
+ - revives all other osds
+ - verifies that the osds fully recover
+
+ timeout: (360) the number of seconds to wait for the cluster
+ to become clean after each cluster change. If this doesn't
+ happen within the timeout, an exception will be raised.
+
+ revive_timeout: (150) number of seconds to wait for an osd asok to
+ appear after attempting to revive the osd
+
+ thrash_primary_affinity: (true) randomly adjust primary-affinity
+
+ chance_pgnum_grow: (0) chance to increase a pool's size
+ chance_pgpnum_fix: (0) chance to adjust pgpnum to pg for a pool
+ pool_grow_by: (10) amount to increase pgnum by
+ chance_pgnum_shrink: (0) chance to decrease a pool's size
+ pool_shrink_by: (10) amount to decrease pgnum by
+ max_pgs_per_pool_osd: (1200) don't expand pools past this size per osd
+
+ pause_short: (3) duration of short pause
+ pause_long: (80) duration of long pause
+ pause_check_after: (50) assert osd down after this long
+ chance_inject_pause_short: (1) chance of injecting short stall
+ chance_inject_pause_long: (0) chance of injecting long stall
+
+ clean_wait: (0) duration to wait before resuming thrashing once clean
+
+ sighup_delay: (0.1) duration to delay between sending signal.SIGHUP to a
+ random live osd
+
+ powercycle: (false) whether to power cycle the node instead
+ of just the osd process. Note that this assumes that a single
+ osd is the only important process on the node.
+
+ bdev_inject_crash: (0) seconds to delay while inducing a synthetic crash.
+ the delay lets the BlockDevice "accept" more aio operations but blocks
+ any flush, and then eventually crashes (losing some or all ios). If 0,
+ no bdev failure injection is enabled.
+
+ bdev_inject_crash_probability: (.5) probability of doing a bdev failure
+ injection crash vs a normal OSD kill.
+
+ chance_test_backfill_full: (0) chance to simulate full disks stopping
+ backfill
+
+ chance_test_map_discontinuity: (0) chance to test map discontinuity
+ map_discontinuity_sleep_time: (40) time to wait for map trims
+
+ ceph_objectstore_tool: (true) whether to export/import a pg while an osd is down
+ chance_move_pg: (1.0) chance of moving a pg if more than 1 osd is down (default 100%)
+
+ optrack_toggle_delay: (2.0) duration to delay between toggling op tracker
+ enablement to all osds
+
+ dump_ops_enable: (true) continuously dump ops on all live osds
+
+ noscrub_toggle_delay: (2.0) duration to delay between toggling noscrub
+
+ disable_objectstore_tool_tests: (false) disable ceph_objectstore_tool based
+ tests
+
+ chance_thrash_cluster_full: .05
+
+ chance_thrash_pg_upmap: 1.0
+ chance_thrash_pg_upmap_items: 1.0
+
+ aggressive_pg_num_changes: (true) whether we should bypass the careful throttling of pg_num and pgp_num changes in mgr's adjust_pgs() controller
+
+ example:
+
+ tasks:
+ - ceph:
+ - thrashosds:
+ cluster: ceph
+ chance_down: 10
+ op_delay: 3
+ min_in: 1
+ timeout: 600
+ - interactive:
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'thrashosds task only accepts a dict for configuration'
+ # add default value for sighup_delay
+ config['sighup_delay'] = config.get('sighup_delay', 0.1)
+ # add default value for optrack_toggle_delay
+ config['optrack_toggle_delay'] = config.get('optrack_toggle_delay', 2.0)
+ # add default value for dump_ops_enable
+ config['dump_ops_enable'] = config.get('dump_ops_enable', "true")
+ # add default value for noscrub_toggle_delay
+ config['noscrub_toggle_delay'] = config.get('noscrub_toggle_delay', 2.0)
+ # add default value for random_eio
+ config['random_eio'] = config.get('random_eio', 0.0)
+ aggro = config.get('aggressive_pg_num_changes', True)
+
+ log.info("config is {config}".format(config=str(config)))
+
+ overrides = ctx.config.get('overrides', {})
+ log.info("overrides is {overrides}".format(overrides=str(overrides)))
+ teuthology.deep_merge(config, overrides.get('thrashosds', {}))
+ cluster = config.get('cluster', 'ceph')
+
+ log.info("config is {config}".format(config=str(config)))
+
+ if 'powercycle' in config:
+
+ # sync everyone first to avoid collateral damage to / etc.
+ log.info('Doing preliminary sync to avoid collateral damage...')
+ ctx.cluster.run(args=['sync'])
+
+ if 'ipmi_user' in ctx.teuthology_config:
+ for remote in ctx.cluster.remotes.keys():
+ log.debug('checking console status of %s' % remote.shortname)
+ if not remote.console.check_status():
+ log.warning('Failed to get console status for %s',
+ remote.shortname)
+
+ # check that all osd remotes have a valid console
+ osds = ctx.cluster.only(teuthology.is_type('osd', cluster))
+ for remote in osds.remotes.keys():
+ if not remote.console.has_ipmi_credentials:
+ raise Exception(
+ 'IPMI console required for powercycling, '
+ 'but not available on osd role: {r}'.format(
+ r=remote.name))
+
+ cluster_manager = ctx.managers[cluster]
+ for f in ['powercycle', 'bdev_inject_crash']:
+ if config.get(f):
+ cluster_manager.config[f] = config.get(f)
+
+ if aggro:
+ cluster_manager.raw_cluster_cmd(
+ 'config', 'set', 'mgr',
+ 'mgr_debug_aggressive_pg_num_changes',
+ 'true')
+
+ log.info('Beginning thrashosds...')
+ thrash_proc = ceph_manager.Thrasher(
+ cluster_manager,
+ config,
+ logger=log.getChild('thrasher')
+ )
+ try:
+ yield
+ finally:
+ log.info('joining thrashosds')
+ thrash_proc.do_join()
+ cluster_manager.wait_for_all_osds_up()
+ cluster_manager.flush_all_pg_stats()
+ cluster_manager.wait_for_recovery(config.get('timeout', 360))
+ if aggro:
+ cluster_manager.raw_cluster_cmd(
+ 'config', 'rm', 'mgr',
+ 'mgr_debug_aggressive_pg_num_changes')
diff --git a/qa/tasks/tox.py b/qa/tasks/tox.py
new file mode 100644
index 00000000..36c226d0
--- /dev/null
+++ b/qa/tasks/tox.py
@@ -0,0 +1,50 @@
+import argparse
+import contextlib
+import logging
+
+from teuthology import misc as teuthology
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+
+def get_toxvenv_dir(ctx):
+ return '{tdir}/tox-venv'.format(tdir=teuthology.get_testdir(ctx))
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Deploy tox from pip. It's a dependency for both Keystone and Tempest.
+ """
+ assert config is None or isinstance(config, list) \
+ or isinstance(config, dict), \
+ "task tox only supports a list or dictionary for configuration"
+ all_clients = ['client.{id}'.format(id=id_)
+ for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
+ if config is None:
+ config = all_clients
+ if isinstance(config, list):
+ config = dict.fromkeys(config)
+
+ log.info('Deploying tox from pip...')
+ for (client, _) in config.items():
+ # yup, we have to deploy tox first. The packaged one, available
+ # on Sepia's Ubuntu machines, is outdated for Keystone/Tempest.
+ tvdir = get_toxvenv_dir(ctx)
+ ctx.cluster.only(client).run(args=[ 'virtualenv', '-p', 'python3', tvdir ])
+ ctx.cluster.only(client).run(args=
+ [ 'source', '{tvdir}/bin/activate'.format(tvdir=tvdir),
+ run.Raw('&&'),
+ 'pip', 'install', 'tox==3.15.0'
+ ])
+
+ # export the path Keystone and Tempest
+ ctx.tox = argparse.Namespace()
+ ctx.tox.venv_path = get_toxvenv_dir(ctx)
+
+ try:
+ yield
+ finally:
+ for (client, _) in config.items():
+ ctx.cluster.only(client).run(
+ args=[ 'rm', '-rf', get_toxvenv_dir(ctx) ])
diff --git a/qa/tasks/userdata_setup.yaml b/qa/tasks/userdata_setup.yaml
new file mode 100644
index 00000000..7271925c
--- /dev/null
+++ b/qa/tasks/userdata_setup.yaml
@@ -0,0 +1,25 @@
+#cloud-config-archive
+
+- type: text/cloud-config
+ content: |
+ output:
+ all: '| tee -a /var/log/cloud-init-output.log'
+
+# allow passwordless access for debugging
+- |
+ #!/usr/bin/env bash
+ exec passwd -d ubuntu
+
+- |
+ #!/usr/bin/env bash
+
+ # mount a NFS share for storing logs
+ apt-get update
+ apt-get -y install nfs-common
+ mkdir /mnt/log
+ # 10.0.2.2 is the host
+ mount -v -t nfs -o proto=tcp 10.0.2.2:{mnt_dir} /mnt/log
+
+ # mount the iso image that has the test script
+ mkdir /mnt/cdrom
+ mount -t auto /dev/cdrom /mnt/cdrom
diff --git a/qa/tasks/userdata_teardown.yaml b/qa/tasks/userdata_teardown.yaml
new file mode 100644
index 00000000..731d769f
--- /dev/null
+++ b/qa/tasks/userdata_teardown.yaml
@@ -0,0 +1,11 @@
+- |
+ #!/usr/bin/env bash
+ cp /var/log/cloud-init-output.log /mnt/log
+
+- |
+ #!/usr/bin/env bash
+ umount /mnt/log
+
+- |
+ #!/usr/bin/env bash
+ shutdown -h -P now
diff --git a/qa/tasks/util/__init__.py b/qa/tasks/util/__init__.py
new file mode 100644
index 00000000..5b8575ed
--- /dev/null
+++ b/qa/tasks/util/__init__.py
@@ -0,0 +1,26 @@
+from teuthology import misc
+
+def get_remote(ctx, cluster, service_type, service_id):
+ """
+ Get the Remote for the host where a particular role runs.
+
+ :param cluster: name of the cluster the service is part of
+ :param service_type: e.g. 'mds', 'osd', 'client'
+ :param service_id: The third part of a role, e.g. '0' for
+ the role 'ceph.client.0'
+ :return: a Remote instance for the host where the
+ requested role is placed
+ """
+ def _is_instance(role):
+ role_tuple = misc.split_role(role)
+ return role_tuple == (cluster, service_type, str(service_id))
+ try:
+ (remote,) = ctx.cluster.only(_is_instance).remotes.keys()
+ except ValueError:
+ raise KeyError("Service {0}.{1}.{2} not found".format(cluster,
+ service_type,
+ service_id))
+ return remote
+
+def get_remote_for_role(ctx, role):
+ return get_remote(ctx, *misc.split_role(role))
diff --git a/qa/tasks/util/rados.py b/qa/tasks/util/rados.py
new file mode 100644
index 00000000..a0c54ce4
--- /dev/null
+++ b/qa/tasks/util/rados.py
@@ -0,0 +1,87 @@
+import logging
+
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def rados(ctx, remote, cmd, wait=True, check_status=False):
+ testdir = teuthology.get_testdir(ctx)
+ log.info("rados %s" % ' '.join(cmd))
+ pre = [
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'rados',
+ ];
+ pre.extend(cmd)
+ proc = remote.run(
+ args=pre,
+ check_status=check_status,
+ wait=wait,
+ )
+ if wait:
+ return proc.exitstatus
+ else:
+ return proc
+
+def create_ec_pool(remote, name, profile_name, pgnum, profile={}, cluster_name="ceph", application=None):
+ remote.run(args=['sudo', 'ceph'] +
+ cmd_erasure_code_profile(profile_name, profile) + ['--cluster', cluster_name])
+ remote.run(args=[
+ 'sudo', 'ceph', 'osd', 'pool', 'create', name,
+ str(pgnum), str(pgnum), 'erasure', profile_name, '--cluster', cluster_name
+ ])
+ if application:
+ remote.run(args=[
+ 'sudo', 'ceph', 'osd', 'pool', 'application', 'enable', name, application, '--cluster', cluster_name
+ ], check_status=False) # may fail as EINVAL when run in jewel upgrade test
+
+def create_replicated_pool(remote, name, pgnum, cluster_name="ceph", application=None):
+ remote.run(args=[
+ 'sudo', 'ceph', 'osd', 'pool', 'create', name, str(pgnum), str(pgnum), '--cluster', cluster_name
+ ])
+ if application:
+ remote.run(args=[
+ 'sudo', 'ceph', 'osd', 'pool', 'application', 'enable', name, application, '--cluster', cluster_name
+ ], check_status=False)
+
+def create_cache_pool(remote, base_name, cache_name, pgnum, size, cluster_name="ceph"):
+ remote.run(args=[
+ 'sudo', 'ceph', 'osd', 'pool', 'create', cache_name, str(pgnum), '--cluster', cluster_name
+ ])
+ remote.run(args=[
+ 'sudo', 'ceph', 'osd', 'tier', 'add-cache', base_name, cache_name,
+ str(size), '--cluster', cluster_name
+ ])
+
+def cmd_erasure_code_profile(profile_name, profile):
+ """
+ Return the shell command to run to create the erasure code profile
+ described by the profile parameter.
+
+ :param profile_name: a string matching [A-Za-z0-9-_.]+
+ :param profile: a map whose semantic depends on the erasure code plugin
+ :returns: a shell command as an array suitable for Remote.run
+
+ If profile is {}, it is replaced with
+
+ { 'k': '2', 'm': '1', 'crush-failure-domain': 'osd'}
+
+ for backward compatibility. In previous versions of teuthology,
+ these values were hardcoded as function arguments and some yaml
+ files were designed with these implicit values. The teuthology
+ code should not know anything about the erasure code profile
+ content or semantic. The valid values and parameters are outside
+ its scope.
+ """
+
+ if profile == {}:
+ profile = {
+ 'k': '2',
+ 'm': '1',
+ 'crush-failure-domain': 'osd'
+ }
+ return [
+ 'osd', 'erasure-code-profile', 'set',
+ profile_name
+ ] + [ str(key) + '=' + str(value) for key, value in profile.items() ]
diff --git a/qa/tasks/util/rgw.py b/qa/tasks/util/rgw.py
new file mode 100644
index 00000000..3229f0a5
--- /dev/null
+++ b/qa/tasks/util/rgw.py
@@ -0,0 +1,94 @@
+import logging
+import json
+import time
+
+from six import StringIO
+
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def rgwadmin(ctx, client, cmd, stdin=StringIO(), check_status=False,
+ format='json', decode=True, log_level=logging.DEBUG):
+ log.info('rgwadmin: {client} : {cmd}'.format(client=client,cmd=cmd))
+ testdir = teuthology.get_testdir(ctx)
+ cluster_name, daemon_type, client_id = teuthology.split_role(client)
+ client_with_id = daemon_type + '.' + client_id
+ pre = [
+ 'adjust-ulimits',
+ 'ceph-coverage'.format(tdir=testdir),
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'radosgw-admin'.format(tdir=testdir),
+ '--log-to-stderr',
+ '--format', format,
+ '-n', client_with_id,
+ '--cluster', cluster_name,
+ ]
+ pre.extend(cmd)
+ log.log(log_level, 'rgwadmin: cmd=%s' % pre)
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ proc = remote.run(
+ args=pre,
+ check_status=check_status,
+ stdout=StringIO(),
+ stderr=StringIO(),
+ stdin=stdin,
+ )
+ r = proc.exitstatus
+ out = proc.stdout.getvalue()
+ if not decode:
+ return (r, out)
+ j = None
+ if not r and out != '':
+ try:
+ j = json.loads(out)
+ log.log(log_level, ' json result: %s' % j)
+ except ValueError:
+ j = out
+ log.log(log_level, ' raw result: %s' % j)
+ return (r, j)
+
+def get_user_summary(out, user):
+ """Extract the summary for a given user"""
+ user_summary = None
+ for summary in out['summary']:
+ if summary.get('user') == user:
+ user_summary = summary
+
+ if not user_summary:
+ raise AssertionError('No summary info found for user: %s' % user)
+
+ return user_summary
+
+def get_user_successful_ops(out, user):
+ summary = out['summary']
+ if len(summary) == 0:
+ return 0
+ return get_user_summary(out, user)['total']['successful_ops']
+
+def wait_for_radosgw(url, remote):
+ """ poll the given url until it starts accepting connections
+
+ add_daemon() doesn't wait until radosgw finishes startup, so this is used
+ to avoid racing with later tasks that expect radosgw to be up and listening
+ """
+ # TODO: use '--retry-connrefused --retry 8' when teuthology is running on
+ # Centos 8 and other OS's with an updated version of curl
+ curl_cmd = ['curl',
+ url]
+ exit_status = 0
+ num_retries = 8
+ for seconds in range(num_retries):
+ proc = remote.run(
+ args=curl_cmd,
+ check_status=False,
+ stdout=StringIO(),
+ stderr=StringIO(),
+ stdin=StringIO(),
+ )
+ exit_status = proc.exitstatus
+ if exit_status == 0:
+ break
+ time.sleep(2**seconds)
+
+ assert exit_status == 0
diff --git a/qa/tasks/util/test/__init__.py b/qa/tasks/util/test/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/qa/tasks/util/test/__init__.py
diff --git a/qa/tasks/util/test/test_rados.py b/qa/tasks/util/test/test_rados.py
new file mode 100644
index 00000000..a8f4cb02
--- /dev/null
+++ b/qa/tasks/util/test/test_rados.py
@@ -0,0 +1,40 @@
+#
+# The MIT License
+#
+# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+from tasks.util import rados
+
+class TestRados(object):
+
+ def test_cmd_erasure_code_profile(self):
+ name = 'NAME'
+ cmd = rados.cmd_erasure_code_profile(name, {})
+ assert 'k=2' in cmd
+ assert name in cmd
+ cmd = rados.cmd_erasure_code_profile(name, { 'k': '88' })
+ assert 'k=88' in cmd
+ assert name in cmd
diff --git a/qa/tasks/util/workunit.py b/qa/tasks/util/workunit.py
new file mode 100644
index 00000000..1f5623af
--- /dev/null
+++ b/qa/tasks/util/workunit.py
@@ -0,0 +1,78 @@
+import copy
+
+from teuthology import misc
+from teuthology.orchestra import run
+
+class Refspec:
+ def __init__(self, refspec):
+ self.refspec = refspec
+
+ def __str__(self):
+ return self.refspec
+
+ def _clone(self, git_url, clonedir, opts=None):
+ if opts is None:
+ opts = []
+ return (['rm', '-rf', clonedir] +
+ [run.Raw('&&')] +
+ ['git', 'clone'] + opts +
+ [git_url, clonedir])
+
+ def _cd(self, clonedir):
+ return ['cd', clonedir]
+
+ def _checkout(self):
+ return ['git', 'checkout', self.refspec]
+
+ def clone(self, git_url, clonedir):
+ return (self._clone(git_url, clonedir) +
+ [run.Raw('&&')] +
+ self._cd(clonedir) +
+ [run.Raw('&&')] +
+ self._checkout())
+
+
+class Branch(Refspec):
+ def __init__(self, tag):
+ Refspec.__init__(self, tag)
+
+ def clone(self, git_url, clonedir):
+ opts = ['--depth', '1',
+ '--branch', self.refspec]
+ return (self._clone(git_url, clonedir, opts) +
+ [run.Raw('&&')] +
+ self._cd(clonedir))
+
+
+class Head(Refspec):
+ def __init__(self):
+ Refspec.__init__(self, 'HEAD')
+
+ def clone(self, git_url, clonedir):
+ opts = ['--depth', '1']
+ return (self._clone(git_url, clonedir, opts) +
+ [run.Raw('&&')] +
+ self._cd(clonedir))
+
+
+def get_refspec_after_overrides(config, overrides):
+ # mimic the behavior of the "install" task, where the "overrides" are
+ # actually the defaults of that task. in other words, if none of "sha1",
+ # "tag", or "branch" is specified by a "workunit" tasks, we will update
+ # it with the information in the "workunit" sub-task nested in "overrides".
+ overrides = copy.deepcopy(overrides.get('workunit', {}))
+ refspecs = {'suite_sha1': Refspec, 'suite_branch': Branch,
+ 'sha1': Refspec, 'tag': Refspec, 'branch': Branch}
+ if any(map(lambda i: i in config, refspecs.keys())):
+ for i in refspecs.keys():
+ overrides.pop(i, None)
+ misc.deep_merge(config, overrides)
+
+ for spec, cls in refspecs.items():
+ refspec = config.get(spec)
+ if refspec:
+ refspec = cls(refspec)
+ break
+ if refspec is None:
+ refspec = Head()
+ return refspec
diff --git a/qa/tasks/vstart_runner.py b/qa/tasks/vstart_runner.py
new file mode 100644
index 00000000..cfbaad78
--- /dev/null
+++ b/qa/tasks/vstart_runner.py
@@ -0,0 +1,1169 @@
+"""
+vstart_runner: override Filesystem and Mount interfaces to run a CephFSTestCase against a vstart
+ceph instance instead of a packaged/installed cluster. Use this to turn around test cases
+quickly during development.
+
+Simple usage (assuming teuthology and ceph checked out in ~/git):
+
+ # Activate the teuthology virtualenv
+ source ~/git/teuthology/virtualenv/bin/activate
+ # Go into your ceph build directory
+ cd ~/git/ceph/build
+ # Invoke a test using this script
+ python ~/git/ceph/qa/tasks/vstart_runner.py --create tasks.cephfs.test_data_scan
+
+Alternative usage:
+
+ # Alternatively, if you use different paths, specify them as follows:
+ LD_LIBRARY_PATH=`pwd`/lib PYTHONPATH=~/git/teuthology:~/git/ceph/qa:`pwd`/../src/pybind:`pwd`/lib/cython_modules/lib.2 python ~/git/ceph/qa/tasks/vstart_runner.py
+
+ # If you wish to drop to a python shell on failures, use --interactive:
+ python ~/git/ceph/qa/tasks/vstart_runner.py --interactive
+
+ # If you wish to run a named test case, pass it as an argument:
+ python ~/git/ceph/qa/tasks/vstart_runner.py tasks.cephfs.test_data_scan
+
+ # Also, you can create the cluster once and then run named test cases against it:
+ python ~/git/ceph/qa/tasks/vstart_runner.py --create-cluster-only
+ python ~/git/ceph/qa/tasks/vstart_runner.py tasks.mgr.dashboard.test_health
+ python ~/git/ceph/qa/tasks/vstart_runner.py tasks.mgr.dashboard.test_rgw
+
+"""
+
+from io import BytesIO
+from io import StringIO
+from collections import defaultdict
+import getpass
+import signal
+import tempfile
+import threading
+import datetime
+import shutil
+import re
+import os
+import time
+import sys
+import errno
+from unittest import suite, loader
+import unittest
+import platform
+from teuthology import misc
+from teuthology.orchestra.run import Raw, quote
+from teuthology.orchestra.daemon import DaemonGroup
+from teuthology.config import config as teuth_config
+import six
+import logging
+try:
+ import urllib3
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+except:
+ pass
+
+log = logging.getLogger(__name__)
+
+handler = logging.FileHandler("./vstart_runner.log")
+formatter = logging.Formatter(
+ fmt=u'%(asctime)s.%(msecs)03d %(levelname)s:%(name)s:%(message)s',
+ datefmt='%Y-%m-%dT%H:%M:%S')
+handler.setFormatter(formatter)
+log.addHandler(handler)
+log.setLevel(logging.INFO)
+
+
+def respawn_in_path(lib_path, python_paths):
+ execv_cmd = ['python']
+ if platform.system() == "Darwin":
+ lib_path_var = "DYLD_LIBRARY_PATH"
+ else:
+ lib_path_var = "LD_LIBRARY_PATH"
+
+ py_binary = os.environ.get("PYTHON", "python")
+
+ if lib_path_var in os.environ:
+ if lib_path not in os.environ[lib_path_var]:
+ os.environ[lib_path_var] += ':' + lib_path
+ os.execvp(py_binary, execv_cmd + sys.argv)
+ else:
+ os.environ[lib_path_var] = lib_path
+ os.execvp(py_binary, execv_cmd + sys.argv)
+
+ for p in python_paths:
+ sys.path.insert(0, p)
+
+
+# Let's use some sensible defaults
+if os.path.exists("./CMakeCache.txt") and os.path.exists("./bin"):
+
+ # A list of candidate paths for each package we need
+ guesses = [
+ ["~/git/teuthology", "~/scm/teuthology", "~/teuthology"],
+ ["lib/cython_modules/lib.2"],
+ ["../src/pybind"],
+ ]
+
+ python_paths = []
+
+ # Up one level so that "tasks.foo.bar" imports work
+ python_paths.append(os.path.abspath(
+ os.path.join(os.path.dirname(os.path.realpath(__file__)), "..")
+ ))
+
+ for package_guesses in guesses:
+ for g in package_guesses:
+ g_exp = os.path.abspath(os.path.expanduser(g))
+ if os.path.exists(g_exp):
+ python_paths.append(g_exp)
+
+ ld_path = os.path.join(os.getcwd(), "lib/")
+ print("Using guessed paths {0} {1}".format(ld_path, python_paths))
+ respawn_in_path(ld_path, python_paths)
+
+
+try:
+ from teuthology.exceptions import CommandFailedError
+ from tasks.ceph_manager import CephManager
+ from tasks.cephfs.fuse_mount import FuseMount
+ from tasks.cephfs.filesystem import Filesystem, MDSCluster, CephCluster
+ from tasks.mgr.mgr_test_case import MgrCluster
+ from teuthology.contextutil import MaxWhileTries
+ from teuthology.task import interactive
+except ImportError:
+ sys.stderr.write("***\nError importing packages, have you activated your teuthology virtualenv "
+ "and set PYTHONPATH to point to teuthology and ceph-qa-suite?\n***\n\n")
+ raise
+
+# Must import after teuthology because of gevent monkey patching
+import subprocess
+
+if os.path.exists("./CMakeCache.txt"):
+ # Running in build dir of a cmake build
+ BIN_PREFIX = "./bin/"
+ SRC_PREFIX = "../src"
+else:
+ # Running in src/ of an autotools build
+ BIN_PREFIX = "./"
+ SRC_PREFIX = "./"
+
+
+class LocalRemoteProcess(object):
+ def __init__(self, args, subproc, check_status, stdout, stderr):
+ self.args = args
+ self.subproc = subproc
+ self.stdout = stdout
+ self.stderr = stderr
+ # this variable is meant for instance of this class named fuse_daemon.
+ # child process of the command launched with sudo must be killed,
+ # since killing parent process alone has no impact on the child
+ # process.
+ self.fuse_pid = -1
+
+ self.check_status = check_status
+ self.exitstatus = self.returncode = None
+
+ def wait(self):
+ if self.finished:
+ # Avoid calling communicate() on a dead process because it'll
+ # give you stick about std* already being closed
+ if self.check_status and self.exitstatus != 0:
+ raise CommandFailedError(self.args, self.exitstatus)
+ else:
+ return
+
+ out, err = self.subproc.communicate()
+ if isinstance(self.stdout, StringIO):
+ self.stdout.write(out.decode(errors='ignore'))
+ elif self.stdout is None:
+ pass
+ else:
+ self.stdout.write(out)
+ if isinstance(self.stderr, StringIO):
+ self.stderr.write(err.decode(errors='ignore'))
+ elif self.stderr is None:
+ pass
+ else:
+ self.stderr.write(err)
+
+ self.exitstatus = self.returncode = self.subproc.returncode
+
+ if self.exitstatus != 0:
+ sys.stderr.write(six.ensure_str(out))
+ sys.stderr.write(six.ensure_str(err))
+
+ if self.check_status and self.exitstatus != 0:
+ raise CommandFailedError(self.args, self.exitstatus)
+
+ @property
+ def finished(self):
+ if self.exitstatus is not None:
+ return True
+
+ if self.subproc.poll() is not None:
+ out, err = self.subproc.communicate()
+ if isinstance(self.stdout, StringIO):
+ self.stdout.write(out.decode(errors='ignore'))
+ elif self.stdout is None:
+ pass
+ else:
+ self.stdout.write(out)
+ if isinstance(self.stderr, StringIO):
+ self.stderr.write(err.decode(errors='ignore'))
+ elif self.stderr is None:
+ pass
+ else:
+ self.stderr.write(err)
+ self.exitstatus = self.returncode = self.subproc.returncode
+ return True
+ else:
+ return False
+
+ def kill(self):
+ log.debug("kill ")
+ if self.subproc.pid and not self.finished:
+ log.debug("kill: killing pid {0} ({1})".format(
+ self.subproc.pid, self.args))
+ safe_kill(self.subproc.pid)
+ else:
+ log.debug("kill: already terminated ({0})".format(self.args))
+
+ @property
+ def stdin(self):
+ class FakeStdIn(object):
+ def __init__(self, mount_daemon):
+ self.mount_daemon = mount_daemon
+
+ def close(self):
+ self.mount_daemon.kill()
+
+ return FakeStdIn(self)
+
+
+class LocalRemote(object):
+ """
+ Amusingly named class to present the teuthology RemoteProcess interface when we are really
+ running things locally for vstart
+
+ Run this inside your src/ dir!
+ """
+
+ def __init__(self):
+ self.name = "local"
+ self.hostname = "localhost"
+ self.user = getpass.getuser()
+
+ def get_file(self, path, sudo, dest_dir):
+ tmpfile = tempfile.NamedTemporaryFile(delete=False).name
+ shutil.copy(path, tmpfile)
+ return tmpfile
+
+ def put_file(self, src, dst, sudo=False):
+ shutil.copy(src, dst)
+
+ def run(self, args, check_status=True, wait=True,
+ stdout=None, stderr=None, cwd=None, stdin=None,
+ logger=None, label=None, env=None, timeout=None, omit_sudo=True):
+ try:
+ if args[args.index('sudo') + 1] in ['-u', 'passwd', 'chown']:
+ omit_sudo = False
+ except ValueError:
+ pass
+
+ # We don't need no stinkin' sudo
+ if omit_sudo:
+ args = [a for a in args if a != "sudo"]
+
+ # We have to use shell=True if any run.Raw was present, e.g. &&
+ shell = any([a for a in args if isinstance(a, Raw)])
+
+ # Filter out helper tools that don't exist in a vstart environment
+ args = [a for a in args if a not in (
+ 'adjust-ulimits', 'ceph-coverage', 'timeout')]
+
+ # Adjust binary path prefix if given a bare program name
+ if "/" not in args[0]:
+ # If they asked for a bare binary name, and it exists
+ # in our built tree, use the one there.
+ local_bin = os.path.join(BIN_PREFIX, args[0])
+ if os.path.exists(local_bin):
+ args = [local_bin] + args[1:]
+ else:
+ log.debug("'{0}' is not a binary in the Ceph build dir".format(
+ args[0]
+ ))
+
+ log.debug("Running {0}".format(args))
+
+ if shell:
+ subproc = subprocess.Popen(quote(args),
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ stdin=subprocess.PIPE,
+ cwd=cwd,
+ shell=True)
+ else:
+ # Sanity check that we've got a list of strings
+ for arg in args:
+ if not isinstance(arg, six.string_types):
+ raise RuntimeError("Oops, can't handle arg {0} type {1}".format(
+ arg, arg.__class__
+ ))
+
+ subproc = subprocess.Popen(args,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ stdin=subprocess.PIPE,
+ cwd=cwd,
+ env=env)
+
+ if stdin:
+ # Hack: writing to stdin is not deadlock-safe, but it "always" works
+ # as long as the input buffer is "small"
+ if isinstance(stdin, str):
+ subproc.stdin.write(stdin.encode())
+ else:
+ subproc.stdin.write(stdin)
+
+ proc = LocalRemoteProcess(
+ args, subproc, check_status,
+ stdout, stderr
+ )
+
+ if wait:
+ proc.wait()
+
+ return proc
+
+ # XXX: for compatibility keep this method same teuthology.orchestra.remote.sh
+ def sh(self, script, **kwargs):
+ """
+ Shortcut for run method.
+
+ Usage:
+ my_name = remote.sh('whoami')
+ remote_date = remote.sh('date')
+ """
+ if 'stdout' not in kwargs:
+ kwargs['stdout'] = StringIO()
+ if 'args' not in kwargs:
+ kwargs['args'] = script
+ proc = self.run(**kwargs)
+ return proc.stdout.getvalue()
+
+
+class LocalDaemon(object):
+ def __init__(self, daemon_type, daemon_id):
+ self.daemon_type = daemon_type
+ self.daemon_id = daemon_id
+ self.controller = LocalRemote()
+ self.proc = None
+
+ @property
+ def remote(self):
+ return LocalRemote()
+
+ def running(self):
+ return self._get_pid() is not None
+
+ def check_status(self):
+ if self.proc:
+ return self.proc.poll()
+
+ def _get_pid(self):
+ """
+ Return PID as an integer or None if not found
+ """
+ ps_txt = self.controller.run(args=["ps", "ww", "-u"+str(os.getuid())],
+ stdout=StringIO()).\
+ stdout.getvalue().strip()
+ lines = ps_txt.split("\n")[1:]
+
+ for line in lines:
+ if line.find("ceph-{0} -i {1}".format(self.daemon_type, self.daemon_id)) != -1:
+ log.debug("Found ps line for daemon: {0}".format(line))
+ return int(line.split()[0])
+ log.debug("No match for {0} {1}: {2}".format(
+ self.daemon_type, self.daemon_id, ps_txt
+ ))
+ return None
+
+ def wait(self, timeout):
+ waited = 0
+ while self._get_pid() is not None:
+ if waited > timeout:
+ raise MaxWhileTries("Timed out waiting for daemon {0}.{1}".format(self.daemon_type, self.daemon_id))
+ time.sleep(1)
+ waited += 1
+
+ def stop(self, timeout=300):
+ if not self.running():
+ log.error('tried to stop a non-running daemon')
+ return
+
+ pid = self._get_pid()
+ log.debug("Killing PID {0} for {1}.{2}".format(pid, self.daemon_type, self.daemon_id))
+ os.kill(pid, signal.SIGTERM)
+
+ waited = 0
+ while pid is not None:
+ new_pid = self._get_pid()
+ if new_pid is not None and new_pid != pid:
+ log.debug("Killing new PID {0}".format(new_pid))
+ pid = new_pid
+ os.kill(pid, signal.SIGTERM)
+
+ if new_pid is None:
+ break
+ else:
+ if waited > timeout:
+ raise MaxWhileTries(
+ "Timed out waiting for daemon {0}.{1}".format(
+ self.daemon_type, self.daemon_id))
+ time.sleep(1)
+ waited += 1
+
+ self.wait(timeout=timeout)
+
+ def restart(self):
+ if self._get_pid() is not None:
+ self.stop()
+
+ self.proc = self.controller.run([os.path.join(BIN_PREFIX, "./ceph-{0}".format(self.daemon_type)), "-i", self.daemon_id])
+
+ def signal(self, sig, silent=False):
+ if not self.running():
+ raise RuntimeError("Can't send signal to non-running daemon")
+
+ os.kill(self._get_pid(), sig)
+ if not silent:
+ log.debug("Sent signal {0} to {1}.{2}".format(sig, self.daemon_type, self.daemon_id))
+
+
+def safe_kill(pid):
+ """
+ os.kill annoyingly raises exception if process already dead. Ignore it.
+ """
+ try:
+ return os.kill(pid, signal.SIGKILL)
+ except OSError as e:
+ if e.errno == errno.ESRCH:
+ # Raced with process termination
+ pass
+ else:
+ raise
+
+
+class LocalFuseMount(FuseMount):
+ def __init__(self, ctx, test_dir, client_id):
+ super(LocalFuseMount, self).__init__(ctx, None, test_dir, client_id, LocalRemote())
+
+ @property
+ def config_path(self):
+ return "./ceph.conf"
+
+ def get_keyring_path(self):
+ # This is going to end up in a config file, so use an absolute path
+ # to avoid assumptions about daemons' pwd
+ return os.path.abspath("./client.{0}.keyring".format(self.client_id))
+
+ def run_shell(self, args, wait=True, check_status=True, omit_sudo=True):
+ # FIXME maybe should add a pwd arg to teuthology.orchestra so that
+ # the "cd foo && bar" shenanigans isn't needed to begin with and
+ # then we wouldn't have to special case this
+ return self.client_remote.run(args, wait=wait, cwd=self.mountpoint,
+ check_status=check_status,
+ omit_sudo=omit_sudo)
+
+ def setupfs(self, name=None):
+ if name is None and self.fs is not None:
+ # Previous mount existed, reuse the old name
+ name = self.fs.name
+ self.fs = LocalFilesystem(self.ctx, name=name)
+ log.debug('Wait for MDS to reach steady state...')
+ self.fs.wait_for_daemons()
+ log.debug('Ready to start {}...'.format(type(self).__name__))
+
+ @property
+ def _prefix(self):
+ return BIN_PREFIX
+
+ def _asok_path(self):
+ # In teuthology, the asok is named after the PID of the ceph-fuse process, because it's
+ # run foreground. When running it daemonized however, the asok is named after
+ # the PID of the launching process, not the long running ceph-fuse process. Therefore
+ # we need to give an exact path here as the logic for checking /proc/ for which
+ # asok is alive does not work.
+
+ # Load the asok path from ceph.conf as vstart.sh now puts admin sockets
+ # in a tmpdir. All of the paths are the same, so no need to select
+ # based off of the service type.
+ d = "./out"
+ with open(self.config_path) as f:
+ for line in f:
+ asok_conf = re.search("^\s*admin\s+socket\s*=\s*(.*?)[^/]+$", line)
+ if asok_conf:
+ d = asok_conf.groups(1)[0]
+ break
+ path = "{0}/client.{1}.{2}.asok".format(d, self.client_id, self.fuse_daemon.subproc.pid)
+ log.info("I think my launching pid was {0}".format(self.fuse_daemon.subproc.pid))
+ return path
+
+ def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None):
+ if mountpoint is not None:
+ self.mountpoint = mountpoint
+ self.setupfs(name=mount_fs_name)
+
+ self.client_remote.run(args=['mkdir', '-p', self.mountpoint])
+
+ def list_connections():
+ self.client_remote.run(
+ args=["mount", "-t", "fusectl", "/sys/fs/fuse/connections", "/sys/fs/fuse/connections"],
+ check_status=False
+ )
+
+ p = self.client_remote.run(args=["ls", "/sys/fs/fuse/connections"],
+ check_status=False, stdout=StringIO())
+ if p.exitstatus != 0:
+ log.warning("ls conns failed with {0}, assuming none".format(p.exitstatus))
+ return []
+
+ ls_str = p.stdout.getvalue().strip()
+ if ls_str:
+ return [int(n) for n in ls_str.split("\n")]
+ else:
+ return []
+
+ # Before starting ceph-fuse process, note the contents of
+ # /sys/fs/fuse/connections
+ pre_mount_conns = list_connections()
+ log.debug("Pre-mount connections: {0}".format(pre_mount_conns))
+
+ prefix = [os.path.join(BIN_PREFIX, "ceph-fuse")]
+ if os.getuid() != 0:
+ prefix += ["--client_die_on_failed_dentry_invalidate=false"]
+
+ if mount_path is not None:
+ prefix += ["--client_mountpoint={0}".format(mount_path)]
+
+ if mount_fs_name is not None:
+ prefix += ["--client_mds_namespace={0}".format(mount_fs_name)]
+
+ self.fuse_daemon = self.client_remote.run(args=
+ prefix + [
+ "-f",
+ "--name",
+ "client.{0}".format(self.client_id),
+ self.mountpoint
+ ], wait=False)
+
+ log.debug("Mounting client.{0} with pid {1}".format(self.client_id, self.fuse_daemon.subproc.pid))
+
+ # Wait for the connection reference to appear in /sys
+ waited = 0
+ post_mount_conns = list_connections()
+ while len(post_mount_conns) <= len(pre_mount_conns):
+ if self.fuse_daemon.finished:
+ # Did mount fail? Raise the CommandFailedError instead of
+ # hitting the "failed to populate /sys/" timeout
+ self.fuse_daemon.wait()
+ time.sleep(1)
+ waited += 1
+ if waited > 30:
+ raise RuntimeError("Fuse mount failed to populate /sys/ after {0} seconds".format(
+ waited
+ ))
+ post_mount_conns = list_connections()
+
+ log.debug("Post-mount connections: {0}".format(post_mount_conns))
+
+ # Record our fuse connection number so that we can use it when
+ # forcing an unmount
+ new_conns = list(set(post_mount_conns) - set(pre_mount_conns))
+ if len(new_conns) == 0:
+ raise RuntimeError("New fuse connection directory not found ({0})".format(new_conns))
+ elif len(new_conns) > 1:
+ raise RuntimeError("Unexpectedly numerous fuse connections {0}".format(new_conns))
+ else:
+ self._fuse_conn = new_conns[0]
+
+ self.gather_mount_info()
+
+ self.mounted = True
+
+ def _run_python(self, pyscript, py_version='python'):
+ """
+ Override this to remove the daemon-helper prefix that is used otherwise
+ to make the process killable.
+ """
+ return self.client_remote.run(args=[py_version, '-c', pyscript],
+ wait=False, stdout=StringIO())
+
+class LocalCephManager(CephManager):
+ def __init__(self):
+ # Deliberately skip parent init, only inheriting from it to get
+ # util methods like osd_dump that sit on top of raw_cluster_cmd
+ self.controller = LocalRemote()
+
+ # A minority of CephManager fns actually bother locking for when
+ # certain teuthology tests want to run tasks in parallel
+ self.lock = threading.RLock()
+
+ self.log = lambda x: log.debug(x)
+
+ # Don't bother constructing a map of pools: it should be empty
+ # at test cluster start, and in any case it would be out of date
+ # in no time. The attribute needs to exist for some of the CephManager
+ # methods to work though.
+ self.pools = {}
+
+ def find_remote(self, daemon_type, daemon_id):
+ """
+ daemon_type like 'mds', 'osd'
+ daemon_id like 'a', '0'
+ """
+ return LocalRemote()
+
+ def run_ceph_w(self, watch_channel=None):
+ """
+ :param watch_channel: Specifies the channel to be watched.
+ This can be 'cluster', 'audit', ...
+ :type watch_channel: str
+ """
+ args = [os.path.join(BIN_PREFIX, "ceph"), "-w"]
+ if watch_channel is not None:
+ args.append("--watch-channel")
+ args.append(watch_channel)
+ proc = self.controller.run(args=args, wait=False, stdout=StringIO())
+ return proc
+
+ def raw_cluster_cmd(self, *args, **kwargs):
+ """
+ args like ["osd", "dump"}
+ return stdout string
+ """
+ proc = self.controller.run(args=[os.path.join(BIN_PREFIX, "ceph")] +\
+ list(args), **kwargs, stdout=StringIO())
+ return proc.stdout.getvalue()
+
+ def raw_cluster_cmd_result(self, *args, **kwargs):
+ """
+ like raw_cluster_cmd but don't check status, just return rc
+ """
+ kwargs['check_status'] = False
+ proc = self.controller.run([os.path.join(BIN_PREFIX, "ceph")] + list(args), **kwargs)
+ return proc.exitstatus
+
+ def admin_socket(self, daemon_type, daemon_id, command, check_status=True,
+ timeout=None, stdout=None):
+ if stdout is None:
+ stdout = StringIO()
+
+ return self.controller.run(
+ args=[os.path.join(BIN_PREFIX, "ceph"), "daemon",
+ "{0}.{1}".format(daemon_type, daemon_id)] + command,
+ check_status=check_status, timeout=timeout, stdout=stdout)
+
+
+class LocalCephCluster(CephCluster):
+ def __init__(self, ctx):
+ # Deliberately skip calling parent constructor
+ self._ctx = ctx
+ self.mon_manager = LocalCephManager()
+ self._conf = defaultdict(dict)
+
+ @property
+ def admin_remote(self):
+ return LocalRemote()
+
+ def get_config(self, key, service_type=None):
+ if service_type is None:
+ service_type = 'mon'
+
+ # FIXME hardcoded vstart service IDs
+ service_id = {
+ 'mon': 'a',
+ 'mds': 'a',
+ 'osd': '0'
+ }[service_type]
+
+ return self.json_asok(['config', 'get', key], service_type, service_id)[key]
+
+ def _write_conf(self):
+ # In teuthology, we have the honour of writing the entire ceph.conf, but
+ # in vstart land it has mostly already been written and we need to carefully
+ # append to it.
+ conf_path = "./ceph.conf"
+ banner = "\n#LOCAL_TEST\n"
+ existing_str = open(conf_path).read()
+
+ if banner in existing_str:
+ existing_str = existing_str[0:existing_str.find(banner)]
+
+ existing_str += banner
+
+ for subsys, kvs in self._conf.items():
+ existing_str += "\n[{0}]\n".format(subsys)
+ for key, val in kvs.items():
+ # Comment out existing instance if it exists
+ log.debug("Searching for existing instance {0}/{1}".format(
+ key, subsys
+ ))
+ existing_section = re.search("^\[{0}\]$([\n]|[^\[])+".format(
+ subsys
+ ), existing_str, re.MULTILINE)
+
+ if existing_section:
+ section_str = existing_str[existing_section.start():existing_section.end()]
+ existing_val = re.search("^\s*[^#]({0}) =".format(key), section_str, re.MULTILINE)
+ if existing_val:
+ start = existing_section.start() + existing_val.start(1)
+ log.debug("Found string to replace at {0}".format(
+ start
+ ))
+ existing_str = existing_str[0:start] + "#" + existing_str[start:]
+
+ existing_str += "{0} = {1}\n".format(key, val)
+
+ open(conf_path, "w").write(existing_str)
+
+ def set_ceph_conf(self, subsys, key, value):
+ self._conf[subsys][key] = value
+ self._write_conf()
+
+ def clear_ceph_conf(self, subsys, key):
+ del self._conf[subsys][key]
+ self._write_conf()
+
+
+class LocalMDSCluster(LocalCephCluster, MDSCluster):
+ def __init__(self, ctx):
+ super(LocalMDSCluster, self).__init__(ctx)
+
+ self.mds_ids = ctx.daemons.daemons['ceph.mds'].keys()
+ self.mds_daemons = dict([(id_, LocalDaemon("mds", id_)) for id_ in self.mds_ids])
+
+ def clear_firewall(self):
+ # FIXME: unimplemented
+ pass
+
+ def newfs(self, name='cephfs', create=True):
+ return LocalFilesystem(self._ctx, name=name, create=create)
+
+
+class LocalMgrCluster(LocalCephCluster, MgrCluster):
+ def __init__(self, ctx):
+ super(LocalMgrCluster, self).__init__(ctx)
+
+ self.mgr_ids = ctx.daemons.daemons['ceph.mgr'].keys()
+ self.mgr_daemons = dict([(id_, LocalDaemon("mgr", id_)) for id_ in self.mgr_ids])
+
+
+class LocalFilesystem(Filesystem, LocalMDSCluster):
+ def __init__(self, ctx, fscid=None, name='cephfs', create=False):
+ # Deliberately skip calling parent constructor
+ self._ctx = ctx
+
+ self.id = None
+ self.name = None
+ self.ec_profile = None
+ self.metadata_pool_name = None
+ self.metadata_overlay = False
+ self.data_pool_name = None
+ self.data_pools = None
+ self.fs_config = None
+
+ # Hack: cheeky inspection of ceph.conf to see what MDSs exist
+ self.mds_ids = set()
+ for line in open("ceph.conf").readlines():
+ match = re.match("^\[mds\.(.+)\]$", line)
+ if match:
+ self.mds_ids.add(match.group(1))
+
+ if not self.mds_ids:
+ raise RuntimeError("No MDSs found in ceph.conf!")
+
+ self.mds_ids = list(self.mds_ids)
+
+ log.debug("Discovered MDS IDs: {0}".format(self.mds_ids))
+
+ self.mon_manager = LocalCephManager()
+
+ self.mds_daemons = dict([(id_, LocalDaemon("mds", id_)) for id_ in self.mds_ids])
+
+ self.client_remote = LocalRemote()
+
+ self._conf = defaultdict(dict)
+
+ if name is not None:
+ if fscid is not None:
+ raise RuntimeError("cannot specify fscid when creating fs")
+ if create and not self.legacy_configured():
+ self.create()
+ else:
+ if fscid is not None:
+ self.id = fscid
+ self.getinfo(refresh=True)
+
+ # Stash a reference to the first created filesystem on ctx, so
+ # that if someone drops to the interactive shell they can easily
+ # poke our methods.
+ if not hasattr(self._ctx, "filesystem"):
+ self._ctx.filesystem = self
+
+ @property
+ def _prefix(self):
+ return BIN_PREFIX
+
+ def set_clients_block(self, blocked, mds_id=None):
+ raise NotImplementedError()
+
+
+class InteractiveFailureResult(unittest.TextTestResult):
+ """
+ Specialization that implements interactive-on-error style
+ behavior.
+ """
+ def addFailure(self, test, err):
+ super(InteractiveFailureResult, self).addFailure(test, err)
+ log.error(self._exc_info_to_string(err, test))
+ log.error("Failure in test '{0}', going interactive".format(
+ self.getDescription(test)
+ ))
+ interactive.task(ctx=None, config=None)
+
+ def addError(self, test, err):
+ super(InteractiveFailureResult, self).addError(test, err)
+ log.error(self._exc_info_to_string(err, test))
+ log.error("Error in test '{0}', going interactive".format(
+ self.getDescription(test)
+ ))
+ interactive.task(ctx=None, config=None)
+
+
+def enumerate_methods(s):
+ log.debug("e: {0}".format(s))
+ for t in s._tests:
+ if isinstance(t, suite.BaseTestSuite):
+ for sub in enumerate_methods(t):
+ yield sub
+ else:
+ yield s, t
+
+
+def load_tests(modules, loader):
+ if modules:
+ log.debug("Executing modules: {0}".format(modules))
+ module_suites = []
+ for mod_name in modules:
+ # Test names like cephfs.test_auto_repair
+ module_suites.append(loader.loadTestsFromName(mod_name))
+ log.debug("Loaded: {0}".format(list(module_suites)))
+ return suite.TestSuite(module_suites)
+ else:
+ log.debug("Executing all cephfs tests")
+ return loader.discover(
+ os.path.join(os.path.dirname(os.path.abspath(__file__)), "cephfs")
+ )
+
+
+def scan_tests(modules):
+ overall_suite = load_tests(modules, loader.TestLoader())
+
+ max_required_mds = 0
+ max_required_clients = 0
+ max_required_mgr = 0
+ require_memstore = False
+
+ for suite_, case in enumerate_methods(overall_suite):
+ max_required_mds = max(max_required_mds,
+ getattr(case, "MDSS_REQUIRED", 0))
+ max_required_clients = max(max_required_clients,
+ getattr(case, "CLIENTS_REQUIRED", 0))
+ max_required_mgr = max(max_required_mgr,
+ getattr(case, "MGRS_REQUIRED", 0))
+ require_memstore = getattr(case, "REQUIRE_MEMSTORE", False) \
+ or require_memstore
+
+ return max_required_mds, max_required_clients, \
+ max_required_mgr, require_memstore
+
+
+class LocalCluster(object):
+ def __init__(self, rolename="placeholder"):
+ self.remotes = {
+ LocalRemote(): [rolename]
+ }
+
+ def only(self, requested):
+ return self.__class__(rolename=requested)
+
+
+class LocalContext(object):
+ def __init__(self):
+ self.config = {}
+ self.teuthology_config = teuth_config
+ self.cluster = LocalCluster()
+ self.daemons = DaemonGroup()
+
+ # Shove some LocalDaemons into the ctx.daemons DaemonGroup instance so that any
+ # tests that want to look these up via ctx can do so.
+ # Inspect ceph.conf to see what roles exist
+ for conf_line in open("ceph.conf").readlines():
+ for svc_type in ["mon", "osd", "mds", "mgr"]:
+ prefixed_type = "ceph." + svc_type
+ if prefixed_type not in self.daemons.daemons:
+ self.daemons.daemons[prefixed_type] = {}
+ match = re.match("^\[{0}\.(.+)\]$".format(svc_type), conf_line)
+ if match:
+ svc_id = match.group(1)
+ self.daemons.daemons[prefixed_type][svc_id] = LocalDaemon(svc_type, svc_id)
+
+ def __del__(self):
+ shutil.rmtree(self.teuthology_config['test_path'])
+
+def exec_test():
+ # Parse arguments
+ interactive_on_error = False
+ create_cluster = False
+ create_cluster_only = False
+ ignore_missing_binaries = False
+ opt_verbose = True
+
+ args = sys.argv[1:]
+ flags = [a for a in args if a.startswith("-")]
+ modules = [a for a in args if not a.startswith("-")]
+ for f in flags:
+ if f == "--interactive":
+ interactive_on_error = True
+ elif f == "--create":
+ create_cluster = True
+ elif f == "--create-cluster-only":
+ create_cluster_only = True
+ elif f == "--ignore-missing-binaries":
+ ignore_missing_binaries = True
+ elif '--no-verbose' == f:
+ opt_verbose = False
+ else:
+ log.error("Unknown option '{0}'".format(f))
+ sys.exit(-1)
+
+ # Help developers by stopping up-front if their tree isn't built enough for all the
+ # tools that the tests might want to use (add more here if needed)
+ require_binaries = ["ceph-dencoder", "cephfs-journal-tool", "cephfs-data-scan",
+ "cephfs-table-tool", "ceph-fuse", "rados"]
+ missing_binaries = [b for b in require_binaries if not os.path.exists(os.path.join(BIN_PREFIX, b))]
+ if missing_binaries and not ignore_missing_binaries:
+ log.error("Some ceph binaries missing, please build them: {0}".format(" ".join(missing_binaries)))
+ sys.exit(-1)
+
+ max_required_mds, max_required_clients, \
+ max_required_mgr, require_memstore = scan_tests(modules)
+
+ remote = LocalRemote()
+
+ # Tolerate no MDSs or clients running at start
+ ps_txt = remote.run(args=["ps", "-u"+str(os.getuid())],
+ stdout=StringIO()).stdout.getvalue().strip()
+ lines = ps_txt.split("\n")[1:]
+ for line in lines:
+ if 'ceph-fuse' in line or 'ceph-mds' in line:
+ pid = int(line.split()[0])
+ log.warning("Killing stray process {0}".format(line))
+ os.kill(pid, signal.SIGKILL)
+
+ # Fire up the Ceph cluster if the user requested it
+ if create_cluster or create_cluster_only:
+ log.info("Creating cluster with {0} MDS daemons".format(
+ max_required_mds))
+ remote.run([os.path.join(SRC_PREFIX, "stop.sh")], check_status=False)
+ remote.run(["rm", "-rf", "./out"])
+ remote.run(["rm", "-rf", "./dev"])
+ vstart_env = os.environ.copy()
+ vstart_env["FS"] = "0"
+ vstart_env["MDS"] = max_required_mds.__str__()
+ vstart_env["OSD"] = "4"
+ vstart_env["MGR"] = max(max_required_mgr, 1).__str__()
+
+ args = [
+ os.path.join(SRC_PREFIX, "vstart.sh"),
+ "-n",
+ "--nolockdep",
+ ]
+ if require_memstore:
+ args.append("--memstore")
+
+ if opt_verbose:
+ args.append("-d")
+
+ remote.run(args, env=vstart_env)
+
+ # Wait for OSD to come up so that subsequent injectargs etc will
+ # definitely succeed
+ LocalCephCluster(LocalContext()).mon_manager.wait_for_all_osds_up(timeout=30)
+
+ if create_cluster_only:
+ return
+
+ # List of client mounts, sufficient to run the selected tests
+ clients = [i.__str__() for i in range(0, max_required_clients)]
+
+ test_dir = tempfile.mkdtemp()
+ teuth_config['test_path'] = test_dir
+
+ ctx = LocalContext()
+ ceph_cluster = LocalCephCluster(ctx)
+ mds_cluster = LocalMDSCluster(ctx)
+ mgr_cluster = LocalMgrCluster(ctx)
+
+ # Construct Mount classes
+ mounts = []
+ for client_id in clients:
+ # Populate client keyring (it sucks to use client.admin for test clients
+ # because it's awkward to find the logs later)
+ client_name = "client.{0}".format(client_id)
+
+ if client_name not in open("./keyring").read():
+ p = remote.run(args=[os.path.join(BIN_PREFIX, "ceph"), "auth", "get-or-create", client_name,
+ "osd", "allow rw",
+ "mds", "allow",
+ "mon", "allow r"], stdout=StringIO())
+
+ open("./keyring", "at").write(p.stdout.getvalue())
+
+ mount = LocalFuseMount(ctx, test_dir, client_id)
+ mounts.append(mount)
+ if mount.is_mounted():
+ log.warning("unmounting {0}".format(mount.mountpoint))
+ mount.umount_wait()
+ else:
+ if os.path.exists(mount.mountpoint):
+ os.rmdir(mount.mountpoint)
+
+ from tasks.cephfs_test_runner import DecoratingLoader
+
+ class LogStream(object):
+ def __init__(self):
+ self.buffer = ""
+
+ def write(self, data):
+ self.buffer += data
+ if "\n" in self.buffer:
+ lines = self.buffer.split("\n")
+ for line in lines[:-1]:
+ pass
+ # sys.stderr.write(line + "\n")
+ log.info(line)
+ self.buffer = lines[-1]
+
+ def flush(self):
+ pass
+
+ decorating_loader = DecoratingLoader({
+ "ctx": ctx,
+ "mounts": mounts,
+ "ceph_cluster": ceph_cluster,
+ "mds_cluster": mds_cluster,
+ "mgr_cluster": mgr_cluster,
+ })
+
+ # For the benefit of polling tests like test_full -- in teuthology land we set this
+ # in a .yaml, here it's just a hardcoded thing for the developer's pleasure.
+ remote.run(args=[os.path.join(BIN_PREFIX, "ceph"), "tell", "osd.*", "injectargs", "--osd-mon-report-interval", "5"])
+ ceph_cluster.set_ceph_conf("osd", "osd_mon_report_interval", "5")
+
+ # Vstart defaults to two segments, which very easily gets a "behind on trimming" health warning
+ # from normal IO latency. Increase it for running teests.
+ ceph_cluster.set_ceph_conf("mds", "mds log max segments", "10")
+
+ # Make sure the filesystem created in tests has uid/gid that will let us talk to
+ # it after mounting it (without having to go root). Set in 'global' not just 'mds'
+ # so that cephfs-data-scan will pick it up too.
+ ceph_cluster.set_ceph_conf("global", "mds root ino uid", "%s" % os.getuid())
+ ceph_cluster.set_ceph_conf("global", "mds root ino gid", "%s" % os.getgid())
+
+ # Monkeypatch get_package_version to avoid having to work out what kind of distro we're on
+ def _get_package_version(remote, pkg_name):
+ # Used in cephfs tests to find fuse version. Your development workstation *does* have >=2.9, right?
+ return "2.9"
+
+ import teuthology.packaging
+ teuthology.packaging.get_package_version = _get_package_version
+
+ overall_suite = load_tests(modules, decorating_loader)
+
+ # Filter out tests that don't lend themselves to interactive running,
+ victims = []
+ for case, method in enumerate_methods(overall_suite):
+ fn = getattr(method, method._testMethodName)
+
+ drop_test = False
+
+ if hasattr(fn, 'is_for_teuthology') and getattr(fn, 'is_for_teuthology') is True:
+ drop_test = True
+ log.warning("Dropping test because long running: ".format(method.id()))
+
+ if getattr(fn, "needs_trimming", False) is True:
+ drop_test = (os.getuid() != 0)
+ log.warning("Dropping test because client trim unavailable: ".format(method.id()))
+
+ if drop_test:
+ # Don't drop the test if it was explicitly requested in arguments
+ is_named = False
+ for named in modules:
+ if named.endswith(method.id()):
+ is_named = True
+ break
+
+ if not is_named:
+ victims.append((case, method))
+
+ log.debug("Disabling {0} tests because of is_for_teuthology or needs_trimming".format(len(victims)))
+ for s, method in victims:
+ s._tests.remove(method)
+
+ if interactive_on_error:
+ result_class = InteractiveFailureResult
+ else:
+ result_class = unittest.TextTestResult
+ fail_on_skip = False
+
+ class LoggingResult(result_class):
+ def startTest(self, test):
+ log.info("Starting test: {0}".format(self.getDescription(test)))
+ test.started_at = datetime.datetime.utcnow()
+ return super(LoggingResult, self).startTest(test)
+
+ def stopTest(self, test):
+ log.info("Stopped test: {0} in {1}s".format(
+ self.getDescription(test),
+ (datetime.datetime.utcnow() - test.started_at).total_seconds()
+ ))
+
+ def addSkip(self, test, reason):
+ if fail_on_skip:
+ # Don't just call addFailure because that requires a traceback
+ self.failures.append((test, reason))
+ else:
+ super(LoggingResult, self).addSkip(test, reason)
+
+ # Execute!
+ result = unittest.TextTestRunner(
+ stream=LogStream(),
+ resultclass=LoggingResult,
+ verbosity=2,
+ failfast=True).run(overall_suite)
+
+ if not result.wasSuccessful():
+ result.printErrors() # duplicate output at end for convenience
+
+ bad_tests = []
+ for test, error in result.errors:
+ bad_tests.append(str(test))
+ for test, failure in result.failures:
+ bad_tests.append(str(test))
+
+ sys.exit(-1)
+ else:
+ sys.exit(0)
+
+
+if __name__ == "__main__":
+ exec_test()
diff --git a/qa/tasks/watch_notify_same_primary.py b/qa/tasks/watch_notify_same_primary.py
new file mode 100644
index 00000000..7c034961
--- /dev/null
+++ b/qa/tasks/watch_notify_same_primary.py
@@ -0,0 +1,130 @@
+
+"""
+watch_notify_same_primary task
+"""
+from six import StringIO
+import contextlib
+import logging
+
+import six
+
+from teuthology.orchestra import run
+from teuthology.contextutil import safe_while
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run watch_notify_same_primary
+
+ The config should be as follows:
+
+ watch_notify_same_primary:
+ clients: [client list]
+
+ The client list should contain 1 client
+
+ The test requires 3 osds.
+
+ example:
+
+ tasks:
+ - ceph:
+ - watch_notify_same_primary:
+ clients: [client.0]
+ - interactive:
+ """
+ log.info('Beginning watch_notify_same_primary...')
+ assert isinstance(config, dict), \
+ "please list clients to run on"
+
+ clients = config.get('clients', ['client.0'])
+ assert len(clients) == 1
+ role = clients[0]
+ assert isinstance(role, six.string_types)
+ PREFIX = 'client.'
+ assert role.startswith(PREFIX)
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ manager = ctx.managers['ceph']
+ manager.raw_cluster_cmd('osd', 'set', 'noout')
+
+ pool = manager.create_pool_with_unique_name()
+ def obj(n): return "foo-{num}".format(num=n)
+ def start_watch(n):
+ remote.run(
+ args = [
+ "rados",
+ "-p", pool,
+ "put",
+ obj(n),
+ "/etc/resolv.conf"],
+ logger=log.getChild('watch.{id}'.format(id=n)))
+ proc = remote.run(
+ args = [
+ "rados",
+ "-p", pool,
+ "watch",
+ obj(n)],
+ stdin=run.PIPE,
+ stdout=StringIO(),
+ stderr=StringIO(),
+ wait=False)
+ return proc
+
+ num = 20
+
+ watches = [start_watch(i) for i in range(num)]
+
+ # wait for them all to register
+ for i in range(num):
+ with safe_while() as proceed:
+ while proceed():
+ lines = remote.sh(
+ ["rados", "-p", pool, "listwatchers", obj(i)])
+ num_watchers = lines.count('watcher=')
+ log.info('i see %d watchers for %s', num_watchers, obj(i))
+ if num_watchers >= 1:
+ break
+
+ def notify(n, msg):
+ remote.run(
+ args = [
+ "rados",
+ "-p", pool,
+ "notify",
+ obj(n),
+ msg],
+ logger=log.getChild('notify.{id}'.format(id=n)))
+
+ [notify(n, 'notify1') for n in range(len(watches))]
+
+ manager.kill_osd(0)
+ manager.mark_down_osd(0)
+
+ [notify(n, 'notify2') for n in range(len(watches))]
+
+ try:
+ yield
+ finally:
+ log.info('joining watch_notify_stress')
+ for watch in watches:
+ watch.stdin.write("\n")
+
+ run.wait(watches)
+
+ for watch in watches:
+ lines = watch.stdout.getvalue().split("\n")
+ got1 = False
+ got2 = False
+ for l in lines:
+ if 'notify1' in l:
+ got1 = True
+ if 'notify2' in l:
+ got2 = True
+ log.info(lines)
+ assert got1 and got2
+
+ manager.revive_osd(0)
+ manager.remove_pool(pool)
diff --git a/qa/tasks/watch_notify_stress.py b/qa/tasks/watch_notify_stress.py
new file mode 100644
index 00000000..e5e38049
--- /dev/null
+++ b/qa/tasks/watch_notify_stress.py
@@ -0,0 +1,70 @@
+"""
+test_stress_watch task
+"""
+import contextlib
+import logging
+
+import six
+from teuthology.orchestra import run
+from teuthology.task import proc_thrasher
+
+log = logging.getLogger(__name__)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run test_stress_watch
+
+ The config should be as follows:
+
+ test_stress_watch:
+ clients: [client list]
+
+ example:
+
+ tasks:
+ - ceph:
+ - test_stress_watch:
+ clients: [client.0]
+ - interactive:
+ """
+ log.info('Beginning test_stress_watch...')
+ assert isinstance(config, dict), \
+ "please list clients to run on"
+ testwatch = {}
+
+ remotes = []
+
+ for role in config.get('clients', ['client.0']):
+ assert isinstance(role, six.string_types)
+ PREFIX = 'client.'
+ assert role.startswith(PREFIX)
+ id_ = role[len(PREFIX):]
+ (remote,) = ctx.cluster.only(role).remotes.keys()
+ remotes.append(remote)
+
+ args =['CEPH_CLIENT_ID={id_}'.format(id_=id_),
+ 'CEPH_ARGS="{flags}"'.format(flags=config.get('flags', '')),
+ 'daemon-helper',
+ 'kill',
+ 'multi_stress_watch foo foo'
+ ]
+
+ log.info("args are %s" % (args,))
+
+ proc = proc_thrasher.ProcThrasher({}, remote,
+ args=[run.Raw(i) for i in args],
+ logger=log.getChild('testwatch.{id}'.format(id=id_)),
+ stdin=run.PIPE,
+ wait=False
+ )
+ proc.start()
+ testwatch[id_] = proc
+
+ try:
+ yield
+ finally:
+ log.info('joining watch_notify_stress')
+ for i in testwatch.values():
+ i.join()
diff --git a/qa/tasks/workunit.py b/qa/tasks/workunit.py
new file mode 100644
index 00000000..5a767038
--- /dev/null
+++ b/qa/tasks/workunit.py
@@ -0,0 +1,423 @@
+"""
+Workunit task -- Run ceph on sets of specific clients
+"""
+import logging
+import pipes
+import os
+import re
+
+import six
+
+from tasks.util import get_remote_for_role
+from tasks.util.workunit import get_refspec_after_overrides
+
+from teuthology import misc
+from teuthology.config import config as teuth_config
+from teuthology.orchestra.run import CommandFailedError
+from teuthology.parallel import parallel
+from teuthology.orchestra import run
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+ """
+ Run ceph on all workunits found under the specified path.
+
+ For example::
+
+ tasks:
+ - ceph:
+ - ceph-fuse: [client.0]
+ - workunit:
+ clients:
+ client.0: [direct_io, xattrs.sh]
+ client.1: [snaps]
+ branch: foo
+
+ You can also run a list of workunits on all clients:
+ tasks:
+ - ceph:
+ - ceph-fuse:
+ - workunit:
+ tag: v0.47
+ clients:
+ all: [direct_io, xattrs.sh, snaps]
+
+ If you have an "all" section it will run all the workunits
+ on each client simultaneously, AFTER running any workunits specified
+ for individual clients. (This prevents unintended simultaneous runs.)
+
+ To customize tests, you can specify environment variables as a dict. You
+ can also specify a time limit for each work unit (defaults to 3h):
+
+ tasks:
+ - ceph:
+ - ceph-fuse:
+ - workunit:
+ sha1: 9b28948635b17165d17c1cf83d4a870bd138ddf6
+ clients:
+ all: [snaps]
+ env:
+ FOO: bar
+ BAZ: quux
+ timeout: 3h
+
+ This task supports roles that include a ceph cluster, e.g.::
+
+ tasks:
+ - ceph:
+ - workunit:
+ clients:
+ backup.client.0: [foo]
+ client.1: [bar] # cluster is implicitly 'ceph'
+
+ You can also specify an alternative top-level dir to 'qa/workunits', like
+ 'qa/standalone', with::
+
+ tasks:
+ - install:
+ - workunit:
+ basedir: qa/standalone
+ clients:
+ client.0:
+ - test-ceph-helpers.sh
+
+ :param ctx: Context
+ :param config: Configuration
+ """
+ assert isinstance(config, dict)
+ assert isinstance(config.get('clients'), dict), \
+ 'configuration must contain a dictionary of clients'
+
+ overrides = ctx.config.get('overrides', {})
+ refspec = get_refspec_after_overrides(config, overrides)
+ timeout = config.get('timeout', '3h')
+ cleanup = config.get('cleanup', True)
+
+ log.info('Pulling workunits from ref %s', refspec)
+
+ created_mountpoint = {}
+
+ if config.get('env') is not None:
+ assert isinstance(config['env'], dict), 'env must be a dictionary'
+ clients = config['clients']
+
+ # Create scratch dirs for any non-all workunits
+ log.info('Making a separate scratch dir for every client...')
+ for role in clients.keys():
+ assert isinstance(role, six.string_types)
+ if role == "all":
+ continue
+
+ assert 'client' in role
+ created_mnt_dir = _make_scratch_dir(ctx, role, config.get('subdir'))
+ created_mountpoint[role] = created_mnt_dir
+
+ # Execute any non-all workunits
+ log.info("timeout={}".format(timeout))
+ log.info("cleanup={}".format(cleanup))
+ with parallel() as p:
+ for role, tests in clients.items():
+ if role != "all":
+ p.spawn(_run_tests, ctx, refspec, role, tests,
+ config.get('env'),
+ basedir=config.get('basedir','qa/workunits'),
+ timeout=timeout,cleanup=cleanup)
+
+ if cleanup:
+ # Clean up dirs from any non-all workunits
+ for role, created in created_mountpoint.items():
+ _delete_dir(ctx, role, created)
+
+ # Execute any 'all' workunits
+ if 'all' in clients:
+ all_tasks = clients["all"]
+ _spawn_on_all_clients(ctx, refspec, all_tasks, config.get('env'),
+ config.get('basedir', 'qa/workunits'),
+ config.get('subdir'), timeout=timeout,
+ cleanup=cleanup)
+
+
+def _client_mountpoint(ctx, cluster, id_):
+ """
+ Returns the path to the expected mountpoint for workunits running
+ on some kind of filesystem.
+ """
+ # for compatibility with tasks like ceph-fuse that aren't cluster-aware yet,
+ # only include the cluster name in the dir if the cluster is not 'ceph'
+ if cluster == 'ceph':
+ dir_ = 'mnt.{0}'.format(id_)
+ else:
+ dir_ = 'mnt.{0}.{1}'.format(cluster, id_)
+ return os.path.join(misc.get_testdir(ctx), dir_)
+
+
+def _delete_dir(ctx, role, created_mountpoint):
+ """
+ Delete file used by this role, and delete the directory that this
+ role appeared in.
+
+ :param ctx: Context
+ :param role: "role.#" where # is used for the role id.
+ """
+ cluster, _, id_ = misc.split_role(role)
+ remote = get_remote_for_role(ctx, role)
+ mnt = _client_mountpoint(ctx, cluster, id_)
+ client = os.path.join(mnt, 'client.{id}'.format(id=id_))
+
+ # Remove the directory inside the mount where the workunit ran
+ remote.run(
+ args=[
+ 'sudo',
+ 'rm',
+ '-rf',
+ '--',
+ client,
+ ],
+ )
+ log.info("Deleted dir {dir}".format(dir=client))
+
+ # If the mount was an artificially created dir, delete that too
+ if created_mountpoint:
+ remote.run(
+ args=[
+ 'rmdir',
+ '--',
+ mnt,
+ ],
+ )
+ log.info("Deleted artificial mount point {dir}".format(dir=client))
+
+
+def _make_scratch_dir(ctx, role, subdir):
+ """
+ Make scratch directories for this role. This also makes the mount
+ point if that directory does not exist.
+
+ :param ctx: Context
+ :param role: "role.#" where # is used for the role id.
+ :param subdir: use this subdir (False if not used)
+ """
+ created_mountpoint = False
+ cluster, _, id_ = misc.split_role(role)
+ remote = get_remote_for_role(ctx, role)
+ dir_owner = remote.user
+ mnt = _client_mountpoint(ctx, cluster, id_)
+ # if neither kclient nor ceph-fuse are required for a workunit,
+ # mnt may not exist. Stat and create the directory if it doesn't.
+ try:
+ remote.run(
+ args=[
+ 'stat',
+ '--',
+ mnt,
+ ],
+ )
+ log.info('Did not need to create dir {dir}'.format(dir=mnt))
+ except CommandFailedError:
+ remote.run(
+ args=[
+ 'mkdir',
+ '--',
+ mnt,
+ ],
+ )
+ log.info('Created dir {dir}'.format(dir=mnt))
+ created_mountpoint = True
+
+ if not subdir:
+ subdir = 'client.{id}'.format(id=id_)
+
+ if created_mountpoint:
+ remote.run(
+ args=[
+ 'cd',
+ '--',
+ mnt,
+ run.Raw('&&'),
+ 'mkdir',
+ '--',
+ subdir,
+ ],
+ )
+ else:
+ remote.run(
+ args=[
+ # cd first so this will fail if the mount point does
+ # not exist; pure install -d will silently do the
+ # wrong thing
+ 'cd',
+ '--',
+ mnt,
+ run.Raw('&&'),
+ 'sudo',
+ 'install',
+ '-d',
+ '-m', '0755',
+ '--owner={user}'.format(user=dir_owner),
+ '--',
+ subdir,
+ ],
+ )
+
+ return created_mountpoint
+
+
+def _spawn_on_all_clients(ctx, refspec, tests, env, basedir, subdir, timeout=None, cleanup=True):
+ """
+ Make a scratch directory for each client in the cluster, and then for each
+ test spawn _run_tests() for each role.
+
+ See run_tests() for parameter documentation.
+ """
+ is_client = misc.is_type('client')
+ client_remotes = {}
+ created_mountpoint = {}
+ for remote, roles_for_host in ctx.cluster.remotes.items():
+ for role in roles_for_host:
+ if is_client(role):
+ client_remotes[role] = remote
+ created_mountpoint[role] = _make_scratch_dir(ctx, role, subdir)
+
+ for unit in tests:
+ with parallel() as p:
+ for role, remote in client_remotes.items():
+ p.spawn(_run_tests, ctx, refspec, role, [unit], env,
+ basedir,
+ subdir,
+ timeout=timeout)
+
+ # cleanup the generated client directories
+ if cleanup:
+ for role, _ in client_remotes.items():
+ _delete_dir(ctx, role, created_mountpoint[role])
+
+
+def _run_tests(ctx, refspec, role, tests, env, basedir,
+ subdir=None, timeout=None, cleanup=True):
+ """
+ Run the individual test. Create a scratch directory and then extract the
+ workunits from git. Make the executables, and then run the tests.
+ Clean up (remove files created) after the tests are finished.
+
+ :param ctx: Context
+ :param refspec: branch, sha1, or version tag used to identify this
+ build
+ :param tests: specific tests specified.
+ :param env: environment set in yaml file. Could be None.
+ :param subdir: subdirectory set in yaml file. Could be None
+ :param timeout: If present, use the 'timeout' command on the remote host
+ to limit execution time. Must be specified by a number
+ followed by 's' for seconds, 'm' for minutes, 'h' for
+ hours, or 'd' for days. If '0' or anything that evaluates
+ to False is passed, the 'timeout' command is not used.
+ """
+ testdir = misc.get_testdir(ctx)
+ assert isinstance(role, six.string_types)
+ cluster, type_, id_ = misc.split_role(role)
+ assert type_ == 'client'
+ remote = get_remote_for_role(ctx, role)
+ mnt = _client_mountpoint(ctx, cluster, id_)
+ # subdir so we can remove and recreate this a lot without sudo
+ if subdir is None:
+ scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp')
+ else:
+ scratch_tmp = os.path.join(mnt, subdir)
+ clonedir = '{tdir}/clone.{role}'.format(tdir=testdir, role=role)
+ srcdir = '{cdir}/{basedir}'.format(cdir=clonedir,
+ basedir=basedir)
+
+ git_url = teuth_config.get_ceph_qa_suite_git_url()
+ # if we are running an upgrade test, and ceph-ci does not have branches like
+ # `jewel`, so should use ceph.git as an alternative.
+ try:
+ remote.run(logger=log.getChild(role),
+ args=refspec.clone(git_url, clonedir))
+ except CommandFailedError:
+ if git_url.endswith('/ceph-ci.git'):
+ alt_git_url = git_url.replace('/ceph-ci.git', '/ceph.git')
+ elif git_url.endswith('/ceph-ci'):
+ alt_git_url = re.sub(r'/ceph-ci$', '/ceph.git', git_url)
+ else:
+ raise
+ log.info(
+ "failed to check out '%s' from %s; will also try in %s",
+ refspec,
+ git_url,
+ alt_git_url,
+ )
+ remote.run(logger=log.getChild(role),
+ args=refspec.clone(alt_git_url, clonedir))
+ remote.run(
+ logger=log.getChild(role),
+ args=[
+ 'cd', '--', srcdir,
+ run.Raw('&&'),
+ 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi',
+ run.Raw('&&'),
+ 'find', '-executable', '-type', 'f', '-printf', r'%P\0'.format(srcdir=srcdir),
+ run.Raw('>{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)),
+ ],
+ )
+
+ workunits_file = '{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)
+ workunits = sorted(six.ensure_str(misc.get_file(remote, workunits_file)).split('\0'))
+ assert workunits
+
+ try:
+ assert isinstance(tests, list)
+ for spec in tests:
+ log.info('Running workunits matching %s on %s...', spec, role)
+ prefix = '{spec}/'.format(spec=spec)
+ to_run = [w for w in workunits if w == spec or w.startswith(prefix)]
+ if not to_run:
+ raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec))
+ for workunit in to_run:
+ log.info('Running workunit %s...', workunit)
+ args = [
+ 'mkdir', '-p', '--', scratch_tmp,
+ run.Raw('&&'),
+ 'cd', '--', scratch_tmp,
+ run.Raw('&&'),
+ run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'),
+ run.Raw('CEPH_REF={ref}'.format(ref=refspec)),
+ run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)),
+ run.Raw('CEPH_ARGS="--cluster {0}"'.format(cluster)),
+ run.Raw('CEPH_ID="{id}"'.format(id=id_)),
+ run.Raw('PATH=$PATH:/usr/sbin'),
+ run.Raw('CEPH_BASE={dir}'.format(dir=clonedir)),
+ run.Raw('CEPH_ROOT={dir}'.format(dir=clonedir)),
+ ]
+ if env is not None:
+ for var, val in env.items():
+ quoted_val = pipes.quote(val)
+ env_arg = '{var}={val}'.format(var=var, val=quoted_val)
+ args.append(run.Raw(env_arg))
+ args.extend([
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir)])
+ if timeout and timeout != '0':
+ args.extend(['timeout', timeout])
+ args.extend([
+ '{srcdir}/{workunit}'.format(
+ srcdir=srcdir,
+ workunit=workunit,
+ ),
+ ])
+ remote.run(
+ logger=log.getChild(role),
+ args=args,
+ label="workunit test {workunit}".format(workunit=workunit)
+ )
+ if cleanup:
+ args=['sudo', 'rm', '-rf', '--', scratch_tmp]
+ remote.run(logger=log.getChild(role), args=args, timeout=(60*60))
+ finally:
+ log.info('Stopping %s on %s...', tests, role)
+ args=['sudo', 'rm', '-rf', '--', workunits_file, clonedir]
+ # N.B. don't cleanup scratch_tmp! If the mount is broken then rm will hang.
+ remote.run(
+ logger=log.getChild(role),
+ args=args,
+ )