1 files changed, 1754 insertions, 0 deletions
diff --git a/qa/tasks/cephadm.py b/qa/tasks/cephadm.py
new file mode 100644
index 000000000..e9fc25d6a
--- /dev/null
+++ b/qa/tasks/cephadm.py
@@ -0,0 +1,1754 @@
+"""
+Ceph cluster task, deployed via cephadm orchestrator
+"""
+import argparse
+import configobj
+import contextlib
+import logging
+import os
+import json
+import re
+import uuid
+import yaml
+
+from copy import deepcopy
+from io import BytesIO, StringIO
+from tarfile import ReadError
+from tasks.ceph_manager import CephManager
+from teuthology import misc as teuthology
+from teuthology import contextutil
+from teuthology import packaging
+from teuthology.orchestra import run
+from teuthology.orchestra.daemon import DaemonGroup
+from teuthology.config import config as teuth_config
+from textwrap import dedent
+from tasks.cephfs.filesystem import MDSCluster, Filesystem
+from tasks.util import chacra
+
+# these items we use from ceph.py should probably eventually move elsewhere
+from tasks.ceph import get_mons, healthy
+from tasks.vip import subst_vip
+
+CEPH_ROLE_TYPES = ['mon', 'mgr', 'osd', 'mds', 'rgw', 'prometheus']
+
+log = logging.getLogger(__name__)
+
+
+def _shell(ctx, cluster_name, remote, args, extra_cephadm_args=[], **kwargs):
+    teuthology.get_testdir(ctx)
+    return remote.run(
+        args=[
+            'sudo',
+            ctx.cephadm,
+            '--image', ctx.ceph[cluster_name].image,
+            'shell',
+            '-c', '/etc/ceph/{}.conf'.format(cluster_name),
+            '-k', '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
+            '--fsid', ctx.ceph[cluster_name].fsid,
+            ] + extra_cephadm_args + [
+            '--',
+            ] + args,
+        **kwargs
+    )
+
+
+def build_initial_config(ctx, config):
+    cluster_name = config['cluster']
+
+    path = os.path.join(os.path.dirname(__file__), 'cephadm.conf')
+    conf = configobj.ConfigObj(path, file_error=True)
+
+    conf.setdefault('global', {})
+    conf['global']['fsid'] = ctx.ceph[cluster_name].fsid
+
+    # overrides
+    for section, keys in config.get('conf',{}).items():
+        for key, value in keys.items():
+            log.info(" override: [%s] %s = %s" % (section, key, value))
+            if section not in conf:
+                conf[section] = {}
+            conf[section][key] = value
+
+    return conf
+
+
+def distribute_iscsi_gateway_cfg(ctx, conf_data):
+    """
+    Distribute common gateway config to get the IPs.
+    These will help in iscsi clients with finding trusted_ip_list.
+    """
+    log.info('Distributing iscsi-gateway.cfg...')
+    for remote, roles in ctx.cluster.remotes.items():
+        remote.write_file(
+            path='/etc/ceph/iscsi-gateway.cfg',
+            data=conf_data,
+            sudo=True)
+
+def update_archive_setting(ctx, key, value):
+    """
+    Add logs directory to job's info log file
+    """
+    if ctx.archive is None:
+        return
+    with open(os.path.join(ctx.archive, 'info.yaml'), 'r+') as info_file:
+        info_yaml = yaml.safe_load(info_file)
+        info_file.seek(0)
+        if 'archive' in info_yaml:
+            info_yaml['archive'][key] = value
+        else:
+            info_yaml['archive'] = {key: value}
+        yaml.safe_dump(info_yaml, info_file, default_flow_style=False)
+
+
+@contextlib.contextmanager
+def normalize_hostnames(ctx):
+    """
+    Ensure we have short hostnames throughout, for consistency between
+    remote.shortname and socket.gethostname() in cephadm.
+    """
+    log.info('Normalizing hostnames...')
+    cluster = ctx.cluster.filter(lambda r: '.' in r.hostname)
+    cluster.run(args=[
+        'sudo',
+        'hostname',
+        run.Raw('$(hostname -s)'),
+    ])
+
+    try:
+        yield
+    finally:
+        pass
+
+
+@contextlib.contextmanager
+def download_cephadm(ctx, config, ref):
+    cluster_name = config['cluster']
+
+    if config.get('cephadm_mode') != 'cephadm-package':
+        if ctx.config.get('redhat'):
+            _fetch_cephadm_from_rpm(ctx)
+        # TODO: come up with a sensible way to detect if we need an "old, uncompiled"
+        # cephadm
+        elif 'cephadm_git_url' in config and 'cephadm_branch' in config:
+            _fetch_cephadm_from_github(ctx, config, ref)
+        else:
+            _fetch_cephadm_from_chachra(ctx, config, cluster_name)
+
+    try:
+        yield
+    finally:
+        _rm_cluster(ctx, cluster_name)
+        if config.get('cephadm_mode') == 'root':
+            _rm_cephadm(ctx)
+
+
+def _fetch_cephadm_from_rpm(ctx):
+    log.info("Copying cephadm installed from an RPM package")
+    # cephadm already installed from redhat.install task
+    ctx.cluster.run(
+        args=[
+            'cp',
+            run.Raw('$(which cephadm)'),
+            ctx.cephadm,
+            run.Raw('&&'),
+            'ls', '-l',
+            ctx.cephadm,
+        ]
+    )
+
+
+def _fetch_cephadm_from_github(ctx, config, ref):
+    ref = config.get('cephadm_branch', ref)
+    git_url = config.get('cephadm_git_url', teuth_config.get_ceph_git_url())
+    log.info('Downloading cephadm (repo %s ref %s)...' % (git_url, ref))
+    if git_url.startswith('https://github.com/'):
+        # git archive doesn't like https:// URLs, which we use with github.
+        rest = git_url.split('https://github.com/', 1)[1]
+        rest = re.sub(r'\.git/?$', '', rest).strip() # no .git suffix
+        ctx.cluster.run(
+            args=[
+                'curl', '--silent',
+                'https://raw.githubusercontent.com/' + rest + '/' + ref + '/src/cephadm/cephadm',
+                run.Raw('>'),
+                ctx.cephadm,
+                run.Raw('&&'),
+                'ls', '-l',
+                ctx.cephadm,
+            ],
+        )
+    else:
+        ctx.cluster.run(
+            args=[
+                'git', 'clone', git_url, 'testrepo',
+                run.Raw('&&'),
+                'cd', 'testrepo',
+                run.Raw('&&'),
+                'git', 'show', f'{ref}:src/cephadm/cephadm',
+                run.Raw('>'),
+                ctx.cephadm,
+                run.Raw('&&'),
+                'ls', '-l', ctx.cephadm,
+            ],
+        )
+    # sanity-check the resulting file and set executable bit
+    cephadm_file_size = '$(stat -c%s {})'.format(ctx.cephadm)
+    ctx.cluster.run(
+        args=[
+            'test', '-s', ctx.cephadm,
+            run.Raw('&&'),
+            'test', run.Raw(cephadm_file_size), "-gt", run.Raw('1000'),
+            run.Raw('&&'),
+            'chmod', '+x', ctx.cephadm,
+        ],
+    )
+
+
+def _fetch_cephadm_from_chachra(ctx, config, cluster_name):
+    log.info('Downloading "compiled" cephadm from cachra')
+    bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote
+    bp = packaging.get_builder_project()(
+        config.get('project', 'ceph'),
+        config,
+        ctx=ctx,
+        remote=bootstrap_remote,
+    )
+    log.info('builder_project result: %s' % (bp._result.json()))
+
+    flavor = config.get('flavor', 'default')
+    branch = config.get('branch')
+    sha1 = config.get('sha1')
+
+    # pull the cephadm binary from chacra
+    url = chacra.get_binary_url(
+            'cephadm',
+            project=bp.project,
+            distro=bp.distro.split('/')[0],
+            release=bp.distro.split('/')[1],
+            arch=bp.arch,
+            flavor=flavor,
+            branch=branch,
+            sha1=sha1,
+    )
+    log.info("Discovered cachra url: %s", url)
+    ctx.cluster.run(
+        args=[
+            'curl', '--silent', '-L', url,
+            run.Raw('>'),
+            ctx.cephadm,
+            run.Raw('&&'),
+            'ls', '-l',
+            ctx.cephadm,
+        ],
+    )
+
+    # sanity-check the resulting file and set executable bit
+    cephadm_file_size = '$(stat -c%s {})'.format(ctx.cephadm)
+    ctx.cluster.run(
+        args=[
+            'test', '-s', ctx.cephadm,
+            run.Raw('&&'),
+            'test', run.Raw(cephadm_file_size), "-gt", run.Raw('1000'),
+            run.Raw('&&'),
+            'chmod', '+x', ctx.cephadm,
+        ],
+    )
+
+
+def _rm_cluster(ctx, cluster_name):
+    log.info('Removing cluster...')
+    ctx.cluster.run(args=[
+        'sudo',
+        ctx.cephadm,
+        'rm-cluster',
+        '--fsid', ctx.ceph[cluster_name].fsid,
+        '--force',
+    ])
+
+
+def _rm_cephadm(ctx):
+    log.info('Removing cephadm ...')
+    ctx.cluster.run(
+        args=[
+            'rm',
+            '-rf',
+            ctx.cephadm,
+        ],
+    )
+
+
+@contextlib.contextmanager
+def ceph_log(ctx, config):
+    cluster_name = config['cluster']
+    fsid = ctx.ceph[cluster_name].fsid
+
+    update_archive_setting(ctx, 'log', '/var/log/ceph')
+
+
+    try:
+        yield
+
+    except Exception:
+        # we need to know this below
+        ctx.summary['success'] = False
+        raise
+
+    finally:
+        log.info('Checking cluster log for badness...')
+        def first_in_ceph_log(pattern, excludes):
+            """
+            Find the first occurrence of the pattern specified in the Ceph log,
+            Returns None if none found.
+
+            :param pattern: Pattern scanned for.
+            :param excludes: Patterns to ignore.
+            :return: First line of text (or None if not found)
+            """
+            args = [
+                'sudo',
+                'egrep', pattern,
+                '/var/log/ceph/{fsid}/ceph.log'.format(
+                    fsid=fsid),
+            ]
+            if excludes:
+                for exclude in excludes:
+                    args.extend([run.Raw('|'), 'egrep', '-v', exclude])
+            args.extend([
+                run.Raw('|'), 'head', '-n', '1',
+            ])
+            r = ctx.ceph[cluster_name].bootstrap_remote.run(
+                stdout=StringIO(),
+                args=args,
+            )
+            stdout = r.stdout.getvalue()
+            if stdout != '':
+                return stdout
+            return None
+
+        if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]',
+                             config.get('log-ignorelist')) is not None:
+            log.warning('Found errors (ERR|WRN|SEC) in cluster log')
+            ctx.summary['success'] = False
+            # use the most severe problem as the failure reason
+            if 'failure_reason' not in ctx.summary:
+                for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']:
+                    match = first_in_ceph_log(pattern, config['log-ignorelist'])
+                    if match is not None:
+                        ctx.summary['failure_reason'] = \
+                            '"{match}" in cluster log'.format(
+                                match=match.rstrip('\n'),
+                            )
+                        break
+
+        if ctx.archive is not None and \
+                not (ctx.config.get('archive-on-error') and ctx.summary['success']):
+            # and logs
+            log.info('Compressing logs...')
+            run.wait(
+                ctx.cluster.run(
+                    args=[
+                        'sudo',
+                        'find',
+                        '/var/log/ceph',   # all logs, not just for the cluster
+                        '/var/log/rbd-target-api', # ceph-iscsi
+                        '-name',
+                        '*.log',
+                        '-print0',
+                        run.Raw('|'),
+                        'sudo',
+                        'xargs',
+                        '-0',
+                        '--no-run-if-empty',
+                        '--',
+                        'gzip',
+                        '--',
+                    ],
+                    wait=False,
+                ),
+            )
+
+            log.info('Archiving logs...')
+            path = os.path.join(ctx.archive, 'remote')
+            try:
+                os.makedirs(path)
+            except OSError:
+                pass
+            for remote in ctx.cluster.remotes.keys():
+                sub = os.path.join(path, remote.shortname)
+                try:
+                    os.makedirs(sub)
+                except OSError:
+                    pass
+                try:
+                    teuthology.pull_directory(remote, '/var/log/ceph',  # everything
+                                              os.path.join(sub, 'log'))
+                except ReadError:
+                    pass
+
+
+@contextlib.contextmanager
+def ceph_crash(ctx, config):
+    """
+    Gather crash dumps from /var/lib/ceph/$fsid/crash
+    """
+    cluster_name = config['cluster']
+    fsid = ctx.ceph[cluster_name].fsid
+
+    update_archive_setting(ctx, 'crash', '/var/lib/ceph/crash')
+
+    try:
+        yield
+
+    finally:
+        if ctx.archive is not None:
+            log.info('Archiving crash dumps...')
+            path = os.path.join(ctx.archive, 'remote')
+            try:
+                os.makedirs(path)
+            except OSError:
+                pass
+            for remote in ctx.cluster.remotes.keys():
+                sub = os.path.join(path, remote.shortname)
+                try:
+                    os.makedirs(sub)
+                except OSError:
+                    pass
+                try:
+                    teuthology.pull_directory(remote,
+                                              '/var/lib/ceph/%s/crash' % fsid,
+                                              os.path.join(sub, 'crash'))
+                except ReadError:
+                    pass
+
+
+@contextlib.contextmanager
+def pull_image(ctx, config):
+    cluster_name = config['cluster']
+    log.info(f'Pulling image {ctx.ceph[cluster_name].image} on all hosts...')
+    run.wait(
+        ctx.cluster.run(
+            args=[
+                'sudo',
+                ctx.cephadm,
+                '--image', ctx.ceph[cluster_name].image,
+                'pull',
+            ],
+            wait=False,
+        )
+    )
+
+    try:
+        yield
+    finally:
+        pass
+
+@contextlib.contextmanager
+def setup_ca_signed_keys(ctx, config):
+    # generate our ca key
+    cluster_name = config['cluster']
+    bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote
+    bootstrap_remote.run(args=[
+        'sudo', 'ssh-keygen', '-t', 'rsa', '-f', '/root/ca-key', '-N', ''
+    ])
+
+    # not using read_file here because it runs dd as a non-root
+    # user and would hit permission issues
+    r = bootstrap_remote.run(args=[
+        'sudo', 'cat', '/root/ca-key.pub'
+    ], stdout=StringIO())
+    ca_key_pub_contents = r.stdout.getvalue()
+
+    # make CA key accepted on each host
+    for remote in ctx.cluster.remotes.keys():
+        # write key to each host's /etc/ssh dir
+        remote.run(args=[
+            'sudo', 'echo', ca_key_pub_contents,
+            run.Raw('|'),
+            'sudo', 'tee', '-a', '/etc/ssh/ca-key.pub',
+        ])
+        # make sshd accept the CA signed key
+        remote.run(args=[
+            'sudo', 'echo', 'TrustedUserCAKeys /etc/ssh/ca-key.pub',
+            run.Raw('|'),
+            'sudo', 'tee', '-a', '/etc/ssh/sshd_config',
+            run.Raw('&&'),
+            'sudo', 'systemctl', 'restart', 'sshd',
+        ])
+
+    # generate a new key pair and sign the pub key to make a cert
+    bootstrap_remote.run(args=[
+        'sudo', 'ssh-keygen', '-t', 'rsa', '-f', '/root/cephadm-ssh-key', '-N', '',
+        run.Raw('&&'),
+        'sudo', 'ssh-keygen', '-s', '/root/ca-key', '-I', 'user_root', '-n', 'root', '-V', '+52w', '/root/cephadm-ssh-key',
+    ])
+
+    # for debugging, to make sure this setup has worked as intended
+    for remote in ctx.cluster.remotes.keys():
+        remote.run(args=[
+            'sudo', 'cat', '/etc/ssh/ca-key.pub'
+        ])
+        remote.run(args=[
+            'sudo', 'cat', '/etc/ssh/sshd_config',
+            run.Raw('|'),
+            'grep', 'TrustedUserCAKeys'
+        ])
+    bootstrap_remote.run(args=[
+        'sudo', 'ls', '/root/'
+    ])
+
+    ctx.ca_signed_key_info = {}
+    ctx.ca_signed_key_info['ca-key'] = '/root/ca-key'
+    ctx.ca_signed_key_info['ca-key-pub'] = '/root/ca-key.pub'
+    ctx.ca_signed_key_info['private-key'] = '/root/cephadm-ssh-key'
+    ctx.ca_signed_key_info['ca-signed-cert'] = '/root/cephadm-ssh-key-cert.pub'
+
+    try:
+        yield
+    finally:
+        pass
+
+@contextlib.contextmanager
+def ceph_bootstrap(ctx, config):
+    """
+    Bootstrap ceph cluster.
+
+    :param ctx: the argparse.Namespace object
+    :param config: the config dict
+    """
+    cluster_name = config['cluster']
+    testdir = teuthology.get_testdir(ctx)
+    fsid = ctx.ceph[cluster_name].fsid
+
+    bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote
+    first_mon = ctx.ceph[cluster_name].first_mon
+    first_mon_role = ctx.ceph[cluster_name].first_mon_role
+    mons = ctx.ceph[cluster_name].mons
+
+    ctx.cluster.run(args=[
+        'sudo', 'mkdir', '-p', '/etc/ceph',
+        ]);
+    ctx.cluster.run(args=[
+        'sudo', 'chmod', '777', '/etc/ceph',
+        ]);
+    try:
+        # write seed config
+        log.info('Writing seed config...')
+        conf_fp = BytesIO()
+        seed_config = build_initial_config(ctx, config)
+        seed_config.write(conf_fp)
+        bootstrap_remote.write_file(
+            path='{}/seed.{}.conf'.format(testdir, cluster_name),
+            data=conf_fp.getvalue())
+        log.debug('Final config:\n' + conf_fp.getvalue().decode())
+        ctx.ceph[cluster_name].conf = seed_config
+
+        # register initial daemons
+        ctx.daemons.register_daemon(
+            bootstrap_remote, 'mon', first_mon,
+            cluster=cluster_name,
+            fsid=fsid,
+            logger=log.getChild('mon.' + first_mon),
+            wait=False,
+            started=True,
+        )
+        if not ctx.ceph[cluster_name].roleless:
+            first_mgr = ctx.ceph[cluster_name].first_mgr
+            ctx.daemons.register_daemon(
+                bootstrap_remote, 'mgr', first_mgr,
+                cluster=cluster_name,
+                fsid=fsid,
+                logger=log.getChild('mgr.' + first_mgr),
+                wait=False,
+                started=True,
+            )
+
+        # bootstrap
+        log.info('Bootstrapping...')
+        cmd = [
+            'sudo',
+            ctx.cephadm,
+            '--image', ctx.ceph[cluster_name].image,
+            '-v',
+            'bootstrap',
+            '--fsid', fsid,
+            '--config', '{}/seed.{}.conf'.format(testdir, cluster_name),
+            '--output-config', '/etc/ceph/{}.conf'.format(cluster_name),
+            '--output-keyring',
+            '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
+        ]
+
+        if not config.get("use-ca-signed-key", False):
+            cmd += ['--output-pub-ssh-key', '{}/{}.pub'.format(testdir, cluster_name)]
+        else:
+            # ctx.ca_signed_key_info should have been set up in
+            # setup_ca_signed_keys function which we expect to have
+            # run before bootstrap if use-ca-signed-key is true
+            signed_key_info = ctx.ca_signed_key_info
+            cmd += [
+                "--ssh-private-key", signed_key_info['private-key'],
+                "--ssh-signed-cert", signed_key_info['ca-signed-cert'],
+            ]
+
+        if config.get("no_cgroups_split") is True:
+            cmd.insert(cmd.index("bootstrap"), "--no-cgroups-split")
+
+        if config.get('registry-login'):
+            registry = config['registry-login']
+            cmd += [
+                "--registry-url", registry['url'],
+                "--registry-username", registry['username'],
+                "--registry-password", registry['password'],
+            ]
+
+        if not ctx.ceph[cluster_name].roleless:
+            cmd += [
+                '--mon-id', first_mon,
+                '--mgr-id', first_mgr,
+                '--orphan-initial-daemons',   # we will do it explicitly!
+                '--skip-monitoring-stack',    # we'll provision these explicitly
+            ]
+
+        if mons[first_mon_role].startswith('['):
+            cmd += ['--mon-addrv', mons[first_mon_role]]
+        else:
+            cmd += ['--mon-ip', mons[first_mon_role]]
+        if config.get('skip_dashboard'):
+            cmd += ['--skip-dashboard']
+        if config.get('skip_monitoring_stack'):
+            cmd += ['--skip-monitoring-stack']
+        if config.get('single_host_defaults'):
+            cmd += ['--single-host-defaults']
+        if not config.get('avoid_pacific_features', False):
+            cmd += ['--skip-admin-label']
+        # bootstrap makes the keyring root 0600, so +r it for our purposes
+        cmd += [
+            run.Raw('&&'),
+            'sudo', 'chmod', '+r',
+            '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
+        ]
+        bootstrap_remote.run(args=cmd)
+
+        # fetch keys and configs
+        log.info('Fetching config...')
+        ctx.ceph[cluster_name].config_file = \
+            bootstrap_remote.read_file(f'/etc/ceph/{cluster_name}.conf')
+        log.info('Fetching client.admin keyring...')
+        ctx.ceph[cluster_name].admin_keyring = \
+            bootstrap_remote.read_file(f'/etc/ceph/{cluster_name}.client.admin.keyring')
+        log.info('Fetching mon keyring...')
+        ctx.ceph[cluster_name].mon_keyring = \
+            bootstrap_remote.read_file(f'/var/lib/ceph/{fsid}/mon.{first_mon}/keyring', sudo=True)
+
+        if not config.get("use-ca-signed-key", False):
+            # fetch ssh key, distribute to additional nodes
+            log.info('Fetching pub ssh key...')
+            ssh_pub_key = bootstrap_remote.read_file(
+                f'{testdir}/{cluster_name}.pub').decode('ascii').strip()
+
+            log.info('Installing pub ssh key for root users...')
+            ctx.cluster.run(args=[
+                'sudo', 'install', '-d', '-m', '0700', '/root/.ssh',
+                run.Raw('&&'),
+                'echo', ssh_pub_key,
+                run.Raw('|'),
+                'sudo', 'tee', '-a', '/root/.ssh/authorized_keys',
+                run.Raw('&&'),
+                'sudo', 'chmod', '0600', '/root/.ssh/authorized_keys',
+            ])
+
+        # set options
+        if config.get('allow_ptrace', True):
+            _shell(ctx, cluster_name, bootstrap_remote,
+                   ['ceph', 'config', 'set', 'mgr', 'mgr/cephadm/allow_ptrace', 'true'])
+
+        if not config.get('avoid_pacific_features', False):
+            log.info('Distributing conf and client.admin keyring to all hosts + 0755')
+            _shell(ctx, cluster_name, bootstrap_remote,
+                   ['ceph', 'orch', 'client-keyring', 'set', 'client.admin',
+                    '*', '--mode', '0755'],
+                   check_status=False)
+
+        # add other hosts
+        for remote in ctx.cluster.remotes.keys():
+            if remote == bootstrap_remote:
+                continue
+
+            # note: this may be redundant (see above), but it avoids
+            # us having to wait for cephadm to do it.
+            log.info('Writing (initial) conf and keyring to %s' % remote.shortname)
+            remote.write_file(
+                path='/etc/ceph/{}.conf'.format(cluster_name),
+                data=ctx.ceph[cluster_name].config_file)
+            remote.write_file(
+                path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
+                data=ctx.ceph[cluster_name].admin_keyring)
+
+            log.info('Adding host %s to orchestrator...' % remote.shortname)
+            _shell(ctx, cluster_name, bootstrap_remote, [
+                'ceph', 'orch', 'host', 'add',
+                remote.shortname
+            ])
+            r = _shell(ctx, cluster_name, bootstrap_remote,
+                       ['ceph', 'orch', 'host', 'ls', '--format=json'],
+                       stdout=StringIO())
+            hosts = [node['hostname'] for node in json.loads(r.stdout.getvalue())]
+            assert remote.shortname in hosts
+
+        yield
+
+    finally:
+        log.info('Cleaning up testdir ceph.* files...')
+        ctx.cluster.run(args=[
+            'rm', '-f',
+            '{}/seed.{}.conf'.format(testdir, cluster_name),
+            '{}/{}.pub'.format(testdir, cluster_name),
+        ])
+
+        log.info('Stopping all daemons...')
+
+        # this doesn't block until they are all stopped...
+        #ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'])
+
+        # stop the daemons we know
+        for role in ctx.daemons.resolve_role_list(None, CEPH_ROLE_TYPES, True):
+            cluster, type_, id_ = teuthology.split_role(role)
+            try:
+                ctx.daemons.get_daemon(type_, id_, cluster).stop()
+            except Exception:
+                log.exception(f'Failed to stop "{role}"')
+                raise
+
+        # tear down anything left (but leave the logs behind)
+        ctx.cluster.run(
+            args=[
+                'sudo',
+                ctx.cephadm,
+                'rm-cluster',
+                '--fsid', fsid,
+                '--force',
+                '--keep-logs',
+            ],
+            check_status=False,  # may fail if upgrading from old cephadm
+        )
+
+        # clean up /etc/ceph
+        ctx.cluster.run(args=[
+            'sudo', 'rm', '-f',
+            '/etc/ceph/{}.conf'.format(cluster_name),
+            '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
+        ])
+
+
+@contextlib.contextmanager
+def ceph_mons(ctx, config):
+    """
+    Deploy any additional mons
+    """
+    cluster_name = config['cluster']
+    fsid = ctx.ceph[cluster_name].fsid
+
+    try:
+        daemons = {}
+        if config.get('add_mons_via_daemon_add'):
+            # This is the old way of adding mons that works with the (early) octopus
+            # cephadm scheduler.
+            num_mons = 1
+            for remote, roles in ctx.cluster.remotes.items():
+                for mon in [r for r in roles
+                            if teuthology.is_type('mon', cluster_name)(r)]:
+                    c_, _, id_ = teuthology.split_role(mon)
+                    if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon:
+                        continue
+                    log.info('Adding %s on %s' % (mon, remote.shortname))
+                    num_mons += 1
+                    _shell(ctx, cluster_name, remote, [
+                        'ceph', 'orch', 'daemon', 'add', 'mon',
+                        remote.shortname + ':' + ctx.ceph[cluster_name].mons[mon] + '=' + id_,
+                    ])
+                    ctx.daemons.register_daemon(
+                        remote, 'mon', id_,
+                        cluster=cluster_name,
+                        fsid=fsid,
+                        logger=log.getChild(mon),
+                        wait=False,
+                        started=True,
+                    )
+                    daemons[mon] = (remote, id_)
+
+                    with contextutil.safe_while(sleep=1, tries=180) as proceed:
+                        while proceed():
+                            log.info('Waiting for %d mons in monmap...' % (num_mons))
+                            r = _shell(
+                                ctx=ctx,
+                                cluster_name=cluster_name,
+                                remote=remote,
+                                args=[
+                                    'ceph', 'mon', 'dump', '-f', 'json',
+                                ],
+                                stdout=StringIO(),
+                            )
+                            j = json.loads(r.stdout.getvalue())
+                            if len(j['mons']) == num_mons:
+                                break
+        else:
+            nodes = []
+            for remote, roles in ctx.cluster.remotes.items():
+                for mon in [r for r in roles
+                            if teuthology.is_type('mon', cluster_name)(r)]:
+                    c_, _, id_ = teuthology.split_role(mon)
+                    log.info('Adding %s on %s' % (mon, remote.shortname))
+                    nodes.append(remote.shortname
+                                 + ':' + ctx.ceph[cluster_name].mons[mon]
+                                 + '=' + id_)
+                    if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon:
+                        continue
+                    daemons[mon] = (remote, id_)
+
+            _shell(ctx, cluster_name, remote, [
+                'ceph', 'orch', 'apply', 'mon',
+                str(len(nodes)) + ';' + ';'.join(nodes)]
+                   )
+            for mgr, i in daemons.items():
+                remote, id_ = i
+                ctx.daemons.register_daemon(
+                    remote, 'mon', id_,
+                    cluster=cluster_name,
+                    fsid=fsid,
+                    logger=log.getChild(mon),
+                    wait=False,
+                    started=True,
+                )
+
+            with contextutil.safe_while(sleep=1, tries=180) as proceed:
+                while proceed():
+                    log.info('Waiting for %d mons in monmap...' % (len(nodes)))
+                    r = _shell(
+                        ctx=ctx,
+                        cluster_name=cluster_name,
+                        remote=remote,
+                        args=[
+                            'ceph', 'mon', 'dump', '-f', 'json',
+                        ],
+                        stdout=StringIO(),
+                    )
+                    j = json.loads(r.stdout.getvalue())
+                    if len(j['mons']) == len(nodes):
+                        break
+
+        # refresh our (final) ceph.conf file
+        bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote
+        log.info('Generating final ceph.conf file...')
+        r = _shell(
+            ctx=ctx,
+            cluster_name=cluster_name,
+            remote=bootstrap_remote,
+            args=[
+                'ceph', 'config', 'generate-minimal-conf',
+            ],
+            stdout=StringIO(),
+        )
+        ctx.ceph[cluster_name].config_file = r.stdout.getvalue()
+
+        yield
+
+    finally:
+        pass
+
+
+@contextlib.contextmanager
+def ceph_mgrs(ctx, config):
+    """
+    Deploy any additional mgrs
+    """
+    cluster_name = config['cluster']
+    fsid = ctx.ceph[cluster_name].fsid
+
+    try:
+        nodes = []
+        daemons = {}
+        for remote, roles in ctx.cluster.remotes.items():
+            for mgr in [r for r in roles
+                        if teuthology.is_type('mgr', cluster_name)(r)]:
+                c_, _, id_ = teuthology.split_role(mgr)
+                log.info('Adding %s on %s' % (mgr, remote.shortname))
+                nodes.append(remote.shortname + '=' + id_)
+                if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mgr:
+                    continue
+                daemons[mgr] = (remote, id_)
+        if nodes:
+            _shell(ctx, cluster_name, remote, [
+                'ceph', 'orch', 'apply', 'mgr',
+                str(len(nodes)) + ';' + ';'.join(nodes)]
+            )
+        for mgr, i in daemons.items():
+            remote, id_ = i
+            ctx.daemons.register_daemon(
+                remote, 'mgr', id_,
+                cluster=cluster_name,
+                fsid=fsid,
+                logger=log.getChild(mgr),
+                wait=False,
+                started=True,
+            )
+
+        yield
+
+    finally:
+        pass
+
+
+@contextlib.contextmanager
+def ceph_osds(ctx, config):
+    """
+    Deploy OSDs
+    """
+    cluster_name = config['cluster']
+    fsid = ctx.ceph[cluster_name].fsid
+
+    try:
+        log.info('Deploying OSDs...')
+
+        # provision OSDs in numeric order
+        id_to_remote = {}
+        devs_by_remote = {}
+        for remote, roles in ctx.cluster.remotes.items():
+            devs_by_remote[remote] = teuthology.get_scratch_devices(remote)
+            for osd in [r for r in roles
+                        if teuthology.is_type('osd', cluster_name)(r)]:
+                _, _, id_ = teuthology.split_role(osd)
+                id_to_remote[int(id_)] = (osd, remote)
+
+        cur = 0
+        for osd_id in sorted(id_to_remote.keys()):
+            osd, remote = id_to_remote[osd_id]
+            _, _, id_ = teuthology.split_role(osd)
+            assert int(id_) == cur
+            devs = devs_by_remote[remote]
+            assert devs   ## FIXME ##
+            dev = devs.pop()
+            if all(_ in dev for _ in ('lv', 'vg')):
+                short_dev = dev.replace('/dev/', '')
+            else:
+                short_dev = dev
+            log.info('Deploying %s on %s with %s...' % (
+                osd, remote.shortname, dev))
+            _shell(ctx, cluster_name, remote, [
+                'ceph-volume', 'lvm', 'zap', dev])
+            add_osd_args = ['ceph', 'orch', 'daemon', 'add', 'osd',
+                            remote.shortname + ':' + short_dev]
+            osd_method = config.get('osd_method')
+            if osd_method:
+                add_osd_args.append(osd_method)
+            _shell(ctx, cluster_name, remote, add_osd_args)
+            ctx.daemons.register_daemon(
+                remote, 'osd', id_,
+                cluster=cluster_name,
+                fsid=fsid,
+                logger=log.getChild(osd),
+                wait=False,
+                started=True,
+            )
+            cur += 1
+
+        if cur == 0:
+            _shell(ctx, cluster_name, remote, [
+                'ceph', 'orch', 'apply', 'osd', '--all-available-devices',
+            ])
+            # expect the number of scratch devs
+            num_osds = sum(map(len, devs_by_remote.values()))
+            assert num_osds
+        else:
+            # expect the number of OSDs we created
+            num_osds = cur
+
+        log.info(f'Waiting for {num_osds} OSDs to come up...')
+        with contextutil.safe_while(sleep=1, tries=120) as proceed:
+            while proceed():
+                p = _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote,
+                           ['ceph', 'osd', 'stat', '-f', 'json'], stdout=StringIO())
+                j = json.loads(p.stdout.getvalue())
+                if int(j.get('num_up_osds', 0)) == num_osds:
+                    break;
+
+        if not hasattr(ctx, 'managers'):
+            ctx.managers = {}
+        ctx.managers[cluster_name] = CephManager(
+            ctx.ceph[cluster_name].bootstrap_remote,
+            ctx=ctx,
+            logger=log.getChild('ceph_manager.' + cluster_name),
+            cluster=cluster_name,
+            cephadm=True,
+        )
+
+        yield
+    finally:
+        pass
+
+
+@contextlib.contextmanager
+def ceph_mdss(ctx, config):
+    """
+    Deploy MDSss
+    """
+    cluster_name = config['cluster']
+    fsid = ctx.ceph[cluster_name].fsid
+
+    nodes = []
+    daemons = {}
+    for remote, roles in ctx.cluster.remotes.items():
+        for role in [r for r in roles
+                    if teuthology.is_type('mds', cluster_name)(r)]:
+            c_, _, id_ = teuthology.split_role(role)
+            log.info('Adding %s on %s' % (role, remote.shortname))
+            nodes.append(remote.shortname + '=' + id_)
+            daemons[role] = (remote, id_)
+    if nodes:
+        _shell(ctx, cluster_name, remote, [
+            'ceph', 'orch', 'apply', 'mds',
+            'all',
+            str(len(nodes)) + ';' + ';'.join(nodes)]
+        )
+    for role, i in daemons.items():
+        remote, id_ = i
+        ctx.daemons.register_daemon(
+            remote, 'mds', id_,
+            cluster=cluster_name,
+            fsid=fsid,
+            logger=log.getChild(role),
+            wait=False,
+            started=True,
+        )
+
+    yield
+
+@contextlib.contextmanager
+def cephfs_setup(ctx, config):
+    mdss = list(teuthology.all_roles_of_type(ctx.cluster, 'mds'))
+
+    # If there are any MDSs, then create a filesystem for them to use
+    # Do this last because requires mon cluster to be up and running
+    if len(mdss) > 0:
+        log.info('Setting up CephFS filesystem(s)...')
+        cephfs_config = config.get('cephfs', {})
+        fs_configs =  cephfs_config.pop('fs', [{'name': 'cephfs'}])
+        set_allow_multifs = len(fs_configs) > 1
+
+        # wait for standbys to become available (slow due to valgrind, perhaps)
+        mdsc = MDSCluster(ctx)
+        with contextutil.safe_while(sleep=2,tries=150) as proceed:
+            while proceed():
+                if len(mdsc.get_standby_daemons()) >= len(mdss):
+                    break
+
+        fss = []
+        for fs_config in fs_configs:
+            assert isinstance(fs_config, dict)
+            name = fs_config.pop('name')
+            temp = deepcopy(cephfs_config)
+            teuthology.deep_merge(temp, fs_config)
+            subvols = config.get('subvols', None)
+            if subvols:
+                teuthology.deep_merge(temp, {'subvols': subvols})
+            fs = Filesystem(ctx, fs_config=temp, name=name, create=True)
+            if set_allow_multifs:
+                fs.set_allow_multifs()
+                set_allow_multifs = False
+            fss.append(fs)
+
+        yield
+
+        for fs in fss:
+            fs.destroy()
+    else:
+        yield
+
+@contextlib.contextmanager
+def ceph_monitoring(daemon_type, ctx, config):
+    """
+    Deploy prometheus, node-exporter, etc.
+    """
+    cluster_name = config['cluster']
+    fsid = ctx.ceph[cluster_name].fsid
+
+    nodes = []
+    daemons = {}
+    for remote, roles in ctx.cluster.remotes.items():
+        for role in [r for r in roles
+                    if teuthology.is_type(daemon_type, cluster_name)(r)]:
+            c_, _, id_ = teuthology.split_role(role)
+            log.info('Adding %s on %s' % (role, remote.shortname))
+            nodes.append(remote.shortname + '=' + id_)
+            daemons[role] = (remote, id_)
+    if nodes:
+        _shell(ctx, cluster_name, remote, [
+            'ceph', 'orch', 'apply', daemon_type,
+            str(len(nodes)) + ';' + ';'.join(nodes)]
+        )
+    for role, i in daemons.items():
+        remote, id_ = i
+        ctx.daemons.register_daemon(
+            remote, daemon_type, id_,
+            cluster=cluster_name,
+            fsid=fsid,
+            logger=log.getChild(role),
+            wait=False,
+            started=True,
+        )
+
+    yield
+
+
+@contextlib.contextmanager
+def ceph_rgw(ctx, config):
+    """
+    Deploy rgw
+    """
+    cluster_name = config['cluster']
+    fsid = ctx.ceph[cluster_name].fsid
+
+    nodes = {}
+    daemons = {}
+    for remote, roles in ctx.cluster.remotes.items():
+        for role in [r for r in roles
+                    if teuthology.is_type('rgw', cluster_name)(r)]:
+            c_, _, id_ = teuthology.split_role(role)
+            log.info('Adding %s on %s' % (role, remote.shortname))
+            svc = '.'.join(id_.split('.')[0:2])
+            if svc not in nodes:
+                nodes[svc] = []
+            nodes[svc].append(remote.shortname + '=' + id_)
+            daemons[role] = (remote, id_)
+
+    for svc, nodes in nodes.items():
+        _shell(ctx, cluster_name, remote, [
+            'ceph', 'orch', 'apply', 'rgw', svc,
+             '--placement',
+             str(len(nodes)) + ';' + ';'.join(nodes)]
+        )
+    for role, i in daemons.items():
+        remote, id_ = i
+        ctx.daemons.register_daemon(
+            remote, 'rgw', id_,
+            cluster=cluster_name,
+            fsid=fsid,
+            logger=log.getChild(role),
+            wait=False,
+            started=True,
+        )
+
+    yield
+
+
+@contextlib.contextmanager
+def ceph_iscsi(ctx, config):
+    """
+    Deploy iSCSIs
+    """
+    cluster_name = config['cluster']
+    fsid = ctx.ceph[cluster_name].fsid
+
+    nodes = []
+    daemons = {}
+    ips = []
+
+    for remote, roles in ctx.cluster.remotes.items():
+        for role in [r for r in roles
+                     if teuthology.is_type('iscsi', cluster_name)(r)]:
+            c_, _, id_ = teuthology.split_role(role)
+            log.info('Adding %s on %s' % (role, remote.shortname))
+            nodes.append(remote.shortname + '=' + id_)
+            daemons[role] = (remote, id_)
+            ips.append(remote.ip_address)
+    trusted_ip_list = ','.join(ips)
+    if nodes:
+        poolname = 'datapool'
+        # ceph osd pool create datapool 3 3 replicated
+        _shell(ctx, cluster_name, remote, [
+            'ceph', 'osd', 'pool', 'create',
+            poolname, '3', '3', 'replicated']
+        )
+
+        _shell(ctx, cluster_name, remote, [
+            'rbd', 'pool', 'init', poolname]
+        )
+
+        # ceph orch apply iscsi datapool (admin)user (admin)password
+        _shell(ctx, cluster_name, remote, [
+            'ceph', 'orch', 'apply', 'iscsi',
+            poolname, 'admin', 'admin',
+            '--trusted_ip_list', trusted_ip_list,
+            '--placement', str(len(nodes)) + ';' + ';'.join(nodes)]
+        )
+
+        # used by iscsi client to identify valid gateway ip's
+        conf_data = dedent(f"""
+        [config]
+        trusted_ip_list = {trusted_ip_list}
+        """)
+        distribute_iscsi_gateway_cfg(ctx, conf_data)
+
+    for role, i in daemons.items():
+        remote, id_ = i
+        ctx.daemons.register_daemon(
+            remote, 'iscsi', id_,
+            cluster=cluster_name,
+            fsid=fsid,
+            logger=log.getChild(role),
+            wait=False,
+            started=True,
+        )
+
+    yield
+
+
+@contextlib.contextmanager
+def ceph_clients(ctx, config):
+    cluster_name = config['cluster']
+
+    log.info('Setting up client nodes...')
+    clients = ctx.cluster.only(teuthology.is_type('client', cluster_name))
+    for remote, roles_for_host in clients.remotes.items():
+        for role in teuthology.cluster_roles_of_type(roles_for_host, 'client',
+                                                     cluster_name):
+            name = teuthology.ceph_role(role)
+            client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name,
+                                                                name)
+            r = _shell(
+                ctx=ctx,
+                cluster_name=cluster_name,
+                remote=remote,
+                args=[
+                    'ceph', 'auth',
+                    'get-or-create', name,
+                    'mon', 'allow *',
+                    'osd', 'allow *',
+                    'mds', 'allow *',
+                    'mgr', 'allow *',
+                ],
+                stdout=StringIO(),
+            )
+            keyring = r.stdout.getvalue()
+            remote.sudo_write_file(client_keyring, keyring, mode='0644')
+    yield
+
+
+@contextlib.contextmanager
+def ceph_initial():
+    try:
+        yield
+    finally:
+        log.info('Teardown complete')
+
+
+## public methods
+@contextlib.contextmanager
+def stop(ctx, config):
+    """
+    Stop ceph daemons
+
+    For example::
+      tasks:
+      - ceph.stop: [mds.*]
+
+      tasks:
+      - ceph.stop: [osd.0, osd.2]
+
+      tasks:
+      - ceph.stop:
+          daemons: [osd.0, osd.2]
+
+    """
+    if config is None:
+        config = {}
+    elif isinstance(config, list):
+        config = {'daemons': config}
+
+    daemons = ctx.daemons.resolve_role_list(
+        config.get('daemons', None), CEPH_ROLE_TYPES, True)
+    clusters = set()
+
+    for role in daemons:
+        cluster, type_, id_ = teuthology.split_role(role)
+        ctx.daemons.get_daemon(type_, id_, cluster).stop()
+        clusters.add(cluster)
+
+#    for cluster in clusters:
+#        ctx.ceph[cluster].watchdog.stop()
+#        ctx.ceph[cluster].watchdog.join()
+
+    yield
+
+
+def shell(ctx, config):
+    """
+    Execute (shell) commands
+    """
+    cluster_name = config.get('cluster', 'ceph')
+
+    args = []
+    for k in config.pop('env', []):
+        args.extend(['-e', k + '=' + ctx.config.get(k, '')])
+    for k in config.pop('volumes', []):
+        args.extend(['-v', k])
+
+    if 'all-roles' in config and len(config) == 1:
+        a = config['all-roles']
+        roles = teuthology.all_roles(ctx.cluster)
+        config = dict((id_, a) for id_ in roles if not id_.startswith('host.'))
+    elif 'all-hosts' in config and len(config) == 1:
+        a = config['all-hosts']
+        roles = teuthology.all_roles(ctx.cluster)
+        config = dict((id_, a) for id_ in roles if id_.startswith('host.'))
+
+    for role, cmd in config.items():
+        (remote,) = ctx.cluster.only(role).remotes.keys()
+        log.info('Running commands on role %s host %s', role, remote.name)
+        if isinstance(cmd, list):
+            for c in cmd:
+                _shell(ctx, cluster_name, remote,
+                       ['bash', '-c', subst_vip(ctx, c)],
+                       extra_cephadm_args=args)
+        else:
+            assert isinstance(cmd, str)
+            _shell(ctx, cluster_name, remote,
+                   ['bash', '-ex', '-c', subst_vip(ctx, cmd)],
+                   extra_cephadm_args=args)
+
+
+def apply(ctx, config):
+    """
+    Apply spec
+    
+      tasks:
+        - cephadm.apply:
+            specs:
+            - service_type: rgw
+              service_id: foo
+              spec:
+                rgw_frontend_port: 8000
+            - service_type: rgw
+              service_id: bar
+              spec:
+                rgw_frontend_port: 9000
+                zone: bar
+                realm: asdf
+
+    """
+    cluster_name = config.get('cluster', 'ceph')
+
+    specs = config.get('specs', [])
+    y = subst_vip(ctx, yaml.dump_all(specs))
+
+    log.info(f'Applying spec(s):\n{y}')
+    _shell(
+        ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote,
+        ['ceph', 'orch', 'apply', '-i', '-'],
+        stdin=y,
+    )
+
+
+def wait_for_service(ctx, config):
+    """
+    Wait for a service to be fully started
+
+      tasks:
+        - cephadm.wait_for_service:
+            service: rgw.foo
+            timeout: 60    # defaults to 300
+
+    """
+    cluster_name = config.get('cluster', 'ceph')
+    timeout = config.get('timeout', 300)
+    service = config.get('service')
+    assert service
+
+    log.info(
+        f'Waiting for {cluster_name} service {service} to start (timeout {timeout})...'
+    )
+    with contextutil.safe_while(sleep=1, tries=timeout) as proceed:
+        while proceed():
+            r = _shell(
+                ctx=ctx,
+                cluster_name=cluster_name,
+                remote=ctx.ceph[cluster_name].bootstrap_remote,
+                args=[
+                    'ceph', 'orch', 'ls', '-f', 'json',
+                ],
+                stdout=StringIO(),
+            )
+            j = json.loads(r.stdout.getvalue())
+            svc = None
+            for s in j:
+                if s['service_name'] == service:
+                    svc = s
+                    break
+            if svc:
+                log.info(
+                    f"{service} has {s['status']['running']}/{s['status']['size']}"
+                )
+                if s['status']['running'] == s['status']['size']:
+                    break
+
+
+@contextlib.contextmanager
+def tweaked_option(ctx, config):
+    """
+    set an option, and then restore it with its original value
+
+    Note, due to the way how tasks are executed/nested, it's not suggested to
+    use this method as a standalone task. otherwise, it's likely that it will
+    restore the tweaked option at the /end/ of 'tasks' block.
+    """
+    saved_options = {}
+    # we can complicate this when necessary
+    options = ['mon-health-to-clog']
+    type_, id_ = 'mon', '*'
+    cluster = config.get('cluster', 'ceph')
+    manager = ctx.managers[cluster]
+    if id_ == '*':
+        get_from = next(teuthology.all_roles_of_type(ctx.cluster, type_))
+    else:
+        get_from = id_
+    for option in options:
+        if option not in config:
+            continue
+        value = 'true' if config[option] else 'false'
+        option = option.replace('-', '_')
+        old_value = manager.get_config(type_, get_from, option)
+        if value != old_value:
+            saved_options[option] = old_value
+            manager.inject_args(type_, id_, option, value)
+    yield
+    for option, value in saved_options.items():
+        manager.inject_args(type_, id_, option, value)
+
+
+@contextlib.contextmanager
+def restart(ctx, config):
+    """
+   restart ceph daemons
+
+   For example::
+      tasks:
+      - ceph.restart: [all]
+
+   For example::
+      tasks:
+      - ceph.restart: [osd.0, mon.1, mds.*]
+
+   or::
+
+      tasks:
+      - ceph.restart:
+          daemons: [osd.0, mon.1]
+          wait-for-healthy: false
+          wait-for-osds-up: true
+
+    :param ctx: Context
+    :param config: Configuration
+    """
+    if config is None:
+        config = {}
+    elif isinstance(config, list):
+        config = {'daemons': config}
+
+    daemons = ctx.daemons.resolve_role_list(
+        config.get('daemons', None), CEPH_ROLE_TYPES, True)
+    clusters = set()
+
+    log.info('daemons %s' % daemons)
+    with tweaked_option(ctx, config):
+        for role in daemons:
+            cluster, type_, id_ = teuthology.split_role(role)
+            d = ctx.daemons.get_daemon(type_, id_, cluster)
+            assert d, 'daemon %s does not exist' % role
+            d.stop()
+            if type_ == 'osd':
+                ctx.managers[cluster].mark_down_osd(id_)
+            d.restart()
+            clusters.add(cluster)
+
+    if config.get('wait-for-healthy', True):
+        for cluster in clusters:
+            healthy(ctx=ctx, config=dict(cluster=cluster))
+    if config.get('wait-for-osds-up', False):
+        for cluster in clusters:
+            ctx.managers[cluster].wait_for_all_osds_up()
+    yield
+
+
+@contextlib.contextmanager
+def distribute_config_and_admin_keyring(ctx, config):
+    """
+    Distribute a sufficient config and keyring for clients
+    """
+    cluster_name = config['cluster']
+    log.info('Distributing (final) config and client.admin keyring...')
+    for remote, roles in ctx.cluster.remotes.items():
+        remote.write_file(
+            '/etc/ceph/{}.conf'.format(cluster_name),
+            ctx.ceph[cluster_name].config_file,
+            sudo=True)
+        remote.write_file(
+            path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
+            data=ctx.ceph[cluster_name].admin_keyring,
+            sudo=True)
+    try:
+        yield
+    finally:
+        ctx.cluster.run(args=[
+            'sudo', 'rm', '-f',
+            '/etc/ceph/{}.conf'.format(cluster_name),
+            '/etc/ceph/{}.client.admin.keyring'.format(cluster_name),
+        ])
+
+
+@contextlib.contextmanager
+def crush_setup(ctx, config):
+    cluster_name = config['cluster']
+
+    profile = config.get('crush_tunables', 'default')
+    log.info('Setting crush tunables to %s', profile)
+    _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote,
+        args=['ceph', 'osd', 'crush', 'tunables', profile])
+    yield
+
+
+@contextlib.contextmanager
+def create_rbd_pool(ctx, config):
+    if config.get('create_rbd_pool', False):
+      cluster_name = config['cluster']
+      log.info('Waiting for OSDs to come up')
+      teuthology.wait_until_osds_up(
+          ctx,
+          cluster=ctx.cluster,
+          remote=ctx.ceph[cluster_name].bootstrap_remote,
+          ceph_cluster=cluster_name,
+      )
+      log.info('Creating RBD pool')
+      _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote,
+          args=['sudo', 'ceph', '--cluster', cluster_name,
+                'osd', 'pool', 'create', 'rbd', '8'])
+      _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote,
+          args=['sudo', 'ceph', '--cluster', cluster_name,
+                'osd', 'pool', 'application', 'enable',
+                'rbd', 'rbd', '--yes-i-really-mean-it'
+          ])
+    yield
+
+
+@contextlib.contextmanager
+def _bypass():
+    yield
+
+
+@contextlib.contextmanager
+def initialize_config(ctx, config):
+    cluster_name = config['cluster']
+    testdir = teuthology.get_testdir(ctx)
+
+    ctx.ceph[cluster_name].thrashers = []
+    # fixme: setup watchdog, ala ceph.py
+
+    ctx.ceph[cluster_name].roleless = False  # see below
+
+    first_ceph_cluster = False
+    if not hasattr(ctx, 'daemons'):
+        first_ceph_cluster = True
+
+    # cephadm mode?
+    if 'cephadm_mode' not in config:
+        config['cephadm_mode'] = 'root'
+    assert config['cephadm_mode'] in ['root', 'cephadm-package']
+    if config['cephadm_mode'] == 'root':
+        ctx.cephadm = testdir + '/cephadm'
+    else:
+        ctx.cephadm = 'cephadm'  # in the path
+
+    if first_ceph_cluster:
+        # FIXME: this is global for all clusters
+        ctx.daemons = DaemonGroup(
+            use_cephadm=ctx.cephadm)
+
+    # uuid
+    fsid = str(uuid.uuid1())
+    log.info('Cluster fsid is %s' % fsid)
+    ctx.ceph[cluster_name].fsid = fsid
+
+    # mon ips
+    log.info('Choosing monitor IPs and ports...')
+    remotes_and_roles = ctx.cluster.remotes.items()
+    ips = [host for (host, port) in
+           (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)]
+
+    if config.get('roleless', False):
+        # mons will be named after hosts
+        first_mon = None
+        max_mons = config.get('max_mons', 5)
+        for remote, _ in remotes_and_roles:
+            ctx.cluster.remotes[remote].append('mon.' + remote.shortname)
+            if not first_mon:
+                first_mon = remote.shortname
+                bootstrap_remote = remote
+            max_mons -= 1
+            if not max_mons:
+                break
+        log.info('No mon roles; fabricating mons')
+
+    roles = [role_list for (remote, role_list) in ctx.cluster.remotes.items()]
+
+    ctx.ceph[cluster_name].mons = get_mons(
+        roles, ips, cluster_name,
+        mon_bind_msgr2=config.get('mon_bind_msgr2', True),
+        mon_bind_addrvec=config.get('mon_bind_addrvec', True),
+    )
+    log.info('Monitor IPs: %s' % ctx.ceph[cluster_name].mons)
+
+    if config.get('roleless', False):
+        ctx.ceph[cluster_name].roleless = True
+        ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote
+        ctx.ceph[cluster_name].first_mon = first_mon
+        ctx.ceph[cluster_name].first_mon_role = 'mon.' + first_mon
+    else:
+        first_mon_role = sorted(ctx.ceph[cluster_name].mons.keys())[0]
+        _, _, first_mon = teuthology.split_role(first_mon_role)
+        (bootstrap_remote,) = ctx.cluster.only(first_mon_role).remotes.keys()
+        log.info('First mon is mon.%s on %s' % (first_mon,
+                                                bootstrap_remote.shortname))
+        ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote
+        ctx.ceph[cluster_name].first_mon = first_mon
+        ctx.ceph[cluster_name].first_mon_role = first_mon_role
+
+        others = ctx.cluster.remotes[bootstrap_remote]
+        mgrs = sorted([r for r in others
+                       if teuthology.is_type('mgr', cluster_name)(r)])
+        if not mgrs:
+            raise RuntimeError('no mgrs on the same host as first mon %s' % first_mon)
+        _, _, first_mgr = teuthology.split_role(mgrs[0])
+        log.info('First mgr is %s' % (first_mgr))
+        ctx.ceph[cluster_name].first_mgr = first_mgr
+    yield
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+    """
+    Deploy ceph cluster using cephadm
+
+    For example, teuthology.yaml can contain the 'defaults' section:
+
+        defaults:
+          cephadm:
+            containers:
+              image: 'quay.io/ceph-ci/ceph'
+
+    Using overrides makes it possible to customize it per run.
+    The equivalent 'overrides' section looks like:
+
+        overrides:
+          cephadm:
+            containers:
+              image: 'quay.io/ceph-ci/ceph'
+            registry-login:
+              url:  registry-url
+              username: registry-user
+              password: registry-password
+
+    :param ctx: the argparse.Namespace object
+    :param config: the config dict
+    """
+    if config is None:
+        config = {}
+
+    assert isinstance(config, dict), \
+        "task only supports a dictionary for configuration"
+
+    overrides = ctx.config.get('overrides', {})
+    teuthology.deep_merge(config, overrides.get('ceph', {}))
+    teuthology.deep_merge(config, overrides.get('cephadm', {}))
+    log.info('Config: ' + str(config))
+
+    # set up cluster context
+    if not hasattr(ctx, 'ceph'):
+        ctx.ceph = {}
+    if 'cluster' not in config:
+        config['cluster'] = 'ceph'
+    cluster_name = config['cluster']
+    if cluster_name not in ctx.ceph:
+        ctx.ceph[cluster_name] = argparse.Namespace()
+        ctx.ceph[cluster_name].bootstrapped = False
+
+    # image
+    teuth_defaults = teuth_config.get('defaults', {})
+    cephadm_defaults = teuth_defaults.get('cephadm', {})
+    containers_defaults = cephadm_defaults.get('containers', {})
+    container_image_name = containers_defaults.get('image', None)
+
+    containers = config.get('containers', {})
+    container_image_name = containers.get('image', container_image_name)
+
+    if not hasattr(ctx.ceph[cluster_name], 'image'):
+        ctx.ceph[cluster_name].image = config.get('image')
+    ref = ctx.config.get("branch", "main")
+    if not ctx.ceph[cluster_name].image:
+        if not container_image_name:
+            raise Exception("Configuration error occurred. "
+                            "The 'image' value is undefined for 'cephadm' task. "
+                            "Please provide corresponding options in the task's "
+                            "config, task 'overrides', or teuthology 'defaults' "
+                            "section.")
+        sha1 = config.get('sha1')
+        flavor = config.get('flavor', 'default')
+
+        if sha1:
+            if flavor == "crimson":
+                ctx.ceph[cluster_name].image = container_image_name + ':' + sha1 + '-' + flavor
+            else:
+                ctx.ceph[cluster_name].image = container_image_name + ':' + sha1
+            ref = sha1
+        else:
+            # fall back to using the branch value
+            ctx.ceph[cluster_name].image = container_image_name + ':' + ref
+    log.info('Cluster image is %s' % ctx.ceph[cluster_name].image)
+
+
+    with contextutil.nested(
+            #if the cluster is already bootstrapped bypass corresponding methods
+            lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped) \
+                              else initialize_config(ctx=ctx, config=config),
+            lambda: ceph_initial(),
+            lambda: normalize_hostnames(ctx=ctx),
+            lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped) \
+                              else download_cephadm(ctx=ctx, config=config, ref=ref),
+            lambda: ceph_log(ctx=ctx, config=config),
+            lambda: ceph_crash(ctx=ctx, config=config),
+            lambda: pull_image(ctx=ctx, config=config),
+            lambda: _bypass() if not (config.get('use-ca-signed-key', False)) \
+                              else setup_ca_signed_keys(ctx, config),
+            lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped) \
+                              else ceph_bootstrap(ctx, config),
+            lambda: crush_setup(ctx=ctx, config=config),
+            lambda: ceph_mons(ctx=ctx, config=config),
+            lambda: distribute_config_and_admin_keyring(ctx=ctx, config=config),
+            lambda: ceph_mgrs(ctx=ctx, config=config),
+            lambda: ceph_osds(ctx=ctx, config=config),
+            lambda: ceph_mdss(ctx=ctx, config=config),
+            lambda: cephfs_setup(ctx=ctx, config=config),
+            lambda: ceph_rgw(ctx=ctx, config=config),
+            lambda: ceph_iscsi(ctx=ctx, config=config),
+            lambda: ceph_monitoring('prometheus', ctx=ctx, config=config),
+            lambda: ceph_monitoring('node-exporter', ctx=ctx, config=config),
+            lambda: ceph_monitoring('alertmanager', ctx=ctx, config=config),
+            lambda: ceph_monitoring('grafana', ctx=ctx, config=config),
+            lambda: ceph_clients(ctx=ctx, config=config),
+            lambda: create_rbd_pool(ctx=ctx, config=config),
+    ):
+        try:
+            if config.get('wait-for-healthy', True):
+                healthy(ctx=ctx, config=config)
+
+            log.info('Setup complete, yielding')
+            yield
+
+        finally:
+            log.info('Teardown begin')
+