diff options
Diffstat (limited to '')
-rw-r--r-- | qa/tasks/fs.py | 167 |
1 files changed, 167 insertions, 0 deletions
diff --git a/qa/tasks/fs.py b/qa/tasks/fs.py new file mode 100644 index 000000000..7e62c8031 --- /dev/null +++ b/qa/tasks/fs.py @@ -0,0 +1,167 @@ +""" +CephFS sub-tasks. +""" + +import logging +import re + +from tasks.cephfs.filesystem import Filesystem, MDSCluster + +log = logging.getLogger(__name__) + +# Everything up to CEPH_MDSMAP_ALLOW_STANDBY_REPLAY +CEPH_MDSMAP_ALLOW_STANDBY_REPLAY = (1<<5) +CEPH_MDSMAP_NOT_JOINABLE = (1 << 0) +CEPH_MDSMAP_LAST = CEPH_MDSMAP_ALLOW_STANDBY_REPLAY +UPGRADE_FLAGS_MASK = ((CEPH_MDSMAP_LAST<<1) - 1) +def pre_upgrade_save(ctx, config): + """ + That the upgrade procedure doesn't clobber state: save state. + """ + + mdsc = MDSCluster(ctx) + status = mdsc.status() + + state = {} + ctx['mds-upgrade-state'] = state + + for fs in list(status.get_filesystems()): + fscid = fs['id'] + mdsmap = fs['mdsmap'] + fs_state = {} + fs_state['epoch'] = mdsmap['epoch'] + fs_state['max_mds'] = mdsmap['max_mds'] + fs_state['flags'] = mdsmap['flags'] & UPGRADE_FLAGS_MASK + state[fscid] = fs_state + log.debug(f"fs fscid={fscid},name={mdsmap['fs_name']} state = {fs_state}") + + +def post_upgrade_checks(ctx, config): + """ + That the upgrade procedure doesn't clobber state. + """ + + state = ctx['mds-upgrade-state'] + + mdsc = MDSCluster(ctx) + status = mdsc.status() + + for fs in list(status.get_filesystems()): + fscid = fs['id'] + mdsmap = fs['mdsmap'] + fs_state = state[fscid] + log.debug(f"checking fs fscid={fscid},name={mdsmap['fs_name']} state = {fs_state}") + + # check state was restored to previous values + assert fs_state['max_mds'] == mdsmap['max_mds'] + assert fs_state['flags'] == (mdsmap['flags'] & UPGRADE_FLAGS_MASK) + + # now confirm that the upgrade procedure was followed + epoch = mdsmap['epoch'] + pre_upgrade_epoch = fs_state['epoch'] + assert pre_upgrade_epoch < epoch + multiple_max_mds = fs_state['max_mds'] > 1 + did_decrease_max_mds = False + should_disable_allow_standby_replay = fs_state['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY + did_disable_allow_standby_replay = False + did_fail_fs = False + for i in range(pre_upgrade_epoch+1, mdsmap['epoch']): + old_status = mdsc.status(epoch=i) + old_fs = old_status.get_fsmap(fscid) + old_mdsmap = old_fs['mdsmap'] + if not multiple_max_mds \ + and (old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE): + raise RuntimeError('mgr is failing fs when there is only one ' + f'rank in epoch {i}.') + if multiple_max_mds \ + and (old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE) \ + and old_mdsmap['max_mds'] == 1: + raise RuntimeError('mgr is failing fs as well the max_mds ' + f'is reduced in epoch {i}') + if old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE: + log.debug(f"max_mds not reduced in epoch {i} as fs was failed " + "for carrying out rapid multi-rank mds upgrade") + did_fail_fs = True + if multiple_max_mds and old_mdsmap['max_mds'] == 1: + log.debug(f"max_mds reduced in epoch {i}") + did_decrease_max_mds = True + if should_disable_allow_standby_replay and not (old_mdsmap['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY): + log.debug(f"allow_standby_replay disabled in epoch {i}") + did_disable_allow_standby_replay = True + assert not multiple_max_mds or did_fail_fs or did_decrease_max_mds + assert not should_disable_allow_standby_replay or did_disable_allow_standby_replay + + +def ready(ctx, config): + """ + That the file system is ready for clients. + """ + + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for configuration' + + timeout = config.get('timeout', 300) + + mdsc = MDSCluster(ctx) + status = mdsc.status() + + for filesystem in status.get_filesystems(): + fs = Filesystem(ctx, fscid=filesystem['id']) + fs.wait_for_daemons(timeout=timeout, status=status) + +def clients_evicted(ctx, config): + """ + Check clients are evicted, unmount (cleanup) if so. + """ + + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for configuration' + + clients = config.get('clients') + + if clients is None: + clients = {("client."+client_id): True for client_id in ctx.mounts} + + log.info("clients is {}".format(str(clients))) + + fs = Filesystem(ctx) + status = fs.status() + + has_session = set() + mounts = {} + for client in clients: + client_id = re.match("^client.([0-9]+)$", client).groups(1)[0] + mounts[client] = ctx.mounts.get(client_id) + + for rank in fs.get_ranks(status=status): + ls = fs.rank_asok(['session', 'ls'], rank=rank['rank'], status=status) + for session in ls: + for client, evicted in clients.items(): + mount = mounts.get(client) + if mount is not None: + global_id = mount.get_global_id() + if session['id'] == global_id: + if evicted: + raise RuntimeError("client still has session: {}".format(str(session))) + else: + log.info("client {} has a session with MDS {}.{}".format(client, fs.id, rank['rank'])) + has_session.add(client) + + no_session = set(clients) - has_session + should_assert = False + for client, evicted in clients.items(): + mount = mounts.get(client) + if mount is not None: + if evicted: + log.info("confirming client {} is blocklisted".format(client)) + assert fs.is_addr_blocklisted(mount.get_global_addr()) + elif client in no_session: + log.info("client {} should not be evicted but has no session with an MDS".format(client)) + fs.is_addr_blocklisted(mount.get_global_addr()) # for debugging + should_assert = True + if should_assert: + raise RuntimeError("some clients which should not be evicted have no session with an MDS?") |