summaryrefslogtreecommitdiffstats
path: root/qa/tasks/fs.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--qa/tasks/fs.py167
1 files changed, 167 insertions, 0 deletions
diff --git a/qa/tasks/fs.py b/qa/tasks/fs.py
new file mode 100644
index 000000000..7e62c8031
--- /dev/null
+++ b/qa/tasks/fs.py
@@ -0,0 +1,167 @@
+"""
+CephFS sub-tasks.
+"""
+
+import logging
+import re
+
+from tasks.cephfs.filesystem import Filesystem, MDSCluster
+
+log = logging.getLogger(__name__)
+
+# Everything up to CEPH_MDSMAP_ALLOW_STANDBY_REPLAY
+CEPH_MDSMAP_ALLOW_STANDBY_REPLAY = (1<<5)
+CEPH_MDSMAP_NOT_JOINABLE = (1 << 0)
+CEPH_MDSMAP_LAST = CEPH_MDSMAP_ALLOW_STANDBY_REPLAY
+UPGRADE_FLAGS_MASK = ((CEPH_MDSMAP_LAST<<1) - 1)
+def pre_upgrade_save(ctx, config):
+ """
+ That the upgrade procedure doesn't clobber state: save state.
+ """
+
+ mdsc = MDSCluster(ctx)
+ status = mdsc.status()
+
+ state = {}
+ ctx['mds-upgrade-state'] = state
+
+ for fs in list(status.get_filesystems()):
+ fscid = fs['id']
+ mdsmap = fs['mdsmap']
+ fs_state = {}
+ fs_state['epoch'] = mdsmap['epoch']
+ fs_state['max_mds'] = mdsmap['max_mds']
+ fs_state['flags'] = mdsmap['flags'] & UPGRADE_FLAGS_MASK
+ state[fscid] = fs_state
+ log.debug(f"fs fscid={fscid},name={mdsmap['fs_name']} state = {fs_state}")
+
+
+def post_upgrade_checks(ctx, config):
+ """
+ That the upgrade procedure doesn't clobber state.
+ """
+
+ state = ctx['mds-upgrade-state']
+
+ mdsc = MDSCluster(ctx)
+ status = mdsc.status()
+
+ for fs in list(status.get_filesystems()):
+ fscid = fs['id']
+ mdsmap = fs['mdsmap']
+ fs_state = state[fscid]
+ log.debug(f"checking fs fscid={fscid},name={mdsmap['fs_name']} state = {fs_state}")
+
+ # check state was restored to previous values
+ assert fs_state['max_mds'] == mdsmap['max_mds']
+ assert fs_state['flags'] == (mdsmap['flags'] & UPGRADE_FLAGS_MASK)
+
+ # now confirm that the upgrade procedure was followed
+ epoch = mdsmap['epoch']
+ pre_upgrade_epoch = fs_state['epoch']
+ assert pre_upgrade_epoch < epoch
+ multiple_max_mds = fs_state['max_mds'] > 1
+ did_decrease_max_mds = False
+ should_disable_allow_standby_replay = fs_state['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY
+ did_disable_allow_standby_replay = False
+ did_fail_fs = False
+ for i in range(pre_upgrade_epoch+1, mdsmap['epoch']):
+ old_status = mdsc.status(epoch=i)
+ old_fs = old_status.get_fsmap(fscid)
+ old_mdsmap = old_fs['mdsmap']
+ if not multiple_max_mds \
+ and (old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE):
+ raise RuntimeError('mgr is failing fs when there is only one '
+ f'rank in epoch {i}.')
+ if multiple_max_mds \
+ and (old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE) \
+ and old_mdsmap['max_mds'] == 1:
+ raise RuntimeError('mgr is failing fs as well the max_mds '
+ f'is reduced in epoch {i}')
+ if old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE:
+ log.debug(f"max_mds not reduced in epoch {i} as fs was failed "
+ "for carrying out rapid multi-rank mds upgrade")
+ did_fail_fs = True
+ if multiple_max_mds and old_mdsmap['max_mds'] == 1:
+ log.debug(f"max_mds reduced in epoch {i}")
+ did_decrease_max_mds = True
+ if should_disable_allow_standby_replay and not (old_mdsmap['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY):
+ log.debug(f"allow_standby_replay disabled in epoch {i}")
+ did_disable_allow_standby_replay = True
+ assert not multiple_max_mds or did_fail_fs or did_decrease_max_mds
+ assert not should_disable_allow_standby_replay or did_disable_allow_standby_replay
+
+
+def ready(ctx, config):
+ """
+ That the file system is ready for clients.
+ """
+
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'task only accepts a dict for configuration'
+
+ timeout = config.get('timeout', 300)
+
+ mdsc = MDSCluster(ctx)
+ status = mdsc.status()
+
+ for filesystem in status.get_filesystems():
+ fs = Filesystem(ctx, fscid=filesystem['id'])
+ fs.wait_for_daemons(timeout=timeout, status=status)
+
+def clients_evicted(ctx, config):
+ """
+ Check clients are evicted, unmount (cleanup) if so.
+ """
+
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'task only accepts a dict for configuration'
+
+ clients = config.get('clients')
+
+ if clients is None:
+ clients = {("client."+client_id): True for client_id in ctx.mounts}
+
+ log.info("clients is {}".format(str(clients)))
+
+ fs = Filesystem(ctx)
+ status = fs.status()
+
+ has_session = set()
+ mounts = {}
+ for client in clients:
+ client_id = re.match("^client.([0-9]+)$", client).groups(1)[0]
+ mounts[client] = ctx.mounts.get(client_id)
+
+ for rank in fs.get_ranks(status=status):
+ ls = fs.rank_asok(['session', 'ls'], rank=rank['rank'], status=status)
+ for session in ls:
+ for client, evicted in clients.items():
+ mount = mounts.get(client)
+ if mount is not None:
+ global_id = mount.get_global_id()
+ if session['id'] == global_id:
+ if evicted:
+ raise RuntimeError("client still has session: {}".format(str(session)))
+ else:
+ log.info("client {} has a session with MDS {}.{}".format(client, fs.id, rank['rank']))
+ has_session.add(client)
+
+ no_session = set(clients) - has_session
+ should_assert = False
+ for client, evicted in clients.items():
+ mount = mounts.get(client)
+ if mount is not None:
+ if evicted:
+ log.info("confirming client {} is blocklisted".format(client))
+ assert fs.is_addr_blocklisted(mount.get_global_addr())
+ elif client in no_session:
+ log.info("client {} should not be evicted but has no session with an MDS".format(client))
+ fs.is_addr_blocklisted(mount.get_global_addr()) # for debugging
+ should_assert = True
+ if should_assert:
+ raise RuntimeError("some clients which should not be evicted have no session with an MDS?")