summaryrefslogtreecommitdiffstats
path: root/qa/tasks/cephfs/kernel_mount.py
diff options
context:
space:
mode:
Diffstat (limited to 'qa/tasks/cephfs/kernel_mount.py')
-rw-r--r--qa/tasks/cephfs/kernel_mount.py260
1 files changed, 260 insertions, 0 deletions
diff --git a/qa/tasks/cephfs/kernel_mount.py b/qa/tasks/cephfs/kernel_mount.py
new file mode 100644
index 00000000..d027bcfc
--- /dev/null
+++ b/qa/tasks/cephfs/kernel_mount.py
@@ -0,0 +1,260 @@
+import json
+import logging
+import time
+from textwrap import dedent
+from teuthology.orchestra.run import CommandFailedError
+from teuthology import misc
+
+from teuthology.orchestra import remote as orchestra_remote
+from teuthology.orchestra import run
+from teuthology.contextutil import MaxWhileTries
+from tasks.cephfs.mount import CephFSMount
+
+log = logging.getLogger(__name__)
+
+
+UMOUNT_TIMEOUT = 300
+
+
+class KernelMount(CephFSMount):
+ def __init__(self, ctx, test_dir, client_id, client_remote,
+ ipmi_user, ipmi_password, ipmi_domain):
+ super(KernelMount, self).__init__(ctx, test_dir, client_id, client_remote)
+
+ self.mounted = False
+ self.ipmi_user = ipmi_user
+ self.ipmi_password = ipmi_password
+ self.ipmi_domain = ipmi_domain
+
+ def mount(self, mount_path=None, mount_fs_name=None, mountpoint=None):
+ if mountpoint is not None:
+ self.mountpoint = mountpoint
+ self.setupfs(name=mount_fs_name)
+
+ log.info('Mounting kclient client.{id} at {remote} {mnt}...'.format(
+ id=self.client_id, remote=self.client_remote, mnt=self.mountpoint))
+
+ self.client_remote.run(args=['mkdir', '-p', self.mountpoint],
+ timeout=(5*60))
+
+ if mount_path is None:
+ mount_path = "/"
+
+ opts = 'name={id},norequire_active_mds,conf={conf}'.format(id=self.client_id,
+ conf=self.config_path)
+
+ if mount_fs_name is not None:
+ opts += ",mds_namespace={0}".format(mount_fs_name)
+
+ self.client_remote.run(
+ args=[
+ 'sudo',
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=self.test_dir),
+ '/bin/mount',
+ '-t',
+ 'ceph',
+ ':{mount_path}'.format(mount_path=mount_path),
+ self.mountpoint,
+ '-v',
+ '-o',
+ opts
+ ],
+ timeout=(30*60),
+ )
+
+ self.client_remote.run(
+ args=['sudo', 'chmod', '1777', self.mountpoint], timeout=(5*60))
+
+ self.mounted = True
+
+ def umount(self, force=False):
+ if not self.is_mounted():
+ return
+
+ log.debug('Unmounting client client.{id}...'.format(id=self.client_id))
+
+ cmd=['sudo', 'umount', self.mountpoint]
+ if force:
+ cmd.append('-f')
+
+ try:
+ self.client_remote.run(args=cmd, timeout=(15*60))
+ except Exception as e:
+ self.client_remote.run(args=[
+ 'sudo',
+ run.Raw('PATH=/usr/sbin:$PATH'),
+ 'lsof',
+ run.Raw(';'),
+ 'ps', 'auxf',
+ ], timeout=(15*60))
+ raise e
+
+ rproc = self.client_remote.run(
+ args=[
+ 'rmdir',
+ '--',
+ self.mountpoint,
+ ],
+ wait=False
+ )
+ run.wait([rproc], UMOUNT_TIMEOUT)
+ self.mounted = False
+
+ def cleanup(self):
+ pass
+
+ def umount_wait(self, force=False, require_clean=False, timeout=900):
+ """
+ Unlike the fuse client, the kernel client's umount is immediate
+ """
+ if not self.is_mounted():
+ return
+
+ try:
+ self.umount(force)
+ except (CommandFailedError, MaxWhileTries):
+ if not force:
+ raise
+
+ self.kill()
+ self.kill_cleanup()
+
+ self.mounted = False
+
+ def is_mounted(self):
+ return self.mounted
+
+ def wait_until_mounted(self):
+ """
+ Unlike the fuse client, the kernel client is up and running as soon
+ as the initial mount() function returns.
+ """
+ assert self.mounted
+
+ def teardown(self):
+ super(KernelMount, self).teardown()
+ if self.mounted:
+ self.umount()
+
+ def kill(self):
+ """
+ The Ceph kernel client doesn't have a mechanism to kill itself (doing
+ that in side the kernel would be weird anyway), so we reboot the whole node
+ to get the same effect.
+
+ We use IPMI to reboot, because we don't want the client to send any
+ releases of capabilities.
+ """
+
+ con = orchestra_remote.getRemoteConsole(self.client_remote.hostname,
+ self.ipmi_user,
+ self.ipmi_password,
+ self.ipmi_domain)
+ con.hard_reset(wait_for_login=False)
+
+ self.mounted = False
+
+ def kill_cleanup(self):
+ assert not self.mounted
+
+ # We need to do a sleep here because we don't know how long it will
+ # take for a hard_reset to be effected.
+ time.sleep(30)
+
+ try:
+ # Wait for node to come back up after reboot
+ misc.reconnect(None, 300, [self.client_remote])
+ except:
+ # attempt to get some useful debug output:
+ con = orchestra_remote.getRemoteConsole(self.client_remote.hostname,
+ self.ipmi_user,
+ self.ipmi_password,
+ self.ipmi_domain)
+ con.check_status(timeout=60)
+ raise
+
+ # Remove mount directory
+ self.client_remote.run(args=['uptime'], timeout=10)
+
+ # Remove mount directory
+ self.client_remote.run(
+ args=[
+ 'rmdir',
+ '--',
+ self.mountpoint,
+ ],
+ timeout=(5*60),
+ check_status=False,
+ )
+
+ def _find_debug_dir(self):
+ """
+ Find the debugfs folder for this mount
+ """
+ pyscript = dedent("""
+ import glob
+ import os
+ import json
+
+ def get_id_to_dir():
+ result = {}
+ for dir in glob.glob("/sys/kernel/debug/ceph/*"):
+ mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines()
+ client_id = mds_sessions_lines[1].split()[1].strip('"')
+
+ result[client_id] = dir
+ return result
+
+ print(json.dumps(get_id_to_dir()))
+ """)
+
+ output = self.client_remote.sh([
+ 'sudo', 'python3', '-c', pyscript
+ ], timeout=(5*60))
+ client_id_to_dir = json.loads(output)
+
+ try:
+ return client_id_to_dir[self.client_id]
+ except KeyError:
+ log.error("Client id '{0}' debug dir not found (clients seen were: {1})".format(
+ self.client_id, ",".join(client_id_to_dir.keys())
+ ))
+ raise
+
+ def _read_debug_file(self, filename):
+ debug_dir = self._find_debug_dir()
+
+ pyscript = dedent("""
+ import os
+
+ print(open(os.path.join("{debug_dir}", "{filename}")).read())
+ """).format(debug_dir=debug_dir, filename=filename)
+
+ output = self.client_remote.sh([
+ 'sudo', 'python3', '-c', pyscript
+ ], timeout=(5*60))
+ return output
+
+ def get_global_id(self):
+ """
+ Look up the CephFS client ID for this mount, using debugfs.
+ """
+
+ assert self.mounted
+
+ mds_sessions = self._read_debug_file("mds_sessions")
+ lines = mds_sessions.split("\n")
+ return int(lines[0].split()[1])
+
+ def get_osd_epoch(self):
+ """
+ Return 2-tuple of osd_epoch, osd_epoch_barrier
+ """
+ osd_map = self._read_debug_file("osdmap")
+ lines = osd_map.split("\n")
+ first_line_tokens = lines[0].split()
+ epoch, barrier = int(first_line_tokens[1]), int(first_line_tokens[3])
+
+ return epoch, barrier