diff options
Diffstat (limited to 'qa/tasks/cephfs/kernel_mount.py')
-rw-r--r-- | qa/tasks/cephfs/kernel_mount.py | 324 |
1 files changed, 324 insertions, 0 deletions
diff --git a/qa/tasks/cephfs/kernel_mount.py b/qa/tasks/cephfs/kernel_mount.py new file mode 100644 index 000000000..f4640e3fd --- /dev/null +++ b/qa/tasks/cephfs/kernel_mount.py @@ -0,0 +1,324 @@ +import errno +import json +import logging +import os +import re + +from io import StringIO +from textwrap import dedent + +from teuthology.orchestra.run import CommandFailedError +from teuthology.orchestra import run +from teuthology.contextutil import MaxWhileTries + +from tasks.cephfs.mount import CephFSMount + +log = logging.getLogger(__name__) + + +UMOUNT_TIMEOUT = 300 + + +class KernelMount(CephFSMount): + def __init__(self, ctx, test_dir, client_id, client_remote, + client_keyring_path=None, hostfs_mntpt=None, + cephfs_name=None, cephfs_mntpt=None, brxnet=None, config={}): + super(KernelMount, self).__init__(ctx=ctx, test_dir=test_dir, + client_id=client_id, client_remote=client_remote, + client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt, + cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet) + + self.rbytes = config.get('rbytes', False) + self.inst = None + self.addr = None + + def mount(self, mntopts=[], createfs=True, check_status=True, **kwargs): + self.update_attrs(**kwargs) + self.assert_and_log_minimum_mount_details() + + self.setup_netns() + + # TODO: don't call setupfs() from within mount(), since it's + # absurd. The proper order should be: create FS first and then + # call mount(). + if createfs: + self.setupfs(name=self.cephfs_name) + if not self.cephfs_mntpt: + self.cephfs_mntpt = '/' + + stderr = StringIO() + try: + self.client_remote.run(args=['mkdir', '-p', self.hostfs_mntpt], + timeout=(5*60), stderr=stderr) + except CommandFailedError: + if 'file exists' not in stderr.getvalue().lower(): + raise + + retval = self._run_mount_cmd(mntopts, check_status) + if retval: + return retval + + stderr = StringIO() + try: + self.client_remote.run( + args=['sudo', 'chmod', '1777', self.hostfs_mntpt], + stderr=stderr, timeout=(5*60)) + except CommandFailedError: + # the client does not have write permissions in the caps it holds + # for the Ceph FS that was just mounted. + if 'permission denied' in stderr.getvalue().lower(): + pass + + + self.mounted = True + + def _run_mount_cmd(self, mntopts, check_status): + opts = 'norequire_active_mds' + if self.client_id: + opts += ',name=' + self.client_id + if self.client_keyring_path and self.client_id: + opts += ',secret=' + self.get_key_from_keyfile() + if self.config_path: + opts += ',conf=' + self.config_path + if self.cephfs_name: + opts += ",mds_namespace=" + self.cephfs_name + if self.rbytes: + opts += ",rbytes" + else: + opts += ",norbytes" + if mntopts: + opts += ',' + ','.join(mntopts) + + mount_dev = ':' + self.cephfs_mntpt + prefix = ['sudo', 'adjust-ulimits', 'ceph-coverage', + self.test_dir + '/archive/coverage', + 'nsenter', + '--net=/var/run/netns/{0}'.format(self.netns_name)] + cmdargs = prefix + ['/bin/mount', '-t', 'ceph', mount_dev, + self.hostfs_mntpt, '-v', '-o', opts] + + mountcmd_stdout, mountcmd_stderr = StringIO(), StringIO() + try: + self.client_remote.run(args=cmdargs, timeout=(30*60), + stdout=mountcmd_stdout, + stderr=mountcmd_stderr) + except CommandFailedError as e: + log.info('mount command failed') + if check_status: + raise + else: + return (e, mountcmd_stdout.getvalue(), + mountcmd_stderr.getvalue()) + log.info('mount command passed') + + def umount(self, force=False): + if not self.is_mounted(): + self.cleanup() + return + + log.debug('Unmounting client client.{id}...'.format(id=self.client_id)) + + try: + cmd=['sudo', 'umount', self.hostfs_mntpt] + if force: + cmd.append('-f') + self.client_remote.run(args=cmd, timeout=(15*60), omit_sudo=False) + except Exception as e: + self.client_remote.run( + args=['sudo', run.Raw('PATH=/usr/sbin:$PATH'), 'lsof', + run.Raw(';'), 'ps', 'auxf'], + timeout=(15*60), omit_sudo=False) + raise e + + self.mounted = False + self.cleanup() + + def umount_wait(self, force=False, require_clean=False, timeout=900): + """ + Unlike the fuse client, the kernel client's umount is immediate + """ + if not self.is_mounted(): + self.cleanup() + return + + try: + self.umount(force) + except (CommandFailedError, MaxWhileTries): + if not force: + raise + + # force delete the netns and umount + self.client_remote.run(args=['sudo', 'umount', '-f', '-l', + self.mountpoint], + timeout=(15*60), omit_sudo=False) + + self.mounted = False + self.cleanup() + + def wait_until_mounted(self): + """ + Unlike the fuse client, the kernel client is up and running as soon + as the initial mount() function returns. + """ + assert self.mounted + + def teardown(self): + super(KernelMount, self).teardown() + if self.mounted: + self.umount() + + def _get_debug_dir(self): + """ + Get the debugfs folder for this mount + """ + + cluster_name = 'ceph' + fsid = self.ctx.ceph[cluster_name].fsid + + global_id = self._get_global_id() + + return os.path.join("/sys/kernel/debug/ceph/", f"{fsid}.client{global_id}") + + def read_debug_file(self, filename): + """ + Read the debug file "filename", return None if the file doesn't exist. + """ + + path = os.path.join(self._get_debug_dir(), filename) + + stdout = StringIO() + stderr = StringIO() + try: + self.run_shell_payload(f"sudo dd if={path}", timeout=(5 * 60), + stdout=stdout, stderr=stderr) + return stdout.getvalue() + except CommandFailedError: + if 'no such file or directory' in stderr.getvalue().lower(): + return errno.ENOENT + elif 'not a directory' in stderr.getvalue().lower(): + return errno.ENOTDIR + elif 'permission denied' in stderr.getvalue().lower(): + return errno.EACCES + raise + + def _get_global_id(self): + try: + p = self.run_shell_payload("getfattr --only-values -n ceph.client_id .", stdout=StringIO()) + v = p.stdout.getvalue() + prefix = "client" + assert v.startswith(prefix) + return int(v[len(prefix):]) + except CommandFailedError: + # Probably this fallback can be deleted in a few releases when the kernel xattr is widely available. + log.debug("Falling back to messy global_id lookup via /sys...") + + pyscript = dedent(""" + import glob + import os + import json + + def get_id_to_dir(): + result = {} + for dir in glob.glob("/sys/kernel/debug/ceph/*"): + mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines() + global_id = mds_sessions_lines[0].split()[1].strip('"') + client_id = mds_sessions_lines[1].split()[1].strip('"') + result[client_id] = global_id + return result + print(json.dumps(get_id_to_dir())) + """) + + output = self.client_remote.sh([ + 'sudo', 'python3', '-c', pyscript + ], timeout=(5*60)) + client_id_to_global_id = json.loads(output) + + try: + return client_id_to_global_id[self.client_id] + except KeyError: + log.error("Client id '{0}' debug dir not found (clients seen were: {1})".format( + self.client_id, ",".join(client_id_to_global_id.keys()) + )) + raise + + def get_global_id(self): + """ + Look up the CephFS client ID for this mount, using debugfs. + """ + + assert self.mounted + + return self._get_global_id() + + @property + def _global_addr(self): + if self.addr is not None: + return self.addr + + # The first line of the "status" file's output will be something + # like: + # "instance: client.4297 (0)10.72.47.117:0/1148470933" + # What we need here is only the string "10.72.47.117:0/1148470933" + status = self.read_debug_file("status") + if status is None: + return None + + instance = re.findall(r'instance:.*', status)[0] + self.addr = instance.split()[2].split(')')[1] + return self.addr; + + @property + def _global_inst(self): + if self.inst is not None: + return self.inst + + client_gid = "client%d" % self.get_global_id() + self.inst = " ".join([client_gid, self._global_addr]) + return self.inst + + def get_global_inst(self): + """ + Look up the CephFS client instance for this mount + """ + return self._global_inst + + def get_global_addr(self): + """ + Look up the CephFS client addr for this mount + """ + return self._global_addr + + def get_osd_epoch(self): + """ + Return 2-tuple of osd_epoch, osd_epoch_barrier + """ + osd_map = self.read_debug_file("osdmap") + assert osd_map + + lines = osd_map.split("\n") + first_line_tokens = lines[0].split() + epoch, barrier = int(first_line_tokens[1]), int(first_line_tokens[3]) + + return epoch, barrier + + def get_op_read_count(self): + stdout = StringIO() + stderr = StringIO() + try: + path = os.path.join(self._get_debug_dir(), "metrics/size") + self.run_shell(f"sudo stat {path}", stdout=stdout, + stderr=stderr, cwd=None) + buf = self.read_debug_file("metrics/size") + except CommandFailedError: + if 'no such file or directory' in stderr.getvalue().lower() \ + or 'not a directory' in stderr.getvalue().lower(): + try: + path = os.path.join(self._get_debug_dir(), "metrics") + self.run_shell(f"sudo stat {path}", stdout=stdout, + stderr=stderr, cwd=None) + buf = self.read_debug_file("metrics") + except CommandFailedError: + return errno.ENOENT + else: + return 0 + return int(re.findall(r'read.*', buf)[0].split()[1]) |